// Copyright (c) 2021. Huawei Technologies Co., Ltd. All rights reserved.

// Package common define common utils
package common

import (
"bufio"
"context"
"errors"
"io"
"os"
"path"
"strconv"
"strings"
"sync"
"time"

"github.com/gin-gonic/gin"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/kubernetes"

"huawei.com/mindxdl/base/common/constants"
"huawei.com/npu-exporter/hwlog"
)

var logChanMap map[string]chan bool

// LogQueryResp log query response
type LogQueryResp struct {
LogContent string `json:"logContent"`
RowNumber uint64 `json:"rowNumber"`
}

const (
checkServicePeriod = 5
byteToMB = 20
maxLogSize = 50
// FileMode file mode
FileMode = 0640
// FolderMode folder mode
FolderMode = 0750
runningStatus = "Running"
succeededStatus = "Succeeded"

// LogFolder service log folder
LogFolder = "ServiceLogs"
)

// GetLogFilePath get log file path
func GetLogFilePath(podName string, elem ...string) string {
return path.Join(path.Join(elem...), podName+".log")
}

func getStoragePath(b BaseCtx, srcModule string) string {
hwlog.RunLog.Infof("current component is %v", srcModule)
return GetUserPath(b)
}

func getLogParentPath(b BaseCtx, srcModule, namespace string) string {
return path.Join(getStoragePath(b, srcModule), LogFolder, namespace)
}

func getPodLogFilePath(b BaseCtx, srcModule, namespace, podName string) string {
return GetLogFilePath(podName, getLogParentPath(b, srcModule, namespace))
}

func logToFile(podLog io.ReadCloser, logFilePath string, wg *sync.WaitGroup) {
defer wg.Done()
_, err := os.Stat(logFilePath)
if err == nil || os.IsExist(err) {
return
}

logFile, err := os.OpenFile(logFilePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC|os.O_APPEND, FileMode)
if err != nil {
hwlog.RunLog.Errorf("create %s log file failed", path.Base(logFilePath))
return
}
defer logFile.Close()
logWriter := bufio.NewWriter(logFile)
if _, err := io.Copy(logWriter, podLog); err != nil {
hwlog.RunLog.Errorf("write log file failed, path: %v, err: %v", path.Base(logFilePath), err)
}
if err := logWriter.Flush(); err != nil {
hwlog.RunLog.Errorf("flush %s log file failed", path.Base(logFilePath))
}
}

func closePodLogStream(podLogList map[string]io.ReadCloser) {
for podName, podLog := range podLogList {
if err := podLog.Close(); err != nil {
hwlog.RunLog.Errorf("close %s log reader failed", podName)
}
}
}

func deleteOldLogFile(logDir string, oldPodNameList []string) {
for _, podName := range oldPodNameList {
if err := os.Remove(GetLogFilePath(podName, logDir)); err != nil {
hwlog.RunLog.Errorf("remove log file failed, name: %v err: %v, please delete it manually",
podName, err)
}
}
}

func findFirst(element string, list []string) int {
for i, v := range list {
if element == v {
return i
}
}
return -1
}

// getNewPodNameList return newly added pod name list
// Only will deleted pod names remain in oldPodNameList after this function
func getNewPodNameList(clientSet *kubernetes.Clientset, svcNamespace, svcName string,
oldPodNameList []string) ([]string, []string) {
labelSelector := labels.Set(map[string]string{"app": svcName}).AsSelector().String()
podList, err := clientSet.CoreV1().Pods(svcNamespace).List(context.TODO(), metav1.ListOptions{
LabelSelector: labelSelector,
})
if err != nil {
return nil, nil
}
var podNameList []string
for _, pod := range podList.Items {
if pod.Status.Phase == runningStatus || pod.Status.Phase == succeededStatus {
if index := findFirst(pod.Name, oldPodNameList); index == -1 {
podNameList = append(podNameList, pod.Name)
} else {
oldPodNameList = append(oldPodNameList[:index], oldPodNameList[index+1:]...)
}
}
}

return podNameList, oldPodNameList
}

func updatePodLogList(clientSet *kubernetes.Clientset, svcNamespace string, podNameList, oldPodNameList []string,
podLogList map[string]io.ReadCloser) {
if podLogList == nil {
podLogList = make(map[string]io.ReadCloser, len(podNameList))
}
opt := &corev1.PodLogOptions{
Follow: true,
}
for _, podName := range podNameList {
logRequest := clientSet.CoreV1().Pods(svcNamespace).GetLogs(podName, opt)
if podLog, err := logRequest.Stream(context.TODO()); err == nil {
podLogList[podName] = podLog
}
}
for _, podName := range oldPodNameList {
if err := podLogList[podName].Close(); err != nil {
hwlog.RunLog.Errorf("close %s log reader failed", podName)
}
delete(podLogList, podName)
}
}

func logRecordGoroutine(svcNameSpace, svcName, logDir string, stopCh <-chan bool) {
var clientSet *kubernetes.Clientset
var cliErr error
podLogList := make(map[string]io.ReadCloser)
var podNameList, oldPodNameList []string
var wg sync.WaitGroup
for {
select {
case <-stopCh:
wg.Wait()
closePodLogStream(podLogList)
deleteOldLogFile(logDir, oldPodNameList)
return
default:
}
if clientSet == nil {
if clientSet, cliErr = K8sClient(""); cliErr != nil {
continue
}
}
podNameList, oldPodNameList = getNewPodNameList(clientSet, svcNameSpace, svcName, oldPodNameList)
updatePodLogList(clientSet, svcNameSpace, podNameList, oldPodNameList, podLogList)
deleteOldLogFile(logDir, oldPodNameList)
oldPodNameList = nil
for podName := range podLogList {
oldPodNameList = append(oldPodNameList, podName)
}

for _, podName := range podNameList {
wg.Add(1)
if _, ok := podLogList[podName]; ok {
go logToFile(podLogList[podName], GetLogFilePath(podName, logDir), &wg)
}
}
time.Sleep(checkServicePeriod * time.Second)
}
}

// StartLogRecord start log record goroutine
func StartLogRecord(svcNamespace, svcName, srcModule string, b BaseCtx) {
logParentDir := getLogParentPath(b, srcModule, svcNamespace)
if err := os.MkdirAll(logParentDir, FolderMode); err != nil {
hwlog.OpLog.WarnfWithCtx(b.Ctx, "log record failed: create the log folder failed, %s", err.Error())
return
}

if logChanMap == nil {
logChanMap = make(map[string]chan bool)
}
mapKey := srcModule + "-" + svcName
logChanMap[mapKey] = make(chan bool, 1)
hwlog.RunLog.InfofWithCtx(b.Ctx, "start to record log of service %v", svcName)
go logRecordGoroutine(svcNamespace, svcName, logParentDir, logChanMap[mapKey])
}

// StopLogRecord stop log record goroutine
func StopLogRecord(svcName, srcModule string, b BaseCtx) {
mapKey := srcModule + "-" + svcName
if _, ok := logChanMap[mapKey]; ok && logChanMap[mapKey] != nil {
logChanMap[mapKey] <- true
close(logChanMap[mapKey])
delete(logChanMap, mapKey)
}
hwlog.RunLog.InfofWithCtx(b.Ctx, "stop to record log of service %v", svcName)
}

func logQuery(logPath string, offset, limit uint64, b BaseCtx) (*LogQueryResp, error) {
logFile, err := os.OpenFile(logPath, os.O_RDONLY, FileMode)
if err != nil {
hwlog.RunLog.ErrorWithCtx(b.Ctx, "Fail to get log file")
return nil, err
}
defer logFile.Close()
logReader := bufio.NewReader(logFile)
logStrs := make([]string, 0, maxLogSize)
var logLineByte []byte
for line := uint64(0); line < offset+limit; line++ {
lineByte, isPrefix, err := logReader.ReadLine()
if err != nil {
if err != io.EOF {
hwlog.RunLog.ErrorfWithCtx(b.Ctx, "An error occurred while reading log file: %v", err)
return nil, err
}
break
}
logLineByte = append(logLineByte, lineByte...)
if isPrefix {
continue
}
if line >= offset {
logStrs = append(logStrs, string(logLineByte))
}
logLineByte = make([]byte, 0)
}
logQueryResp := LogQueryResp{
LogContent: strings.Join(logStrs, "\n"),
RowNumber: uint64(len(logStrs)),
}
return &logQueryResp, nil
}

func getOffsetAndLimit(logOffset, logLimit string, b BaseCtx) (uint64, uint64, string) {
offset, err := strconv.ParseUint(logOffset, BaseHex, BitSize64)
if err != nil {
hwlog.OpLog.ErrorfWithCtx(b.Ctx, "get service log offset convert to integer failed, err: %v", err)
return 0, 0, ParamConvert2IntegerFailed
}
limit, err := strconv.ParseUint(logLimit, BaseHex, BitSize64)
if err != nil {
hwlog.OpLog.ErrorfWithCtx(b.Ctx, "get service log limit convert to integer failed, err: %v", err)
return 0, 0, ParamConvert2IntegerFailed
}
if err := validLogLimitOffset(offset, limit); err != nil {
hwlog.OpLog.ErrorfWithCtx(b.Ctx, "get service log param invalid: %v", err)
return 0, 0, ParamInvalid
}
return offset, limit, Success
}

func validPodNameNamespace(podName, podNamespace string,
search func(name, svcType string, uid uint64) error, b BaseCtx) string {
if ok := ValidName("podName", podName, b); !ok {
hwlog.RunLog.ErrorWithCtx(b.Ctx, "podName invalid")
return ParamInvalid
}
if ok := ValidName("namespace", podNamespace, b); !ok {
hwlog.RunLog.ErrorWithCtx(b.Ctx, "pod namespace invalid")
return ParamInvalid
}
clientSet, cliErr := K8sClient("")
if cliErr != nil {
hwlog.RunLog.ErrorfWithCtx(b.Ctx, "Failed to get customClient err: %s", cliErr.Error())
return GetK8sClientFailed
}
pod, err := clientSet.CoreV1().Pods(podNamespace).Get(context.Background(),
podName, metav1.GetOptions{})
if err != nil {
hwlog.RunLog.ErrorWithCtx(b.Ctx, "the pod does not exist")
return ParamInvalid
}
if svcType, ok := pod.Labels[constants.TrainManageName]; ok {
name, ok := pod.Labels["dbjob-name"]
if !ok {
hwlog.RunLog.ErrorWithCtx(b.Ctx, "failed to get service name of the pod")
return ParamInvalid
}
if err := search(name, svcType, b.HdInfo.UserID); err != nil {
hwlog.RunLog.ErrorWithCtx(b.Ctx, "the pod does not belong to train manager")
return ParamInvalid
}
} else {
name, ok := pod.Labels["app"]
if !ok {
hwlog.RunLog.ErrorWithCtx(b.Ctx, "failed to get service name of the pod")
return ParamInvalid
}
if err := search(name, "", b.HdInfo.UserID); err != nil {
hwlog.RunLog.ErrorfWithCtx(b.Ctx, "the pod does not belong to task manager, %v", err)
return ParamInvalid
}
}

return ""
}

// QueryServiceLog query service log
func QueryServiceLog(srcModule string,
search func(name, svcType string, uid uint64) error, b BaseCtx, c *gin.Context) {
hwlog.OpLog.InfoWithCtx(b.Ctx, "start to query service log")
offset, limit, errCode := getOffsetAndLimit(c.Query("offset"), c.Query("limit"), b)
if errCode != Success {
ConstructResp(c, errCode, "", nil)
return
}
podName := c.Query("podName")
podNamespace := c.Query("namespace")
if err := validPodNameNamespace(podName, podNamespace, search, b); err != "" {
ConstructResp(c, ParamInvalid, "", nil)
return
}
logPath := getPodLogFilePath(b, srcModule, podNamespace, podName)
log, err := logQuery(logPath, offset, limit, b)
if err != nil {
hwlog.OpLog.ErrorfWithCtx(b.Ctx, "query service pod log failed, err: %v", err)
ConstructResp(c, QueryK8sPodLogFailed, "", nil)
return
}
hwlog.OpLog.InfoWithCtx(b.Ctx, "query pod log succeed")
ConstructResp(c, Success, "", log)
}

func downloadLogFile(writer io.Writer, logPath string, b BaseCtx) (int64, error) {
if fi, err := os.Stat(logPath); err == nil {
if fi.Size()>>byteToMB > maxLogSize {
return 0, errors.New("the log file is too large, please download it by other way")
}
}
logFile, err := os.OpenFile(logPath, os.O_RDONLY, FileMode)
if err != nil {
hwlog.RunLog.ErrorWithCtx(b.Ctx, "Fail to get log file")
return 0, err
}
defer logFile.Close()
return io.Copy(writer, logFile)
}

// DownloadServiceLog download log file
func DownloadServiceLog(srcModule string,
search func(name, svcType string, uid uint64) error, b BaseCtx, c *gin.Context) {
podName := c.Query("podName")
podNamespace := c.Query("namespace")
logFileSize := int64(0)
c.Header(ContentDisposition, "attachment; filename="+podName+".log")
c.Header(ContentType, "application/text/plain")
c.Header(AcceptLength, strconv.FormatInt(logFileSize, BaseHex))

if err := validPodNameNamespace(podName, podNamespace, search, b); err != "" {
ConstructResp(c, ParamInvalid, "", nil)
return
}

hwlog.OpLog.InfofWithCtx(b.Ctx, "get pod log podName(%v), namespace(%v)", podName, podNamespace)
logPath := getPodLogFilePath(b, srcModule, podNamespace, podName)
fileSize, err := downloadLogFile(c.Writer, logPath, b)
if err != nil {
hwlog.OpLog.ErrorfWithCtx(b.Ctx, "download service pod log failed, err: %v", err)
ConstructResp(c, DownloadPodLogFileFailed, "", nil)
return
}
c.Header(AcceptLength, strconv.FormatInt(fileSize, BaseHex))
hwlog.OpLog.InfoWithCtx(b.Ctx, "pod log downloaded")
ConstructResp(c, Success, "", nil)
}

mindxdl--common--log_record.go的更多相关文章

  1. Socket聊天程序——Common

    写在前面: 上一篇记录了Socket聊天程序的客户端设计,为了记录的完整性,这里还是将Socket聊天的最后一个模块--Common模块记录一下.Common的设计如下: 功能说明: Common模块 ...

  2. angularjs 1 开发简单案例(包含common.js,service.js,controller.js,page)

    common.js var app = angular.module('app', ['ngFileUpload']) .factory('SV_Common', function ($http) { ...

  3. Common Bugs in C Programming

    There are some Common Bugs in C Programming. Most of the contents are directly from or modified from ...

  4. ANSI Common Lisp Practice - My Answers - Chatper - 3

    Ok, Go ahead. 1 (a) (b) (c) (d) 2 注:union 在 Common Lisp 中的作用就是求两个集合的并集.但是这有一个前提,即给的两个列表已经满足集合的属性了.具体 ...

  5. [LeetCode] Lowest Common Ancestor of a Binary Tree 二叉树的最小共同父节点

    Given a binary tree, find the lowest common ancestor (LCA) of two given nodes in the tree. According ...

  6. [LeetCode] Lowest Common Ancestor of a Binary Search Tree 二叉搜索树的最小共同父节点

    Given a binary search tree (BST), find the lowest common ancestor (LCA) of two given nodes in the BS ...

  7. [LeetCode] Longest Common Prefix 最长共同前缀

    Write a function to find the longest common prefix string amongst an array of strings. 这道题让我们求一系列字符串 ...

  8. 48. 二叉树两结点的最低共同父结点(3种变种情况)[Get lowest common ancestor of binary tree]

    [题目] 输入二叉树中的两个结点,输出这两个结点在数中最低的共同父结点. 二叉树的结点定义如下:  C++ Code  123456   struct BinaryTreeNode {     int ...

  9. 动态规划求最长公共子序列(Longest Common Subsequence, LCS)

    1. 问题描述 子串应该比较好理解,至于什么是子序列,这里给出一个例子:有两个母串 cnblogs belong 比如序列bo, bg, lg在母串cnblogs与belong中都出现过并且出现顺序与 ...

  10. 【leetcode】Longest Common Prefix

    题目简述: Write a function to find the longest common prefix string amongst an array of strings. 解题思路: c ...

随机推荐

  1. kingbaseES V8R3数据安全案例之---审计记录清除案例

    ​ 案例说明: 对于KingbaseES V8R3数据库,默认用户无权限删除审计记录,只有对审计记录做了转储以后会自动清理审计记录. 适用版本: KingbaseES V8R3 本案例数据库版本: S ...

  2. KingbaseES R6 集群修改data目录

    案例说明: 本案例是在部署完成KingbaseES R6集群后,由于业务的需求,集群需要修改data(数据存储)目录的测试.本案例分两种修改方式,第一种是离线修改data目录,即关闭整个集群后,修改数 ...

  3. OpenCV读写视频操作

    一.读取视频流 在使用OpenCV读取摄像头,或者处理一些磁盘中保存的视频文件时,通常使用VideoCapture进行读取. std::string video_path("/path/to ...

  4. Base64加密、解密

    #region Base64加密方法 /// <summary> /// Base64加密,采用utf8编码方式加密 /// </summary> /// <param ...

  5. Java SE 6、super关键字,包

    1.super关键字 super代表父类的引用,用于访问父类的属性,方法,构造器 super可以访问父类的属性,但不能访问父类的 private 属性 super.属性名; 可以访问父类的方法,不能访 ...

  6. ACL和NAT

    1 ACL 1.1 ACL的作用 1).用来对数据包做访问控制(丢弃或者放弃) 2).结合其他协议,用来匹配范围 1.2 ACL的工作原理 当数据包从接口经过时,由于接口启用了ACL,此时路由器会对报 ...

  7. 基于 OpenTelemetry 的链路追踪

    链路追踪的前世今生 分布式跟踪(也称为分布式请求跟踪)是一种用于分析和监控应用程序的方法,尤其是使用微服务架构构建的应用程序.分布式跟踪有助于精确定位故障发生的位置以及导致性能差的原因. 起源 链路追 ...

  8. Elasticsearch:创建 API key 接口访问 Elasticsearch

    转载自:https://blog.csdn.net/UbuntuTouch/article/details/107181440 在之前我的文章 "Elastic:使用Postman来访问El ...

  9. 使用coverlet统计单元测试的代码覆盖率

    单元测试是个好东西, 可以在一定程度上兜底 虽然写单元测试这件事情非常麻烦 但是好的单元测试可以显著提高代码质量, 减少bug, 避免无意中的修改导致其他模块出错 写测试用例的过程中, 靠人力去确保所 ...

  10. 《吐血整理》高级系列教程-吃透Fiddler抓包教程(26)-Fiddler如何抓取Android7.0以上的Https包-上篇

    1.简介 众所周知,假如设备是android 7.0+的系统同时应用设置targetSdkVersion >= 24的话,那么应用默认是不信任安装的Fiddler用户证书的,所以你就没法抓到应用 ...