负责搭建公司日志分析,一直想把CDN日志也放入到日志分析,前些日志终于达成所愿,现在贴出具体做法:

1、收集日志

  腾讯云CDN日志一般一小时刷新一次,也就是说当前只能下载一小时之前的日志数据,但据本人观察,有时前一小时的并下载不到,所以为了保险起见,可以下载两小时之前的日志数据。下载日志可以通过腾讯云的API获取日志列表,然后下载。

  腾讯云日志下载API 链接:https://www.qcloud.com/document/product/228/8087

日志采集脚本:

[root@BJVM-2-181 bin]# cat get_cdn_log.py
#!/usr/bin/env python
# coding=utf-8 import hashlib
import requests
import hmac
import random
import time
import base64
import json
import gzip
import os
import sys
from datetime import datetime, timedelta class Sign(object): def __init__(self, secretId, secretKey):
self.secretId = secretId
self.secretKey = secretKey # 生成签名串
def make(self, requestHost, requestUri, params, method='GET'):
srcStr = method.upper() + requestHost + requestUri + '?' + "&".join(k.replace("_",".") + "=" + str(params[k]) for k in sorted(params.keys()))
hashed = hmac.new(self.secretKey, srcStr, hashlib.sha1)
return base64.b64encode(hashed.digest()) class CdnHelper(object):
SecretId='AKIDLsldjflsdjflsdjflsdjfpGSO5XoGiY9'
SecretKey='SeaHjSDFLJSLDFJQIuFJ7rMiz0lGV'
requestHost='cdn.api.qcloud.com'
requestUri='/v2/index.php' def __init__(self, host, startDate, endDate):
self.host = host
self.startDate = startDate
self.endDate = endDate
self.params = {
'Timestamp': int(time.time()),
'Action': 'GetCdnLogList',
'SecretId': CdnHelper.SecretId,
'Nonce': random.randint(10000000,99999999),
'host': self.host,
'startDate': self.startDate,
'endDate': self.endDate
}
self.params['Signature'] = Sign(CdnHelper.SecretId, CdnHelper.SecretKey).make(CdnHelper.requestHost, CdnHelper.requestUri, self.params)
self.url = 'https://%s%s' % (CdnHelper.requestHost, CdnHelper.requestUri) def GetCdnLogList(self):
ret = requests.get(self.url, params=self.params)
return ret.json() class GZipTool(object):
"""
压缩与解压gzip
"""
def __init__(self, bufSize = 1024*8):
self.bufSize = bufSize
self.fin = None
self.fout = None
def compress(self, src, dst):
self.fin = open(src, 'rb')
self.fout = gzip.open(dst, 'wb')
self.__in2out()
def decompress(self, gzFile, dst):
self.fin = gzip.open(gzFile, 'rb')
self.fout = open(dst, 'wb')
self.__in2out()
def __in2out(self,):
while True:
buf = self.fin.read(self.bufSize)
if len(buf) < 1:
break
self.fout.write(buf)
self.fin.close()
self.fout.close() def download(link, name):
try:
r = requests.get(link)
with open(name, 'wb') as f:
f.write(r.content)
return True
except:
return False def writelog(src, dst):
# 保存为以天命名日志
dst = dst.split('-')[0][:-2] + '-' + dst.split('-')[1]
with open(src, 'r') as f1:
with open(dst, 'a+') as f2:
for line in f1:
f2.write(line) if __name__ == '__main__':
#startDate = "2017-02-23 12:00:00"
#endDate = "2017-02-23 12:00:00" # 前一小时
# startDate = endDate = time.strftime('%Y-%m-%d ', time.localtime()) + str(time.localtime().tm_hour-1) + ":00:00"
tm = datetime.now() + timedelta(hours=-2)
startDate = endDate = tm.strftime("%Y-%m-%d %H:00:00") #hosts = ['userface.51img1.com']
hosts = [
'pfcdn.xxx.com',
'pecdn.xxx.com',
'pdcdn.xxx.com',
'pccdn.xxx.com',
'pbcdn.xxx.com',
'pacdn.xxx.com',
'p9cdn.xxx.com',
'p8cdn.xxx.com',
'p7cdn.xxx.com',
] for host in hosts:
try:
obj = CdnHelper(host, startDate,endDate)
ret = obj.GetCdnLogList() link = ret['data']['list'][0]['link']
name = ret['data']['list'][0]['name'] # 下载链接保存的文件名
gzip_name = '/data/logs/cdn/cdn_log_temp/' + name + '.gz'
# 解压后的文件名
local_name = '/data/logs/cdn/cdn_log_temp/' + name + '.log'
# 追加的文件名
real_path = '/data/logs/cdn/' + name + '.log'
print local_name, real_path status = download(link, gzip_name)
if status:
try:
GZipTool().decompress(gzip_name, local_name)
writelog(local_name, real_path)
# os.remove(gzip_name)
os.remove(local_name)
except:
continue
except Exception ,e:
print e
continue

get_cdn_log.py

放到定时任务,每小时执行一次

# cdn日志
*/ * * * /usr/bin/python /root/bin/get_cdn_log.py &> /dev/null

此图解压后的日志,每个域名保存为一个文件,按天分割。

  

2、filebeat配置(具体含义查看官方文档)

[root@BJ-- bin]# cat /usr/local/app/filebeat-1.2.-x86_64/nginx-php.yml
filebeat:
prospectors:
-
paths:
- /data/logs/cdn/*.log
document_type: cdn-log
input_type: log
#tail_files: true
multiline:
negate: true
match: after
output:
logstash:
hosts: ["10.80.2.181:5048", "10.80.2.182:5048"]
shipper:
logging:
files:

3、logstash配置

日志格式:

 61.135.234.125 cdn.xxx.com /game////57037f7fc1a0dde9091d4fe6502a6c53.jpg     http://www.xxx.com/ 5 "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; NetworkBench/7.0.0.282-5004888-124025)" "(null)" GET HTTP/1.1 hit

日志内容依次包括:请求时间、访问域名的客户端IP、被访问域名、文件请求路径、本次访问字节数大小、省份、运营商、http返回码、referer信息、request-time(毫秒)、User-Agent、range、HTTP Method、HTTP协议标识、缓存Hit/Miss。

配置文件

# /usr/local/app/logstash-2.3.4/conf.d/logstash.conf
input {
beats {
port =>
host => "0.0.0.0"
}
} filter { .....(省略) else if [type] == "cdn-log" {
grok {
patterns_dir => ["./patterns"]
match => { "message" => "%{DATESTAMP_EVENTLOG:timestamp} %{IPORHOST:client_ip} %{IPORHOST:server_name} %{NOTSPACE:request} %{NUMBER:bytes} %{NUMBER:province} %{NUMBER:operator} %{NUMBER:status} (?:%{URI:referrer}|%{WORD:referrer}) %{NUMBER:request_time} %{QS:agent} \"\(%{WORD:range}\)\" %{WORD:method} HTTP/%{NUMBER:protocol} %{WORD:cache}" }
}
date {
match => [ "timestamp", "yyyyMMddHHmmss"]
target => "@timestamp"
}
alter {
condrewrite => [
"province", "", "北京",
"province", "", "内蒙古",
"province", "", "山西",
"province", "", "河北",
"province", "", "天津",
"province", "", "宁夏",
"province", "", "陕西",
"province", "", "甘肃",
"province", "", "青海",
"province", "", "新疆",
"province", "", "黑龙江",
"province", "", "吉林",
"province", "", "辽宁",
"province", "", "福建",
"province", "", "江苏",
"province", "", "安徽",
"province", "", "山东",
"province", "", "上海",
"province", "", "浙江",
"province", "", "河南",
"province", "", "湖北",
"province", "", "江西",
"province", "", "湖南",
"province", "", "贵州",
"province", "", "云南",
"province", "", "重庆",
"province", "", "四川",
"province", "", "西藏",
"province", "", "广东",
"province", "", "广西",
"province", "", "海南",
"province", "", "其他",
"province", "", "港澳台",
"province", "", "海外",
"operator", "", "中国电信",
"operator", "", "中国联通",
"operator", "", "教育网",
"operator", "", "长城宽带",
"operator", "", "中国移动",
"operator", "", "中国铁通",
"operator", "-1", "海外运营商",
"operator", "", "其他运营商"
]
}
} } # filter output { if "_grokparsefailure" in [tags] {
file { path => "/var/log/logstash/grokparsefailure-%{[type]}-%{+YYYY.MM.dd}.log" }
}
......(省略) else if [type] == "cdn-log"{
elasticsearch {
hosts => ["10.80.2.13:9200","10.80.2.14:9200","10.80.2.15:9200","10.80.2.16:9200"]
sniffing => true
manage_template => true
template_overwrite => true
template_name => "cdn"
template => "/usr/local/app/logstash-2.3.4/templates/cdn.json"
index => "%{[type]}-%{+YYYY.MM.dd}"
document_type => "%{[type]}"
}
}
......(省略) } # output

4 效果图(一小时数据)

cdn使用量效果图

cdn访问情况统计

状态码统计

Python脚本收集腾讯云CDN日志,并入ELK日志分析的更多相关文章

  1. 腾讯云CDN python SDK

    腾讯云CDN python SDK 博主在开发时偶尔要用到CDN,感觉适合学生党的应该是腾讯云的CDN了,还提供了每月10G的流量,博主平时学习使用已经足够了. 代码 #coding=utf-8 fr ...

  2. 图片流量节省大杀器:基于腾讯云CDN的sharpP自适应图片技术实践

    目前移动端运营素材大部分依赖图片,基于对图片流量更少,渲染速度更快的诉求,我们推动CDN,X5内核,即通产品部共同推出了一套业务透明,无痛接入的CDN图片优化方案:基于CDN的sharpP自适应图片无 ...

  3. 借助腾讯云CDN开启全站https及问题解决分享

    版权声明:本文由张戈原创文章,转载请注明出处: 文章原文链接:https://www.qcloud.com/community/article/78 来源:腾云阁 https://www.qcloud ...

  4. 腾讯云--腾讯云sdk-实现脚本修改腾讯云负载均衡权重

    一.请确认你的当前python环境为python 2.x 获取 python 版本的方法 (linux shell) # python -v python 2.7.11 二.CLB SDK下载与配置 ...

  5. EasyNVR结合阿里云/腾讯云CDN实现微信/小程序直播的方案

    背景需求: 许多客户有这样的需求:微信公众号做为平台来对摄像机进行直播:可以让用户随时随地打开公共号就可以观看:保证画面的流畅性:保证视频的并发访问量等. 问题分析: 虽然需求看似很简单,其实真正实现 ...

  6. EasyNVR完美搭配腾讯云CDN/阿里云CDN进行RTMP、HLS直播加速的使用说明

    1.相关资料入口 腾讯云LVB EasyNVR.com 2.加速说明 2.1. 腾讯LVB加速 2.1.1. 开通服务 腾讯云视频LVB开通入口 2.1.2. 登录进入控制台 腾讯云直播控制台 2.1 ...

  7. python socket编程腾讯云下报错[Errno 99] Cannot assign requested address的解决方式

    先写服务端server.py: import socket import time HOST = '172.17.xx.xx' #服务器的私网IP #HOST = 'localhost' PORT = ...

  8. 腾讯云EMR大数据实时OLAP分析案例解析

    OLAP(On-Line Analytical Processing),是数据仓库系统的主要应用形式,帮助分析人员多角度分析数据,挖掘数据价值.本文基于QQ音乐海量大数据实时分析场景,通过QQ音乐与腾 ...

  9. Springboot项目使用aop切面保存详细日志到ELK日志平台

    上一篇讲过了将Springboot项目中logback日志插入到ELK日志平台,它只是个示例.这一篇来看一下实际使用中,我们应该怎样通过aop切面,拦截所有请求日志插入到ELK日志系统.同时,由于往往 ...

随机推荐

  1. mysql授权远程用户连接(权限最小化原则)

    1.进入MySQL,创建一个新用户root,密码为root: 格式:grant 权限 on 数据库名.表名 to 用户@登录主机 identified by "用户密码"; gra ...

  2. 2.9. Scalar Properties for Primitive Data Types 选项(Core Data 应用程序实践指南)

    该选项的意思是,“用Scalar特性来表示原始数据类型”.什么意思,妈妈米呀,这是我学这门课程遇到的最难懂的概念. scalar properties,是复数,也就是说是 “分等级的属性”.那么,大概 ...

  3. social relation & recommender system

    由于社交网络盛行,现在许多关于推荐系统的研究都考虑了如何使用social relation来改进推荐系统.虽然有很多论文都成功的使用social relation改进了推荐效果,然而,也有一些尝试失败 ...

  4. js原生封装getClassName()方法-ie不支持getElementsByClassName,所以要自己实现获取类名为className的所有元素

    <html> <head> <script type="text/javascript"> window.onload = function() ...

  5. Java线程:锁

    一.锁的原理 Java中每个对象都有一个内置锁,当程序运行到非静态的synchronized同步方法上时,自动获得与正在执行的代码类的当前实例(this实例)有关的锁.获得一个对象的锁也称为获取锁.锁 ...

  6. 外部IIS/Apache/Nginx来代理FMS的http服务

    默认FMS在安装的时候,会安装Apache2.2,并监听8134端口,代理http服务器:当如也可以用外部的服务器,此时建立站点,并指向目录:C:\Program Files\Adobe\Flash ...

  7. 表单验证--通过原生js模仿ajax的异步交互

    今天给大家带来个福利,我也是刚刚学习的很实用的一个东西,通过原生js模仿ajax的异步交互. 我的博客只是给那些新手看的大神勿喷,写的不好可留言,请指出. 因为当初自己学的时候一个问题不会找人问,知道 ...

  8. loadrunner Analysis :SLA(Service Level Agreement服务水平协议)

    SLA是为负载场景定义的具体目标,用于与实际负载结果比较,确定系统是否达到性能目标. 1.1.1     设置SLA(以Transaction Response Time(Average)为例) 可以 ...

  9. LoadRunner面试题

    在LoadRunner中为什么要设置思考时间和pacing 答: 录制时记录的是客户端和服务端的交互,如果要精确模拟 用户的行为,那么客户操作客户端时花费了很多时间要怎么模拟呢?录入 填写提交的内容, ...

  10. css3 2d转换3d转换以及动画的知识点汇总

    css3 2d转换 2d转换的方法: 1.移动 translate(x, y) 可以改变元素的位置,x.y可为负值: 2.缩放 scale(x, y) 可以对元素进行水平和垂直方向的缩放,x.y的取值 ...