rabbit_消费者

import pika

import json

import time

import os

import ast

import uuid

import time

import json

import hashlib

import redis

import pymysql

import logging

from logging import handlers

# 日志记录

class Logger(object):

    level_relations = {

        'debug': logging.DEBUG,

        'info': logging.INFO,

        'warning': logging.WARNING,

        'error': logging.ERROR,

        'crit': logging.CRITICAL

    }  # 日志级别关系映射

    def __init__(self, filename, level='info', when='D', backCount=3,

                 fmt='%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s'):

        self.logger = logging.getLogger(filename)

        format_str = logging.Formatter(fmt)  # 设置日志格式

        self.logger.setLevel(self.level_relations.get(level))  # 设置日志级别

        sh = logging.StreamHandler()  # 往屏幕上输出

        sh.setFormatter(format_str)  # 设置屏幕上显示的格式

        th = handlers.TimedRotatingFileHandler(filename=filename, when=when, backupCount=backCount,

                                               encoding='utf-8')  # 往文件里写入#指定间隔时间自动生成文件的处理器

        # 实例化TimedRotatingFileHandler

        # interval是时间间隔，backupCount是备份文件的个数，如果超过这个个数，就会自动删除，when是间隔的时间单位，单位有以下几种：

        # S 秒

        # M 分

        # H 小时、

        # D 天、

        # W 每星期（interval==0时代表星期一）

        # midnight 每天凌晨

        th.setFormatter(format_str)  # 设置文件里写入的格式

        self.logger.addHandler(sh)  # 把对象加到logger里

        self.logger.addHandler(th)

file = 'all'

log = Logger('clear' + os.sep + '%s.log' % file)

logger = log.logger

DEBUG = False

class ClearLog(object):

    if DEBUG:

        DATABASE = 'unionlog'

        # 本地测试

        poll = redis.ConnectionPool(host='192.168.10.10', port=7000, db=5, password='', decode_responses=True)

        conn = pymysql.connect(host='192.168.10.5', user='root',

                               password='root',

                               database=DATABASE, charset='utf8')

        cursor = conn.cursor()

    else:

        DATABASE = 'log'

        # 线上正式

        poll = redis.ConnectionPool(host='192.168.5.219', port=6379, db=5, password='', decode_responses=True)

        conn = pymysql.connect(host='', user='datacenter',

                               password='kbs11zx@',

                               database=DATABASE, charset='utf8')

        cursor = conn.cursor()

    CONN = redis.Redis(connection_pool=poll)

    REDIS_PID_HASH = "tarsier.log.clear.pid.hash"

    REDIS_PID_DELETE_HASH = "tarsier.log.delete.pid.hash"

    REDIS_PID_DELETE_LIST = "tarsier.log.delete.pid.list"

    REDIS_PID_DELETE_LIST_TEMP = "tarsier.log.delete.pid.list.temp"

    table_list = []

    table = 'tarsier_log_details'

    instance = None

    def __new__(cls, *args, **kwargs):

        if cls.instance:

            return cls.instance

        else:

            return super().__new__(cls)

    @staticmethod

    def get_table_list(table):

        ClearLog.table = table

        # 判断表是否存在

        if table in ClearLog.table_list:

            # print('表存在1')

            pass

        else:

            ClearLog.cursor.execute("SHOW TABLES")

            res = ClearLog.cursor.fetchall()

            table_temp = []

            for i in res:

                table_temp.append(i[0])

            # print(table_temp)

            ClearLog.table_list = table_temp

            if table in ClearLog.table_list:

                # print('表存在2')

                pass

            else:

                # 创建表

                sql = """create table %s like tarsier_log_details""" % (table)

                try:

                    print('创建表')

                    ClearLog.cursor.execute(sql)

                except Exception as e:

                    pass

                ClearLog.cursor.execute("SHOW TABLES")

                res = ClearLog.cursor.fetchall()

                table_temp = []

                for i in res:

                    table_temp.append(i[0])

                ClearLog.table_list = table_temp

    # 更新数据库

    @staticmethod

    def updata_db(data):

        # ##################### 表名 #####################

        table = "tarsier_log_details_%s" % ClearLog.timestamp_to_str(format="%Y%m")

        ClearLog.get_table_list(table)

        keys = ', '.join(data.keys())

        values = ', '.join(['%s'] * len(data))

        # 实际用的是插入语句，不过加了ON DUPLICATE KEY UPDATE（主键存在，则执行更新操作）

        sql = 'INSERT INTO {table}({keys}) VALUES ({values}) ON DUPLICATE KEY UPDATE'.format(table=table, keys=keys,

                                                                                             values=values)

        update = ','.join([" {key} = %s".format(key=key) for key in data])

        sql += update

        try:

            ClearLog.cursor.execute(sql, tuple(data.values()) * 2)

            print('update Successful')

            ClearLog.conn.commit()

        except Exception as e:

            logger.error(e)

            print('update Failed')

    @staticmethod

    def update_db_sql(sql):

        try:

            ClearLog.cursor.execute(sql)  # 执行sql

            ClearLog.conn.commit()  # 提交到数据库

            print('更新成功')

        except Exception as e:

            print("ERROR:{}".format(str(e)))

            ClearLog.conn.rollback()  # 发生错误则回滚

            logger.info('error:%s' % str(e))

    def __call__(self, *args, **kwargs):

        pass

    def __init__(self):

        # ClearLog.main()

        pass

    @staticmethod

    def md5_me(key):

        md5 = hashlib.md5()

        md5.update(str(key).encode('utf-8'))

        value = md5.hexdigest()

        return value

    @staticmethod

    def main():

        with open('20201110.log', encoding='utf-8') as f:

            count = 0

            for item in f:

                line = item.strip()

                data = ast.literal_eval(line)

                # 数据清洗 - 开始

                ClearLog.clear_log(data)

                # print(ClearLog.__dict__)

                count += 1

                if count % 10000 == 0:

                    print(count)

                    # break

    @staticmethod

    def main2(data):

        # 数据清洗 - 开始

        ClearLog.clear_log(data)

    @staticmethod

    def clear_log(data):

        res_data = {}

        rsUid = data.get('rsUid', '')

        rsPageId = data.get('rsPageId', '')

        rshyuid = data.get('rshyuid', '')

        pageid = data.get('pageid', '')

        pageUrl = data.get('pageUrl', '')

        userAgent = data.get('userAgent', '')

        referrer = data.get('referrer', '')

        nowDate = data.get('nowDate', '')

        device = data.get('device', '')

        rshyType = data.get('rshyType', '')

        targetDataset = str(data.get('targetDataset', ''))

        targetValue = data.get('targetValue', '')

        targetClassName = data.get('targetClassName', '')

        inputData = str(data.get('inputData', ''))

        rshyUserIp = data.get('rshyUserIp', '')

        netloc = data.get('netloc', '')

        urlPath = data.get('urlPath', '')

        siteName = data.get('siteName', '')

        TIME = ClearLog.timestamp_to_str()

        ID = ClearLog.get_uuid()

        rshyTime = data.get('rshyTime', '')

        try:

            rsdate = rshyTime.split()[0]

            temp = rshyTime.split()[1]

            rshour = temp.split(':')[0]

        except:

            rsdate = ''

            rshour = 0

        res_data.update({

            "id": ID,

            "rsuid": rsUid,

            "rshytime": rshyTime,

            "rshour": rshour,

            "rsdate": rsdate,

            "rspageid": rsPageId,

            "rshyuid": rshyuid,

            "pageid": pageid,

            "pageurl": pageUrl,

            "useragent": userAgent,

            "referrer": referrer,

            "device": device,

            "rshytype": rshyType,

            "targetvalue": targetValue,

            "targetdataset": targetDataset,

            "targetclassname": targetClassName,

            "inputdata": inputData,

            "starttime": nowDate,

            "rshyuserip": rshyUserIp,

            "netloc": netloc,

            "urlpath": urlPath,

            "sitename": siteName,

            "createtime": TIME,

            "updatetime": TIME,

        })

        if rshyType == 'view' or rshyType == '':

            # 先判断这个值是否与存储一样

            rsUidKey = rsPageId  # ClearLog.md5_me(pageid)

            # print("pid", rsUidKey)

            if not rsPageId:

                return

            # 一直刷新pid

            ClearLog.CONN.hset(ClearLog.REDIS_PID_DELETE_HASH, rsUidKey, nowDate)

            res_temp = rsUid + pageUrl + referrer + userAgent + device

            # print('##############')

            res_rs_uid = ClearLog.md5_me(res_temp)

            # print(res_rs_uid)

            # 从redis中获取uid对应数据， 如果数据一样不做存储

            exist_uid = ClearLog.CONN.hget(ClearLog.REDIS_PID_HASH, rsUidKey)

            # print(exist_uid)

            if not exist_uid or res_rs_uid != str(exist_uid):

                ClearLog.CONN.hset(ClearLog.REDIS_PID_HASH, rsUidKey, res_rs_uid)

                # 数据入库

                ClearLog.write_data(res_data)

                # 存储一份记录时间hash

                ClearLog.CONN.hset(ClearLog.REDIS_PID_DELETE_HASH, rsUidKey, nowDate)

                # 并将此数据入删除队列

                data_temp = {"pid": rsUidKey, "date": nowDate}

                ClearLog.CONN.lpush(ClearLog.REDIS_PID_DELETE_LIST, json.dumps(data_temp))

            return

        # if not rshyType:

        #     return

        ClearLog.write_data(res_data)

    @staticmethod

    def write_data(data):

        ClearLog.updata_db(data)

        file_name = ClearLog.timestamp_to_str_m()

        with open('clear{}{}.clear.log'.format(os.sep, file_name), 'a+', encoding='utf-8') as f:

            f.write(str(data) + '\n')

    # 格式化时间转时间戳

    @staticmethod

    def str_to_timestamp(str_time=None, format='%Y-%m-%d %H:%M:%S'):

        if str_time:

            time_tuple = time.strptime(str_time, format)  # 把格式化好的时间转换成元祖

            result = time.mktime(time_tuple)  # 把时间元祖转换成时间戳

            return int(result)

        return int(time.time())

    # 把时间戳转换成格式化

    @staticmethod

    def timestamp_to_str(timestamp=None, format='%Y-%m-%d %H:%M:%S'):

        if timestamp:

            time_tuple = time.localtime(timestamp)  # 把时间戳转换成时间元祖

            result = time.strftime(format, time_tuple)  # 把时间元祖转换成格式化好的时间

            return result

        else:

            return time.strftime(format)

    # 把时间戳转换成格式化

    @staticmethod

    def timestamp_to_str_m(timestamp=None, format='%Y-%m-%d'):

        if timestamp:

            time_tuple = time.localtime(timestamp)  # 把时间戳转换成时间元祖

            result = time.strftime(format, time_tuple)  # 把时间元祖转换成格式化好的时间

            return result

        else:

            return time.strftime(format)

    # uuid

    @staticmethod

    def get_uuid():

        res = str(uuid.uuid4())

        UUID = ''.join(res.split('-'))

        return UUID

    # 每5分钟删除一次hash中的值，并将停留时间算出

    @staticmethod

    def del_tarsier_log_pid_hash():

        table = ClearLog.table + '_%s' % ClearLog.timestamp_to_str_m(format='%Y%m')

        print('每5分钟删除一次hash中的值，并将停留时间算出')

        get_pid_list = ClearLog.CONN.hgetall(ClearLog.REDIS_PID_DELETE_HASH)

        # print(get_pid_list)

        for hash_pid_item in get_pid_list:

            redisDate = ClearLog.CONN.hget(ClearLog.REDIS_PID_DELETE_HASH, hash_pid_item)

            # 如果存储时间与当前时间相差1min之外更新  最后访问时间与停留时间 并将hash的值进行删除  否则不做处理

            try:

                redis_data_time = ClearLog.str_to_timestamp(redisDate)

                now_data_time = time.time()

                chufatime = now_data_time - redis_data_time

                # starttime =

                # staytime = redis_data_time - starttime

                if chufatime >= 60:

                    # 进行更新操作

                    sql = """update {} set endtime='{}' where rspageid='{}'""".format(table, redisDate,

                                                                                      hash_pid_item)

                    print(sql)

                    ClearLog.update_db_sql(sql)

                    # 更新完进行redis的值删除操作

                    ClearLog.CONN.hdel(ClearLog.REDIS_PID_DELETE_HASH, hash_pid_item)

            except Exception as e:

                pass

        print('====================================')

    # 每一天清除一次队列信息

    @staticmethod

    def del_tarsier_log_pid_list():

        logger.info('每一天清除一次队列信息')

        res_str = ClearLog.CONN.lpop(ClearLog.REDIS_PID_DELETE_LIST)

        while res_str:

            try:

                # 并将此数据入删除队列

                # data_temp = {"pid": rsUidKey, "date": nowDate}

                res_json = json.loads(res_str)

                # print(res_json)

                nowDate = res_json.get("date", '')

                rsUidKey = res_json.get("pid", '')

                redis_data_time = ClearLog.str_to_timestamp(nowDate)

                now_data_time = time.time()

                chufatime = now_data_time - redis_data_time

                if chufatime >= 24 * 60 * 60:

                    # 更新完进行redis的值删除操作

                    ClearLog.CONN.hdel(ClearLog.REDIS_PID_HASH, rsUidKey)

                    # print('删除')

                else:

                    ClearLog.CONN.rpush(ClearLog.REDIS_PID_DELETE_LIST_TEMP, json.dumps(res_json))

                res_str = ClearLog.CONN.lpop(ClearLog.REDIS_PID_DELETE_LIST)

            except:

                pass

        # print('处理队列')

        res_str = ClearLog.CONN.lpop(ClearLog.REDIS_PID_DELETE_LIST_TEMP)

        while res_str:

            res_json = json.loads(res_str)

            ClearLog.CONN.rpush(ClearLog.REDIS_PID_DELETE_LIST, json.dumps(res_json))

            res_str = ClearLog.CONN.lpop(ClearLog.REDIS_PID_DELETE_LIST_TEMP)

        logger.info('清除完毕')

# 把时间戳转换成格式化

# 天

def timestamp_to_str_day(timestamp=None, format='%Y%m%d'):

    if timestamp:

        time_tuple = time.localtime(timestamp)  # 把时间戳转换成时间元祖

        result = time.strftime(format, time_tuple)  # 把时间元祖转换成格式化好的时间

        return result

    else:

        return time.strftime(format)

# Connect to RabbitMQ and create channel

rabbit_host = "192.168.2.129"

rabbit_username = 'rshy'

rabbit_password = 'root1234@AWJSW'

queue_topic = 'logs.collect.statistics'

user = pika.PlainCredentials(rabbit_username, rabbit_password)

connection = pika.BlockingConnection(pika.ConnectionParameters(host=rabbit_host, credentials=user,)) #  heartbeat=0

# connection = pika.BlockingConnection(pika.ConnectionParameters(host='192.168.2.129'))

channel = connection.channel()

# Declare and listen queue

# channel.queue_declare(queue=cfg.QUEUE_TOPIC)

channel.queue_declare(queue=queue_topic)

ClearLogObj = ClearLog()

def consumer():

    print(' [*] Waiting for messages. To exit press CTRL+C')

    # Function process and print data

    def callback(ch, method, properties, body):

        # print("Method: {}".format(method))

        # print("Properties: {}".format(properties))

        data = json.loads(body)

        # print("ID: {}".format(data['id']))

        # print("Name: {}".format(data['name']))

        # print('Description: {}'.format(data['description']))

        print("--data--:", data)

        ClearLogObj.main2(data)

        file_name = timestamp_to_str_day()

        with open('consumer' + os.sep + file_name + '.log', 'a+', encoding='utf-8') as f:

            f.write(str(data) + '\n')

    # Listen and receive data from queue

    # channel.basic_consume(cfg.QUEUE_TOPIC, callback, True)

    channel.basic_consume(queue_topic, callback, True)

    channel.start_consuming()

if __name__ == '__main__':

    consumer()

rabbit_消费者的更多相关文章

java简单模拟生产者消费者问题
本文来自:http://www.cnblogs.com/happyPawpaw/archive/2013/01/18/2865957.html 引言生产者和消费者问题是线程模型中的经典问题:生产者和 ...
队列&生产者消费者
Queue是python标准库中的线程安全的队列(FIFO)实现,提供了一个适用多线程的先进先出的数据结构,即队列,用来在生产者和消费者线程之间信息传递. 如果在多线程中,给存放数据,也就是修改同一份 ...
rocketmq生产者和消费者
1.生产者: package com.ebways.mq.test.mq; import com.alibaba.rocketmq.client.exception.MQClientException ...
java并发编程（十三）经典问题生产者消费者问题
生产者消费者问题是线程模型中的经典问题:生产者和消费者在同一时间段内共用同一存储空间,生产者向空间里生产数据,而消费者取走数据. 这里实现如下情况的生产--消费模型: 生产者不断交替地生产两组数据&q ...
java 中多线程之间的通讯之生产者和消费者（多个线程之间的通讯）
在真实开发中关于多线程的通讯的问题用到下边的例子是比较多的不同的地方时if 和while 的区别如果只是两个线程之间的通讯,使用if是没有问题的. 但是在多个线程之间就会有问题 /* * 这个例 ...
Competing Consumers Pattern （竞争消费者模式）
Enable multiple concurrent consumers to process messages received on the same messaging channel. Thi ...
iPhone 6 被盗记录二【写在315前夕：苹果售后福州直信创邺在没有三包的情况下帮小偷翻新、助力小偷换机销赃！无视王法。让人震惊，痛心，憎恨！消费者很受伤很无奈】
投诉公司: 北京直信创邺数码科技有限公司标题: 写在315前夕:苹果售后在没有三包的情况下帮小偷翻新.助力小偷换机销赃!无视王法.让人震惊,痛心,憎恨!消费者很受伤很无奈期望: 还我手机,或者赔 ...
【Windows】用信号量实现生产者-消费者模型
线程并发的生产者-消费者模型: 1.两个进程对同一个内存资源进行操作,一个是生产者,一个是消费者. 2.生产者往共享内存资源填充数据,如果区域满,则等待消费者消费数据. 3.消费者从共享内存资源取数据 ...
Java程序设计之消费者和生产者
新建一个Break类,表示食物数量. public class Break { public static final int MAX = 10; //最多一次性煮十个面包 Stack<Inte ...

随机推荐

实现spaCy实体标注模型
命名实体识别是指对现实世界中某个对象的名称的识别.与词性标注一样,是自然语言处理的技术基础之一.它的作用主要是通过模型识别出文本中需要的实体,也可以推导出实体之间的关系(实体消歧). 本文介绍的是运用 ...
OOP-面向对象(二)
面向对象三大特征: 封装继承多态 -封装:对类中成员属性和方法的保护,控制外界对内部成员的访问,修改,删除等操作私有的: private 在本类内部可以访问,类的外部不可以访问.(python中 ...
1144 The Missing Number
Given N integers, you are supposed to find the smallest positive integer that is NOT in the given li ...
Python 并行计算那点事 -- 译文 [原创]
Python 并行计算的那点事1(The Python Concurrency Story) 英文原文:https://powerfulpython.com/blog/python-concurren ...
HTML5中meta属性的使用详解
meta属性在HTML中占据了很重要的位置.如:针对搜索引擎的SEO,文档的字符编码,设置刷新缓存等.虽然一些网页可能没有使用meta,但是作为正规军,我们还是有必要了解一些meta的属性,并且能够熟 ...
SpringCloud之服务网关Gateway，入门+实操
SpringCloudAlibaba微服务实战教程系列 Spring Cloud 微服务架构学习记录与示例一. GateWay简介 Spring Cloud GateWay是Spring Cloud ...
POJ1135比较有意思的对短路（多米骨牌）
题意: 有一个骨牌游戏,就是推到一个后所有的牌都会被退到的那种游戏,起点是1,有两种骨牌,一种是关键牌,另一种是普通牌,普通牌是连接关键牌用的,给你一些边a b c的意思是关键牌a倒之后c时 ...
GDOI2021 游记
蹭了个名额去参加 \(\text{GDOI}\) \(\text{tg}\),体验了一下大佬的生活/kk (以下试题皆为 \(\text A\) 卷 DAY -1 不知道要复习什么.本来没有机会来参加 ...
数据库导入时出现“2006 - MySQL server has gone away”问题的解决（windows）
1.查到文件my.ini,在文件最后([mysqld]段最后),修改"max_allowed_packet = 50M",添加"interactive_timeout = ...
制作一个简单的toast弹框
toast弹框的作用 toast弹框顾名思义,就是为了弹出一个提示框,效果如图: 使用toast弹框可以可用户带来更好的交互体验 toast弹框的使用 Toast组件制做出toast的样式以及出现的 ...

rabbit_消费者

rabbit_消费者的更多相关文章

随机推荐

热门专题