024--python re、logging、configparser、hashlib模块

一、re模块

　　re模块又称正则表达式是一种小型的、高度专业化的编程语言，（在Python中）它内嵌在Python中，并通过 re 模块实现。正则表达式模式被编译成一系列的字节码，然后由用 C 编写的匹配引擎执行。

　　1.元字符：. ^ $ * + ? { } [ ] | ( ) \ 共11个元字符

　　元字符： . 匹配任意字符

import re

ret=re.findall('a..in','helloalvin')

print(ret)

#['alvin']

　　元字符： ^ 表示以什么开头

import re

ret=re.findall('^a...n','alvinhelloawwwn')

print(ret)

#['alvin']

　　元字符：$ 表示以什么结尾

import re

ret=re.findall('a...n$','alvinhelloawwwn')

print(ret)

#['awwwn']

　　元字符：* 表示*号前面的字符可以出现0次到无穷次

import re

ret=re.findall('abc*','abcccc')#贪婪匹配[0,+oo]

print(ret)

#['abcccc']

　　元字符：+ 表示+号前面的字符可以出现1次到无穷次

import re

ret=re.findall('abc+','abccc')#贪婪匹配[1,+oo]

print(ret)

#['abccc']

　　元字符：？表示？前面的字符可以出现[0,1]次

import re

ret=re.findall('abc?','abccc')#贪婪匹配[0,1]

print(ret)

#['abc']

　　元字符：{} 可以在{}内自定义前面的字符出现的次数

import re

ret=re.findall('abc{1,4}','abccc')

print(ret)

#['abccc'] 贪婪匹配

　　注意：前面的*,+,?等都是贪婪匹配，也就是尽可能匹配，后面加?号使其变成惰性匹配

import re

ret=re.findall('abc*?','abcccccc')

print(ret)

#['ab']

　　元字符：[ ] 是字符集，会匹配[ ]里边的所有字符，注意：除^ - / 等3个符号外，其它字符都当做普通字符。

import re

ret=re.findall('a[bc]d','acd')

print(ret)#['acd']

ret=re.findall('[a-z]','acd')

print(ret)#['a', 'c', 'd']

ret=re.findall('[.*+]','a.cd+')

print(ret)#['.', '+']

#在字符集里有功能的符号: - ^ \

ret=re.findall('[1-9]','45dha3')

print(ret)#['4', '5', '3']

ret=re.findall('[^ab]','45bdha3')

print(ret)#['4', '5', 'd', 'h', '3']

ret=re.findall('[\d]','45bdha3')

print(ret)#['4', '5', '3']

　　元字符：\ 反斜杠后边跟元字符去除特殊功能,比如\. 反斜杠后边跟普通字符实现特殊功能,比如\d

　　\d 匹配任何十进制数；它相当于类 [0-9]。
　　\D 匹配任何非数字字符；它相当于类 [^0-9]。
　　\s 匹配任何空白字符；它相当于类 [ \t\n\r\f\v]。
　　\S 匹配任何非空白字符；它相当于类 [^ \t\n\r\f\v]。
　　\w 匹配任何字母数字字符；它相当于类 [a-zA-Z0-9_]。
　　\W 匹配任何非字母数字字符；它相当于类 [^a-zA-Z0-9_]
　　\b 匹配一个特殊字符边界，比如空格，&，＃等

import re

ret=re.findall('I\b','I am LIST')

print(ret)

#[]

ret=re.findall(r'I\b','I am LIST')

print(ret)

#['I']

　　再来看两个匹配：

#-----------------------------eg1:

import re

ret=re.findall('c\l','abc\le')

print(ret)

#[]

ret=re.findall('c\\l','abc\le')

print(ret)

#[]

ret=re.findall('c\\\\l','abc\le')

print(ret)

#['c\\l']

ret=re.findall(r'c\\l','abc\le')

print(ret)

#['c\\l']

#-----------------------------eg2:

#之所以选择\b是因为\b在ASCII表中是有意义的

m = re.findall('\bblow', 'blow')

print(m)

#[]

m = re.findall(r'\bblow', 'blow')

print(m)

#['blow']

　　元字符：( ) 把（）内的代码当做一个整体进行匹配

import re

m = re.findall(r'(ad)+', 'add')

print(m)

#['ad']

m = re.findall(r'(ad)+', 'adad')

print(m)

#['ad']

m = re.findall(r'(?:ad)+', 'adad')

print(m)

#['adad']

ret=re.search('(?P<id>\d{2})/(?P<name>\w{3})','23/com')

print(ret.group())

#23/com

print(ret.group('id'))

#

　　元字符：| 表示或的意思

import re

ret=re.search('(ab)|\d','rabhdg8sd')

print(ret.group())

#ab

　　re模块常用方法

import re

#

re.findall('a','alvin yuan')    #返回所有满足匹配条件的结果,放在列表里

#

re.search('a','alvin yuan').group()  #函数会在字符串内查找模式匹配,只到找到第一个匹配然后返回一个包含匹配信息的对象,该对象可以

                                     # 通过调用group()方法得到匹配的字符串,如果字符串没有匹配，则返回None。

#

re.match('a','abc').group()     #同search,不过尽在字符串开始处进行匹配

#

ret=re.split('[ab]','abcd')     #先按'a'分割得到''和'bcd',在对''和'bcd'分别按'b'分割

print(ret)#['', '', 'cd']

#

ret=re.sub('\d','abc','alvin5yuan6',1)

print(ret)#alvinabcyuan6

ret=re.subn('\d','abc','alvin5yuan6')

print(ret)#('alvinabcyuanabc', 2)

#

obj=re.compile('\d{3}')

ret=obj.search('abc123eeee')

print(ret.group())#

　　补充：

import re

ret=re.finditer('\d','ds3sy4784a')

print(ret)        #<callable_iterator object at 0x10195f940>

print(next(ret).group())

print(next(ret).group())

　　注意问题：

import re

ret=re.findall('www.(baidu|oldboy).com','www.oldboy.com')

print(ret)#['oldboy']     这是因为findall会优先把匹配结果组里内容返回,如果想要匹配结果,取消权限即可

ret=re.findall('www.(?:baidu|oldboy).com','www.oldboy.com')

print(ret)#['www.oldboy.com']

　　补充2：

import re

print(re.findall("<(?P<tag_name>\w+)>\w+</(?P=tag_name)>","<h1>hello</h1>"))

print(re.search("<(?P<tag_name>\w+)>\w+</(?P=tag_name)>","<h1>hello</h1>"))

print(re.search(r"<(\w+)>\w+</\1>","<h1>hello</h1>"))

#匹配出所有的整数

import re

#ret=re.findall(r"\d+{0}]","1-2*(60+(-40.35/5)-(-4*3))")

ret=re.findall(r"-?\d+\.\d*|(-?\d+)","1-2*(60+(-40.35/5)-(-4*3))")

ret.remove("")

print(ret)

二、logging模块

　　1.介绍：

import logging

logging.debug('debug message')

logging.info('info message')

logging.warning('warning message')

logging.error('error message')

logging.critical('critical message')

　　输出：

　　WARNING:root:warning message
　　ERROR:root:error message
　　CRITICAL:root:critical message

　　可见，默认情况下Python的logging模块将日志打印到了标准输出中，且只显示了大于等于WARNING级别的日志，这说明默认的日志级别设置为WARNING（日志级别等级CRITICAL > ERROR > WARNING > INFO > DEBUG > NOTSET），默认的日志格式为日志级别：Logger名称：用户输出消息。

　　2.配置日志级别、日志格式、输出位置

import logging

logging.basicConfig(level=logging.DEBUG,#大写

                    format='%(asctime)s %(filename)s [line:%(lineno)d] %(levelname)s %(message)s',

                    datefmt='%a, %d %b %Y %H:%M:%S',

                    filename='/tmp/test.log',

                    filemode='w')  

logging.debug('debug message')

logging.info('info message')

logging.warning('warning message')

logging.error('error message')

logging.critical('critical message')

　　输出：
　　cat /tmp/test.log
　　Mon, 05 May 2014 16:29:53 test_logging.py[line:9] DEBUG debug message
　　Mon, 05 May 2014 16:29:53 test_logging.py[line:10] INFO info message
　　Mon, 05 May 2014 16:29:53 test_logging.py[line:11] WARNING warning message
　　Mon, 05 May 2014 16:29:53 test_logging.py[line:12] ERROR error message
　　Mon, 05 May 2014 16:29:53 test_logging.py[line:13] CRITICAL critical message

　　可见在logging.basicConfig()函数中可通过具体参数来更改logging模块默认行为，可用参数有
　　filename：用指定的文件名创建FiledHandler（后边会具体讲解handler的概念），这样日志会被存储在指定的文件中。
　　filemode：文件打开方式，在指定了filename时使用这个参数，默认值为“a”还可指定为“w”。
　　format：指定handler使用的日志显示格式。
　　datefmt：指定日期时间格式。
　　level：设置rootlogger（后边会讲解具体概念）的日志级别
　　stream：用指定的stream创建StreamHandler。可以指定输出到sys.stderr,sys.stdout或者文件(f=open('test.log','w'))，默认为sys.stderr。若同时列出了filename和stream两个参数，则stream参数会被忽略。

　　format参数中可能用到的格式化串：
　　%(name)s Logger的名字
　　%(levelno)s 数字形式的日志级别
　　%(levelname)s 文本形式的日志级别
　　%(pathname)s 调用日志输出函数的模块的完整路径名，可能没有
　　%(filename)s 调用日志输出函数的模块的文件名
　　%(module)s 调用日志输出函数的模块名
　　%(funcName)s 调用日志输出函数的函数名
　　%(lineno)d 调用日志输出函数的语句所在的代码行
　　%(created)f 当前时间，用UNIX标准的表示时间的浮点数表示
　　%(relativeCreated)d 输出日志信息时的，自Logger创建以来的毫秒数
　　%(asctime)s 字符串形式的当前时间。默认格式是 “2003-07-08 16:49:45,896”。逗号后面的是毫秒
　　%(thread)d 线程ID。可能没有
　　%(threadName)s 线程名。可能没有
　　%(process)d 进程ID。可能没有
　　%(message)s用户输出的消息

　　3.logger对象

上述几个例子中我们了解到了logging.debug()、logging.info()、logging.warning()、logging.error()、logging.critical()（分别用以记录不同级别的日志信息），logging.basicConfig()（用默认日志格式（Formatter）为日志系统建立一个默认的流处理器（StreamHandler），设置基础配置（如日志级别等）并加到root logger（根Logger）中）这几个logging模块级别的函数，另外还有一个模块级别的函数是logging.getLogger([name])（返回一个logger对象，如果没有指定名字将返回root logger）。

import logging

logger = logging.getLogger()

# 创建一个handler，用于写入日志文件

fh = logging.FileHandler('test.log')

# 再创建一个handler，用于输出到控制台

ch = logging.StreamHandler()

formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')#输出格式

fh.setFormatter(formatter)#指定写入文件格式

ch.setFormatter(formatter)#指定输出文件格式

logger.addHandler(fh) #logger对象可以添加多个fh和ch对象

logger.addHandler(ch)

logger.debug('logger debug message')

logger.info('logger info message')

logger.warning('logger warning message')

logger.error('logger error message')

logger.critical('logger critical message')

　　先简单介绍一下，logging库提供了多个组件：Logger、Handler、Filter、Formatter。Logger对象提供应用程序可直接使用的接口，Handler发送日志到适当的目的地，Filter提供了过滤日志信息的方法，Formatter指定日志显示格式。

(1)

Logger是一个树形层级结构，输出信息之前都要获得一个Logger（如果没有显示的获取则自动创建并使用root Logger，如第一个例子所示）。
logger = logging.getLogger()返回一个默认的Logger也即root Logger，并应用默认的日志级别、Handler和Formatter设置。
当然也可以通过Logger.setLevel(lel)指定最低的日志级别，可用的日志级别有logging.DEBUG、logging.INFO、logging.WARNING、logging.ERROR、logging.CRITICAL。
Logger.debug()、Logger.info()、Logger.warning()、Logger.error()、Logger.critical()输出不同级别的日志，只有日志等级大于或等于设置的日志级别的日志才会被输出。

import logging

logger = logging.getLogger()

# 创建一个handler，用于写入日志文件

fh = logging.FileHandler('test.log')

# 再创建一个handler，用于输出到控制台

ch = logging.StreamHandler()

formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

fh.setFormatter(formatter)

ch.setFormatter(formatter)

# 定义一个filter

filter = logging.Filter('mylogger')

fh.addFilter(filter)

ch.addFilter(filter)

# logger.addFilter(filter)

logger.addHandler(fh)

logger.addHandler(ch)

logger.setLevel(logging.DEBUG)

logger.debug('logger debug message')

logger.info('logger info message')

logger.warning('logger warning message')

logger.error('logger error message')

logger.critical('logger critical message')

##################################################

logger1 = logging.getLogger('mylogger')

logger1.setLevel(logging.DEBUG)

logger2 = logging.getLogger('mylogger')

logger2.setLevel(logging.INFO)

logger1.addHandler(fh)

logger1.addHandler(ch)

logger2.addHandler(fh)

logger2.addHandler(ch)

logger1.debug('logger1 debug message')

logger1.info('logger1 info message')

logger1.warning('logger1 warning message')

logger1.error('logger1 error message')

logger1.critical('logger1 critical message')

logger2.debug('logger2 debug message')

logger2.info('logger2 info message')

logger2.warning('logger2 warning message')

logger2.error('logger2 error message')

logger2.critical('logger2 critical message')

示例一

#coding:utf-8

import logging  

# 创建一个logger

logger = logging.getLogger()  

logger1 = logging.getLogger('mylogger')

logger1.setLevel(logging.DEBUG)  

logger2 = logging.getLogger('mylogger')

logger2.setLevel(logging.INFO)  

logger3 = logging.getLogger('mylogger.child1')

logger3.setLevel(logging.WARNING)  

logger4 = logging.getLogger('mylogger.child1.child2')

logger4.setLevel(logging.DEBUG)  

logger5 = logging.getLogger('mylogger.child1.child2.child3')

logger5.setLevel(logging.DEBUG)  

# 创建一个handler，用于写入日志文件

fh = logging.FileHandler('/tmp/test.log')  

# 再创建一个handler，用于输出到控制台

ch = logging.StreamHandler()  

# 定义handler的输出格式formatter

formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

fh.setFormatter(formatter)

ch.setFormatter(formatter)  

#定义一个filter

#filter = logging.Filter('mylogger.child1.child2')

#fh.addFilter(filter)    

# 给logger添加handler

#logger.addFilter(filter)

logger.addHandler(fh)

logger.addHandler(ch)  

#logger1.addFilter(filter)

logger1.addHandler(fh)

logger1.addHandler(ch)  

logger2.addHandler(fh)

logger2.addHandler(ch)  

#logger3.addFilter(filter)

logger3.addHandler(fh)

logger3.addHandler(ch)  

#logger4.addFilter(filter)

logger4.addHandler(fh)

logger4.addHandler(ch)  

logger5.addHandler(fh)

logger5.addHandler(ch)  

# 记录一条日志

logger.debug('logger debug message')

logger.info('logger info message')

logger.warning('logger warning message')

logger.error('logger error message')

logger.critical('logger critical message')  

logger1.debug('logger1 debug message')

logger1.info('logger1 info message')

logger1.warning('logger1 warning message')

logger1.error('logger1 error message')

logger1.critical('logger1 critical message')  

logger2.debug('logger2 debug message')

logger2.info('logger2 info message')

logger2.warning('logger2 warning message')

logger2.error('logger2 error message')

logger2.critical('logger2 critical message')  

logger3.debug('logger3 debug message')

logger3.info('logger3 info message')

logger3.warning('logger3 warning message')

logger3.error('logger3 error message')

logger3.critical('logger3 critical message')  

logger4.debug('logger4 debug message')

logger4.info('logger4 info message')

logger4.warning('logger4 warning message')

logger4.error('logger4 error message')

logger4.critical('logger4 critical message')  

logger5.debug('logger5 debug message')

logger5.info('logger5 info message')

logger5.warning('logger5 warning message')

logger5.error('logger5 error message')

logger5.critical('logger5 critical message')

示例二

import os

import time

import logging

from config import settings

def get_logger(card_num, struct_time):

    if struct_time.tm_mday < 23:

        file_name = "%s_%s_%d" %(struct_time.tm_year, struct_time.tm_mon, 22)

    else:

        file_name = "%s_%s_%d" %(struct_time.tm_year, struct_time.tm_mon+1, 22)

    file_handler = logging.FileHandler(

        os.path.join(settings.USER_DIR_FOLDER, card_num, 'record', file_name),

        encoding='utf-8'

    )

    fmt = logging.Formatter(fmt="%(asctime)s :  %(message)s")

    file_handler.setFormatter(fmt)

    logger1 = logging.Logger('user_logger', level=logging.INFO)

    logger1.addHandler(file_handler)

    return logger1

示例三

三、configparser模块

　　多数软件文档格式如下：

[DEFAULT]

ServerAliveInterval = 45

Compression = yes

CompressionLevel = 9

ForwardX11 = yes

[bitbucket.org]

User = hg

[topsecret.server.com]

Port = 50022

ForwardX11 = no

　　用python生成如下：

import configparser

config = configparser.ConfigParser()

config["DEFAULT"] = {'ServerAliveInterval': '',

                      'Compression': 'yes',

                     'CompressionLevel': ''}

config['bitbucket.org'] = {}

config['bitbucket.org']['User'] = 'hg'

config['topsecret.server.com'] = {}

topsecret = config['topsecret.server.com']

topsecret['Host Port'] = ''     # mutates the parser

topsecret['ForwardX11'] = 'no'  # same here

config['DEFAULT']['ForwardX11'] = 'yes'

with open('example.ini', 'w') as configfile:

   config.write(configfile)

import configparser

config = configparser.ConfigParser()

#---------------------------------------------查

print(config.sections())   #[]

config.read('example.ini')

print(config.sections())   #['bitbucket.org', 'topsecret.server.com']

print('bytebong.com' in config)# False

print(config['bitbucket.org']['User']) # hg

print(config['DEFAULT']['Compression']) #yes

print(config['topsecret.server.com']['ForwardX11'])  #no

for key in config['bitbucket.org']:

    print(key)

# user

# serveraliveinterval

# compression

# compressionlevel

# forwardx11

print(config.options('bitbucket.org'))#['user', 'serveraliveinterval', 'compression', 'compressionlevel', 'forwardx11']

print(config.items('bitbucket.org'))  #[('serveraliveinterval', '45'), ('compression', 'yes'), ('compressionlevel', '9'), ('forwardx11', 'yes'), ('user', 'hg')]

print(config.get('bitbucket.org','compression'))#yes

#---------------------------------------------删,改,增(config.write(open('i.cfg', "w")))

config.add_section('yuan')

config.remove_section('topsecret.server.com')

config.remove_option('bitbucket.org','user')

config.set('bitbucket.org','k1','')

config.write(open('i.cfg', "w"))

增删改查

configparser的增删改查

四、hashlib模块

　　用于加密相关的操作，3.x里代替了md5模块和sha模块，主要提供 SHA1, SHA224, SHA256, SHA384, SHA512 ，MD5 算法

import hashlib

m=hashlib.md5()# m=hashlib.sha256()

m.update('hello'.encode('utf8'))

print(m.hexdigest())

#5d41402abc4b2a76b9719d911017c592

m.update('alvin'.encode('utf8'))

print(m.hexdigest())

#92a7e713c30abbb0319fa07da2a5c4af

m2=hashlib.md5()

m2.update('helloalvin'.encode('utf8'))

print(m2.hexdigest())

#92a7e713c30abbb0319fa07da2a5c4af

　　以上加密算法虽然依然非常厉害，但时候存在缺陷，即：通过撞库可以反解。所以，有必要对加密算法中添加自定义key再来做加密。

import hashlib

# ######## 256 ########

hash = hashlib.sha256('898oaFs09f'.encode('utf8'))

hash.update('alvin'.encode('utf8'))

print (hash.hexdigest())

#e79e68f070cdedcfe63eaf1a2e92c83b4cfb1b5c6bc452d214c1b7e77cdfd1c7

　　python 还有一个 hmac 模块，它内部对我们创建 key 和内容再进行处理然后再加密:

import hmac

h = hmac.new('alvin'.encode('utf8'))

h.update('hello'.encode('utf8'))

print (h.hexdigest())

#320df9832eab4c038b6c1d7ed73a5940

学习资料：