Day6 反射、模块、正则表达式和算法
递归完成阶乘
- def func(num):
if num == 1:
return 1
return num * func(num - 1)- x = func(7)
- print(x)
反射
commons.py
- def login():
print("炫酷登录页面")- def logout():
print("炫酷退出页面")- def home():
print("炫酷主页面")
- index.py
- import commons
- def run():
inp = input('请输入要访问的url:')
#inp字符串类型 inp = "login"
#commons.inp #commons.login
#反射:利用字符串的形式去对象(模块)中操作(寻找/检查/删除/设置)成员,都是在内存中操作
#delattr()
#setattr()
if hasattr(commons, inp):
func = getattr(commons, inp)
func()
else:
print('404')- if __name__ == '__main__':
run()
以字符串形式导入模块
account.py
- def login():
print("炫酷登录页面")
- manager.py
- def order():
print("炫酷的订单页面")
- commons.py
- def logout():
print("炫酷退出页面")
- index.py
- def run():
inp = input('请输入要访问的url:')
#inp字符串类型 inp = "login"
#commons.inp #commons.login
#反射:利用字符串的形式去对象(模块)中操作(寻找/检查/删除/设置)成员,都是在内存中操作
#delattr()
#setattr()
m, f = inp.split('/')
#__import__导入模块传入__import__("commons")
obj = __import__(m)
if hasattr(obj, f):
func = getattr(obj, f)
func()
else:
print('404')- if __name__ == '__main__':
run()
导入lib目录中的模块
- def run():
inp = input('请输入要访问的url:')
#inp字符串类型 inp = "login"
#commons.inp #commons.login
#反射:利用字符串的形式去对象(模块)中操作(寻找/检查/删除/设置)成员,都是在内存中操作
#delattr()
#setattr()
m, f = inp.split('/')
#__import__导入模块传入__import__("commons")
#obj = __import__(m)
obj = __import__("lib." + m, fromlist=True)
if hasattr(obj, f):
func = getattr(obj, f)
func()
else:
print('404')- if __name__ == '__main__':
run()
总结
- 实例:伪造Web框架的路由系统
- 反射:基于字符串的形式取去对象(模块)中操作其成员
- getattr,delattr,setattr,hasattr
- 扩展:导入模块
- import xxx
- from xxx import lll
- obj = __import__("xxx")
- obj = __import__("xxx.lll.ddd", fromlist=True)
模块中的特殊变量
- import sys
import os
"""
我是注释
"""
#获取注释
print(__doc__)
#__cached__:字节码所在路径
#当前py文件所在路径
print(__file__)
#文件的绝对路径
ret = os.path.abspath(__file__)
print(ret)
#找到文件的上级目录
ret1 = os.path.dirname(ret)
print(ret1)
ret2 = os.path.dirname(ret1)
print(ret2)
- from bin import admin
- print(__package__)
print(admin.__package__)
#只有执行当前文件时,当前文件的特殊变量__name__==“__main__”
print(__name__)
sys模块
- sys.argv #命令行参数List,第一个元素是程序本身路径
sys.exit(n) #退出程序,正常退出时exit(0)
sys.version #获取Python解释程序的版本信息
sys.maxint #最大的Int值
sys.path #返回模块的搜索路径,初始化时使用PYTHONPATH环境变量的值
sys.platform #返回操作系统平台名称
sys.stdin #输入相关
sys.stdout #输出相关
sys.stderror #错误相关
进度条
- import sys
import time- def view_bar(num, total):
rate = num / total
rate_num = int(rate * 100)
- #‘\r’:回到当前行的首个位置
- r = '\r%s>%d%%' % ("="*num, rate_num)
- #输出不加换行符
sys.stdout.write(r)
#输出清空
sys.stdout.flush()- if __name__ == '__main__':
for i in range(0, 101):
time.sleep(0.1)
view_bar(i, 100)
os模块
- os.getcwd() #获取当前工作目录,即当前python脚本工作的目录路径
os.chdir("dirname") #改变当前脚本工作目录;相当于shell下cd
os.curdir #返回当前目录: ('.')
os.pardir #获取当前目录的父目录字符串名:('..')
os.makedirs('dir1/dir2') #可生成多层递归目录
os.removedirs('dirname1') #若目录为空,则删除,并递归到上一级目录,如若也为空,则删除,依此类推
os.mkdir('dirname') #生成单级目录;相当于shell中mkdir dirname
os.rmdir('dirname') #删除单级空目录,若目录不为空则无法删除,报错;相当于shell中rmdir dirname
os.listdir('dirname') #列出指定目录下的所有文件和子目录,包括隐藏文件,并以列表方式打印
os.remove() #删除一个文件
os.rename("oldname","new") #重命名文件/目录
os.stat('path/filename') #获取文件/目录信息
os.sep #操作系统特定的路径分隔符,win下为"\\",Linux下为"/"
os.linesep #当前平台使用的行终止符,win下为"\t\n",Linux下为"\n"
os.pathsep #用于分割文件路径的字符串
os.name #字符串指示当前使用平台。win->'nt'; Linux->'posix'
os.system("bash command") #运行shell命令,直接显示
os.environ #获取系统环境变量os.path.abspath(path) #返回path规范化的绝对路径
- os.path.split(path) #将path分割成目录和文件名二元组返回
os.path.dirname(path) #返回path的目录。其实就是os.path.split(path)的第一个元素
- os.path.basename(path) #返回path最后的文件名。如何path以/或\结尾,那么就会返回空值。即os.path.split(path)的第二个元素
os.path.exists(path) #如果path存在,返回True;如果path不存在,返回False
os.path.isabs(path) #如果path是绝对路径,返回True
os.path.isfile(path) #如果path是一个存在的文件,返回True。否则返回False
os.path.isdir(path) #如果path是一个存在的目录,则返回True。否则返回Falseos.path.join(path1[, path2[, ...]]) #将多个路径组合后返回,第一个绝对路径之前的参数将被忽略
- os.path.getatime(path) #返回path所指向的文件或者目录的最后存取时间
os.path.getmtime(path) #返回path所指向的文件或者目录的最后修改时间
MD5
- import hashlib
#自定义加密key
obj = hashlib.md5(bytes(bytes('wnddnxnsnxsjnxueldoekcemckkaslkmadlkcecene', encoding='utf-8')))
obj.update(bytes('Radar', encoding='utf-8'))
#加密后的结果
ret = obj.hexdigest()
print(ret)
configparser
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- # Author: wanghuafeng
- import configparser
- #只对应指定的格式操作,如下readme.txt:
- """
- [section1]
- k1 = 123
- k2 = v2
- [section2]
- k1 = 567
- """
- #获取所有节点
- config = configparser.ConfigParser()
- config.read('readme.txt', encoding='utf-8')
- ret = config.sections()
- print(ret)
- #获取指定节点下所有的键值对
- ret1 = config.items('section1')
- print(ret1)
- #获取指定节点下所有的键
- ret2 = config.options('section1')
- print(ret2)
- #获取指定节点下指定key的值
- v = config.get('section1', 'k1')
- # v = config.getint('section1', 'k1')
- #v = config.getfloat('section1', 'k1')
- #v = config.getboolean('section1', 'k1')
- print(v)
- #检查、删除、添加节点(均在内存中操作)
- # 检查
- has_sec = config.has_section('section1')
- print(has_sec)
- # 添加节点
- config.add_section("SEC_1")
- config.write(open('readme.txt', 'w'))
- # 删除节点
- config.remove_section("SEC_1")
- config.write(open('readme.txt', 'w'))
- #检查、删除、设置指定组内的键值对
- # 检查
- has_opt = config.has_option('section1', 'k1')
- print(has_opt)
- # 删除
- config.remove_option('section1', 'k1')
- config.write(open('readme.txt', 'w'))
- # 设置
- config.set('section1', 'k10', "")
- config.write(open('readme.txt', 'w'))
XML模块
xml格式如下:
- <data>
- <country name="Liechtenstein">
- <rank updated="yes">2</rank>
- <year>2023</year>
- <gdppc>141100</gdppc>
- <neighbor direction="E" name="Austria" />
- <neighbor direction="W" name="Switzerland" />
- </country>
- <country name="Singapore">
- <rank updated="yes">5</rank>
- <year>2026</year>
- <gdppc>59900</gdppc>
- <neighbor direction="N" name="Malaysia" />
- </country>
- <country name="Panama">
- <rank updated="yes">69</rank>
- <year>2026</year>
- <gdppc>13600</gdppc>
- <neighbor direction="W" name="Costa Rica" />
- <neighbor direction="E" name="Colombia" />
- </country>
- </data>
xml节点功能
- class Element:
- """An XML element.
- This class is the reference implementation of the Element interface.
- An element's length is its number of subelements. That means if you
- want to check if an element is truly empty, you should check BOTH
- its length AND its text attribute.
- The element tag, attribute names, and attribute values can be either
- bytes or strings.
- *tag* is the element name. *attrib* is an optional dictionary containing
- element attributes. *extra* are additional element attributes given as
- keyword arguments.
- Example form:
- <tag attrib>text<child/>...</tag>tail
- """
- 当前节点的标签名
- tag = None
- """The element's name."""
- 当前节点的属性
- attrib = None
- """Dictionary of the element's attributes."""
- 当前节点的内容
- text = None
- """
- Text before first subelement. This is either a string or the value None.
- Note that if there is no text, this attribute may be either
- None or the empty string, depending on the parser.
- """
- tail = None
- """
- Text after this element's end tag, but before the next sibling element's
- start tag. This is either a string or the value None. Note that if there
- was no text, this attribute may be either None or an empty string,
- depending on the parser.
- """
- def __init__(self, tag, attrib={}, **extra):
- if not isinstance(attrib, dict):
- raise TypeError("attrib must be dict, not %s" % (
- attrib.__class__.__name__,))
- attrib = attrib.copy()
- attrib.update(extra)
- self.tag = tag
- self.attrib = attrib
- self._children = []
- def __repr__(self):
- return "<%s %r at %#x>" % (self.__class__.__name__, self.tag, id(self))
- def makeelement(self, tag, attrib):
- 创建一个新节点
- """Create a new element with the same type.
- *tag* is a string containing the element name.
- *attrib* is a dictionary containing the element attributes.
- Do not call this method, use the SubElement factory function instead.
- """
- return self.__class__(tag, attrib)
- def copy(self):
- """Return copy of current element.
- This creates a shallow copy. Subelements will be shared with the
- original tree.
- """
- elem = self.makeelement(self.tag, self.attrib)
- elem.text = self.text
- elem.tail = self.tail
- elem[:] = self
- return elem
- def __len__(self):
- return len(self._children)
- def __bool__(self):
- warnings.warn(
- "The behavior of this method will change in future versions. "
- "Use specific 'len(elem)' or 'elem is not None' test instead.",
- FutureWarning, stacklevel=2
- )
- return len(self._children) != 0 # emulate old behaviour, for now
- def __getitem__(self, index):
- return self._children[index]
- def __setitem__(self, index, element):
- # if isinstance(index, slice):
- # for elt in element:
- # assert iselement(elt)
- # else:
- # assert iselement(element)
- self._children[index] = element
- def __delitem__(self, index):
- del self._children[index]
- def append(self, subelement):
- 为当前节点追加一个子节点
- """Add *subelement* to the end of this element.
- The new element will appear in document order after the last existing
- subelement (or directly after the text, if it's the first subelement),
- but before the end tag for this element.
- """
- self._assert_is_element(subelement)
- self._children.append(subelement)
- def extend(self, elements):
- 为当前节点扩展 n 个子节点
- """Append subelements from a sequence.
- *elements* is a sequence with zero or more elements.
- """
- for element in elements:
- self._assert_is_element(element)
- self._children.extend(elements)
- def insert(self, index, subelement):
- 在当前节点的子节点中插入某个节点,即:为当前节点创建子节点,然后插入指定位置
- """Insert *subelement* at position *index*."""
- self._assert_is_element(subelement)
- self._children.insert(index, subelement)
- def _assert_is_element(self, e):
- # Need to refer to the actual Python implementation, not the
- # shadowing C implementation.
- if not isinstance(e, _Element_Py):
- raise TypeError('expected an Element, not %s' % type(e).__name__)
- def remove(self, subelement):
- 在当前节点在子节点中删除某个节点
- """Remove matching subelement.
- Unlike the find methods, this method compares elements based on
- identity, NOT ON tag value or contents. To remove subelements by
- other means, the easiest way is to use a list comprehension to
- select what elements to keep, and then use slice assignment to update
- the parent element.
- ValueError is raised if a matching element could not be found.
- """
- # assert iselement(element)
- self._children.remove(subelement)
- def getchildren(self):
- 获取所有的子节点(废弃)
- """(Deprecated) Return all subelements.
- Elements are returned in document order.
- """
- warnings.warn(
- "This method will be removed in future versions. "
- "Use 'list(elem)' or iteration over elem instead.",
- DeprecationWarning, stacklevel=2
- )
- return self._children
- def find(self, path, namespaces=None):
- 获取第一个寻找到的子节点
- """Find first matching element by tag name or path.
- *path* is a string having either an element tag or an XPath,
- *namespaces* is an optional mapping from namespace prefix to full name.
- Return the first matching element, or None if no element was found.
- """
- return ElementPath.find(self, path, namespaces)
- def findtext(self, path, default=None, namespaces=None):
- 获取第一个寻找到的子节点的内容
- """Find text for first matching element by tag name or path.
- *path* is a string having either an element tag or an XPath,
- *default* is the value to return if the element was not found,
- *namespaces* is an optional mapping from namespace prefix to full name.
- Return text content of first matching element, or default value if
- none was found. Note that if an element is found having no text
- content, the empty string is returned.
- """
- return ElementPath.findtext(self, path, default, namespaces)
- def findall(self, path, namespaces=None):
- 获取所有的子节点
- """Find all matching subelements by tag name or path.
- *path* is a string having either an element tag or an XPath,
- *namespaces* is an optional mapping from namespace prefix to full name.
- Returns list containing all matching elements in document order.
- """
- return ElementPath.findall(self, path, namespaces)
- def iterfind(self, path, namespaces=None):
- 获取所有指定的节点,并创建一个迭代器(可以被for循环)
- """Find all matching subelements by tag name or path.
- *path* is a string having either an element tag or an XPath,
- *namespaces* is an optional mapping from namespace prefix to full name.
- Return an iterable yielding all matching elements in document order.
- """
- return ElementPath.iterfind(self, path, namespaces)
- def clear(self):
- 清空节点
- """Reset element.
- This function removes all subelements, clears all attributes, and sets
- the text and tail attributes to None.
- """
- self.attrib.clear()
- self._children = []
- self.text = self.tail = None
- def get(self, key, default=None):
- 获取当前节点的属性值
- """Get element attribute.
- Equivalent to attrib.get, but some implementations may handle this a
- bit more efficiently. *key* is what attribute to look for, and
- *default* is what to return if the attribute was not found.
- Returns a string containing the attribute value, or the default if
- attribute was not found.
- """
- return self.attrib.get(key, default)
- def set(self, key, value):
- 为当前节点设置属性值
- """Set element attribute.
- Equivalent to attrib[key] = value, but some implementations may handle
- this a bit more efficiently. *key* is what attribute to set, and
- *value* is the attribute value to set it to.
- """
- self.attrib[key] = value
- def keys(self):
- 获取当前节点的所有属性的 key
- """Get list of attribute names.
- Names are returned in an arbitrary order, just like an ordinary
- Python dict. Equivalent to attrib.keys()
- """
- return self.attrib.keys()
- def items(self):
- 获取当前节点的所有属性值,每个属性都是一个键值对
- """Get element attributes as a sequence.
- The attributes are returned in arbitrary order. Equivalent to
- attrib.items().
- Return a list of (name, value) tuples.
- """
- return self.attrib.items()
- def iter(self, tag=None):
- 在当前节点的子孙中根据节点名称寻找所有指定的节点,并返回一个迭代器(可以被for循环)。
- """Create tree iterator.
- The iterator loops over the element and all subelements in document
- order, returning all elements with a matching tag.
- If the tree structure is modified during iteration, new or removed
- elements may or may not be included. To get a stable set, use the
- list() function on the iterator, and loop over the resulting list.
- *tag* is what tags to look for (default is to return all elements)
- Return an iterator containing all the matching elements.
- """
- if tag == "*":
- tag = None
- if tag is None or self.tag == tag:
- yield self
- for e in self._children:
- yield from e.iter(tag)
- # compatibility
- def getiterator(self, tag=None):
- # Change for a DeprecationWarning in 1.4
- warnings.warn(
- "This method will be removed in future versions. "
- "Use 'elem.iter()' or 'list(elem.iter())' instead.",
- PendingDeprecationWarning, stacklevel=2
- )
- return list(self.iter(tag))
- def itertext(self):
- 在当前节点的子孙中根据节点名称寻找所有指定的节点的内容,并返回一个迭代器(可以被for循环)。
- """Create text iterator.
- The iterator loops over the element and all subelements in document
- order, returning all inner text.
- """
- tag = self.tag
- if not isinstance(tag, str) and tag is not None:
- return
- if self.text:
- yield self.text
- for e in self:
- yield from e.itertext()
- if e.tail:
- yield e.tail
- 节点功能一览表
遍历xml文件中的所有内容
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- # Author: wanghuafeng
- #浏览器返回的字符串
- #1.html
- #2.json
- #3.xml:页面展示(字符串类型的一个XML格式数据)、配置文件(文件、内部数据XML格式)
- from xml.etree import ElementTree as ET
- tree = ET.parse('read.xml')
- root = tree.getroot()
- for child in root:
- print(child.tag, child.attrib)
- for gradechild in child:
- print(gradechild.tag, gradechild.text)
打开xml的两种方式
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- # Author: wanghuafeng
- #浏览器返回的字符串
- #1.html
- #2.json
- #3.xml:页面展示(字符串类型的一个XML格式数据)、配置文件(文件、内部数据XML格式)
- from xml.etree import ElementTree as ET
- #第一种方式:利用ElementTree.XML将字符串解析成xml对象
- # 打开文件,读取XML内容
- str_xml = open('read.xml', 'r').read()
- # 将字符串解析成xml特殊对象,root代指xml文件的根节点
- root = ET.XML(str_xml)
- print(root)
- #第二种方式:利用ElementTree.parse将文件直接解析成xml对象
- # 直接解析xml文件
- tree = ET.parse("read.xml")
- # 获取xml文件的根节点
- root = tree.getroot()
- print(root)
修改节点
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- # Author: wanghuafeng
- #浏览器返回的字符串
- #1.html
- #2.json
- #3.xml:页面展示(字符串类型的一个XML格式数据)、配置文件(文件、内部数据XML格式)
- from xml.etree import ElementTree as ET
- ############ 解析方式一 ############
- #解析字符串方式,修改,保存
- # 打开文件,读取XML内容
- str_xml = open('read.xml', 'r').read()
- # 将字符串解析成xml特殊对象,root代指xml文件的根节点
- root = ET.XML(str_xml)
- ############ 操作 ############
- # 顶层标签
- print(root.tag)
- # 循环所有的year节点
- for node in root.iter('year'):
- # 将year节点中的内容自增一
- new_year = int(node.text) + 1
- node.text = str(new_year)
- # 设置属性
- node.set('name', 'alex')
- node.set('age', '')
- # 删除属性
- del node.attrib['name']
- ############ 保存文件 ############
- tree = ET.ElementTree(root)
- tree.write("newnew.xml", encoding='utf-8')
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- # Author: wanghuafeng
- #浏览器返回的字符串
- #1.html
- #2.json
- #3.xml:页面展示(字符串类型的一个XML格式数据)、配置文件(文件、内部数据XML格式)
- from xml.etree import ElementTree as ET
- ############ 解析方式二 ############
- #解析文件方式,修改,保存
- # 直接解析xml文件
- tree = ET.parse("read.xml")
- # 获取xml文件的根节点
- root = tree.getroot()
- ############ 操作 ############
- # 顶层标签
- print(root.tag)
- # 循环所有的year节点
- for node in root.iter('year'):
- # 将year节点中的内容自增一
- new_year = int(node.text) + 1
- node.text = str(new_year)
- # 设置属性
- node.set('name', 'alex')
- node.set('age', '')
- # 删除属性
- del node.attrib['name']
- ############ 保存文件 ############
- tree.write("newnew2.xml", encoding='utf-8')
创建XML
方式一:
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- # Author: wanghuafeng
- from xml.etree import ElementTree as ET
- #创建方式(一)
- # 创建根节点
- root = ET.Element("famliy")
- # 创建节点大儿子
- son1 = ET.Element('son', {'name': '儿1'})
- # 创建小儿子
- son2 = ET.Element('son', {"name": '儿2'})
- # 在大儿子中创建两个孙子
- grandson1 = ET.Element('grandson', {'name': '儿11'})
- grandson2 = ET.Element('grandson', {'name': '儿12'})
- son1.append(grandson1)
- son1.append(grandson2)
- # 把儿子添加到根节点中
- root.append(son1)
- root.append(son1)
- tree = ET.ElementTree(root)
- tree.write('new_create1.xml',encoding='utf-8', short_empty_elements=False)
方式二
- from xml.etree import ElementTree as ET
- # 创建根节点
- root = ET.Element("famliy")
- # 创建大儿子
- # son1 = ET.Element('son', {'name': '儿1'})
- son1 = root.makeelement('son', {'name': '儿1'})
- # 创建小儿子
- # son2 = ET.Element('son', {"name": '儿2'})
- son2 = root.makeelement('son', {"name": '儿2'})
- # 在大儿子中创建两个孙子
- # grandson1 = ET.Element('grandson', {'name': '儿11'})
- grandson1 = son1.makeelement('grandson', {'name': '儿11'})
- # grandson2 = ET.Element('grandson', {'name': '儿12'})
- grandson2 = son1.makeelement('grandson', {'name': '儿12'})
- son1.append(grandson1)
- son1.append(grandson2)
- # 把儿子添加到根节点中
- root.append(son1)
- root.append(son1)
- tree = ET.ElementTree(root)
- tree.write('oooo.xml',encoding='utf-8', short_empty_elements=False)
方式三
- from xml.etree import ElementTree as ET
- # 创建根节点
- root = ET.Element("famliy")
- # 创建节点大儿子
- son1 = ET.SubElement(root, "son", attrib={'name': '儿1'})
- # 创建小儿子
- son2 = ET.SubElement(root, "son", attrib={"name": "儿2"})
- # 在大儿子中创建一个孙子
- grandson1 = ET.SubElement(son1, "age", attrib={'name': '儿11'})
- grandson1.text = '孙子'
- et = ET.ElementTree(root) #生成文档对象
- et.write("test.xml", encoding="utf-8", xml_declaration=True, short_empty_elements=False)
带缩进功能创建xml
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- # Author: wanghuafeng
- from xml.etree import ElementTree as ET
- from xml.dom import minidom
- def prettify(elem):
- """将节点转换成字符串,并添加缩进。
- """
- rough_string = ET.tostring(elem, 'utf-8')
- reparsed = minidom.parseString(rough_string)
- return reparsed.toprettyxml(indent="\t")
- # 创建根节点
- root = ET.Element("famliy")
- # 创建大儿子
- # son1 = ET.Element('son', {'name': '儿1'})
- son1 = root.makeelement('son', {'name': '儿1'})
- # 创建小儿子
- # son2 = ET.Element('son', {"name": '儿2'})
- son2 = root.makeelement('son', {"name": '儿2'})
- # 在大儿子中创建两个孙子
- # grandson1 = ET.Element('grandson', {'name': '儿11'})
- grandson1 = son1.makeelement('grandson', {'name': '儿11'})
- # grandson2 = ET.Element('grandson', {'name': '儿12'})
- grandson2 = son1.makeelement('grandson', {'name': '儿12'})
- son1.append(grandson1)
- son1.append(grandson2)
- # 把儿子添加到根节点中
- root.append(son1)
- root.append(son1)
- raw_str = prettify(root)
- f = open("new_create2.xml",'w',encoding='utf-8')
- f.write(raw_str)
- f.close()
命名空间
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- # Author: wanghuafeng
- from xml.etree import ElementTree as ET
- ET.register_namespace('com',"http://www.company.com") #some name
- # build a tree structure
- root = ET.Element("{http://www.company.com}STUFF")
- body = ET.SubElement(root, "{http://www.company.com}MORE_STUFF", attrib={"{http://www.company.com}hhh": ""})
- body.text = "STUFF EVERYWHERE!"
- # wrap it in an ElementTree instance, and save as XML
- tree = ET.ElementTree(root)
- tree.write("page.xml",
- xml_declaration=True,
- encoding='utf-8',
- method="xml")
shutil:高级的 文件、文件夹、压缩包 处理模块
1.将文件内容拷贝到另一个文件中
shutil.copyfileobj(fsrc, fdst[, length])
- import shutil
- shutil.copyfileobj(open('old.xml','r'), open('new.xml', 'w'))
2.拷贝文件
shutil.copyfile(src, dst)
- shutil.copyfile('f1.log', 'f2.log')
3.仅拷贝权限。内容、组、用户均不变
shutil.copymode(src, dst)
- shutil.copymode('f1.log', 'f2.log')
4.仅拷贝状态的信息,包括:mode bits, atime, mtime, flags
shutil.copystat(src, dst)
- shutil.copystat('f1.log', 'f2.log')
5.拷贝文件和权限
shutil.copy(src, dst)
- import shutil
- shutil.copy('f1.log', 'f2.log')
6.拷贝文件和状态信息
shutil.copy2(src, dst)
- import shutil
- shutil.copy2('f1.log', 'f2.log')
7.递归的去拷贝文件夹
shutil.ignore_patterns(*patterns)
shutil.copytree(src, dst, symlinks=False, ignore=None)
- import shutil
- shutil.copytree('folder1', 'folder2', ignore=shutil.ignore_patterns('*.pyc', 'tmp*'))
- import shutil
- shutil.copytree('f1', 'f2', symlinks=True, ignore=shutil.ignore_patterns('*.pyc', 'tmp*'))
8.递归的去删除文件
shutil.rmtree(path[, ignore_errors[, onerror]])
- import shutil
- shutil.rmtree('folder1')
9.递归的去移动文件,它类似mv命令,其实就是重命名
shutil.move(src, dst)
- import shutil
- shutil.move('folder1', 'folder3')
10.创建压缩包并返回文件路径,例如:zip、tar
shutil.make_archive(base_name, format,...)
- base_name: 压缩包的文件名,也可以是压缩包的路径。只是文件名时,则保存至当前目录,否则保存至指定路径,
如:www =>保存至当前路径
如:/Users/wupeiqi/www =>保存至/Users/wupeiqi/ - format: 压缩包种类,“zip”, “tar”, “bztar”,“gztar”
- root_dir: 要压缩的文件夹路径(默认当前目录)
- owner: 用户,默认当前用户
- group: 组,默认当前组
- logger: 用于记录日志,通常是logging.Logger对象
- #将 /Users/wupeiqi/Downloads/test 下的文件打包放置当前程序目录
- import shutil
- ret = shutil.make_archive("wwwwwwwwww", 'gztar', root_dir='/Users/wupeiqi/Downloads/test')
- #将 /Users/wupeiqi/Downloads/test 下的文件打包放置 /Users/wupeiqi/目录
- import shutil
- ret = shutil.make_archive("/Users/wupeiqi/wwwwwwwwww", 'gztar', root_dir='/Users/wupeiqi/Downloads/test')
shutil 对压缩包的处理是调用 ZipFile 和 TarFile 两个模块来进行的,详细:
- import zipfile
- # 压缩
- z = zipfile.ZipFile('laxi.zip', 'w')
- z.write('a.log')
- z.write('data.data')
- z.close()
- # 解压
- z = zipfile.ZipFile('laxi.zip', 'r')
- z.extractall()
#z.namelist()获取压缩包中的对象
- z.namelist()
- z.close() zipfile解压缩
- import tarfile
- # 压缩
- tar = tarfile.open('your.tar','w')
- tar.add('/Users/wupeiqi/PycharmProjects/bbs2.log', arcname='bbs2.log')
- tar.add('/Users/wupeiqi/PycharmProjects/cmdb.log', arcname='cmdb.log')
- tar.close()
- # 解压
- tar = tarfile.open('your.tar','r')
- tar.extractall() # 可设置解压地址
#obj = tar.getmember("bbs2.log")
#tar.extractall(obj)
- tar.close()
- tarfile解压缩
subprocess
1.执行命令,返回状态码
call
- ret = subprocess.call(["ls", "-l"], shell=False)
- ret = subprocess.call("ls -l", shell=True)
2.执行命令,如果执行状态码是 0 ,则返回0,否则抛异常
check_call
- subprocess.check_call(["ls", "-l"])
- subprocess.check_call("exit 1", shell=True)
3.执行命令,如果状态码是 0 ,则返回执行结果,否则抛异常
check_output
- subprocess.check_output(["echo", "Hello World!"])
- subprocess.check_output("exit 1", shell=True)
subprocess.Popen(...)
用于执行复杂的系统命令
参数:
- args:shell命令,可以是字符串或者序列类型(如:list,元组)
- bufsize:指定缓冲。0 无缓冲,1 行缓冲,其他 缓冲区大小,负值 系统缓冲
- stdin, stdout, stderr:分别表示程序的标准输入、输出、错误句柄
- preexec_fn:只在Unix平台下有效,用于指定一个可执行对象(callable object),它将在子进程运行之前被调用
- close_sfs:在windows平台下,如果close_fds被设置为True,则新创建的子进程将不会继承父进程的输入、输出、错误管道。
所以不能将close_fds设置为True同时重定向子进程的标准输入、输出与错误(stdin, stdout, stderr)。 - shell:同上
- cwd:用于设置子进程的当前目录
- env:用于指定子进程的环境变量。如果env = None,子进程的环境变量将从父进程中继承。
- universal_newlines:不同系统的换行符不同,True -> 同意使用 \n
- startupinfo与createionflags只在windows下有效
将被传递给底层的CreateProcess()函数,用于设置子进程的一些属性,如:主窗口的外观,进程的优先级等等
- import subprocess
- ret1 = subprocess.Popen(["mkdir","t1"])
- ret2 = subprocess.Popen("mkdir t2", shell=True)
终端输入的命令分为两种:
- 输入即可得到输出,如:ifconfig
- 输入进行某环境,依赖再输入,如:python
- import subprocess
- obj = subprocess.Popen("mkdir t3", shell=True, cwd='/home/dev',)
- import subprocess
- obj = subprocess.Popen(["python"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
- obj.stdin.write("print(1)\n")
- obj.stdin.write("print(2)")
- obj.stdin.close()
- cmd_out = obj.stdout.read()
- obj.stdout.close()
- cmd_error = obj.stderr.read()
- obj.stderr.close()
- print(cmd_out)
- print(cmd_error)
- import subprocess
- obj = subprocess.Popen(["python"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
- obj.stdin.write("print(1)\n")
- obj.stdin.write("print(2)")
- out_error_list = obj.communicate()
- print(out_error_list)
- import subprocess
- obj = subprocess.Popen(["python"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
- out_error_list = obj.communicate('print("hello")')
- print(out_error_list)
正则表达式
特殊字符:
- #".":匹配除换行外的任意字符,只能匹配一个字符
#"^":起始位置匹配
#"$":末尾位置匹配
#"*":匹配*前的字符,0到多次
#"+":匹配+前的字符,1到多次
#"?":匹配?前的字符,0到1次
#"{}":re.findall('aa.{1,5}b','aaddbbssbbbaajjddseeaall')#匹配aa与b直接的字符1到5次
#"[]":re.findall('a[a-z]b','aaddbbssbbbaajjddseeaall')
#re.findall('a[0-9]b','aaddbbssbbbaajjddseeaall')
#re.findall('a[^asd]b','aaddbbssbbbaajjddseeaall')#^代表非
#"\":\d数字,\跟元字符去除特殊功能,\s任何空白字符,\w字母数字[a-z0-9A-Z_],\b匹配位置信息re.findall(r'I\b','I& am abcId')
#"()":
#"|":
- 函数:
- #只匹配起始位置
re.match('com', 'comwwww.eeiiem')
#匹配到一个就结束
re.search('com', 'comwwww.eeicomiem')
#finditer,匹配到后需要循环才能取值
#sub、subn
re.sub("g.t", "have", 'I get A, I got B, I got C',2)#最后的2为替换次数
re.sub("g.t", "have", 'I get A, I got B, I got C')#替换后会返回替换的次数
re.split('\d+', 'one1two2three3four4')#['one', 'two', 'three', 'four', '']
- re.findall('\\\\com', 'asssss\comssssaaa')
- re.findall(r'\\com', 'asssss\comssssaaa')
- re.findall(r'\bam', 'I asssss\comsss am saaa')#匹配时都加上r
- 正则表达式分组
- #分组:去已经提取的的内容中提取数据
import re
origin = "has sddflklfsdkfd4562125121"
r = re.match("h(?P<name>\w+)", origin)#设置匹配到的key值
print(r.group())
#分组
print(r.groups())
print(r.groupdict())
- # 无分组
r = re.findall("a\w+", origin)
print(r)
# 有分组
origin = "hello alex bcd abcd lge acd 19"
r = re.findall("a((\w*)c)(d)", origin)
print(r)
- # 与分组无关
origin = "hello alex bcd alex lge alex acd 19"
r = re.sub("a\w+", "999", origin, 2)
print(r)
- 常用正则匹配
- IP:
- ^(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}$
- 手机号:
- ^1[3|4|5|8][0-9]\d{8}$
- 邮箱:
- [a-zA-Z0-9_-]+@[a-zA-Z0-9_-]+(\.[a-zA-Z0-9_-]+)
match
- import re
#macth只从头开始匹配
m = re.match("abc", "abcefg")
m = re.match("[0-9][0-9]", "885467saawswwcc")
m = re.match("[0-9]{0,10}", "885467saawswwcc")- if m:
print(m)
print(m.group())
findall
- #匹配所有的数字
m = re.findall("[0-9]{1,15}", "885467saawsw4561w2c2c3")
#匹配所有字母
m = re.findall("[a-zA-Z]{1,15}", "885467saawsw4561w1c2c3")
#匹配所有字符
m = re.findall(".+", "885467saawsw4561w1c2c3")
#匹配任意非空
m = re.findall("\S+", "885467s aaw~s%w4&5!61w1c2c3")- if m:
print(m)
search
- #找到第一个匹配的就返回
m = re.search("\d+", "885467s aaw~s%w4&5!61w1c2c3")- if m:
print(m)
print(m.group())
sub
- #替换
m = re.sub("^\d+", "|", "4saa885467s aaw~s%w4&5!61w1c2c3",count=2)
if m:
print(m)
- 对于文件特别大,需要进行正则匹配时,建议先编译再匹配
- import re
- p = re.compile("^[0-9]")
m = p.match('14534Abc')
- print(m.group())
算法
冒泡算法
- #!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: wanghuafeng- #冒泡排序属于简单排序
#时间复杂度O(n),小循环增加了循环的时间,成指数增长,O(n)**2
import random
import time- def bubble_sort(array):
for i in range(len(array)):
for j in range(len(array)-1-i):
#升序排列
if array[j] > array[j+1]:
temp = array[j]
array[j] = array[j+1]
array[j+1] = temp
print(array)- if __name__ == '__main__':
array = []
for i in range(50000):
array.append(random.randrange(100000))
print(array)
time_start = time.time()
bubble_sort(array)
time_end = time.time()
print(time_end - time_start)
选择排序
- #!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: wanghuafeng- #循环整个列表,找最小的值,找到后最小值和第一个值交换,
# 接着在剩下的列表中找最小值,与剩下列表中的第一个值交换,直到完成
#时间复杂度,
import random
import time- def selection_sort(array):
for i in range(len(array)):
smallest_index = i
for j in range(i, len(array)):
#比较时不交换,只更新下标
if array[smallest_index] > array[j]:
smallest_index = j
tmp = array[i]
array[i] = array[smallest_index]
array[smallest_index] = tmp
print(array)- if __name__ == '__main__':
array = []
for i in range(50000):
array.append(random.randrange(100000))
#print(array)
time_start = time.time()
selection_sort(array)
time_end = time.time()
print(time_end - time_start)
快速排序
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- # Author: wanghuafeng
- """
- 设要排序的数组是A[0]……A[N-1],首先任意选取一个数据(通常选用数组的第一个数)作为关键数据,
- 然后将所有比它小的数都放到它前面,所有比它大的数都放到它后面,这个过程称为一趟快速排序。
- 值得注意的是,快速排序不是一种稳定的排序算法,也就是说,多个相同的值的相对位置也许会在算法结束时产生变动
- 注:在待排序的文件中,若存在多个关键字相同的记录,经过排序后这些具有相同关键字的记录之间的相对次序保持不变,
- 该排序方法是稳定的;若具有相同关键字的记录之间的相对次序发生改变,则称这种排序方法是不稳定的。
- 要注意的是,排序算法的稳定性是针对所有输入实例而言的。即在所有可能的输入实例中,
- 只要有一个实例使得算法不满足稳定性要求,则该排序算法就是不稳定的。
- """
- import time,random
- def quick_sort(array, start, end):
- if start >= end:
- return
- k = array[start]
- #左边标记
- left_flag = start
- #右边标记
- right_flag = end
- #左右旗子不相等
- while left_flag < right_flag:
- #右边的大于左边的,代表继续往左移动旗子
- while left_flag < right_flag and array[right_flag] > k:
- right_flag -= 1
- tmp = array[left_flag]
- array[left_flag] = array[right_flag]
- array[right_flag] = tmp
- #只要交换后,接着左边的旗子开始向右移动
- while left_flag < right_flag and array[left_flag] <= k:
- left_flag += 1
- #上面的loop一跳出,代表左边的旗子现在所在位置的值小于
- tmp = array[left_flag]
- array[left_flag] = array[right_flag]
- array[right_flag] = tmp
- print(array)
- #把问题分两半
- #排左边一半的列表
- quick_sort(array, start, left_flag-1)
- #排右边一半的列表
- quick_sort(array, left_flag+1, end)
- if __name__ == '__main__':
- array = []
- for i in range(50000):
- array.append(random.randrange(100000))
- #array = [96, 14, 10, 9, 6, 99, 16, 5, 1, 3, 2, 4, 1, 13, 26, 18, 2, 45, 34, 23, 1, 7, 3, 22, 19, 2]
- #print(array)
- time_start = time.time()
- quick_sort(array, 0, len(array)-1)
- time_end = time.time()
- print(time_end - time_start)
collections系列
counter:对字典进行处理,用于计算元素出现的个数
- #!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: wanghuafeng- import collections
#计数器,Counter
obj = collections.Counter("asdfghjgioccvbnjrtubvg6782bnsh2bn")
print(obj)
#取前4个数,排序按照从多到少
ret = obj.most_common(4)
print(ret)
#obj:处理完的数据
for k,v in obj.items():
print(k, v)- #elements:原生的值,未经过加工处理的
for i in obj.elements():
print(i)
- #!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: wanghuafeng- import collections
obj = collections.Counter(['11', '22', '33', '44', '55'])
print(obj)
#增加
obj.update(['eric', '11', '11'])
print(obj)
#删除
obj.subtract(['eric', '11', '11'])
print(obj)
有序字典
- #!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: wanghuafeng- import collections
#列表+字典=有序字典,key取列表中的元素
dic = collections.OrderedDict()
dic['k1'] = 'v1'
dic['k2'] = 'v2'
dic['k3'] = 'v3'
dic['k4'] = 'v4'
dic['k5'] = 'v5'
dic['k6'] = 'v6'
dic['k7'] = 'v7'
dic['k8'] = 'v8'
print(dic)
- #!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: wanghuafeng- import collections
#列表+字典=有序字典,key取列表中的元素
dic = collections.OrderedDict()
dic['k1'] = 'v1'
dic['k2'] = 'v2'
dic['k3'] = 'v3'
dic['k4'] = 'v4'
dic['k5'] = 'v5'
dic['k6'] = 'v6'
dic['k7'] = 'v7'
dic['k8'] = 'v8'
print(dic)
#移动到最后
dic.move_to_end('k1')
print(dic)
#后进先出
dic.popitem()
print(dic)
#指定取数,pop将取出的数据为己有
ret = dic.pop('k3')
print(dic)
print(ret)
#设置默认值,如果存在不做任何操作,如果不存在则增加
dic.setdefault('k9','22')
print(dic)
#更新
dic.update({'k1':'v1', 'k10':'v10'})
print(dic)
默认字典
- #!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: wanghuafeng- import collections
values = [11, 22, 33, 44, 55, 66, 77, 88]
my_dict = collections.defaultdict(list)- for value in values:
if value > 33:
my_dict['k1'].append(value)
else:
my_dict['k2'].append(value)- print(my_dict)
可命名元祖
- #创建类,defaultdict
MytupleClass = collections.namedtuple('MytupleClass',['x', 'y', 'z'])
print(help(MytupleClass))
obj = MytupleClass(11, 22, 33)
print(obj.x)
print(obj.y)
print(obj.z)
双向队列
- #!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: wanghuafeng- import collections
- d = collections.deque()
#增加一个
d.append('1')
#往左增加
d.appendleft('10')
d.appendleft('1')
print(d)
#统计个数
ret = d.count('1')
print(ret)
#扩展,右边增加
d.extend(['yy', 'qq', '11'])
print(d)
#扩展,左边增加
d.extendleft(['y1y', 'q2q', '131'])
- print(d)
- #从右取数插入左边
d.rotate(3)
print(d)
单向队列
- import queue
- #创建单向队列
q = queue.Queue()
#增加数据
q.put('123')
q.put('456')
print(q.qsize())
#按顺序取数据
print(q.get())
Day6 反射、模块、正则表达式和算法的更多相关文章
- python_way day6 反射,正则 模块(进度条,hash)
python_way day6 反射 正则 模块 sys,os,hashlib 一.模块: 1.sys & os: 我们在写项目的时候,经常遇到模块互相调用的情况,但是在不同的模块下我们通过什 ...
- Python中的re模块--正则表达式
Python中的re模块--正则表达式 使用match从字符串开头匹配 以匹配国内手机号为例,通常手机号为11位,以1开头.大概是这样13509094747,(这个号码是我随便写的,请不要拨打),我们 ...
- mybatis源码- 反射模块一(跟着MyBatis学反射):类级别信息的封装
目录 1 JavaBean 规范 2 Reflector和ReflectorFactory 2.1 Reflector 属性 2.1.1 属性 2.1.2 Invoker 接口 2.2 Reflect ...
- MyBatis源码分析-基础支持层反射模块Reflector/ReflectorFactory
本文主要介绍MyBatis的反射模块是如何实现的. MyBatis 反射的核心类Reflector,下面我先说明它的构造函数和成员变量.具体方法下面详解. org.apache.ibatis.refl ...
- 【Python开发】Python之re模块 —— 正则表达式操作
Python之re模块 -- 正则表达式操作 这个模块提供了与 Perl 相似l的正则表达式匹配操作.Unicode字符串也同样适用. 正则表达式使用反斜杠" \ "来代表特殊形式 ...
- Python模块之常用模块,反射以及正则表达式
常用模块 1. OS模块 用于提供系统级别的操作,系统目录,文件,路径,环境变量等 os.getcwd() 获取当前工作目录,即当前python脚本工作的目录路径 os.chdir("di ...
- Python_Day6_反射_正则表达式之篇
一.反射 定义:利用字符串形式去对象(模块)中操作(寻找/检查/删除/设置)成员 #getattr:获取模块中属性 #hasattr:检查模块中是否存在某个成员(函数) #delattr:删除模块中成 ...
- day16 python模块 正则表达式
day16 python 一.模块 1.什么是模块 是一组功能的集合 2.模块的类型 内置模块; python提供的, 解释器自带的 ...
- sys,os,模块-正则表达式
# *__conding:utf-8__* """"我是注释""" sys,os模块 import sysimport os pr ...
随机推荐
- Codeforces Round #263 (Div. 1) C. Appleman and a Sheet of Paper 树状数组暴力更新
C. Appleman and a Sheet of Paper Appleman has a very big sheet of paper. This sheet has a form of ...
- 动态规划——数位dp
通过先前在<动态规划——背包问题>中关于动态规划的初探,我们其实可以看到,动态规划其实不是像凸包.扩展欧几里得等是具体的算法,而是一种在解决问题中决策的思想.在不同的题目中,我们都需要根据 ...
- sql server常用查询
最近在做一些练习,觉得数据的查询是一个很有意思的,在做的过程中一些好的查询方法也使自己感觉到数据库的强大,于是乎就会想到要把这些方法记下来,以后就懒得再去想了 1.查询是整百的倍数 SELECT 实缴 ...
- jsp判断手机访问和电脑访问
<% //取用户操作系统信息 String agent = request.getHeader("User-Agent") == null ? "": r ...
- Android数据存储(1)少量数据保存之SharedPreferences接口实例
SharedPreferences数据保存主要是通过键值的方式存储在xml文件中 xml文件在data/此程序的包名/XX.xml 格式 <?xml version='1.0' encoding ...
- C++中模板类使用友元模板函数
在类模板中可以出现三种友元声明:(1)普通非模板类或函数的友元声明,将友元关系授予明确指定的类或函数.(2)类模板或函数模板的友元声明,授予对友元所有实例的访问权.(3)只授予对类模板或函数模板的特定 ...
- C#操作INI配置文件示例
源文件地址:http://pan.baidu.com/share/link?shareid=2536126078&uk=1761850335创建如图所示的控件: 源代码: using Syst ...
- 《Android开发艺术探索》读书笔记 (10) 第10章 Android的消息机制
第10章 Android的消息机制 10.1 Android消息机制概述 (1)Android的消息机制主要是指Handler的运行机制,其底层需要MessageQueue和Looper的支撑.Mes ...
- Android 获取系统或SDCARD剩余空间信息(转)
android.os下的StatFs类主要用来获取文件系统的状态,能够获取sd卡的大小和剩余空间,获取系统内部空间也就是/system的大小和剩余空间等等. 看下读取sd卡的:Java代码 ...
- 使用downloadmanager调用系统的下载
/** * 文件名 UpdateDownload.java * 包含类名列表 com.issmobile.numlibrary.tool * 版本信息 版本号 * 创建日期 2014年7月14日 ...