# -*- coding:utf-8 -*-
import re '''
适应新版本
''' year='17a'#用户自定义
ss='./data/'#根目录
filename = ss+'EDED%s.txt'%year#输入文件名 def tred_nonote(): p1 = r"^(?:\s{5}|X\s{4}|\W\s{4})(\d\d\d\d)\s\s[A-Z].+\]$"#匹配1001
p2 = r"^(?:\s{5}|X\s{4}|\W\s{4})\d\d\d\d\s\s([A-Z].+)\s+\[[A-Z]\]$"
p3 = r"^(?:\s{5}|X\s{4}|\W\s{4})\d\d\d\d\s\s[A-Z].+\s+\[([A-Z])\]$"
p4 = r"^\s{5}Desc:\s(.+\w\w\.)\n" p5 = r"^\s{5}Desc:\s(.+[^\.]|.+\.g\.)\n"#非以.结尾的Desc
p6 = r"^\s{11}(.+\.)\n"#非以.结尾的Desc的第二行
p7 = r"^\s{5}Repr:\s(.+)\n"#Repr pattern1 = re.compile(p1)
pattern2 = re.compile(p2)
pattern3 = re.compile(p3)
pattern4 = re.compile(p4)
pattern5 = re.compile(p5)
pattern6 = re.compile(p6)
pattern7 = re.compile(p7) fr = open(filename)
temp = ();
flag = 0
for line in fr.readlines():
matcher1 = re.findall(pattern1,line)
matcher2 = re.findall(pattern2,line)
matcher3 = re.findall(pattern3,line)
matcher4 = re.findall(pattern4,line)
matcher5 = re.findall(pattern5,line)
matcher6 = re.findall(pattern6,line)
matcher7 = re.findall(pattern7,line) w2 = open(ss+'tred_nonote%s.txt'%year,'a')#a代表追加 w代表重写
if matcher1:
flag = 1
w2.write("\n")
for j in matcher1:
for k in j:
w2.write(k) if ((matcher2!=[])and(flag ==1)):
flag = 2 w2.write(",")
for j in matcher2:
for k in j:
w2.write(k)
if ((matcher3!=[])and(flag ==2)):
flag = 3
# w2.write(",")
for j in matcher3:
for k in j:
w2.write(k)
if ((matcher4!=[])and(flag ==3)):
flag = 4
w2.write(",\"")
for j in matcher4:
for k in j:
w2.write(k)
w2.write("\"")
if ((matcher5!=[])and(flag ==3 or 5)):
flag = 5
w2.write(",\"")
for j in matcher5:
for k in j:
w2.write(k)
if ((matcher6!=[])and(flag ==5)):
flag = 6
w2.write(" ")
for j in matcher6:
for k in j:
w2.write(k)
w2.write("\"")
if ((matcher7!=[])and(flag ==4 or 6)):
flag = 7
w2.write(",")
for j in matcher7:
for k in j:
w2.write(k) w2.close( ) def tred_note(): p1 = r"^(?:\s{5}|X\s{4}|\W\s{4})(\d\d\d\d)\s\s[A-Z].+\]$"#匹配1001
p2 = r"^\s{5}Note:\s\n"#Note
p3= r"^\s{11}([^ ].+)\n"#Note内容
p4= r"^(?:-|컴)+\n"
pattern1 = re.compile(p1)
pattern2 = re.compile(p2)
pattern3 = re.compile(p3)
pattern4 = re.compile(p4) fr = open(filename)
w2 = open(ss+'tred_note%s.txt'%year,'a')#a代表追加 w代表重写
# temp = ();
flag = 0
flag1=0
for line in fr.readlines():
matcher1 = re.findall(pattern1,line)
matcher2 = re.findall(pattern2,line)
matcher3 = re.findall(pattern3,line)
matcher4 = re.findall(pattern4,line) #print matcher if matcher1!=[]:
flag = 1
w2.write("\n")
# for j in matcher1: # w2.write(j) if ((matcher2!=[])and(flag == 1)):
flag = 2
flag1=1
# w2.write(",")
if flag1==1:
if ((matcher3!=[])and(flag ==2 or 3)):
flag = 3
w2.write(" ")
for j in matcher3: w2.write(j)
# w2.write(")
if ((matcher4!=[])and(flag == 3)):
flag=0
flag1=0
w2.write("\n")
w2.close( )
fr.close() def join(): f1= open(ss+'tred_note%s.txt'%year)
f2 =open(ss+'tred_nonote%s.txt'%year) list_note=[]
for line1 in f1:
# print(line1)
if line1.isspace():
list_note.append('')
else:
list_note.append(line1) f1.close() # print(list_note)
f2_w= open(ss+'tred%s.csv'%year,'a')
# for i in range(len(list_note)):
i=0
# f2_r = open(ss+'/new/%s_w.txt'%list_tag[i])
for line2 in f2: str11="%s,\"%s\"\n"%(line2.strip('\n'),list_note[i].strip('\n'))
i=i+1
# print(i)
# print(str11)
f2_w.write(str11) f2_w.close()
f2.close()
if __name__ == '__main__':
tred_nonote()
tred_note()
join()

tred_extract_EDED_new的更多相关文章

随机推荐

  1. XSS学习小结

    一.什么是XSS? XSS全称是Cross Site Scripting即跨站脚本,当目标网站目标用户浏览器渲染HTML文档的过程中,出现了不被预期的脚本指令并执行时,XSS就发生了. 这里我们主要注 ...

  2. JSP复习

    3.2.2 JSP指令元素: JSP指令 (1) page指令:定义整个页面的全局属性 (2)include指令:用于包含一个文件或代码的文件 (3)taglib指令:用来引用自定义的标签或第三方标签 ...

  3. win64+anaconda+xgboost(转)

    Windows下安装python版的XGBoost(Anaconda)          XGBoost是近年来很受追捧的机器学习算法,由华盛顿大学的陈天奇提出,在国内外的很多大赛中取得很不错的名次, ...

  4. BIGDECIMAL 四舍五入等取舍问题

    我输入的是1.35,但是电脑不可能取到整数,他的值如下:初始化数据:1.350000000000000088817841970012523233890533447265625ROUND_DOWN); ...

  5. Typora快捷键的使用

    无序列表:输入-之后输入空格 有序列表:输入数字+“.”之后输入空格 任务列表:-[空格]空格 文字 标题:ctrl+数字 表格:ctrl+t 生成目录:[TOC]按回车 选中一整行:ctrl+l 选 ...

  6. 批量删除进程清理 minerd

    发现顽固minerd 进程与ntp一起启动,所以一起杀掉 yum remove ntp kill -9 `ps -ef | grep ntp|awk '{print $2}'` kill -9 `ps ...

  7. Winsock版本的“hello world!”

    1.基于TCP协议的“hello world!” 1)服务器端:WSAStartup()->socket()->bind()->listen()->accept()->s ...

  8. Python frozenset() 函数

    Python frozenset() 函数  Python 内置函数 描述 frozenset() 返回一个冻结的集合,冻结后集合不能再添加或删除任何元素. 语法 frozenset() 函数语法: ...

  9. 关于元表,self,元方法__index

    这是需要仔细分辨的几个概念. 元表:相当于table的隐藏属性. 只有固定的一些方法,如__index,__tostring,__add等,称为元方法. 虽然是固定的,但是任何table都可以作为任何 ...

  10. onsubmit return false仍提交表单

    博主之前遇到这样的问题,是因为代码有错,改正之后就正常了. 但今天确定代码没错,仍然return false提交表单. 总结网上各路大神的解释: 1.onsubmit的作用是防止form只有一个inp ...