Python 字串处理

#!/usr/bin/python

#-*- coding:utf-8 –*-

import os

import sys

import re

import shutil

import xlrd

import xlwt

import getopt

import math

from xlutils.copy import copy

'''

脚本使用：

    设置strUiPorject ui项目名称，取值如下 "mstar"/"mstar_atv"/"formal"/"formal_grey"/"haier"/"videocon"/"bbk"/"atv_project"

    删除无用字串：             ./genstr -d

    特殊标记的字串优先排序：    ./genstr -p

    给已整理好优先级高的字串添加strMark..../genstr -a

    读取 优先级字串整理.h 中的字串进行比对添加标记（未整理）./genstr -c 

脚本功能：

    1、根据strUiPorject设置的UI名称，迭代过滤UI目录的所有源文件和头文件，获取项目使用字串总数，并删除UIL多余字串

    2、对某一种语言做特殊标记strMark，标记的字串会放在翻译的最后面

    3、mstar优先级字串整理在mstar优先字串整理.h，已使用字串整理在mstar已使用字串整理.h中,

    formal优先级字串整理在formal优先字串整理.h

执行过程：

    1、设置UI项目名称

    2、根据UI名称，配置过滤路径和UIL删除路径(filterPath/strUilPath)

    3、再根据配置的路径执行过滤和删除动作

注意：

    SourceCode中，有一些是 TV_IDS_String_ID+Offset方式获取新字串的，这些字串要手动加到脚本，以防误删

        如：TV_IDS_String_GMT_0 字串

'''

#=======注意此处设置UI项目================================#

#=="mstar"/"formal"/"formal_grey"/"mstar_atv"/"haier"/"videocon"/"bbk"/"atv_project"=#

strUiPorject = "formal_grey"

#========================================================#

g_deleteMode = 0

g_priorityMode = 0

g_AddmarkMode = 0

g_CompareMode = 0

setStr = set()

tupleStr = ()

strMark = "aaaa"

#=======================以下不需要设置=======================#

if "bbk" == strUiPorject:

    filterPath = "aps/application/radisson/formal"

    strUilPath = "aps/application/radisson/formal/UI_Project/TV_UIProject/Languages"

elif "formal" == strUiPorject:

    filterPath = "aps/application/radisson/formal"

    strUilPath = "aps/application/radisson/formal/UI_Project/TV_UIProject_new/Languages"

elif "formal_grey" == strUiPorject:

    filterPath = "aps/application/radisson/formal"

    strUilPath = "aps/application/radisson/formal/UI_Project/TV_UIProject_grey/Languages"

elif "mstar_atv" == strUiPorject:

    filterPath = "aps/application/radisson/mstar"

    strUilPath = "aps/application/radisson/mstar/UI_Project/TV_UIProject_atv/Languages"

else:

    filterPath = "aps/application/radisson/%s" % strUiPorject

    strUilPath = "aps/application/radisson/%s/UI_Project/TV_UIProject/Languages" % strUiPorject

gamePath = "aps/game"

listPath = [filterPath,gamePath]

def filterUsefulString():

    listStrId = []

    for path in listPath:

        for  dirPath, dirNames, fileNames in os.walk(path):

            for  sourceFile in fileNames:

                filePath = dirPath+"/"+sourceFile

                if (re.search(".*\.c.*",sourceFile) or re.search(".*\.h.*",sourceFile)) \

                    and sourceFile != "TV_strid.h":

                    for line in open(filePath,"r"):

                        if "TV_IDS_String" in line:

                            if line.count("TV_IDS_String") > 2:

                                print "\n\nthe number of string are more than 2 in a row \n\n "

                                print sourceFile

                                print "\n"

                                continue

                            if re.search(".*TV_(IDS_String\w*).*TV_(IDS_String\w*).*",line):

                                tupleStr=re.search(".*TV_(IDS_String\w*).*TV_(IDS_String\w*).*",line).groups()

                                for i in range(len(tupleStr)):

                                    setStr.add(tupleStr[i])

                            else:

                                setStr.add(re.search(".*TV_(IDS_String\w*).*",line).group(1))

                        elif "TV_IDS_Game_Menu_OSD_String" in line:

                            setStr.add("IDS_Game_Menu_OSD_String")

    UsedStrfilename = strUiPorject + "已使用字串整理.h"

    print "\n\n程序中共使用 %d 个字串。\n保存在当前目录 %s-文件中\n\n" % (len(setStr),UsedStrfilename)

    for line in setStr:

        listStrId.append(line)

        listStrId.append("\n")

    open(UsedStrfilename,"w").writelines(listStrId)

'''

    #读EXCEL到映射表

def excelSetting()

    setElStr = set()

    mapStr = {}

    listFirst = []

    setDiff = set()

    book = xlrd.open_workbook(r'Languages.xls')

    sheet = book.sheet_by_index(0)

    listFirst = sheet.row_values(0)

    for row in range(sheet.nrows):

        cellStr = str(sheet.cell(row,0).value)

        cellStr.rstrip()

        if cellStr in setStr:

            mapStr[cellStr] = sheet.row_values(row)

    #setElStr = set(mapStr.keys())

    #setDiff = setElStr - setStr

    #写EXCEL

    wboot = xlwt.Workbook()

    sheet = wboot.add_sheet("Language")

    #操作第一行，抬头

    for col in range(len(listFirst)):

        sheet.write(0,col,listFirst[col])

    #其它行

    row = 1

    for (k,v) in mapStr.items():

        for col in range(len(v)):

            sheet.write(row,col,v[col])

        row = row + 1

    wboot.save(r'Language_.xls')

'''

#处理UIL文件，对比setStr集合，删除无用字串

def deleteString():

    delCount = 0

    lanList = []

    for dirPath,dirNames,fileNames in os.walk(strUilPath):

        for sourceFile in fileNames:

            filePath = dirPath + "/" + sourceFile

            for line in open(filePath,"r"):

                #==============================================#

                #有些字串在code中是以偏移量的方式使用，不能删除

                if "IDS_String_GMT_" in line:

                    lanList.append(line)

                    continue

                elif re.search(".*IDS_String_\d{1,2}\".*",line) or ("IDS_String_LNB" in line):

                    lanList.append(line)

                    continue

                # ==============================================#

                if "<String ID=" in line:

                    if re.search("\s*<String ID=\"(\w*)\".*",line).group(1) in setStr:

                        lanList.append(line)

                    else:

                        delCount = delCount+1

                else:

                    lanList.append(line)

            open(filePath,"w").writelines(lanList)

            print(sourceFile + "删除 %s" %delCount)

            lanList = []

            delCount = 0

#处理UIL文件，迭代lanFist集合，标记字串放在UIL文件后面

def priorityString():

    lanFist = []

    lanList1 = []

    lanList2 = []

    lanList3 = []

    pat = re.compile(".*\"(IDS_String\w*)\".*")

    for line in open(strUilPath+"/English.uil","r").readlines():

        if strMark in line and pat.search(line):

            lanFist.append(pat.search(line).group(1))

    PrStrfilename = strUiPorject + "优先字串整理.h"

    open(PrStrfilename, "a").writelines([x + "\n" for x in lanFist])

    print "优先级字串共%d，如下：" %len(lanFist)

    print lanFist

    print "\n\n优先字串共 %d 个。\n保存在当前目录 %s-文件中\n\n" % (len(lanFist),PrStrfilename)

    for dirPath, dirNames, fileNames in os.walk(strUilPath):

        for sourceFile in fileNames:

            filePath = dirPath + "/" + sourceFile

            for line in open(filePath, "r"):

                if pat.search(line) and pat.search(line).group(1) in lanFist:

                    line = line.replace(strMark,"")

                    lanList1.append(line)

                elif "IDS_String_spliteLine" in line:

                    lanList3.append(line)

                else:

                    lanList2.append(line)

            if(len(lanList1) and len(lanList2)>=3):

                lanList2 = lanList2[0:-1] +lanList3 + lanList1+lanList2[-1:]

                lanList3 = []

                open(filePath, "w").writelines(lanList2)

            lanList1 = []

            lanList2 = []

#给已整理好优先级高的字串添加strMark

def AddMark():

    StringList = []

    a = 0

    for line in open(strUilPath+"/English.uil","r") :

        if ("<String ID=" in line):

            if re.search("IDS_String_spliteLine",line):

                a = 1

            elif(a == 1):

                line = line.replace('Value="','Value="'+strMark)

        StringList.append(line)

    open(strUilPath+"/English.uil","w").writelines(StringList)

    print "\n添加StrMark完成\n"

#读取 优先级字串整理.h 中的字串进行比对添加标记（未整理）

def CompareAddMark():

    strSet = set()

    strList = []

    for line in open('formal优先字串整理.h', "r"):

        strSet.add(re.search(".*(IDS_String_.*).*",line).group(1))

    print strSet

    for line in open(strUilPath+"/English.uil","r") :

        if "<String ID=" in line:

            if re.search("\s*<String ID=\"(\w*)\".*", line).group(1) in strSet:

                line = line.replace('Value="','Value="'+strMark)

        strList.append(line)

    open(strUilPath+"/English.uil","w").writelines(strList)    

def fun_parse_InputParam():

    global g_deleteMode

    global g_priorityMode

    global g_AddmarkMode

    global g_CompareMode

    try:

        opts, args = getopt.getopt(sys.argv[1:], 'dpac')

    except getopt.GetoptError, err:

        #print str(err)

        sys.exit()

    for op, value in opts:

        if op == "-d":

            g_deleteMode = 1

        elif op == "-p":

            g_priorityMode = 1

        elif op == "-a":

            g_AddmarkMode = 1

        elif op == "-c":

            g_CompareMode = 1

        else:

            print("unhandled option")

            sys.exit()

if __name__ == "__main__":

    fun_parse_InputParam()

    if g_deleteMode:

        filterUsefulString()

        deleteString()

    if g_priorityMode:

        priorityString()

    if g_AddmarkMode:

        AddMark()

    if g_CompareMode:

        CompareAddMark()

Python 字串处理的更多相关文章

python之字串
python字串声明: 单引('), 双引("), 三引(''' 或 """"). python字串前缀: r表示原生字串, 字串内容: (1)不能包 ...
如何使用 Python 進行字串格式化
前言: Python有几种方法可以显示程序的输出:数据可以以人类可读的形式打印出来,或者写入文件以供将来使用. 在开发应用程式时我们往往会需要把变数进行字串格式化,也就是说把字串中的变数替换成变量值. ...
动态规划--求最大连续子数组的和（Python实现）&求解最大连续乘积字串（Python实现）
def MaxSum(self,array,n): sum=array[0] result=array[0] for i in range(0,n): if sum<0: sum=a[i] el ...
c#调用dll接口传递utf-8字串方法
1. 起源: VCU10之视频下载模块,采用纯python编码实现,c++代码调用pythonrun.h配置python运行环境启动python模块,编译为dll给c#调用,以使界面UI能够使用其中功 ...
最大公共字串LCS问题（阿里巴巴）
给定两个串,均由最小字母组成.求这两个串的最大公共字串LCS(Longest Common Substring). 使用动态规划解决. #include <iostream> #inclu ...
编程：使用递归方式判断某个字串是否回文（Palindrome）
Answer: import java.util.Scanner; public class Palindrome { private static int len;//全局变量整型数据 privat ...
NOIP2002字串变换[BFS]
题目描述已知有两个字串 A$, B$ 及一组字串变换的规则(至多6个规则): A1$ -> B1$ A2$ -> B2$ 规则的含义为:在 A＄中的子串 A1$ 可以变换为 B1$.A2 ...
字串符相关 split() 字串符分隔 substring() 提取字符串 substr()提取指定数目的字符 parseInt() 函数可解析一个字符串，并返回一个整数。
split() 方法将字符串分割为字符串数组,并返回此数组. stringObject.split(separator,limit) 我们将按照不同的方式来分割字符串: 使用指定符号分割字符串,代码如 ...
mormot 数据集转换为JSON字串
mormot 数据集转换为JSON字串 unit Unit1; interface uses Windows, Messages, SysUtils, Variants, Classes, Graph ...

随机推荐

Spring IOC 源码简单分析 02 - Bean Reference
### 准备 ## 目标了解 bean reference 装配的流程 ##测试代码 gordon.study.spring.ioc.IOC02_BeanReference.java ioc02 ...
TinyURL
2018-03-09 15:19:04 TinyURL,短地址,或者叫短链接,指的是一种互联网上的技术与服务.此服务可以提供一个非常短小的URL以代替原来的可能较长的URL,将长的URL地址缩短. 用 ...
《图解Http》 2-6章：基础，报文，状态码，首部。
HTTP协议和Cookie 是stateless协议,自身不对请求和响应之间的通信状态进行保存.但随着技术发展,为了实现保存状态的功能,引入了Cookie技术. Cookie在请求和响应报文中写入信息 ...
[转]PowerDesigner 把Comment写到name中和把name写到Comment中 pd7以后版本可用
http://www.cnblogs.com/cxd4321/archive/2009/03/07/1405475.html 在使用PowerDesigner对数据库进行概念模型和物理模型设计时,一般 ...
SCWS中文分词PHP扩展详细安装说明
因最近写的一段代码,需要用到中文分词,在网上找了一下,发现了scws这个不错的插件,故根据文档安装使用,下面记录下安装的全过程系统:centos 安装scws wget http://www.xun ...
OSI七层与tcp/ip四层
1)OSI七层模型 OSI中的层功能 TCP/IP协议族应用层文件传输,电子邮件,文件服务,虚拟终端 TFTP,HTTP,SNMP,FTP,SMTP,DNS,Telnet 表示层数据格式化,代 ...
HDU 2577 分情况多维DP
How to Type Time Limit: 2000/1000 MS (Java/Others) Memory Limit: 32768/32768 K (Java/Others)Total ...
UVA-10779 Collectors Problem （网络流建模）
题目大意:有n个人,已知每人有ki个糖纸,并且知道每张糖纸的颜色.其中,Bob希望能和同伴交换使得手上的糖纸数尽量多.他的同伴只会用手上的重复的交换手上没有的,并且他的同伴们之间不会产生交换.求出Bo ...
修改oracle表空间数值
alter database datafile 'D:\oracle\dbfile\DATA.DBF' autoextend on next 100m maxsize 2000M;
快速切题CF 158B taxi 构造 && 82A double cola 数学观察难度:0
实在太冷了今天 taxi :错误原因1 忽略了 1 1 1 1 和 1 2 1 这种情况,直接认为最多两组一车了 2 语句顺序错 double cola: 忘了减去n的序号1,即n-- B. Taxi ...

Python 字串处理

Python 字串处理的更多相关文章

随机推荐

热门专题