0 引言




1 - 下载colors_new.cp

2 - 将下面两份代码存成对应的get_color.py 和pygame_main.py;

3 - python 运行pygame_main.py即可。

# get_color.py
import cv2
import pickle as cp
import numpy as np class ColorSample(object):
def __init__(self):
'''colors_new.cp来自https://github.com/JarveeLee/SynthText_Chinese_version/tree/master/data/models/colors_new.cp '''
with open('colors_new.cp','rb') as f:
self.colorsRGB = cp.load(f,encoding='latin-1')
self.ncol = self.colorsRGB.shape[0]#4941 # convert color-means from RGB to LAB for better nearest neighbour
# computations:
self.colorsLAB = np.r_[self.colorsRGB[:,0:3], self.colorsRGB[:,6:9]].astype('uint8')
self.colorsLAB = np.squeeze(cv2.cvtColor(self.colorsLAB[None,:,:],cv2.COLOR_RGB2Lab)) def sample_normal(self, col_mean, col_std):
sample from a normal distribution centered around COL_MEAN
with standard deviation = COL_STD.
col_sample = col_mean + col_std * np.random.randn()
return np.clip(col_sample, 0, 255).astype('uint8') def sample_from_data(self,bg_mat):
bg_mat : this is a nxmx3 RGB image. returns a tuple : (RGB_foreground, RGB_background)
each of these is a 3-vector.
bg_orig = bg_mat.copy()
bg_mat = cv2.cvtColor(bg_mat, cv2.COLOR_RGB2Lab)
bg_mat = np.reshape(bg_mat, (np.prod(bg_mat.shape[:2]),3))
bg_mean = np.mean(bg_mat,axis=0) norms = np.linalg.norm(self.colorsLAB-bg_mean[None,:], axis=1)
# choose a random color amongst the top 3 closest matches:
#nn = np.random.choice(np.argsort(norms)[:3])
nn = np.argmin(norms) ## nearest neighbour color:
data_col = self.colorsRGB[np.mod(nn,self.ncol),:] col1 = self.sample_normal(data_col[:3],data_col[3:6])
col2 = self.sample_normal(data_col[6:9],data_col[9:12]) if nn < self.ncol:
return (col2, col1)
# need to swap to make the second color close to the input backgroun color
return (col1, col2)
if __name__ =='__main__':
fg_col,bg_col = sample_from_data(bgi)
# -*- coding: utf-8 -*-
#pygame replace Image import os
import cv2
import glob
import math
import random
import numpy as np
import os.path as osp
from xml.dom.minidom import Document
import multiprocessing as mp
import logging
from PIL import Image,ImageDraw,ImageFont
import secrets
import pygame
from pygame.locals import *
from pygame import freetype import get_color resultImgsDir = '/home/result_imgs' # 生成的图片存放位置
resultXmlDir = '/home/result_xmls' # 生产的xml存放位置
bgiDir = '/home/background_images' # 添加背景图片
gTtf= '/home/ttfs' # 添加字体库
totalFile = '/home/zzc/data/synth_recepit_text/result_200.txt' # 所需要添加的文字,一行一句(或者一行一个单词) FORMAT = '%(asctime)-15s [%(processName)s] %(message)s'
logging.basicConfig(format = FORMAT) gBlockSize = 20 #每一个进程一次处理的句子
ttfSize = [28,30,35,40,45,50,55,60,65] #====test
#charset = [line.strip().split('\t')[1] for line in open('text/chars_gb2312').readlines()[:-1]]
def _addSaltNoise(block,level = 10):
'''添加椒盐噪声 '''
ran = np.random.randint(0,level,block.shape)
salt = ran == 0
pepper = ran == level
block[salt]= 0
block[pepper] = 255
return block def _addNoise(block,below=4,high =20):
''' 添加噪声'''
randValue = np.random.randn(*block.shape)*np.random.randint(below,high)
block = block+randValue
block[block<0] = 0.0
block[block>255] = 255.0
block = block.astype('uint8')
return block def _feather(block, height):
''' 对图片进行羽化'''
# determine the gaussian-blur std:
if height <= 30 :
bsz = 0.25
elif 30 < height < 50:
bsz = max(0.30, 0.5 + 0.1*np.random.randn())
ksz = 3
bsz = max(0.5, 1.5 + 0.5*np.random.randn())
ksz = 5#np.random.choice([1,3,5])#5
return cv2.GaussianBlur(block,(ksz,ksz),bsz) def _seamlessClone(obj,dst,center):
''' 进行前背景合成'''
mask = 255 * np.ones(obj.shape, obj.dtype)
#print('obj,shape:',obj.shape,' dst.shape:',dst.shape,' center:',center) try:
mixed_clone = cv2.seamlessClone(obj, dst, mask, center, cv2.MIXED_CLONE)
except Exception as e:
raise e
return mixed_clone def _rander(bgiGame,string,rowStart,font,get_color):
''' 进行渲染'''
isFailed = False
width, height = bgiGame.get_size()
'''sample the color '''
bgiNp = pygame.surfarray.array3d(bgiGame)
fg_col,bg_col = get_color.sample_from_data(bgiNp)
#fg_col = fg_col + np.random.randint(-3,3,[1,3])
fg_col = fg_col.squeeze()
'''change the property of font '''
font.oblique = secrets.choice([False,True])
font.rotation = secrets.choice(range(-5,5)) test = font.render(string)
txtwidth,txtheight = test[1].size if width-txtwidth < 0: isFailed = True
colStart = secrets.randbelow(max(1, width-txtwidth)) if rowStart+txtheight > height or colStart+txtwidth>width or isFailed:
return bgiGame,rowStart,0,0,0
'''render the text '''
font.render_to(bgiGame,(colStart,rowStart), string, fg_col)
'''surface 2 numpy '''
bgiNp = pygame.surfarray.array3d(bgiGame)
bgiNp = cv2.cvtColor(bgiNp.transpose([1,0,2]),cv2.COLOR_RGB2BGR) '''add noise and blur '''
block = bgiNp[rowStart:rowStart+txtheight,colStart:colStart+txtwidth,:]
block = _addNoise(block,4,20)
if secrets.choice(range(4))==0: block = _addSaltNoise(block,np.random.randint(70,80))
block = _feather(block,txtheight)
block = _addNoise(block,2,20)
if secrets.choice(range(4))==0: block = _addSaltNoise(block,np.random.randint(70,80))
bgiNp[rowStart:rowStart+txtheight,colStart:colStart+txtwidth,:] = block
'''numpy 2 surface '''
bgiNp = cv2.cvtColor(bgiNp.transpose([1,0,2]),cv2.COLOR_BGR2RGB)
bgiGame = pygame.surfarray.make_surface(bgiNp) return bgiGame,rowStart,colStart,txtwidth,txtheight
bgi = _seamlessClone(block,bgi,center) return bgi
''' def _paste(bgiGame,ttf,size,rowStart,curText,cols,get_color): #ttfont = ImageFont.truetype(ttf,size)
ttfont = freetype.Font(ttf,size)
curText = curText.strip() '''random the digit '''
numberLength = 10
digits = ['0','1','2','3','4','5','6','7','8','9']
if secrets.randbelow(numberLength) == 0:
#curText = ''.join([str(random.randint(0,9)) for _ in range(shouldMaxNumTxt)])
curText = ''.join([secrets.choice(digits) for _ in range(numberLength)] )
string = curText
'''random the dot '''
if secrets.randbelow(numberLength-2) == 0:
dotInd = random.randint(1,numberLength-2)
string = curText[:dotInd]+'.'+curText[dotInd+1:]
string = curText '''如果maxNumText小于10,则跳过 '''
numText = len(string)
if numText != numberLength:
string = ''
return None,None,None,None bgiGame,rowStart,colStart,txtwidth,txtheight = _rander(bgiGame,string,rowStart,ttfont,get_color) return bgiGame,string,rowStart,colStart,txtwidth,txtheight def _xml(doc,anno,string,xminT,yminT,xmaxT,ymaxT):
''' 生成对应的xml'''
if not string: return
body = doc.createElement('object')
anno.appendChild(body) name = doc.createElement('name')
nameText = doc.createTextNode('text')
body.appendChild(name) content = doc.createElement('textContent')
contentText = doc.createTextNode(string)
body.appendChild(content) bndbox = doc.createElement('bndbox') xmin = doc.createElement('xmin')
ymin = doc.createElement('ymin')
xmax = doc.createElement('xmax')
ymax = doc.createElement('ymax') xminText = doc.createTextNode(str(xminT))
yminText = doc.createTextNode(str(yminT))
xmaxText = doc.createTextNode(str(xmaxT))
ymaxText = doc.createTextNode(str(ymaxT)) xmin.appendChild(xminText)
ymax.appendChild(ymaxText) bndbox.appendChild(xmin)
body.appendChild(bndbox) def paste(imgname,bgi,text,ttf,get_color): pygame.init()
bgiGame = pygame.image.load(bgi)
width,height = bgiGame.get_size()
depth = bgiGame.get_bitsize()//8 # 选择当前行的间距
curRow = 0
curRowInter = random.randint(3,7)
curRow += curRowInter # 随机选择字体大小
curTtfSize = random.choice(ttfSize)
# 创建xml的文件头
doc = Document()
anno = doc.createElement('Annotations')
imgNameNode = doc.createElement('imgName')
anno.appendChild(imgNameNode) sizeNode = doc.createElement('size')
widthNode = doc.createElement('width')
heightNode = doc.createElement('height')
depthNode = doc.createElement('depth')
anno.appendChild(sizeNode) # 循环的一行一行去将文字粘贴到对应的图片上
curCol = 0; numTextDone = 0
while curRow+curTtfSize <= width:
# cur col point # cur row point
'''paste the text on bgiGame '''
if curRow+curTtfSize <= width:
# if curcols is bigger than 0.9*cols,then do not paste the line
curText = secrets.choice(text) bgiGame,string,curRow,colStart,txtwidth,txtheight = _paste(bgiGame,ttf,curTtfSize,curRow,curText,width,get_color)
if not string: continue
numTextDone += 1
_xml(doc,anno,string,xminT = colStart,yminT = curRow,xmaxT = colStart+txtwidth,ymaxT = curRow+txtheight) curRow += txtheight
curRow += curRowInter
# cur intervel
curRowInter = random.randint(3,6)
# cur ttf size
curTtfSize = random.choice(ttfSize) bgi = pygame.surfarray.array3d(bgiGame).transpose([1,0,2])
bgi = cv2.cvtColor(bgi,cv2.COLOR_RGB2BGR)
return bgi, doc, numTextDone def handle(indTexts): ind, texts = indTexts
# 获取进程号
pid = os.getpid()
# 随机获取颜色
getcolor = get_color.ColorSample()
bgis = glob.glob( osp.join(bgiDir, '*.jpg') )
# 随机选择当前一张背景图
bgipath = random.choice(bgis) # 随机获取字体
ttf = random.choice(ttfs) # 调用paste函数进行操作
imgname = 'bgi{}_ind{}_pid{}_ttf{}.jpg'.format(osp.basename(bgipath),ind,pid,osp.basename(ttf))
bgiNp,doc,numTextDone = paste(imgname,bgipath,texts,ttf,getcolor) imgnamep = 'bgi{}_ind{}_{}Of{}_ttf{}.jpg'.format(osp.basename(bgipath),ind,numTextDone,len(texts),osp.basename(ttf))
logging.warn(imgnamep) # 将图片和xml写入到对应位置
xmlFileName = osp.join(resultXmlDir,'{}.xml'.format(imgname[:-4]))
with open(xmlFileName, "w") as fxml:
fxml.write(str(doc.toprettyxml(indent = " ", newl = "\n", encoding = "utf-8"),encoding = 'utf-8')) pygame.quit()
return if __name__ == '__main__': ''' 1 - 先读取文字行,然后按照进程个数进行划分'''
total = [line.strip() for line in open(totalFile)]
numP = 30
totalSP = []
inter = math.ceil(len(total)/gBlockSize)
for i in range(inter):
totalSP.append(total[i::inter]) '''2 - 开启多进程进行处理 '''
p = mp.Pool(numP)
p.map(handle, enumerate(totalSP))





# -*- coding: utf-8 -*-

import os
import cv2
import glob
import math
import random
import numpy as np
import os.path as osp
from xml.dom.minidom import Document
import multiprocessing as mp
import logging
from PIL import Image,ImageDraw,ImageFont
import pygame
from pygame.locals import *
from pygame import freetype import get_color resultImgsDir = 'crnn_result_imgs1'
resultXmlDir = 'crnn_result_xmls1'
bgiDir = 'bgi'
gTtf= 'ttfs'
totalFile = 'texts.txt' FORMAT = '%(asctime)-15s [%(processName)s] %(message)s'
logging.basicConfig(format = FORMAT) gBlockSize = 20#num of each process's sentences
ttfSize = [28,30,35,40,45,50,55,60,65] def _addSaltNoise(block,level = 10):
ran = np.random.randint(0,level,block.shape)
salt = ran == 0
pepper = ran == level
block[salt]= 0
block[pepper] = 255
return block def _addNoise(block):
randValue = np.random.randn(*block.shape)*np.random.randint(2,20)
block = block+randValue
block[block<0] = 0.0
block[block>255] = 255.0
block = block.astype('uint8')
return block def _feather(block, height):
# determine the gaussian-blur std:
if height <= 30 :
bsz = 0.25
elif 30 < height < 50:
bsz = max(0.30, 0.5 + 0.1*np.random.randn())
ksz = 3
bsz = max(0.5, 1.5 + 0.5*np.random.randn())
ksz = 5#np.random.choice([1,3,5])#5
return cv2.GaussianBlur(block,(ksz,ksz),bsz) def _seamlessClone(obj,dst,center):
mask = 255 * np.ones(obj.shape, obj.dtype)
#print('obj,shape:',obj.shape,' dst.shape:',dst.shape,' center:',center) try:
mixed_clone = cv2.seamlessClone(obj, dst, mask, center, cv2.MIXED_CLONE)
except Exception as e:
raise e
return mixed_clone def _rander(rawbgi,string,bgr,point,font,get_color): bgi = Image.fromarray(rawbgi)
draw = ImageDraw.Draw(bgi)
curCol,curRow = point
fg_col,bg_col = get_color.sample_from_data(rawbgi)
fg_col = fg_col + np.random.randint(-3,3,[1,3])
draw.text((curCol,curRow),string, tuple(fg_col.squeeze()), font=font)
width,height = font.getsize(string)
region = curCol,curRow,curCol+width,curRow+height
bgi = np.array(bgi)
block = bgi[curRow:curRow+height,curCol:curCol+width,:]
block = _addNoise(block)
block = _feather(block,height)
block = _addNoise(block)
block = _addSaltNoise(block,50)
# bgi[curRow:curRow+height,curCol:curCol+width,:] = block
# return bgi
center = (curCol+curCol+width)//2,(curRow+curRow+height)//2
# width, height, channels = bgi.shape
# center = height//2,width//2
bgi = _seamlessClone(block,bgi,center) return bgi def _paste(bgi,ttf,size,curRow,curCol,curText,cols,get_color): ttfont = ImageFont.truetype(ttf,size)
maxNumText = math.floor((cols-curCol)/size)
curText = curText.strip() '''random the digit '''
shouldMaxNumTxt = 10
if random.randint(0,9)==9 and maxNumText >= 4:
curText = ''.join([str(random.randint(0,9)) for _ in range(shouldMaxNumTxt)])
string = curText
'''random the dot '''
if random.randint(0,7)==7:
dotInd = random.randint(1,shouldMaxNumTxt-2)
string = curText[:dotInd]+'.'+curText[dotInd+1:]
startInd = random.randint(0,max(0,len(curText)-shouldMaxNumTxt-1))
string = curText[startInd:startInd+shouldMaxNumTxt].strip()
string= curText '''如果maxNumText小于10,则跳过 '''
if maxNumText < 10 or len(curText)<10: string = '' numText = len(string)
if numText == 10 :
bgr = [random.randint(100,254) for i in range(3)]
bgi = _rander(bgi,string,bgr,(curCol,curRow),ttfont,get_color)
string = ''
'''get printed width height '''
width,height = ttfont.getsize(string)
return bgi,string,width,height def _xml(doc,anno,string,xminT,yminT,xmaxT,ymaxT): if not string: return
body = doc.createElement('object')
anno.appendChild(body) name = doc.createElement('name')
nameText = doc.createTextNode('text')
body.appendChild(name) content = doc.createElement('textContent')
contentText = doc.createTextNode(string)
body.appendChild(content) bndbox = doc.createElement('bndbox') xmin = doc.createElement('xmin')
ymin = doc.createElement('ymin')
xmax = doc.createElement('xmax')
ymax = doc.createElement('ymax') xminText = doc.createTextNode(str(xminT))
yminText = doc.createTextNode(str(yminT))
xmaxText = doc.createTextNode(str(xmaxT))
ymaxText = doc.createTextNode(str(ymaxT)) xmin.appendChild(xminText)
ymax.appendChild(ymaxText) bndbox.appendChild(xmin)
body.appendChild(bndbox) def paste(imgname,bgi,text,ttf,ttfRandom,get_color): bgi = cv2.imread(bgi)
rows,cols,depth = bgi.shape # bgi = Image.fromarray(bgi)
# draw = ImageDraw.Draw(bgi) curRow = 0
curRowInter = random.randint(3,7)
curRow += curRowInter
curTtfSize = random.randint(0,len(ttfRandom)-1)
#create the xml head
doc = Document()
anno = doc.createElement('Annotations')
imgNameNode = doc.createElement('imgName')
anno.appendChild(imgNameNode) height,width,depth = rows,cols,depth
sizeNode = doc.createElement('size')
widthNode = doc.createElement('width')
heightNode = doc.createElement('height')
depthNode = doc.createElement('depth')
anno.appendChild(sizeNode) while curRow + ttfRandom[curTtfSize] <=rows:
#cur col point
curCol = random.randint(0,cols-1) #cur row point
'''paste the text on bgi '''
if curCol < cols*0.9 and curRow+ttfRandom[curTtfSize] <= rows:
#if curcols is bigger than 0.9*cols,then do not paste the line
curText = text[random.randint(0,len(text)-1)] bgi,string,width,height = _paste(bgi,ttf,ttfRandom[curTtfSize],curRow,curCol,curText,cols,get_color)
if not string: continue
_xml(doc,anno,string,xminT = curCol,yminT = curRow,xmaxT = curCol+width,ymaxT = curRow+height)
curRow += curRowInter
curRow += ttfRandom[curTtfSize]
#cur intervel
curRowInter = random.randint(3,7)
#cur ttf size
curTtfSize = random.randint(0,len(ttfRandom)-1)
return np.array(bgi), doc def handle(text): ind, text = text
pid = os.getpid()
#background image
getcolor = get_color.ColorSample()
bgis = glob.glob( osp.join(bgiDir,'*.jpg') )
#select one background image
curBgi = random.randint(0,len(bgis)-1)
bgi = bgis[curBgi] #ttf
ttfs = glob.glob(osp.join(gTtf,'*.ttf'))
curTtf = random.randint(0,len(ttfs)-1)
ttf = ttfs[curTtf] #ttf size random
ttfRandom = [1]+[ random.randint(0,1) for i in range(len(ttfSize)-1)]
ttfRandom = [ran*size for ran,size in zip(ttfRandom, ttfSize)]
ttfRandom = [i for i in ttfRandom if i != 0] imgname = '{}_{}_{}.jpg'.format(ind,pid,curTtf)
bgi,doc = paste(imgname,bgi,text,ttf,ttfRandom,getcolor)
xmlFileName = osp.join(resultXmlDir,'{}.xml'.format(imgname[:-4]))
with open(xmlFileName, "w") as fxml:
fxml.write(str(doc.toprettyxml(indent = " ", newl = "\n", encoding = "utf-8"),encoding = 'utf-8'))
return if __name__ == '__main__': total = [line.strip() for line in open(totalFile)]
numP = 30
totalSP = []
inter = math.ceil(len(total)/gBlockSize)
for i in range(inter):
totalSP.append(total[i::inter]) print('begin')
p = mp.Pool(numP)
p.map(handle, enumerate(totalSP[:1000]))


