VOC数据集生成代码使用说明

#split.py 文件 输入格式为images ,和标签txt文件，txt中的数据为坐标值共8个。
 
import os
import numpy as np
import math
import cv2 as cv
import imageio
 
#path = '/media/D/code/OCR/text-detection-ctpn/data/mlt_english+chinese/image'
#path = '/home/chendali1/Gsj/text-detection-ctpn-master/prepare_training_data/image/image_1000/'
path='/home/chendali1/Gsj/prepare_training_data/ICDAR/images_train/'
#gt_path = '/home/chendali1/Gsj/text-detection-ctpn-master/prepare_training_data/label/labelDigit1000/'
gt_path='/home/chendali1/Gsj/prepare_training_data/ICDAR/result_train/'
out_path = 're_image'
if not os.path.exists(out_path):
    os.makedirs(out_path)
files = os.listdir(path)
files.sort()
#files=files[:100]
for file in files:
    _, basename = os.path.split(file)
    if basename.lower().split('.')[-1] not in ['jpg', 'png']:
        continue
    stem, ext = os.path.splitext(basename)
 
    #stem=stem0.split('_')[2]
 
    gt_file = os.path.join(gt_path, stem+'.txt')
    img_path = os.path.join(path, file)
    print(img_path)
    #print(gt_file)
    img = cv.imread(img_path)
    if img is None:
        print('****************************')
        print('Image ' + img_path + ' may be a bad picture!')
        print('****************************')
        newname = os.path.join(path,stem+'.gif')
        os.rename(img_path,newname)
        img_path=newname
        print(img_path)
        print('Try read with imageio.')
        gif = imageio.mimread(img_path)
        if gif is None:
            print('****************************')
            print("Image " + img_path + " can't be read!")
            print('****************************')
 
        print('Read success!')
        img = cv.cvtColor(gif[0], cv.COLOR_RGB2BGR)
 
    img_size = img.shape
    im_size_min = np.min(img_size[0:2])
    im_size_max = np.max(img_size[0:2])
 
    im_scale = float(600) / float(im_size_min)
    if np.round(im_scale * im_size_max) > 1200:
        im_scale = float(1200) / float(im_size_max)
    re_im = cv.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv.INTER_LINEAR)
    re_size = re_im.shape
    cv.imwrite(os.path.join(out_path, stem) + '.jpg', re_im)
 
    with open(gt_file, 'r') as f:
        lines = f.readlines()
    for line in lines:
        splitted_line = line.strip().lower().split(',')
        pt_x = np.zeros((4, 1))
        pt_y = np.zeros((4, 1))
        pt_x[0, 0] = int(float(splitted_line[0]) / img_size[1] * re_size[1])
        pt_y[0, 0] = int(float(splitted_line[1]) / img_size[0] * re_size[0])
        pt_x[1, 0] = int(float(splitted_line[2]) / img_size[1] * re_size[1])
        pt_y[1, 0] = int(float(splitted_line[3]) / img_size[0] * re_size[0])
        pt_x[2, 0] = int(float(splitted_line[4]) / img_size[1] * re_size[1])
        pt_y[2, 0] = int(float(splitted_line[5]) / img_size[0] * re_size[0])
        pt_x[3, 0] = int(float(splitted_line[6]) / img_size[1] * re_size[1])
        pt_y[3, 0] = int(float(splitted_line[7]) / img_size[0] * re_size[0])
 
        ind_x = np.argsort(pt_x, axis=0)
        pt_x = pt_x[ind_x]
        pt_y = pt_y[ind_x]
 
        if pt_y[0] < pt_y[1]:
            pt1 = (pt_x[0], pt_y[0])
            pt3 = (pt_x[1], pt_y[1])
        else:
            pt1 = (pt_x[1], pt_y[1])
            pt3 = (pt_x[0], pt_y[0])
 
        if pt_y[2] < pt_y[3]:
            pt2 = (pt_x[2], pt_y[2])
            pt4 = (pt_x[3], pt_y[3])
        else:
            pt2 = (pt_x[3], pt_y[3])
            pt4 = (pt_x[2], pt_y[2])
 
        xmin = int(min(pt1[0], pt2[0]))
        ymin = int(min(pt1[1], pt2[1]))
        xmax = int(max(pt2[0], pt4[0]))
        ymax = int(max(pt3[1], pt4[1]))
 
        if xmin < 0:
            xmin = 0
        if xmax > re_size[1] - 1:
            xmax = re_size[1] - 1
        if ymin < 0:
            ymin = 0
        if ymax > re_size[0] - 1:
            ymax = re_size[0] - 1
 
        width = xmax - xmin
        height = ymax - ymin
 
        # reimplement
        step = 16.0
        x_left = []
        x_right = []
        x_left.append(xmin)
        x_left_start = int(math.ceil(xmin / 16.0) * 16.0)
        if x_left_start == xmin:
            x_left_start = xmin + 16
        for i in np.arange(x_left_start, xmax, 16):
            x_left.append(i)
        x_left = np.array(x_left)
 
        x_right.append(x_left_start - 1)
        for i in range(1, len(x_left) - 1):
            x_right.append(x_left[i] + 15)
        x_right.append(xmax)
        x_right = np.array(x_right)
 
        idx = np.where(x_left == x_right)
        x_left = np.delete(x_left, idx, axis=0)
        x_right = np.delete(x_right, idx, axis=0)
 
        if not os.path.exists('label_tmp'):
            os.makedirs('label_tmp')
        with open(os.path.join('label_tmp', stem) + '.txt', 'a') as f:
            #for i in range(len(x_left)):
            f.writelines("tianchi\t")
            f.writelines(str(int( pt_x[0, 0])))
            f.writelines("\t")
            f.writelines(str(int( pt_y[0, 0])))
            f.writelines("\t")
            f.writelines(str(int( pt_x[1, 0])))
            f.writelines("\t")
            f.writelines(str(int( pt_y[1, 0])))
            f.writelines("\t")
            f.writelines(str(int( pt_x[2, 0])))
            f.writelines("\t")
            f.writelines(str(int( pt_y[2, 0])))
            f.writelines("\t")
            f.writelines(str(int( pt_x[3, 0])))
            f.writelines("\t")
            f.writelines(str(int( pt_y[3, 0])))
            f.writelines("\n")

#ToVoc.py 上述执行完后直接运行这个脚本文件完美生成VOC文件
from xml.dom.minidom import Document
import cv2
import os
import glob
import shutil
import numpy as np
 
def generate_xml(name, lines, img_size, class_sets, doncateothers=True):
    doc = Document()
 
    def append_xml_node_attr(child, parent=None, text=None):
        ele = doc.createElement(child)
        if not text is None:
            text_node = doc.createTextNode(text)
            ele.appendChild(text_node)
        parent = doc if parent is None else parent
        parent.appendChild(ele)
        return ele
 
    img_name = name + '.jpg'
    # create header
    annotation = append_xml_node_attr('annotation')
    append_xml_node_attr('folder', parent=annotation, text='tianchi')
    append_xml_node_attr('filename', parent=annotation, text=img_name)
    source = append_xml_node_attr('source', parent=annotation)
    append_xml_node_attr('database', parent=source, text='coco_text_database')
    append_xml_node_attr('annotation', parent=source, text='tianchi')
    append_xml_node_attr('image', parent=source, text='tianchi')
    append_xml_node_attr('flickrid', parent=source, text='')
    owner = append_xml_node_attr('owner', parent=annotation)
    append_xml_node_attr('name', parent=owner, text='ms')
    size = append_xml_node_attr('size', annotation)
    append_xml_node_attr('width', size, str(img_size[1]))
    append_xml_node_attr('height', size, str(img_size[0]))
    append_xml_node_attr('depth', size, str(img_size[2]))
    append_xml_node_attr('segmented', parent=annotation, text='')
 
    # create objects
    objs = []
    for line in lines:
        splitted_line = line.strip().lower().split()
        cls = splitted_line[0].lower()
        if not doncateothers and cls not in class_sets:
            continue
        cls = 'dontcare' if cls not in class_sets else cls
        if cls == 'dontcare':
            continue
        obj = append_xml_node_attr('object', parent=annotation)
        occlusion = int(0)
        x1, y1, x2, y2 = int(float(splitted_line[1]) + 1), int(float(splitted_line[2]) + 1), \
                         int(float(splitted_line[3]) + 1), int(float(splitted_line[4]) + 1)
        x0,y0,x1,y1,x2,y2,x3,y3 = int(float(splitted_line[1])+1),int(float(splitted_line[2])+1),\
        int(float(splitted_line[3])+1),int(float(splitted_line[4])+1),int(float(splitted_line[5])+1),\
        int(float(splitted_line[6])+1),int(float(splitted_line[7])+1),int(float(splitted_line[8])+1)
        truncation = float(0)
        difficult = 1 if _is_hard(cls, truncation, occlusion, x1, y1, x2, y2) else 0
        truncted = 0 if truncation < 0.5 else 1
 
        append_xml_node_attr('name', parent=obj, text=cls)
        append_xml_node_attr('pose', parent=obj, text='none')
        append_xml_node_attr('truncated', parent=obj, text=str(truncted))
        append_xml_node_attr('difficult', parent=obj, text=str(int(difficult)))
        bb = append_xml_node_attr('bndbox', parent=obj)
        append_xml_node_attr('x0', parent=bb, text=str(int(x0)))
        append_xml_node_attr('y0', parent=bb, text=str(y0))
        append_xml_node_attr('x1', parent=bb, text=str(x1))
        append_xml_node_attr('y1', parent=bb, text=str(y1))
        append_xml_node_attr('x1', parent=bb, text=str(x2))
        append_xml_node_attr('y1', parent=bb, text=str(y2))
        append_xml_node_attr('x1', parent=bb, text=str(x3))
        append_xml_node_attr('y1', parent=bb, text=str(y3))
 
        o = {'class': cls, 'box': np.asarray([x0, y0,x1,y1, x2, y2,x3,y3], dtype=float), \
             'truncation': truncation, 'difficult': difficult, 'occlusion': occlusion}
        objs.append(o)
 
    return doc, objs
 
def _is_hard(cls, truncation, occlusion, x1, y1, x2, y2):
    hard = False
    if y2 - y1 < 25 and occlusion >= 2:
        hard = True
        return hard
    if occlusion >= 3:
        hard = True
        return hard
    if truncation > 0.8:
        hard = True
        return hard
    return hard
 
def build_voc_dirs(outdir):
    mkdir = lambda dir: os.makedirs(dir) if not os.path.exists(dir) else None
    mkdir(outdir)
    mkdir(os.path.join(outdir, 'Annotations'))
    mkdir(os.path.join(outdir, 'ImageSets'))
    mkdir(os.path.join(outdir, 'ImageSets', 'Layout'))
    mkdir(os.path.join(outdir, 'ImageSets', 'Main'))
    mkdir(os.path.join(outdir, 'ImageSets', 'Segmentation'))
    mkdir(os.path.join(outdir, 'JPEGImages'))
    mkdir(os.path.join(outdir, 'SegmentationClass'))
    mkdir(os.path.join(outdir, 'SegmentationObject'))
    return os.path.join(outdir, 'Annotations'), os.path.join(outdir, 'JPEGImages'), os.path.join(outdir, 'ImageSets',
                                                                                                 'Main')
 
if __name__ == '__main__':
    _outdir = 'TEXTVOC/VOC2007'
    _draw = bool(0)
    _dest_label_dir, _dest_img_dir, _dest_set_dir = build_voc_dirs(_outdir)
    _doncateothers = bool(1)
    for dset in ['train']:
        _labeldir = 'label_tmp'
        _imagedir = 're_image'
        class_sets = ('tianchi', 'dontcare')
        class_sets_dict = dict((k, i) for i, k in enumerate(class_sets))
        allclasses = {}
        fs = [open(os.path.join(_dest_set_dir, cls + '_' + dset + '.txt'), 'w') for cls in class_sets]
        ftrain = open(os.path.join(_dest_set_dir, dset + '.txt'), 'w')
 
        files = glob.glob(os.path.join(_labeldir, '*.txt'))
        files.sort()
        for file in files:
            path, basename = os.path.split(file)
            stem, ext = os.path.splitext(basename)
            with open(file, 'r') as f:
                lines = f.readlines()
            img_file = os.path.join(_imagedir, stem + '.jpg')
 
            print(img_file)
            img = cv2.imread(img_file)
            img_size = img.shape
 
            doc, objs = generate_xml(stem, lines, img_size, class_sets=class_sets, doncateothers=_doncateothers)
 
            cv2.imwrite(os.path.join(_dest_img_dir, stem + '.jpg'), img)
            xmlfile = os.path.join(_dest_label_dir, stem + '.xml')
            with open(xmlfile, 'w') as f:
                f.write(doc.toprettyxml(indent='    '))
 
            ftrain.writelines(stem + '\n')
 
            cls_in_image = set([o['class'] for o in objs])
 
            for obj in objs:
                cls = obj['class']
                allclasses[cls] = 0 \
                    if not cls in list(allclasses.keys()) else allclasses[cls] + 1
 
            for cls in cls_in_image:
                if cls in class_sets:
                    fs[class_sets_dict[cls]].writelines(stem + ' 1\n')
            for cls in class_sets:
                if cls not in cls_in_image:
                    fs[class_sets_dict[cls]].writelines(stem + ' -1\n')
 
        (f.close() for f in fs)
        ftrain.close()
 
        print('~~~~~~~~~~~~~~~~~~~')
        print(allclasses)
        print('~~~~~~~~~~~~~~~~~~~')
        shutil.copyfile(os.path.join(_dest_set_dir, 'train.txt'), os.path.join(_dest_set_dir, 'val.txt'))
        shutil.copyfile(os.path.join(_dest_set_dir, 'train.txt'), os.path.join(_dest_set_dir, 'trainval.txt'))
        for cls in class_sets:
            shutil.copyfile(os.path.join(_dest_set_dir, cls + '_train.txt'),
                            os.path.join(_dest_set_dir, cls + '_trainval.txt'))
            shutil.copyfile(os.path.join(_dest_set_dir, cls + '_train.txt'),
                            os.path.join(_dest_set_dir, cls + '_val.txt'))

VOC数据集生成代码使用说明的更多相关文章

【Detection】物体识别-制作PASCAL VOC数据集
PASCAL VOC数据集 PASCAL VOC为图像识别和分类提供了一整套标准化的优秀的数据集,从2005年到2012年每年都会举行一场图像识别challenge 默认为20类物体 1 数据集结构 ...
搭建 MobileNet-SSD 开发环境并使用 VOC 数据集训练 TensorFlow 模型
原文地址:搭建 MobileNet-SSD 开发环境并使用 VOC 数据集训练 TensorFlow 模型 0x00 环境 OS: Ubuntu 1810 x64 Anaconda: 4.6.12 P ...
在Ubuntu内制作自己的VOC数据集
一.VOC数据集的简介 PASCAL VOC为图像的识别和分类提供了一整套标准化的优秀数据集,基本上就是目标检测数据集的模板.现在有VOC2007,VOC2012.主要有20个类.而现在主要的模型评估 ...
【工具引入】uiautomatorviewer 查找元素后自动生成代码
缘起公司部门调整PC部门和无线部门合并,原本负责主站PC端自动化的同事需要马上上手安卓,IOS自动化.对于初次接触移动端的测试者来说,跨度还是有点大的.加之人员有些变动,不得不搞个工具降低学习成本, ...
PASCAL VOC数据集分析（转）
PASCAL VOC数据集分析 PASCAL VOC为图像识别和分类提供了一整套标准化的优秀的数据集,从2005年到2012年每年都会举行一场图像识别challenge. 本文主要分析PASCAL V ...
自动化工具制作PASCAL VOC 数据集
自动化工具制作PASCAL VOC 数据集 1. VOC的格式 VOC主要有三个重要的文件夹:Annotations.ImageSets和JPEGImages JPEGImages 文件夹该文件 ...
Eclipse 使用mybatis generator插件自动生成代码
Eclipse 使用mybatis generator插件自动生成代码标签: mybatis 2016-12-07 15:10 5247人阅读评论(0) 收藏举报 .embody{ paddin ...
目标检测：keras-yolo3之制作VOC数据集训练指南
制作VOC数据集指南 Github:https://github.com/hyhouyong/keras-yolo3 LabelImg标注工具(windows环境下):https://github.c ...
VOC数据集目标检测
最近在做与目标检测模型相关的工作,很多都要求VOC格式的数据集. PASCAL VOC挑战赛 (The PASCAL Visual Object Classes )是一个世界级的计算机视觉挑战赛, P ...

随机推荐

python编程之 json包
1,json是什么? JSON(JavaScript Object Notation) 是一种轻量级的数据交换格式,易于人阅读和编写. 我的理解就是:json是一种统一的格式化的文件,比如,一个jso ...
ovs常用操作
1.添加网桥:ovs-vsctl add-br 交换机名 2.删除网桥:ovs-vsctl del-br 交换机名 3.添加端口:ovs-vsctl add-port 交换机名端口名(网卡名) 4. ...
Error "Client wants topic A to have B, but our version has C. Dropping connection."
ROS problem 出现这个问题的原因是话题上的消息类型和订阅节点指定的消息类型不匹配.
CF1110D Jongmah
题目地址:CF1110D Jongmah 约定:称形如 \([a-1,a,a+1]\) 这样的三元组为关于 \(a\) 的顺子,形如 \([a,a,a]\) 这样的三元组为关于 \(a\) 的对子. ...
Python3-操作系统发展史
操作系统发展史手工操作 —— 穿孔卡片批处理 —— 磁带存储多道程序系统操作系统的作用手工操作 —— 穿孔卡片 1946年第一台计算机诞生--20世纪50年代中期,计算机工作还在采用手工操作 ...
qunee 开发清新、高效的拓扑图组件 http://www.qunee.com/
qunee 开发清新.高效的拓扑图组件 http://www.qunee.com/ RoadFlow: http://www.cqroad.cn/ 村暖花开
DHCP Server (推荐使用Windows)
一些小的服务 windows做的比linux好 DHCP服务概述: 名称:DHCP (Dynamic Host Configuration Protocol --动态主机配置协议) 功能:是一个局域网 ...
CANopen--实现双电机速度同步
图1 将上图图中左边的电机和右边的电机进行速度同步,右边的电机同步左边的电机速度.这里需要知道Copley的驱动中的速度环的输入输出情况.如下图所示,速度环限制器接收速度命令信号,经限制后,产生一限制 ...
模拟电路学习之NMOS开关电路1
elasticsearch中的java.io.IOException: 远程主机强迫关闭了一个现有的连接
[2018-07-31T14:29:41,289][WARN ][o.e.x.s.t.n.SecurityNetty4HttpServerTransport] [9rTGh-y] caught exc ...

VOC数据集生成代码使用说明

VOC数据集生成代码使用说明的更多相关文章

随机推荐

热门专题