生成TFRecord文件完整代码实例

import os

import json

def get_annotation_dict(input_folder_path, word2number_dict):

    label_dict = {}

    father_file_list = os.listdir(input_folder_path)

    for father_file in father_file_list:

        full_father_file = os.path.join(input_folder_path, father_file)

        son_file_list = os.listdir(full_father_file)

        for image_name in son_file_list:

            label_dict[os.path.join(full_father_file, image_name)] = word2number_dict[father_file]

    return label_dict

def save_json(label_dict, json_path):

    with open(json_path, 'w') as json_path:

        json.dump(label_dict, json_path)

    print("label json file has been generated successfully!")

generate_annotation_json.py

总共有七种分类图片，类别的名称就是每个文件夹名称

generate_annotation_json.py是为了得到图片标注的label_dict。通过这个代码块可以获得我们需要的图片标注字典，key是图片具体地址， value是图片的类别，具体实例如下：

{

"/images/hangs/862e67a8-5bd9-41f1-8c6d-876a3cb270df.JPG": 6,

"/images/tags/adc264af-a76b-4477-9573-ac6c435decab.JPG": 3,

"/images/tags/fd231f5a-b42c-43ba-9e9d-4abfbaf38853.JPG": 3,

"/images/hangs/2e47d877-1954-40d6-bfa2-1b8e3952ebf9.jpg": 6,

"/images/tileds/a07beddc-4b39-4865-8ee2-017e6c257e92.png": 5,

 "/images/models/642015c8-f29d-4930-b1a9-564f858c40e5.png": 4

}

generate_tfrecord.py

import os
import tensorflow as tf
import io
from PIL import Image
from generate_annotation_json import get_annotation_dict

flags = tf.app.flags
flags.DEFINE_string('images_dir',
                    '/data2/raycloud/jingxiong_datasets/six_classes/images',
                    'Path to image(directory)')
flags.DEFINE_string('annotation_path',
                     '/data1/humaoc_file/classify/data/annotations/annotations.json',
                    'Path to annotation')
flags.DEFINE_string('record_path',
                    '/data1/humaoc_file/classify/data/train_tfrecord/train.record',
                    'Path to TFRecord')
FLAGS = flags.FLAGS

def int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def process_image_channels(image):
    process_flag = False
    # process the 4 channels .png
    if image.mode == 'RGBA':
        r, g, b, a = image.split()
        image = Image.merge("RGB", (r,g,b))
        process_flag = True
    # process the channel image
    elif image.mode != 'RGB':
        image = image.convert("RGB")
        process_flag = True
    return image, process_flag

def process_image_reshape(image, resize):
    width, height = image.size
    if resize is not None:
        if width > height:
             width = int(width * resize / height)
             height = resize
        else:
            width = resize
            height = int(height * resize / width)
        image = image.resize((width, height), Image.ANTIALIAS)
    return image

def create_tf_example(image_path, label, resize=None):
    #以二进制格式打开图片
    with tf.gfile.GFile(image_path, 'rb') as fid:
        encode_jpg = fid.read()
    encode_jpg_io = io.BytesIO(encode_jpg)
    image = Image.open(encode_jpg_io)
    # process png pic with four channels，将图片转为RGB
    image, process_flag = process_image_channels(image)
    # reshape image
    image = process_image_reshape(image, resize)
    if process_flag == True or resize is not None:
        bytes_io = io.BytesIO()
        image.save(bytes_io, format='JPEG')
        encoded_jpg = bytes_io.getvalue()
    width, height = image.size
    tf_example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded': bytes_feature(encode_jpg),
                'image/format': bytes_feature(b'jpg'),
                'image/class/label': int64_feature(label),
                'image/height': int64_feature(height),
                'image/width': int64_feature(width)
            }
        ))
    return tf_example

def generate_tfrecord(annotation_dict, record_path, resize=None):
    num_tf_example = 0
    #writer就是我们TFrecord生成器
    writer = tf.python_io.TFRecordWriter(record_path)
    for image_path, label in annotation_dict.items():
        #tf.gfile.GFile获取文本操作句柄，类似于python提供的文本操作open()函数
        #filename是要打开的文件名，mode是以何种方式去读写，将会返回一个文本操作句柄。
        if not tf.gfile.GFile(image_path):
            print("{} does not exist".format(image_path))
        tf_example = create_tf_example(image_path, label, resize)
        #tf_example.SerializeToString()是将Example中的map压缩为二进制文件
        writer.write(tf_example.SerializeToString())
        num_tf_example += 1
        if num_tf_example % 100 == 0:
            print("Create %d TF_Example" % num_tf_example)
    writer.close()
    print("{} tf_examples has been created successfully, which are saved in {}".format(num_tf_example, record_path))

def main(_):
    word2number_dict = {
        "combinations": 0,
        "details": 1,
        "sizes": 2,
        "tags": 3,
        "models": 4,
        "tileds": 5,
        "hangs": 6
    }
    # 图片路径
    images_dir = FLAGS.images_dir
    #annotation_path = FLAGS.annotation_path
    #生成TFRecord文件的路径
    record_path = FLAGS.record_path
    annotation_dict = get_annotation_dict(images_dir, word2number_dict)
    generate_tfrecord(annotation_dict, record_path)

if __name__ == '__main__':
    tf.app.run()

　总结：1.制作数据（图片路径和标签）

　　　　2.利用tf.python_io.TFRecordWriter创建一个writer，就是我们TFrecord生成器

　　　　3.遍历数据集，以二进制形式打开图片

　　　　4.利用tf.train.Example将图片，图片格式，标签和长宽进行保存

　　　　5然后利用writer.write(tf_example.SerializeToString())将tf.train.Example存储的数据格式写入TFRecord即可

参考链接：https://www.jianshu.com/p/b480e5fcb638

生成TFRecord文件完整代码实例的更多相关文章

Java生成MD5加密字符串代码实例
这篇文章主要介绍了Java生成MD5加密字符串代码实例,本文对MD5的作用作了一些介绍,然后给出了Java下生成MD5加密字符串的代码示例,需要的朋友可以参考下 (1)一般使用的数据库中都会保存用 ...
Extjs的GridPanel分页前后台完整代码实例
第一次写文章啊,有些冲动.最近在公司学习Extjs,做了一个分页的小实例和大家分享. 1.首先编写paging-grid.js文件,这是我在网上参考的例子改写的,大同小异. Ext.onReady(f ...
python_文件操作代码实例
"""提示:代码中的内容均被注释,请参考,切勿照搬""" 1 #文件的打开和关闭 ''' 文件对象 = open('文件名','使用方式') ...
C#生成漂亮验证码完整代码类
using System;using System.Web;using System.Drawing;using System.Security.Cryptography; namespace Dot ...
django 生成csv文件重要代码
import csv from django.http import HttpResponse # Number of unruly passengers each year 1995 - 2005. ...
运行pyqt4生成py文件增加代码
if __name__ == "__main__": import sys app = QtGui.QApplication(sys.argv) Form = QtGui.QWid ...
TFRecord文件的读写
前言在跑通了官网的mnist和cifar10数据之后,笔者尝试着制作自己的数据集,并保存,读入,显示. TensorFlow可以支持cifar10的数据格式, 也提供了标准的TFRecord 格式,而 ...
Keil提示premature end of file错误无法生成HEX文件
今天舍友在使用Keil UV4的时候遇到一个问题:Keil提示premature end of file,无法生成hex文件. 代码是没有错误的.那么问题就出在设置上面了. 百度了一圈,发现很少人解答 ...
java自动生成entity文件
网上关于自动生成entity文件的代码很多,看了很多代码后,在先辈们的基础上再完善一些功能(指定多个表,全部表). 为了使用方便所以把两个类写在一个java文件中,所以大家可以直接拿这个java文件, ...

随机推荐

Python--day48--ORM框架SQLAlchemy之子查询
一定要把第一次查询的结果作为一个结果再进行查询:代码后面加.subquery()标明是子查询 1,简单的子查询 #select * from (select * from tb) as B q1 = ...
Python--day47--内容回顾
1.什么是数据库
laravel post提交数据时显示异常
post提交数据时候显示如下: The page has expired due to inactivity. Please refresh and try again 这是由于在laravel框架中 ...
带你认识“货真价实”的P2P网贷风控
文/杨帆说起P2P,多数金融圈内人士已经并不陌生.国内现有近千家的P2P网贷平台,动辄打出高息诱人的收益率宣传口号以及眼花缭乱的安全承诺.但是在这些浮华表面的背后,关于P2P的风控很多人仍然是一 ...
python基础六之编码
python中编码的特点: 1,各个编码之间的二进制是不能互相识别的,会产生乱码 2,文件的储存和传输是不能用Unicode的 python3的编码在python3中字符串在内存中是用Unicode ...
前端小白-----ES6之字符串模板
前言:只要坚持就会胜利--Coldfront-小白菜既是总结也是一种分享分享内容:ES6 字符串模板案例1:var Musics=[{music:"六月的雨",singer: ...
5.29 SD省队培训D1
5.29 SD省队培训D1 自闭的一天 T1 梦批糼先咕一咕(两天之内一定补上) T2 等你哈苏德继续咕(一星期之内补上) T3喜欢最最痛四十分做法: 首先,我们发现同一个点加两条额外边是一件非 ...
dotnet 启动 JIT 多核心编译提升启动性能
用2分钟提升十分之一的启动性能,通过在桌面程序启动 JIT 多核心编译提升启动性能在 dotnet 可以通过让 JIT 进行多核心编译提升软件的启动性能,在默认托管的 ASP.NET 程序是开启的, ...
dotnet 动态代理魔法书
看到标题的小伙伴是不是想知道什么是魔法书,如果你需要写一段代码,这段代码是在做神奇的业务,只有你查询到了魔法书你才能找到这个对象,同时你还需要实现自己的接口,通过自己实现的接口调用才能用到有趣的方法 ...
CodeForces - 617E XOR and Favorite Number （莫队+前缀和）
Bob has a favorite number k and ai of length n. Now he asks you to answer m queries. Each query is g ...

生成TFRecord文件完整代码实例

生成TFRecord文件完整代码实例的更多相关文章

随机推荐

热门专题