densenet tensorflow 中文汉字手写识别
densenet 中文汉字手写识别,代码如下:
import tensorflow as tf
import os
import random
import math
import tensorflow.contrib.slim as slim
import time
import logging
import numpy as np
import pickle
from PIL import Image import tensorflow as tf
#from tflearn.layers.conv import global_avg_pool
from tensorflow.contrib.layers import batch_norm, flatten
from tensorflow.contrib.framework import arg_scope
import numpy as np # Hyperparameter
growth_k = 12
nb_block = 2 # how many (dense block + Transition Layer) ?
init_learning_rate = 1e-4
epsilon = 1e-8 # AdamOptimizer epsilon
dropout_rate = 0.2 # Momentum Optimizer will use
nesterov_momentum = 0.9
weight_decay = 1e-4 # Label & batch_size
class_num = 3755
batch_size = 128 total_epochs = 50 def conv_layer(input, filter, kernel, stride=1, layer_name="conv"):
with tf.name_scope(layer_name):
network = tf.layers.conv2d(inputs=input, filters=filter, kernel_size=kernel, strides=stride, padding='SAME')
return network def Global_Average_Pooling(x, stride=1):
#It is global average pooling without tflearn
width = np.shape(x)[1]
height = np.shape(x)[2]
pool_size = [width, height]
return tf.layers.average_pooling2d(inputs=x, pool_size=pool_size, strides=stride) # The stride value does not matter
"""
return global_avg_pool(x, name='Global_avg_pooling')
# But maybe you need to install h5py and curses or not
""" def Batch_Normalization(x, training, scope):
with arg_scope([batch_norm],
scope=scope,
updates_collections=None,
decay=0.9,
center=True,
scale=True,
zero_debias_moving_mean=True) :
return tf.cond(training,
lambda : batch_norm(inputs=x, is_training=training, reuse=None),
lambda : batch_norm(inputs=x, is_training=training, reuse=True)) def Drop_out(x, rate, training) :
return tf.layers.dropout(inputs=x, rate=rate, training=training) def Relu(x):
return tf.nn.relu(x) def Average_pooling(x, pool_size=[2,2], stride=2, padding='VALID'):
return tf.layers.average_pooling2d(inputs=x, pool_size=pool_size, strides=stride, padding=padding) def Max_Pooling(x, pool_size=[3,3], stride=2, padding='VALID'):
return tf.layers.max_pooling2d(inputs=x, pool_size=pool_size, strides=stride, padding=padding) def Concatenation(layers) :
return tf.concat(layers, axis=3) def Linear(x) :
return tf.layers.dense(inputs=x, units=class_num, name='linear') class DenseNet():
def __init__(self, x, nb_blocks, filters, training):
self.nb_blocks = nb_blocks
self.filters = filters
self.training = training
self.model = self.Dense_net(x) def bottleneck_layer(self, x, scope):
# print(x)
with tf.name_scope(scope):
x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
x = Relu(x)
x = conv_layer(x, filter=4 * self.filters, kernel=[1,1], layer_name=scope+'_conv1')
x = Drop_out(x, rate=dropout_rate, training=self.training) x = Batch_Normalization(x, training=self.training, scope=scope+'_batch2')
x = Relu(x)
x = conv_layer(x, filter=self.filters, kernel=[3,3], layer_name=scope+'_conv2')
x = Drop_out(x, rate=dropout_rate, training=self.training) # print(x) return x def transition_layer(self, x, scope):
with tf.name_scope(scope):
x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
x = Relu(x)
x = conv_layer(x, filter=self.filters, kernel=[1,1], layer_name=scope+'_conv1')
x = Drop_out(x, rate=dropout_rate, training=self.training)
x = Average_pooling(x, pool_size=[2,2], stride=2) return x def dense_block(self, input_x, nb_layers, layer_name):
with tf.name_scope(layer_name):
layers_concat = list()
layers_concat.append(input_x) x = self.bottleneck_layer(input_x, scope=layer_name + '_bottleN_' + str(0)) layers_concat.append(x) for i in range(nb_layers - 1):
x = Concatenation(layers_concat)
x = self.bottleneck_layer(x, scope=layer_name + '_bottleN_' + str(i + 1))
layers_concat.append(x) x = Concatenation(layers_concat) return x def Dense_net(self, input_x):
x = conv_layer(input_x, filter=2 * self.filters, kernel=[7,7], stride=2, layer_name='conv0')
x = Max_Pooling(x, pool_size=[3,3], stride=2) for i in range(self.nb_blocks) :
# 6 -> 12 -> 48
x = self.dense_block(input_x=x, nb_layers=4, layer_name='dense_'+str(i))
x = self.transition_layer(x, scope='trans_'+str(i)) """
x = self.dense_block(input_x=x, nb_layers=6, layer_name='dense_1')
x = self.transition_layer(x, scope='trans_1') x = self.dense_block(input_x=x, nb_layers=12, layer_name='dense_2')
x = self.transition_layer(x, scope='trans_2') x = self.dense_block(input_x=x, nb_layers=48, layer_name='dense_3')
x = self.transition_layer(x, scope='trans_3')
""" x = self.dense_block(input_x=x, nb_layers=32, layer_name='dense_final') # 100 Layer
x = Batch_Normalization(x, training=self.training, scope='linear_batch')
x = Relu(x)
x = Global_Average_Pooling(x)
x = flatten(x)
x = Linear(x) # x = tf.reshape(x, [-1, 10])
return x def build_graph(top_k):
# with tf.device('/cpu:0'):
# keep_prob = tf.placeholder(dtype=tf.float32, shape=[], name='keep_prob')
images = tf.placeholder(dtype=tf.float32, shape=[None, 64, 64, 1], name='image_batch')
# label = tf.placeholder(tf.float32, shape=[None, 10])
labels = tf.placeholder(dtype=tf.int64, shape=[None], name='label_batch')
training_flag = tf.placeholder(tf.bool)
logits = DenseNet(x=images, nb_blocks=nb_block, filters=growth_k, training=training_flag).model
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels))
# loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits)) """
l2_loss = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()])
optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=nesterov_momentum, use_nesterov=True)
train = optimizer.minimize(cost + l2_loss * weight_decay)
In paper, use MomentumOptimizer
init_learning_rate = 0.1
but, I'll use AdamOptimizer
""" global_step = tf.get_variable("step", [], initializer=tf.constant_initializer(0.0), trainable=False)
rate = tf.train.exponential_decay(2e-4, global_step, decay_steps=2000, decay_rate=0.97, staircase=True)
optimizer = tf.train.AdamOptimizer(learning_rate=rate, epsilon=epsilon)
train_op = optimizer.minimize(loss, global_step=global_step) accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits, 1), labels), tf.float32)) probabilities = logits
tf.summary.scalar('loss', loss)
tf.summary.scalar('accuracy', accuracy)
merged_summary_op = tf.summary.merge_all()
predicted_val_top_k, predicted_index_top_k = tf.nn.top_k(probabilities, k=top_k)
accuracy_in_top_k = tf.reduce_mean(tf.cast(tf.nn.in_top_k(probabilities, labels, top_k), tf.float32)) return {'images': images,
'labels': labels,
'training_flag': training_flag,
'top_k': top_k,
'global_step': global_step,
'train_op': train_op,
'loss': loss,
'accuracy': accuracy,
'accuracy_top_k': accuracy_in_top_k,
'merged_summary_op': merged_summary_op,
'predicted_distribution': probabilities,
'predicted_index_top_k': predicted_index_top_k,
'predicted_val_top_k': predicted_val_top_k} logger = logging.getLogger('Training a chinese write char recognition')
logger.setLevel(logging.INFO)
# formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
logger.addHandler(ch) run_mode = "train"
charset_size = class_num
max_steps = 122002
save_steps = 1000
cur_test_acc = 0 """
# for online 3755 words training
checkpoint_dir = '/aiml/dfs/checkpoint_888/'
train_data_dir = '/aiml/data/train/'
test_data_dir = '/aiml/data/test/'
log_dir = '/aiml/dfs/'
""" checkpoint_dir = './checkpoint_densenet/'
train_data_dir = './data/train/'
test_data_dir = './data/test/'
log_dir = './' tf.app.flags.DEFINE_string('mode', run_mode, 'Running mode. One of {"train", "valid", "test"}')
tf.app.flags.DEFINE_boolean('random_flip_up_down', True, "Whether to random flip up down")
tf.app.flags.DEFINE_boolean('random_brightness', True, "whether to adjust brightness")
tf.app.flags.DEFINE_boolean('random_contrast', True, "whether to random constrast") tf.app.flags.DEFINE_integer('charset_size', charset_size, "Choose the first `charset_size` character to conduct our experiment.")
tf.app.flags.DEFINE_integer('image_size', 64, "Needs to provide same value as in training.")
tf.app.flags.DEFINE_boolean('gray', True, "whether to change the rbg to gray")
tf.app.flags.DEFINE_integer('max_steps', max_steps, 'the max training steps ')
tf.app.flags.DEFINE_integer('eval_steps', 50, "the step num to eval")
tf.app.flags.DEFINE_integer('save_steps', save_steps, "the steps to save") tf.app.flags.DEFINE_string('checkpoint_dir', checkpoint_dir, 'the checkpoint dir')
tf.app.flags.DEFINE_string('train_data_dir', train_data_dir, 'the train dataset dir')
tf.app.flags.DEFINE_string('test_data_dir', test_data_dir, 'the test dataset dir')
tf.app.flags.DEFINE_string('log_dir', log_dir, 'the logging dir') ##############################
# resume training
tf.app.flags.DEFINE_boolean('restore', True, 'whether to restore from checkpoint')
############################## tf.app.flags.DEFINE_boolean('epoch', 10, 'Number of epoches')
tf.app.flags.DEFINE_boolean('batch_size', 128, 'Validation batch size')
FLAGS = tf.app.flags.FLAGS class DataIterator:
def __init__(self, data_dir):
# Set FLAGS.charset_size to a small value if available computation power is limited.
truncate_path = data_dir + ('%05d' % FLAGS.charset_size)
print(truncate_path)
self.image_names = []
for root, sub_folder, file_list in os.walk(data_dir):
if root < truncate_path:
self.image_names += [os.path.join(root, file_path) for file_path in file_list]
random.shuffle(self.image_names)
self.labels = [int(file_name[len(data_dir):].split(os.sep)[0]) for file_name in self.image_names] @property
def size(self):
return len(self.labels) @staticmethod
def data_augmentation(images):
if FLAGS.random_flip_up_down:
# images = tf.image.random_flip_up_down(images)
images = tf.contrib.image.rotate(images, random.randint(0, 15) * math.pi / 180, interpolation='BILINEAR')
if FLAGS.random_brightness:
images = tf.image.random_brightness(images, max_delta=0.3)
if FLAGS.random_contrast:
images = tf.image.random_contrast(images, 0.8, 1.2)
return images def input_pipeline(self, batch_size, num_epochs=None, aug=False):
images_tensor = tf.convert_to_tensor(self.image_names, dtype=tf.string)
labels_tensor = tf.convert_to_tensor(self.labels, dtype=tf.int64)
input_queue = tf.train.slice_input_producer([images_tensor, labels_tensor], num_epochs=num_epochs) labels = input_queue[1]
images_content = tf.read_file(input_queue[0])
images = tf.image.convert_image_dtype(tf.image.decode_png(images_content, channels=1), tf.float32)
if aug:
images = self.data_augmentation(images)
new_size = tf.constant([FLAGS.image_size, FLAGS.image_size], dtype=tf.int32)
images = tf.image.resize_images(images, new_size)
image_batch, label_batch = tf.train.shuffle_batch([images, labels], batch_size=batch_size, capacity=50000,
min_after_dequeue=10000)
return image_batch, label_batch def train():
print('Begin training')
train_feeder = DataIterator(FLAGS.train_data_dir)
test_feeder = DataIterator(FLAGS.test_data_dir)
with tf.Session() as sess:
train_images, train_labels = train_feeder.input_pipeline(batch_size=FLAGS.batch_size, aug=True)
test_images, test_labels = test_feeder.input_pipeline(batch_size=FLAGS.batch_size)
graph = build_graph(top_k=1)
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
saver = tf.train.Saver() train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train', sess.graph)
test_writer = tf.summary.FileWriter(FLAGS.log_dir + '/val')
start_step = 0
if FLAGS.restore:
ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
if ckpt:
saver.restore(sess, ckpt)
print("restore from the checkpoint {0}".format(ckpt))
start_step += int(ckpt.split('-')[-1]) logger.info(':::Training Start:::')
try:
while not coord.should_stop():
start_time = time.time()
train_images_batch, train_labels_batch = sess.run([train_images, train_labels])
feed_dict = {graph['images']: train_images_batch,
graph['labels']: train_labels_batch,
graph['training_flag']: True}
_, loss_val, train_summary, step = sess.run(
[graph['train_op'], graph['loss'], graph['merged_summary_op'], graph['global_step']],
feed_dict=feed_dict)
train_writer.add_summary(train_summary, step)
end_time = time.time()
logger.info("the step {0} takes {1} loss {2}".format(step, end_time - start_time, loss_val))
if step > FLAGS.max_steps:
break
accuracy_test = 0
if step % FLAGS.eval_steps == 1:
test_images_batch, test_labels_batch = sess.run([test_images, test_labels])
feed_dict = {graph['images']: test_images_batch,
graph['labels']: test_labels_batch,
graph['training_flag']: False}
accuracy_test, test_summary = sess.run(
[graph['accuracy'], graph['merged_summary_op']],
feed_dict=feed_dict)
test_writer.add_summary(test_summary, step)
logger.info('===============Eval a batch=======================')
logger.info('the step {0} test accuracy: {1}'
.format(step, accuracy_test))
logger.info('===============Eval a batch=======================')
if step % FLAGS.save_steps == 1:
logger.info('Save the ckpt of {0}'.format(step))
saver.save(sess, os.path.join(FLAGS.checkpoint_dir, 'my-model'),
global_step=graph['global_step'])
global cur_test_acc
cur_test_acc = accuracy_test
except tf.errors.OutOfRangeError:
logger.info('==================Train Finished================')
saver.save(sess, os.path.join(FLAGS.checkpoint_dir, 'my-model'), global_step=graph['global_step'])
finally:
coord.request_stop()
coord.join(threads) def validation():
print('validation')
test_feeder = DataIterator(FLAGS.test_data_dir) final_predict_val = []
final_predict_index = []
groundtruth = [] with tf.Session() as sess:
test_images, test_labels = test_feeder.input_pipeline(batch_size=FLAGS.batch_size, num_epochs=1)
graph = build_graph(top_k=3) sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer()) # initialize test_feeder's inside state coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord) saver = tf.train.Saver()
ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
if ckpt:
saver.restore(sess, ckpt)
print("restore from the checkpoint {0}".format(ckpt)) print(':::Start validation:::')
try:
i = 0
acc_top_1, acc_top_k = 0.0, 0.0
while not coord.should_stop():
i += 1
start_time = time.time()
test_images_batch, test_labels_batch = sess.run([test_images, test_labels])
feed_dict = {graph['images']: test_images_batch,
graph['labels']: test_labels_batch,
graph['training_flag']: False}
batch_labels, probs, indices, acc_1, acc_k = sess.run([graph['labels'],
graph['predicted_val_top_k'],
graph['predicted_index_top_k'],
graph['accuracy'],
graph['accuracy_top_k']], feed_dict=feed_dict)
final_predict_val += probs.tolist()
final_predict_index += indices.tolist()
groundtruth += batch_labels.tolist()
acc_top_1 += acc_1
acc_top_k += acc_k
end_time = time.time()
logger.info("the batch {0} takes {1} seconds, accuracy = {2}(top_1) {3}(top_k)"
.format(i, end_time - start_time, acc_1, acc_k)) except tf.errors.OutOfRangeError:
logger.info('==================Validation Finished================')
acc_top_1 = acc_top_1 * FLAGS.batch_size / test_feeder.size
acc_top_k = acc_top_k * FLAGS.batch_size / test_feeder.size
logger.info('top 1 accuracy {0} top k accuracy {1}'.format(acc_top_1, acc_top_k))
finally:
coord.request_stop()
coord.join(threads)
return {'prob': final_predict_val, 'indices': final_predict_index, 'groundtruth': groundtruth} def inference(image):
print('inference')
temp_image = Image.open(image).convert('L')
temp_image = temp_image.resize((FLAGS.image_size, FLAGS.image_size), Image.ANTIALIAS)
temp_image = np.asarray(temp_image) / 255.0
temp_image = temp_image.reshape([-1, 64, 64, 1])
with tf.Session() as sess:
logger.info('========start inference============')
# images = tf.placeholder(dtype=tf.float32, shape=[None, 64, 64, 1])
# Pass a shadow label 0. This label will not affect the computation graph.
graph = build_graph(top_k=3)
saver = tf.train.Saver()
ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
if ckpt:
saver.restore(sess, ckpt)
predict_val, predict_index = sess.run([graph['predicted_val_top_k'], graph['predicted_index_top_k']],
feed_dict={graph['images']: temp_image, graph['training_flag']: False})
return predict_val, predict_index def main(_):
print(FLAGS.mode)
if FLAGS.mode == "train":
train()
elif FLAGS.mode == 'validation':
dct = validation()
result_file = 'result.dict'
logger.info('Write result into {0}'.format(result_file))
with open(result_file, 'wb') as f:
pickle.dump(dct, f)
logger.info('Write file ends')
elif FLAGS.mode == 'inference':
image_path = './data/00098/102544.png'
final_predict_val, final_predict_index = inference(image_path)
logger.info('the result info label {0} predict index {1} predict_val {2}'.format(190, final_predict_index,
final_predict_val)) if __name__ == "__main__":
tf.app.run()
densenet模型参考:https://github.com/taki0112/Densenet-Tensorflow
效果:
===============Eval a batch=======================
the step 34001.0 test accuracy: 0.765625
===============Eval a batch=======================
Compare Structure (CNN, ResNet, DenseNet)
densenet tensorflow 中文汉字手写识别的更多相关文章
- TensorFlow MNIST(手写识别 softmax)实例运行
TensorFlow MNIST(手写识别 softmax)实例运行 首先要有编译环境,并且已经正确的编译安装,关于环境配置参考:http://www.cnblogs.com/dyufei/p/802 ...
- TensorFlow 入门之手写识别(MNIST) softmax算法
TensorFlow 入门之手写识别(MNIST) softmax算法 MNIST flyu6 softmax回归 softmax回归算法 TensorFlow实现softmax softmax回归算 ...
- TensorFlow 入门之手写识别CNN 三
TensorFlow 入门之手写识别CNN 三 MNIST 卷积神经网络 Fly 多层卷积网络 多层卷积网络的基本理论 构建一个多层卷积网络 权值初始化 卷积和池化 第一层卷积 第二层卷积 密集层连接 ...
- TensorFlow 入门之手写识别(MNIST) softmax算法 二
TensorFlow 入门之手写识别(MNIST) softmax算法 二 MNIST Fly softmax回归 softmax回归算法 TensorFlow实现softmax softmax回归算 ...
- TensorFlow 入门之手写识别(MNIST) 数据处理 一
TensorFlow 入门之手写识别(MNIST) 数据处理 一 MNIST Fly softmax回归 准备数据 解压 与 重构 手写识别入门 MNIST手写数据集 图片以及标签的数据格式处理 准备 ...
- 基于tensorflow的MNIST手写识别
这个例子,是学习tensorflow的人员通常会用到的,也是基本的学习曲线中的一环.我也是! 这个例子很简单,这里,就是简单的说下,不同的tensorflow版本,相关的接口函数,可能会有不一样哟.在 ...
- 使用tensorflow实现mnist手写识别(单层神经网络实现)
import tensorflow as tf import tensorflow.examples.tutorials.mnist.input_data as input_data import n ...
- 基于tensorflow实现mnist手写识别 (多层神经网络)
标题党其实也不多,一个输入层,三个隐藏层,一个输出层 老样子先上代码 导入mnist的路径很长,现在还记不住 import tensorflow as tf import tensorflow.exa ...
- Tensorflow快餐教程(1) - 30行代码搞定手写识别
版权声明:本文为博主原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明. 本文链接:https://blog.csdn.net/lusing/article/details ...
随机推荐
- mysql主从只同步部分库或表
同步部分数据有两个思路,1.master只发送需要的:2.slave只接收想要的. master端: binlog-do-db 二进制日志记录的数据库(多数据库用逗号,隔开)binlog-i ...
- 36:字符串排序SortString
题目描述:编写一个程序,将输入字符串中的字符按如下规则排序. 规则1:英文字母从A到Z排列,不区分大小写. 如,输入:Type 输出:epTy 规则2:同一个英文字母的大小写同时存在时,按照输入顺序排 ...
- java ConcurrentHashMap 初识
“ConcurrentHashMap是一个线程安全的哈希表“,但是不允许key和value为空: HashTable和ConcurrentHashMap都是线程安全的,但是HashTable是同步容器 ...
- Xenomai 3 migration
Xenomai 3 的rtdm驱动更像一般的Linux驱动,named device会在/dev/rtdm/xxx创建一个设备文件.而用户空间使用时,写得来也和Linux的一般char设备相似,ope ...
- Erlang 在erlang项目中使用protobuf
protobuf是google的一个序列化框架,类似XML,JSON,其特点是基于二进制,比XML表示同样一段内容要短小得多,还可以定义一些可选字段,广泛用于服务端与客户端通信.文章将着重介绍在erl ...
- 图像处理之全景拼接---基于sift的全景图像拼接
http://blog.csdn.net/masibuaa/article/details/9246493#comments
- Windows 驱动入门(二)代码结构
windows驱动程序基础.转载标明出处:http://blog.csdn.net/ikerpeng/article/details/38777641 windows驱动程序结构: 我想说的是wind ...
- ScrollView分析
本文转载至 http://blog.sina.com.cn/s/blog_a843a8850101dsg5.html Properties alwaysBounceHorizontal ...
- python中装饰器你真的理解吗?
def w1(func): print('装饰器1....') def w1_in(): print('w1_in.....') func() return w1_in def w2(func): p ...
- vs2013工程技巧
1 vs工程输出了dll和lib,分别是什么,有什么用? 当设置工程property的Project Defaults的Configuration Type为dll时,不光会生成该动态链接库的dll文 ...