tensorflow-用DASC结合Inception-v3对imagenet2012聚类实现
一、目的
以imagenet2012作为数据集,用Inception-v3对图像提取特征作为输入,来训练一个自编码器。
以上作为预训练模型,随后在该自编码器的基础上,中间加入一个自表示层,将最终学习到的自表示层系数,作为相似度矩阵,对imagenet2012的1000类进行聚类。
二、预训练
1.原理
inception-v3+自编码器
2.代码
- import tensorflow as tf
- import os
- import numpy as np
- import random
- import tensorflow.contrib.slim as slim
- import shutil
- tf.app.flags.DEFINE_string('model_dir', 'model/inception', 'Inception-v3 pretrain model dir')
- tf.app.flags.DEFINE_string('class_list', 'imagenet12/train_class_list.txt', 'ILSVRC2012 image class list')
- tf.app.flags.DEFINE_string('img_path', '/media/gpu/bdc7606d-0e3c-4870-9a5d-4926fd9961c0/gpu/Works/imagenet/others/ILSVRC2012_img_train', 'ILSVRC2012 image train path')
- tf.app.flags.DEFINE_integer('max_train_steps_pre', 200000, 'max train num')
- tf.app.flags.DEFINE_boolean('restore', True, 'wheather restore model and variable from previous saved')
- tf.app.flags.DEFINE_string('checkpoint_path', 'model/pre/', 'model saved path')
- tf.app.flags.DEFINE_string('feature_train_path','feature_train','ILSVRC2012 train feature save path')
- tf.app.flags.DEFINE_integer('large_multi', 100, 'enlarge the feature data')
- tf.app.flags.DEFINE_integer('width', 32, 'the width of feature input')
- tf.app.flags.DEFINE_integer('inception_out_size', 2048, 'the dim of feature input,inception out dim')
- tf.app.flags.DEFINE_integer('train_num_of_every_batch', 2000, 'change the data every 2000 epochs')
- FLAGS = tf.app.flags.FLAGS
- kernel_num_list = [16, 32, 64] #channel num
- kernel_size_list = [[3, 3], [3, 3], [3, 3]] #channel size
- kernel_stride_list = [2, 2, 2] #stride
- batch_size = 500
- def get_inception_graph():
- '''
- load inception-v3 gragh for get_inception_output to
- get the feature from Inception-v3
- '''
- with tf.gfile.FastGFile(os.path.join(FLAGS.model_dir, 'inception-v3.pb'), 'rb') as f:
- graph_def = tf.GraphDef()
- graph_def.ParseFromString(f.read())
- inception_out = tf.import_graph_def(graph_def,name='',return_elements=['pool_3/_reshape:0'])
- return inception_out
- def create_graph_pre():
- '''
- create graph and loss
- '''
- inception_input = tf.placeholder(tf.float32, [None, FLAGS.width, FLAGS.inception_out_size/FLAGS.width, 1], name='inception_holder')
- with tf.variable_scope('DSC'):
- with tf.variable_scope('encoder'):
- net = slim.conv2d(inception_input, kernel_num_list[0], kernel_size_list[0], stride = kernel_stride_list[0], scope='conv_0')
- net = slim.conv2d(net, kernel_num_list[1], kernel_size_list[1], stride=kernel_stride_list[1], scope='conv_1')
- net = slim.conv2d(net, kernel_num_list[2], kernel_size_list[2], stride=kernel_stride_list[2], scope='conv_2')
- with tf.variable_scope('decoder'):
- net = slim.conv2d_transpose(net, kernel_num_list[1], kernel_size_list[2], stride=kernel_stride_list[2], scope='deconv_2')
- net = slim.conv2d_transpose(net, kernel_num_list[0], kernel_size_list[1], stride=kernel_stride_list[1], scope='deconv_1')
- net = slim.conv2d_transpose(net, 1, kernel_size_list[0], stride=kernel_stride_list[0], scope='deconv_0')
- restruct_loss = tf.losses.mean_squared_error(net, inception_input)
- return restruct_loss,inception_input,net
- def get_inception_output(sess, img, txt_name,inception_out,save):
- '''
- get the inception-v3 feature for img and save in txt_name
- '''
- image_data = tf.gfile.FastGFile(img, 'rb').read()
- output = sess.run(inception_out, feed_dict={'DecodeJpeg/contents:0': image_data})
- output = np.squeeze(output)
- output = output.reshape(FLAGS.width,-1)
- if save == True:
- np.savetxt(txt_name, output, fmt='%.6f')
- return output
- def get_inception_batch(sess,inception_out,save=True):
- '''
- get inception-v3 feature for a batch as input of the new graph(create_graph_pre)
- '''
- class_list = np.loadtxt(FLAGS.class_list, dtype= str)[0:batch_size]
- batch = []
- for i, item in enumerate(class_list):
- class_img_path = os.path.join(FLAGS.img_path, item)
- class_img_list = os.listdir(class_img_path)
- img_name = random.choice(class_img_list)
- txt_name = os.path.join(FLAGS.feature_train_path, item, img_name[:-4]+'txt')
- img = os.path.join(class_img_path, img_name)
- if os.path.exists(txt_name):
- print('%s Found!' % os.path.join(item, img_name[:-4]+'txt'))
- batch_i = np.loadtxt(txt_name)
- else:
- #print('%s Extracting!' % os.path.join(item, img_name[:-4]+'txt'))
- dir_name = os.path.join(FLAGS.feature_train_path, item)
- if not os.path.exists(dir_name):
- os.makedirs(dir_name)
- batch_i = get_inception_output(sess, img,txt_name, inception_out,save=save)
- batch.append(batch_i)
- large_batch = np.array(batch) * FLAGS.large_multi
- return large_batch
- def reconstruct(sess, net, img_inception):
- '''
- get the loss for the input(img_inception) to varify the result of reconstruct
- '''
- output = sess.run([net], feed_dict={'inception_holder:0': img_inception})
- img_inception=np.squeeze(img_inception)
- output=np.squeeze(np.array(output))
- test_loss = pow(img_inception-output,2)
- return output, sum(sum(test_loss))/(32*64)
- def interface_pre():
- total_loss, inception_input, net = create_graph_pre()
- global_step = tf.Variable(0)
- learning_rate = tf.train.exponential_decay(1e-3, global_step, decay_steps=100, decay_rate=0.98, staircase=True)
- train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(total_loss)
- saver = tf.train.Saver(max_to_keep=3)
- with tf.Session() as sess:
- if FLAGS.restore:
- print('continue training from previous checkpoint')
- ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
- pre_step = int(ckpt.replace(FLAGS.checkpoint_path + '-', ''))
- saver.restore(sess, ckpt)
- else:
- #remove previous model
- if os.path.exists(FLAGS.checkpoint_path):
- shutil.rmtree(FLAGS.checkpoint_path)
- os.makedirs(FLAGS.checkpoint_path)
- sess.run(tf.global_variables_initializer())
- pre_step = 0
- inception_out = get_inception_graph()
- for step in range(FLAGS.max_train_steps_pre):
- if step % FLAGS.train_num_of_every_batch == 0:
- inception_output = get_inception_batch(sess, inception_out, save=False)
- inception_output = inception_output.reshape(-1,inception_output.shape[1], inception_output.shape[2], 1)
- perm = np.arange(batch_size)
- np.random.shuffle(perm)
- inception_output = inception_output[perm]
- inception_output = inception_output.reshape(-1,inception_output.shape[1], inception_output.shape[2], 1)
- _, loss_value= sess.run([train_op, total_loss],feed_dict={'inception_holder:0':inception_output})
- if step % 100 == 0:
- print("step %d :total_loss= %f" % (step, loss_value))
- if step % 500 == 0 and step > 0:
- # save model
- if step > 500 :
- write_meta_graph = False
- else:
- write_meta_graph = True
- all_step = pre_step + step
- saver.save(sess, FLAGS.checkpoint_path, global_step=all_step, write_meta_graph=write_meta_graph)
- #construct
- img_inception = get_inception_output(sess, 'cropped_panda.jpg', 'cropped_panda.txt',inception_out,False)
- img_out, test_loss = reconstruct(sess, net, FLAGS.large_multi*img_inception.reshape(-1,32,64,1))
- print("test loss= %.5f" % test_loss)
- if __name__ == '__main__':
- interface_pre()
三、训练
1.原理
以imagenet2012在inception-v3特征上的类平均向量作为输入,来训练模型,获得自表示系数作为聚类输入,从而获得聚类结果并可视化。
2.代码
- import tensorflow as tf
- import os
- import numpy as np
- import random
- import tensorflow.contrib.slim as slim
- import tensorflow.contrib.slim.nets as nets
- import shutil
- from scipy.sparse import coo_matrix
- from sklearn.cluster import spectral_clustering
- from scipy.sparse.linalg import svds
- from sklearn import cluster
- from sklearn.preprocessing import normalize
- tf.app.flags.DEFINE_string('class_list', '../imagenet12/train_class_list.txt', 'ILSVRC2012 image class list')
- tf.app.flags.DEFINE_string('img_path', '/media/gpu/bdc7606d-0e3c-4870-9a5d-4926fd9961c0/gpu/Works/imagenet/others/ILSVRC2012_img_train', 'ILSVRC2012 image train path')
- tf.app.flags.DEFINE_integer('max_train_steps', 200000, 'max train num')
- tf.app.flags.DEFINE_boolean('restore', False, 'wheather restore model and variable from previous saved')
- tf.app.flags.DEFINE_string('pretrain_path', '../model/pre/', 'pretrain model path')
- tf.app.flags.DEFINE_string('train_path', 'model/train/', 'train model path')
- tf.app.flags.DEFINE_string('Coef_path','Coef/','save path of self_express xishu')
- tf.app.flags.DEFINE_integer('large_multi', 100, '')
- tf.app.flags.DEFINE_integer('width', 32, '')
- tf.app.flags.DEFINE_integer('inception_out_size', 2048, '')
- tf.app.flags.DEFINE_float('self_express_loss_weight',1,'')
- tf.app.flags.DEFINE_float('regularizer_loss_weight',0.01,'')
- tf.app.flags.DEFINE_integer('train_num_of_every_batch', 5000, '')
- tf.app.flags.DEFINE_string('cluster_path','cluster','cluster result path')
- tf.app.flags.DEFINE_string('data_path','avg_train_vector','imagenet2012 average feature path')
- FLAGS = tf.app.flags.FLAGS
- kernel_num_list = [16, 32, 64]
- kernel_size_list = [[3, 3], [3, 3], [3, 3]]
- kernel_stride_list = [2, 2, 2]
- batch_size = 1000
- learn_rate=0.001
- def create_graph_pre():
- inception_input = tf.placeholder(tf.float32, [None, FLAGS.width, int(FLAGS.inception_out_size/FLAGS.width), 1], name='inception_holder')
- with tf.variable_scope('DSC'):
- with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(0.0005)):
- with tf.variable_scope('encoder'):
- net = slim.conv2d(inception_input, kernel_num_list[0], kernel_size_list[0], stride = kernel_stride_list[0], scope='conv_0')
- net = slim.conv2d(net, kernel_num_list[1], kernel_size_list[1], stride=kernel_stride_list[1], scope='conv_1')
- net = slim.conv2d(net, kernel_num_list[2], kernel_size_list[2], stride=kernel_stride_list[2], scope='conv_2')
- self_express_x = net
- net = tf.reshape(net, [batch_size, -1], name='reshape_to_flat')
- Coef = slim.model_variable('Coef',
- shape=[batch_size, batch_size],
- initializer=tf.truncated_normal_initializer(stddev=0.1),
- regularizer=slim.l2_regularizer(0.0005), trainable=True)
- net = tf.matmul(Coef, net, name='mutmul')
- with tf.variable_scope('decoder'):
- net = tf.reshape(net, [batch_size, int(FLAGS.width/8), int(FLAGS.inception_out_size/FLAGS.width/8), kernel_num_list[2]], name='reshape_to_normal')
- self_express_x_c = net
- net = slim.conv2d_transpose(net, kernel_num_list[1], kernel_size_list[2], stride=kernel_stride_list[2], scope='deconv_2')
- net = slim.conv2d_transpose(net, kernel_num_list[0], kernel_size_list[1], stride=kernel_stride_list[1], scope='deconv_1')
- net = slim.conv2d_transpose(net, 1, kernel_size_list[0], stride=kernel_stride_list[0], scope='deconv_0')
- reconstruct_loss = tf.losses.mean_squared_error(net, inception_input)
- self_express_loss = FLAGS.self_express_loss_weight *tf.losses.mean_squared_error(self_express_x, self_express_x_c)
- regularizer_loss = FLAGS.regularizer_loss_weight * tf.reduce_sum(tf.pow(Coef, 2.0))
- #regularizer_loss = tf.add_n(tf.losses.get_regularization_losses())
- loss = reconstruct_loss + self_express_loss + regularizer_loss
- #loss = self_express_loss
- return net, loss, Coef,reconstruct_loss, self_express_loss, regularizer_loss
- def get_inception_batch_avg():
- class_list = np.loadtxt(FLAGS.class_list, dtype=str)[0:batch_size]
- res=[]
- for i in range(len(class_list)):
- data_path = os.path.join(FLAGS.data_path,class_list[i]+'.txt')
- data = np.loadtxt(data_path)
- data = data.reshape(32,64)
- res.append(data*100)
- return np.array(res)
- def interface():
- net, total_loss, Coef, reconstruct_loss, self_express_loss, regularizer_loss = create_graph_pre()
- global_step = tf.Variable(0)
- learning_rate = tf.train.exponential_decay(1e-4, global_step, decay_steps=100, decay_rate=0.98, staircase=True)
- train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(total_loss)
- saver = tf.train.Saver(max_to_keep=3)
- with tf.Session() as sess:
- if FLAGS.restore:
- print('continue training from previous checkpoint')
- ckpt = tf.train.latest_checkpoint(FLAGS.train_path)
- pre_step = int(ckpt.replace(FLAGS.train_path+'-', ''))
- saver.restore(sess, ckpt)
- else:
- # remove previous model and Coef
- if os.path.exists(FLAGS.train_path):
- shutil.rmtree(FLAGS.train_path)
- if os.path.exists(FLAGS.Coef_path):
- shutil.rmtree(FLAGS.Coef_path)
- os.makedirs(FLAGS.train_path)
- os.makedirs(FLAGS.Coef_path)
- # restore from pretrain
- sess.run(tf.global_variables_initializer())
- pre_step = 0
- ckpt = tf.train.latest_checkpoint(FLAGS.pretrain_path)
- variable_restore_op = slim.assign_from_checkpoint_fn(ckpt,slim.get_variables_to_restore(),ignore_missing_vars=True)
- variable_restore_op(sess)
- inception_out = get_inception_graph()
- inception_output = get_inception_batch_avg()
- inception_output = inception_output.reshape(-1, inception_output.shape[1], inception_output.shape[2], 1)
- for step in range(FLAGS.max_train_steps):
- _, loss_value, Coef_val, rec_val, see_val, reg_val= \
- sess.run([train_op, total_loss, Coef, reconstruct_loss, self_express_loss, regularizer_loss],
- feed_dict={'inception_holder:0':inception_output})
- if step % 100 == 0:
- print("step %d :total_loss= %f,rec_loss= %f,see_val=%f,reg_val=%f"
- % (step,loss_value,rec_val, see_val,reg_val))
- if step % 1000 == 0 and step > 0:
- if step > 500 :
- write_meta_graph = False
- else:
- write_meta_graph = True
- all_step = pre_step+step
- saver.save(sess, FLAGS.train_path, global_step=all_step,write_meta_graph=write_meta_graph)
- np.savetxt(FLAGS.Coef_path+str(all_step)+'.txt',Coef_val,fmt='%.6f')
- def thrC(C):
- row,col = C.shape
- for i in range(row):
- for j in range(col):
- C[i,j]=abs(C[i,j])
- return C
- def post_proC(C,N):
- # C: coefficient matrix
- C = 0.5 * (C + C.T)
- np.savetxt(FLAGS.cluster_path + 'C_abs.txt', C, fmt='%.6f')
- graph = coo_matrix(C)
- labels = spectral_clustering(graph, n_clusters=N)
- return labels
- def vis(N,labels):
- ## visual
- for i in range(N):
- print(i)
- index = [j for j in range(len(labels)) if labels[j]==i]
- class_list=np.loadtxt(FLAGS.class_list,dtype=str)
- sub_class_list = class_list[index]
- np.savetxt(os.path.join(FLAGS.cluster_path, str(i) + '.txt'), sub_class_list, fmt='%s')
- if vis:
- dir_path = os.path.join(FLAGS.cluster_path, str(i))
- if os.path.exists(dir_path):
- shutil.rmtree(dir_path)
- os.makedirs(dir_path)
- # copy an example to dir_path
- for sub_class_item in sub_class_list:
- img_path = os.path.join(FLAGS.img_path, sub_class_item)
- random_img = random.choice(os.listdir(img_path))
- src = os.path.join(img_path, random_img)
- dst = os.path.join(dir_path, random_img)
- shutil.copyfile(src, dst)
- if __name__ == '__main__':
- interface()
- C=np.loadtxt('Coef/199000.txt') #系数,相似度矩阵
- C=thrC(C)
- N=32
- grp = post_proC(C,N)
- vis(N,grp)
tensorflow-用DASC结合Inception-v3对imagenet2012聚类实现的更多相关文章
- Inception V3 的 tensorflow 实现
tensorflow 官方给出的实现:models/inception_v3.py at master · tensorflow/models · GitHub 1. 模型结构 首先来看 Incept ...
- 源码分析——迁移学习Inception V3网络重训练实现图片分类
1. 前言 近些年来,随着以卷积神经网络(CNN)为代表的深度学习在图像识别领域的突破,越来越多的图像识别算法不断涌现.在去年,我们初步成功尝试了图像识别在测试领域的应用:将网站样式错乱问题.无线领域 ...
- 微调Inception V3网络-对Satellite分类
目录 1. 流程概述 2. 准备数据集 2.1 Satellite数据集介绍 3. Inception V3网络 4. 训练 4.1 基于Keras微调Inception V3网络 4.2 Keras ...
- 1、VGG16 2、VGG19 3、ResNet50 4、Inception V3 5、Xception介绍——迁移学习
ResNet, AlexNet, VGG, Inception: 理解各种各样的CNN架构 本文翻译自ResNet, AlexNet, VGG, Inception: Understanding va ...
- 脸型分类-Face shape classification using Inception v3
本文链接:https://blog.csdn.net/u011961856/article/details/77984667函数解析github 代码:https://github.com/adoni ...
- 网络结构解读之inception系列四:Inception V3
网络结构解读之inception系列四:Inception V3 Inception V3根据前面两篇结构的经验和新设计的结构的实验,总结了一套可借鉴的网络结构设计的原则.理解这些原则的背后隐藏的 ...
- 从GoogLeNet至Inception v3
从GoogLeNet至Inception v3 一.CNN发展纵览 我们先来看一张图片: 1985年,Rumelhart和Hinton等人提出了后向传播(Back Propagation,BP)算法( ...
- 经典分类CNN模型系列其五:Inception v2与Inception v3
经典分类CNN模型系列其五:Inception v2与Inception v3 介绍 Inception v2与Inception v3被作者放在了一篇paper里面,因此我们也作为一篇blog来对其 ...
- [译]与TensorFlow的第一次接触(三)之聚类
转自 [译]与TensorFlow的第一次接触(三)之聚类 2016.08.09 16:58* 字数 4316 阅读 7916评论 5喜欢 18 前一章节中介绍的线性回归是一种监督学习算法,我们使用数 ...
- 深度学习面试题29:GoogLeNet(Inception V3)
目录 使用非对称卷积分解大filters 重新设计pooling层 辅助构造器 使用标签平滑 参考资料 在<深度学习面试题20:GoogLeNet(Inception V1)>和<深 ...
随机推荐
- cc.Node 的坐标空间与ACTION的学习
1.创建二维的向量坐标 //创建向量坐标方法一 let new_pos1 = new cc.Vec2(100, 100); //创建向量坐标方法二 let new_pos2 = cc.v2(200, ...
- Mac App开发
1. icns制作 在线工具: https://iconverticons.com/online/ 2. 替换dmg图标 选中dmg文件 右键, 选择显示简介 将icns图表拖拽到简介弹出框的左上角图 ...
- css中的宽度
浏览器通过CSS对元素的盒子模型的描述进行页面渲染的.因此,元素的宽度受到父元素.css描述的影响. 通常,元素的宽度是指盒子模型中content-box所占用的宽度.也就是说,默认box-sizin ...
- Python 实现 动态规划 /斐波那契数列
1.斐波那契数列 斐波那契数列(Fibonacci sequence),又称黄金分割数列.因数学家列昂纳多·斐波那契(Leonardoda Fibonacci)以兔子繁殖为例子而引入,故又称为“兔子数 ...
- spring-aop 的注释用法
一.书写增强有效代码 //切面注释@Aspectpublic class errorLogger { private static Logger logger = Logger.getLogger(e ...
- C# SortedDictionary以及SortedList的浅谈
msdn叙述:The SortedDictionary<TKey, TValue> generic class is a binary search tree with O(log n) ...
- webpack配置css相关loader注意先后顺序
一.问题描述 在webpack3中,引入animate.css失败. 二.问题分析 1.难道是入口main.js引用方式不对? import animate from 'animate.css' 2. ...
- DWM1000 巧用Status 快速Debug
在Debug DWM1000 的时候,可以巧用Status 加快Debug,例如如下代码 if (status_reg & SYS_STATUS_RXFCG) { …… } else { sp ...
- VS2015编译FFMPEG,修改FFmpeg缓冲区大小解决实时流解码丢包问题,FFmpeg错误rtsp流地址卡死的问题,设置超时
之前尝试过很多网上利用Windows编译FFmpeg的文章,都没有办法编译X64位的FFmpeg,有些教程中有专门提到编译64位的FFmpeg需要下载mingw-w64-install,但是编译的过程 ...
- SVN-您的主机中的软件中止了一个已建立的连接
关于这个问题,网络上有各种解决的办法,关闭防火墙,HTTP/HTTPS切换,改端口... ...但我都试了没有用.本来一直用的好好的,突然就出现了这个问题,而且在几分钟前都是正常的.下面来说说我都干了 ...