Deep Dream 模型

本节的代码参考了TensorFlow 源码中的示例程序https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/tutorials/deepdream，并做了适当修改。

4.2.1 导入Inception 模型

在chapter_4_data/中或者网址https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip 下载解压得到模型文件tensorflow_inception_graph.pb，将该文件拷贝到当前文件夹中（即chapter_4/中）。

使用下面的命令加载模型并打印一些基础信息：

python load_inception.py

# coding:utf-8

# 导入要用到的基本模块。

from __future__ import print_function

import numpy as np

import tensorflow as tf

# 创建图和Session

graph = tf.Graph()

sess = tf.InteractiveSession(graph=graph)

# tensorflow_inception_graph.pb文件中，既存储了inception的网络结构也存储了对应的数据

# 使用下面的语句将之导入

model_fn = 'tensorflow_inception_graph.pb'

with tf.gfile.FastGFile(model_fn, 'rb') as f:

    graph_def = tf.GraphDef()

    graph_def.ParseFromString(f.read())

# 定义t_input为我们输入的图像

t_input = tf.placeholder(np.float32, name='input')

imagenet_mean = 117.0

# 输入图像需要经过处理才能送入网络中

# expand_dims是加一维，从[height, width, channel]变成[1, height, width, channel]

# t_input - imagenet_mean是减去一个均值

t_preprocessed = tf.expand_dims(t_input - imagenet_mean, 0)

tf.import_graph_def(graph_def, {'input': t_preprocessed})

# 找到所有卷积层

layers = [op.name for op in graph.get_operations() if op.type == 'Conv2D' and 'import/' in op.name]

# 输出卷积层层数

print('Number of layers', len(layers))

# 特别地，输出mixed4d_3x3_bottleneck_pre_relu的形状

name = 'mixed4d_3x3_bottleneck_pre_relu'

print('shape of %s: %s' % (name, str(graph.get_tensor_by_name('import/' + name + ':0').get_shape())))

4.2.2 生成原始的Deep Dream 图像

python gen_naive.py

# coding: utf-8

from __future__ import print_function

import os

from io import BytesIO

import numpy as np

from functools import partial

import PIL.Image

import scipy.misc

import tensorflow as tf

graph = tf.Graph()

model_fn = 'tensorflow_inception_graph.pb'

sess = tf.InteractiveSession(graph=graph)

with tf.gfile.FastGFile(model_fn, 'rb') as f:

    graph_def = tf.GraphDef()

    graph_def.ParseFromString(f.read())

t_input = tf.placeholder(np.float32, name='input')

imagenet_mean = 117.0

t_preprocessed = tf.expand_dims(t_input - imagenet_mean, 0)

tf.import_graph_def(graph_def, {'input': t_preprocessed})

def savearray(img_array, img_name):

    scipy.misc.toimage(img_array).save(img_name)

    print('img saved: %s' % img_name)

def render_naive(t_obj, img0, iter_n=20, step=1.0):

    # t_score是优化目标。它是t_obj的平均值

    # 结合调用处看，实际上就是layer_output[:, :, :, channel]的平均值

    t_score = tf.reduce_mean(t_obj)

    # 计算t_score对t_input的梯度

    t_grad = tf.gradients(t_score, t_input)[0]

    # 创建新图

    img = img0.copy()

    for i in range(iter_n):

        # 在sess中计算梯度，以及当前的score

        g, score = sess.run([t_grad, t_score], {t_input: img})

        # 对img应用梯度。step可以看做“学习率”

        g /= g.std() + 1e-8

        img += g * step

        print('score(mean)=%f' % (score))

    # 保存图片

    savearray(img, 'naive.jpg')

# 定义卷积层、通道数，并取出对应的tensor

name = 'mixed4d_3x3_bottleneck_pre_relu'

channel = 139

layer_output = graph.get_tensor_by_name("import/%s:0" % name)

# 定义原始的图像噪声

img_noise = np.random.uniform(size=(224, 224, 3)) + 100.0

# 调用render_naive函数渲染

render_naive(layer_output[:, :, :, channel], img_noise, iter_n=20)

4.2.3 生成更大尺寸的Deep Dream 图像

python gen_multiscale.py

# coding:utf-8

from __future__ import print_function

import os

from io import BytesIO

import numpy as np

from functools import partial

import PIL.Image

import scipy.misc

import tensorflow as tf

graph = tf.Graph()

model_fn = 'tensorflow_inception_graph.pb'

sess = tf.InteractiveSession(graph=graph)

with tf.gfile.FastGFile(model_fn, 'rb') as f:

    graph_def = tf.GraphDef()

    graph_def.ParseFromString(f.read())

t_input = tf.placeholder(np.float32, name='input')

imagenet_mean = 117.0

t_preprocessed = tf.expand_dims(t_input - imagenet_mean, 0)

tf.import_graph_def(graph_def, {'input': t_preprocessed})

def savearray(img_array, img_name):

    scipy.misc.toimage(img_array).save(img_name)

    print('img saved: %s' % img_name)

def resize_ratio(img, ratio):

    min = img.min()

    max = img.max()

    img = (img - min) / (max - min) * 255

    img = np.float32(scipy.misc.imresize(img, ratio))

    img = img / 255 * (max - min) + min

    return img

def calc_grad_tiled(img, t_grad, tile_size=512):

    # 每次只对tile_size×tile_size大小的图像计算梯度，避免内存问题

    sz = tile_size

    h, w = img.shape[:2]

    # img_shift：先在行上做整体移动，再在列上做整体移动

    # 防止在tile的边缘产生边缘效应

    sx, sy = np.random.randint(sz, size=2)

    img_shift = np.roll(np.roll(img, sx, 1), sy, 0)

    grad = np.zeros_like(img)

    # y, x是开始位置的像素

    for y in range(0, max(h - sz // 2, sz), sz):

        for x in range(0, max(w - sz // 2, sz), sz):

            # 每次对sub计算梯度。sub的大小是tile_size×tile_size

            sub = img_shift[y:y + sz, x:x + sz]

            g = sess.run(t_grad, {t_input: sub})

            grad[y:y + sz, x:x + sz] = g

    # 使用np.roll移动回去

    return np.roll(np.roll(grad, -sx, 1), -sy, 0)

def render_multiscale(t_obj, img0, iter_n=10, step=1.0, octave_n=3, octave_scale=1.4):

    # 同样定义目标和梯度

    t_score = tf.reduce_mean(t_obj)

    t_grad = tf.gradients(t_score, t_input)[0]

    img = img0.copy()

    for octave in range(octave_n):

        if octave > 0:

            # 每次将将图片放大octave_scale倍

            # 共放大octave_n - 1 次

            img = resize_ratio(img, octave_scale)

        for i in range(iter_n):

            # 调用calc_grad_tiled计算任意大小图像的梯度

            g = calc_grad_tiled(img, t_grad)

            g /= g.std() + 1e-8

            img += g * step

            print('.', end=' ')

    savearray(img, 'multiscale.jpg')

if __name__ == '__main__':

    name = 'mixed4d_3x3_bottleneck_pre_relu'

    channel = 139

    img_noise = np.random.uniform(size=(224, 224, 3)) + 100.0

    layer_output = graph.get_tensor_by_name("import/%s:0" % name)

    render_multiscale(layer_output[:, :, :, channel], img_noise, iter_n=20)

4.2.4 生成更高质量的Deep Dream 图像

python gen_lapnorm.py

# coding:utf-8

from __future__ import print_function

import os

from io import BytesIO

import numpy as np

from functools import partial

import PIL.Image

import scipy.misc

import tensorflow as tf

graph = tf.Graph()

model_fn = 'tensorflow_inception_graph.pb'

sess = tf.InteractiveSession(graph=graph)

with tf.gfile.FastGFile(model_fn, 'rb') as f:

    graph_def = tf.GraphDef()

    graph_def.ParseFromString(f.read())

t_input = tf.placeholder(np.float32, name='input')

imagenet_mean = 117.0

t_preprocessed = tf.expand_dims(t_input - imagenet_mean, 0)

tf.import_graph_def(graph_def, {'input': t_preprocessed})

def savearray(img_array, img_name):

    scipy.misc.toimage(img_array).save(img_name)

    print('img saved: %s' % img_name)

def resize_ratio(img, ratio):

    min = img.min()

    max = img.max()

    img = (img - min) / (max - min) * 255

    img = np.float32(scipy.misc.imresize(img, ratio))

    img = img / 255 * (max - min) + min

    return img

def calc_grad_tiled(img, t_grad, tile_size=512):

    sz = tile_size

    h, w = img.shape[:2]

    sx, sy = np.random.randint(sz, size=2)

    img_shift = np.roll(np.roll(img, sx, 1), sy, 0)  # 先在行上做整体移动，再在列上做整体移动

    grad = np.zeros_like(img)

    for y in range(0, max(h - sz // 2, sz), sz):

        for x in range(0, max(w - sz // 2, sz), sz):

            sub = img_shift[y:y + sz, x:x + sz]

            g = sess.run(t_grad, {t_input: sub})

            grad[y:y + sz, x:x + sz] = g

    return np.roll(np.roll(grad, -sx, 1), -sy, 0)

k = np.float32([1, 4, 6, 4, 1])

k = np.outer(k, k)

k5x5 = k[:, :, None, None] / k.sum() * np.eye(3, dtype=np.float32)

# 这个函数将图像分为低频和高频成分

def lap_split(img):

    with tf.name_scope('split'):

        # 做过一次卷积相当于一次“平滑”，因此lo为低频成分

        lo = tf.nn.conv2d(img, k5x5, [1, 2, 2, 1], 'SAME')

        # 低频成分放缩到原始图像一样大小得到lo2，再用原始图像img减去lo2，就得到高频成分hi

        lo2 = tf.nn.conv2d_transpose(lo, k5x5 * 4, tf.shape(img), [1, 2, 2, 1])

        hi = img - lo2

    return lo, hi

# 这个函数将图像img分成n层拉普拉斯金字塔

def lap_split_n(img, n):

    levels = []

    for i in range(n):

        # 调用lap_split将图像分为低频和高频部分

        # 高频部分保存到levels中

        # 低频部分再继续分解

        img, hi = lap_split(img)

        levels.append(hi)

    levels.append(img)

    return levels[::-1]

# 将拉普拉斯金字塔还原到原始图像

def lap_merge(levels):

    img = levels[0]

    for hi in levels[1:]:

        with tf.name_scope('merge'):

            img = tf.nn.conv2d_transpose(img, k5x5 * 4, tf.shape(hi), [1, 2, 2, 1]) + hi

    return img

# 对img做标准化。

def normalize_std(img, eps=1e-10):

    with tf.name_scope('normalize'):

        std = tf.sqrt(tf.reduce_mean(tf.square(img)))

        return img / tf.maximum(std, eps)

# 拉普拉斯金字塔标准化

def lap_normalize(img, scale_n=4):

    img = tf.expand_dims(img, 0)

    tlevels = lap_split_n(img, scale_n)

    # 每一层都做一次normalize_std

    tlevels = list(map(normalize_std, tlevels))

    out = lap_merge(tlevels)

    return out[0, :, :, :]

def tffunc(*argtypes):

    placeholders = list(map(tf.placeholder, argtypes))

    def wrap(f):

        out = f(*placeholders)

        def wrapper(*args, **kw):

            return out.eval(dict(zip(placeholders, args)), session=kw.get('session'))

        return wrapper

    return wrap

def render_lapnorm(t_obj, img0,

                   iter_n=10, step=1.0, octave_n=3, octave_scale=1.4, lap_n=4):

    # 同样定义目标和梯度

    t_score = tf.reduce_mean(t_obj)

    t_grad = tf.gradients(t_score, t_input)[0]

    # 将lap_normalize转换为正常函数

    lap_norm_func = tffunc(np.float32)(partial(lap_normalize, scale_n=lap_n))

    img = img0.copy()

    for octave in range(octave_n):

        if octave > 0:

            img = resize_ratio(img, octave_scale)

        for i in range(iter_n):

            g = calc_grad_tiled(img, t_grad)

            # 唯一的区别在于我们使用lap_norm_func来标准化g！

            g = lap_norm_func(g)

            img += g * step

            print('.', end=' ')

    savearray(img, 'lapnorm.jpg')

if __name__ == '__main__':

    name = 'mixed4d_3x3_bottleneck_pre_relu'

    channel = 139

    img_noise = np.random.uniform(size=(224, 224, 3)) + 100.0

    layer_output = graph.get_tensor_by_name("import/%s:0" % name)

    render_lapnorm(layer_output[:, :, :, channel], img_noise, iter_n=20)

4.2.5 最终的Deep Dream 模型

python gen_deepdream.py

# coding:utf-8

from __future__ import print_function

import os

from io import BytesIO

import numpy as np

from functools import partial

import PIL.Image

import scipy.misc

import tensorflow as tf

graph = tf.Graph()

model_fn = 'tensorflow_inception_graph.pb'

sess = tf.InteractiveSession(graph=graph)

with tf.gfile.FastGFile(model_fn, 'rb') as f:

    graph_def = tf.GraphDef()

    graph_def.ParseFromString(f.read())

t_input = tf.placeholder(np.float32, name='input')  # define the input tensor

imagenet_mean = 117.0

t_preprocessed = tf.expand_dims(t_input - imagenet_mean, 0)

tf.import_graph_def(graph_def, {'input': t_preprocessed})

def savearray(img_array, img_name):

    scipy.misc.toimage(img_array).save(img_name)

    print('img saved: %s' % img_name)

def visstd(a, s=0.1):

    return (a - a.mean()) / max(a.std(), 1e-4) * s + 0.5

def resize_ratio(img, ratio):

    min = img.min()

    max = img.max()

    img = (img - min) / (max - min) * 255

    img = np.float32(scipy.misc.imresize(img, ratio))

    img = img / 255 * (max - min) + min

    return img

def resize(img, hw):

    min = img.min()

    max = img.max()

    img = (img - min) / (max - min) * 255

    img = np.float32(scipy.misc.imresize(img, hw))

    img = img / 255 * (max - min) + min

    return img

def calc_grad_tiled(img, t_grad, tile_size=512):

    sz = tile_size

    h, w = img.shape[:2]

    sx, sy = np.random.randint(sz, size=2)

    img_shift = np.roll(np.roll(img, sx, 1), sy, 0)  # 先在行上做整体移动，再在列上做整体移动

    grad = np.zeros_like(img)

    for y in range(0, max(h - sz // 2, sz), sz):

        for x in range(0, max(w - sz // 2, sz), sz):

            sub = img_shift[y:y + sz, x:x + sz]

            g = sess.run(t_grad, {t_input: sub})

            grad[y:y + sz, x:x + sz] = g

    return np.roll(np.roll(grad, -sx, 1), -sy, 0)

def tffunc(*argtypes):

    placeholders = list(map(tf.placeholder, argtypes))

    def wrap(f):

        out = f(*placeholders)

        def wrapper(*args, **kw):

            return out.eval(dict(zip(placeholders, args)), session=kw.get('session'))

        return wrapper

    return wrap

def render_deepdream(t_obj, img0,

                     iter_n=10, step=1.5, octave_n=4, octave_scale=1.4):

    t_score = tf.reduce_mean(t_obj)

    t_grad = tf.gradients(t_score, t_input)[0]

    img = img0

    # 同样将图像进行金字塔分解

    # 此时提取高频、低频的方法比较简单。直接缩放就可以

    octaves = []

    for i in range(octave_n - 1):

        hw = img.shape[:2]

        lo = resize(img, np.int32(np.float32(hw) / octave_scale))

        hi = img - resize(lo, hw)

        img = lo

        octaves.append(hi)

    # 先生成低频的图像，再依次放大并加上高频

    for octave in range(octave_n):

        if octave > 0:

            hi = octaves[-octave]

            img = resize(img, hi.shape[:2]) + hi

        for i in range(iter_n):

            g = calc_grad_tiled(img, t_grad)

            img += g * (step / (np.abs(g).mean() + 1e-7))

            print('.', end=' ')

    img = img.clip(0, 255)

    savearray(img, 'deepdream.jpg')

if __name__ == '__main__':

    img0 = PIL.Image.open('test.jpg')

    img0 = np.float32(img0)

    name = 'mixed4d_3x3_bottleneck_pre_relu'

    channel = 139

    layer_output = graph.get_tensor_by_name("import/%s:0" % name)

    render_deepdream(layer_output[:, :, :, channel], img0)

    # name = 'mixed4c'

    # layer_output = graph.get_tensor_by_name("import/%s:0" % name)

    # render_deepdream(tf.square(layer_output), img0)

Deep Dream 模型的更多相关文章

Deep Dream模型与实现
Deep Dream是谷歌公司在2015年公布的一项有趣的技术.在训练好的卷积神经网络中,只需要设定几个参数,就可以通过这项技术生成一张图像. 本文章的代码和图片都放在我的github上,想实现本文代 ...
Deep Learning模型之：CNN卷积神经网络（一）深度解析CNN
http://m.blog.csdn.net/blog/wu010555688/24487301 本文整理了网上几位大牛的博客,详细地讲解了CNN的基础结构与核心思想,欢迎交流. [1]Deep le ...
深度学习在美团点评推荐平台排序中的应用&& wide&&deep推荐系统模型--学习笔记
写在前面:据说下周就要xxxxxxxx, 吓得本宝宝赶紧找些广告的东西看看 gbdt+lr的模型之前是知道怎么搞的,dnn+lr的模型也是知道的,但是都没有试验过深度学习在美团点评推荐平台排序中的运 ...
Top Deep Learning Projects in github
Top Deep Learning Projects A list of popular github projects related to deep learning (ranked by sta ...
Unity3d地图制作之模型高光
由于颇受暗黑破坏神美工的影响,最近都在研究怎么制作场景地图之类的. 那么今日讲的模型高光虽然和地图无关,但是也涉及到一些美工的知识,尤其是shader. 按照国际惯例,先贴一张图饱饱眼福. 大家可以看 ...
#Deep Learning回顾#之LeNet、AlexNet、GoogLeNet、VGG、ResNet
CNN的发展史上一篇回顾讲的是2006年Hinton他们的Science Paper,当时提到,2006年虽然Deep Learning的概念被提出来了,但是学术界的大家还是表示不服.当时有流传的段 ...
Deep Learning（深度学习）学习笔记整理(二）
本文整理了网上几位大牛的博客,详细地讲解了CNN的基础结构与核心思想,欢迎交流. [1]Deep learning简介 [2]Deep Learning训练过程 [3]Deep Learning模型之 ...
Applied Deep Learning Resources
Applied Deep Learning Resources A collection of research articles, blog posts, slides and code snipp ...
Deep Learning for Information Retrieval
最近关注了一些Deep Learning在Information Retrieval领域的应用,得益于Deep Model在对文本的表达上展现的优势(比如RNN和CNN),我相信在IR的领域引入Dee ...

随机推荐

map里面的set方法
let a=[1,2,3,4,1,2,3,4,1,4];let b= new Set(a);console.log(b) 还有add方法介绍下: let a=new Set();let b=[1,1, ...
UI常用接口使用规范
//////////////////////////////////////////////////////////////////////////////////////////////// /// ...
jquery easyui教程[申明：来源于网络]
jquery easyui教程[申明:来源于网络] 地址:http://wenku.baidu.com/view/570e4d4533687e21af45a941.html
悬线法 || BZOJ3039: 玉蟾宫 || Luogu P4147 玉蟾宫
题面: P4147 玉蟾宫题解:过于板子举报了 #include<cstdio> #include<cstring> #include<iostream> #de ...
POJ 2187 - Beauty Contest - [凸包+旋转卡壳法][凸包的直径]
题目链接:http://poj.org/problem?id=2187 Time Limit: 3000MS Memory Limit: 65536K Description Bessie, Farm ...
[daily][fedora][netctl][nmcli] 设置笔记本为台式机网关
TAG:将一个网卡动态增减到网桥里的配置场景是这样的. 我的笔记本无线网卡用来访问互联网.OS里面有一个birdge用来链接所有的虚拟机帮助虚拟机上网. 现在有了一台台式机.台式机用来做hyperv ...
可视化&地图__公司收集
原文地址:https://github.com/zhongcaiwei/Data-visualization-technology-sharing 一.数据可视化企业(部分) 数字冰雹光启元-腾讯 ...
mybatis（二）--相关属性及相关细节
mapper接口动态代理动态代理dao开发规则 1.namespace必需是接口的全路径名 2.接口的方法名必需与映射文件的sql id一致 3.接口的输入参数必需与映射文件的parameter ...
Oracle实用操作
查询用户下所有表:select * from tab; 删除表: drop table 表名; 但是删除表后还是会查询到BIN开头的垃圾表,drop后的表存在于回收站: 清空回收站所有表: purg ...
python基础之初识函数&函数进阶
函数基础部分 1.什么是函数? 函数是组织好的,可重复使用的,用来实现单一,或相关联功能的代码段.函数能提高应用的模块性,和代码的重复利用率. 2.定义函数定义:def 关键词开头,空格之后接函数名 ...

Deep Dream 模型

Deep Dream 模型的更多相关文章

随机推荐

热门专题