faster-rcnn 笔记

2019-02-18,15点00

'''

下面是别人写的原始的笔记,我在上面自己补充了一些.

'''

#https://www.cnblogs.com/the-home-of-123/p/9747963.html

 #  以voc数据集为例，按照imdb的命名，利用pascal_voc()函数生成不同的imdb

'''

for year in ['2007', '2012']:

  for split in ['train', 'val', 'trainval', 'test']:

    name = 'voc_{}_{}'.format(year, split)  #year='2007', split='trainval'

    __sets[name] = (lambda split=split, year=year: pascal_voc(split, year))

def get_imdb(name):

  """Get an imdb (image database) by name."""

  if name not in __sets:

    raise KeyError('Unknown dataset: {}'.format(name))

  return __sets[name]()

'''

# self._data_path = os.path.join(self._devkit_path, 'VOC' + self._year)  #数据库路径

# self._classes = ('__background__',  # always index 0， 训练类别标签，包含背景类

#                   'person')

#  # Default to roidb handler

# self._roidb_handler = self.gt_roidb #感兴趣区域（ROI）数据库

# self._salt = str(uuid.uuid4()) #？？

# self._comp_id = 'comp4' # ？？

def _build_network(self, is_training=True):

    # select initializers进行初始化

    if cfg.TRAIN.TRUNCATED:

      initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)

      initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001)

    else:

      initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)

      initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)

    net_conv = self._image_to_head(is_training)##经过特征提取网络，初步提取特征

    with tf.variable_scope(self._scope, self._scope):

      # build the anchors for the image

      self._anchor_component()###产生anchor

      # region proposal network ###产生proposal的坐标

      rois = self._region_proposal(net_conv, is_training, initializer)

      #这里面rois表示的是那些非背景的区域对应到feature_map上的坐标组成的数组.

      '''

      上面一行的代码是和兴!!

      '''

      # region of interest pooling

      if cfg.POOLING_MODE == 'crop':

        pool5 = self._crop_pool_layer(net_conv, rois, "pool5") ###对产生的porposal进行ROI池化，统一格式

      else:

        raise NotImplementedError

      '''

      这里面得到的pool5就是把rois

      '''

    fc7 = self._head_to_tail(pool5, is_training)

    with tf.variable_scope(self._scope, self._scope):

      # region classification 输入到Fast-RCNN网络中，对样本进行分类和预测框回归

      cls_prob, bbox_pred = self._region_classification(fc7, is_training,

                                                        initializer, initializer_bbox)

    '''

    利用self._region_classification 里面的fc 层和softmax层输出 cls_prob, bbox_pred.得到最总的预测结果.

    '''

    self._score_summaries.update(self._predictions)

    return rois, cls_prob, bbox_pred

'''

下面是上面说的核心代码的分析

'''

def _region_proposal(self, net_conv, is_training, initializer):

    rpn = slim.conv2d(net_conv, cfg.RPN_CHANNELS, [3, 3], trainable=is_training, weights_initializer=initializer,

                        scope="rpn_conv/3x3") ##经过一个3X3卷积，之后分两条线

    self._act_summaries.append(rpn)

    '''

    下面一行的代码就是唐老师说的最精髓的地方.

    anchors这些概念都是虚拟的.其实都没有.都是通过学习得到的.

    输出的维度是self._num_anchors * 2,   每2个数表示一种anchor对应的得分.至于到底哪个数对应哪个anchor,

    不用指明,这些完全是通过学习获得的.这样避免人工干预,效果更好.更加end_to_end.

    从这里面kernal=[1*1]就表示每一个像素点对应9个anchor!

    '''

    rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training,

                                weights_initializer=initializer,

                                padding='VALID', activation_fn=None, scope='rpn_cls_score') ###第一条线产生预测类别确定是背景还是类别

    '''

    我纳闷的地方是这里面得到的rpn_cls_score:(1,height,width,18) 表示的是9个框的分数.而表示不了各个分类的分数

    那么后面的nms怎么做?

    其实这个地方只是对是否是背景做nms

    '''

    # change it so that the score has 2 as its channel size

    rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape')

    rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape")

    rpn_cls_pred = tf.argmax(tf.reshape(rpn_cls_score_reshape, [-1, 2]), axis=1, name="rpn_cls_pred")

    rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob")

    rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training,  ###第二条线产生预测框坐标，对预测框坐标进行预测

                                weights_initializer=initializer,

                                padding='VALID', activation_fn=None, scope='rpn_bbox_pred')

    if is_training:

      rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") ###根据预测的类别和预测框坐标对porposa进行筛选，对前N个进行NMS,这里面nms只是找这些框,那些不是背景的得分高.把那些是背景概率高的框去掉.

      rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor")

      # Try to have a deterministic order for the computing graph, for reproducibility

      with tf.control_dependencies([rpn_labels]):

        rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois")

    else:

      if cfg.TEST.MODE == 'nms':

        rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")

      elif cfg.TEST.MODE == 'top':

        rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois")

      else:

        raise NotImplementedError

    self._predictions["rpn_cls_score"] = rpn_cls_score

    self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape

    self._predictions["rpn_cls_prob"] = rpn_cls_prob

    self._predictions["rpn_cls_pred"] = rpn_cls_pred

    self._predictions["rpn_bbox_pred"] = rpn_bbox_pred

    self._predictions["rois"] = rois

    return rois

def _crop_pool_layer(self, bottom, rois, name): ####bottom为convert层卷积输出---也就是特征图， feat_stride为补偿乘积，用来求得原图的w,h.rois为选出的256个anchor的坐标,这些坐标是特征图上的坐标.

    '''

    结果就是在特征图上,把rois这些子图都扣出来.然后按照比例反映射到原始input_image里面的部分.

    类似感受眼这个东西.

    '''

    with tf.variable_scope(name) as scope:

      batch_ids = tf.squeeze(tf.slice(rois, [0, 0], [-1, 1], name="batch_id"), [1])

      # Get the normalized coordinates of bounding boxes

      bottom_shape = tf.shape(bottom)

      height = (tf.to_float(bottom_shape[1]) - 1.) * np.float32(self._feat_stride[0])

      width = (tf.to_float(bottom_shape[2]) - 1.) * np.float32(self._feat_stride[0])

      '''

      yinwei rois是针对原始图片的坐标.所以相对坐标是需要/htight or width

      '''

      x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width

      y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height

      x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width

      y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height###得到相对位置

      # Won't be back-propagated to rois anyway, but to save time

      '''

      因为bboxes.需要的是图片中子图的相对坐标位置,也就是4个百分比位置

      '''

      bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], axis=1))

      '''

      表示bboxes这个变量,不计算梯度.

      '''

      pre_pool_size = cfg.POOLING_SIZE * 2

      crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [pre_pool_size, pre_pool_size], name="crops")##利用tensorflow的自带函数作用类似于ROI池化

      '''

      因为pre_pool_size = cfg.POOLING_SIZE * 2,所以下面再maxpool一下把图片缩小回去.

      '''

    return slim.max_pool2d(crops, [2, 2], padding='SAME')

import tensorflow as tf

# help(tf.image.crop_and_resize)

def _region_classification(self, fc7, is_training, initializer, initializer_bbox):

    cls_score = slim.fully_connected(fc7, self._num_classes,

                                       weights_initializer=initializer,

                                       trainable=is_training,

                                       activation_fn=None, scope='cls_score')

    cls_prob = self._softmax_layer(cls_score, "cls_prob")

    cls_pred = tf.argmax(cls_score, axis=1, name="cls_pred")

    bbox_pred = slim.fully_connected(fc7, self._num_classes * 4,

                                     weights_initializer=initializer_bbox,

                                     trainable=is_training,

                                     activation_fn=None, scope='bbox_pred')

    self._predictions["cls_score"] = cls_score

    self._predictions["cls_pred"] = cls_pred

    self._predictions["cls_prob"] = cls_prob

    self._predictions["bbox_pred"] = bbox_pred

    return cls_prob, bbox_pred

faster-rcnn 笔记的更多相关文章

Faster RCNN 学习笔记
下面的介绍都是基于VGG16 的Faster RCNN网络,各网络的差异在于Conv layers层提取特征时有细微差异,至于后续的RPN层.Pooling层及全连接的分类和目标定位基本相同. 一). ...
faster rcnn源码阅读笔记1
自己保存的源码阅读笔记哈 faster rcnn 的主要识别过程(粗略) (开始填坑了): 一张3通道,1600*1600图像输入中,经过特征提取网络,得到100*100*512的feature ma ...
论文笔记：目标检测算法（R-CNN，Fast R-CNN，Faster R-CNN，FPN，YOLOv1-v3）
R-CNN(Region-based CNN) motivation:之前的视觉任务大多数考虑使用SIFT和HOG特征,而近年来CNN和ImageNet的出现使得图像分类问题取得重大突破,那么这方面的 ...
论文阅读笔记二十七：Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks（CVPR 2016）
论文源址:https://arxiv.org/abs/1506.01497 tensorflow代码:https://github.com/endernewton/tf-faster-rcnn 室友对 ...
深度学习笔记之目标检测算法系列（包括RCNN、Fast RCNN、Faster RCNN和SSD）
不多说,直接上干货! 本文一系列目标检测算法:RCNN, Fast RCNN, Faster RCNN代表当下目标检测的前沿水平,在github都给出了基于Caffe的源码. • RCNN RCN ...
Faster RCNN学习笔记
感谢知乎大神的分享 https://zhuanlan.zhihu.com/p/31426458 Ross B. Girshick在2016年提出了新的Faster RCNN,在结构上,Faster R ...
目标检测（四）Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks
作者:Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun SPPnet.Fast R-CNN等目标检测算法已经大幅降低了目标检测网络的运行时间. ...
Faster RCNN代码理解（Python）
转自http://www.infocool.net/kb/Python/201611/209696.html#原文地址第一步,准备从train_faster_rcnn_alt_opt.py入: 初 ...
Faster RCNN 学习与实现
论文论文翻译 Faster R-CNN 主要分为两个部分: RPN(Region Proposal Network)生成高质量的 region proposal: Fast R-CNN 利用 reg ...
faster rcnn算法及源码及论文解析相关博客
1. 通过代码理解faster-RCNN中的RPN http://blog.csdn.net/happyflyy/article/details/54917514 2. faster rcnn详解 R ...

随机推荐

WebService连接winfrom简单实例
C# 创建.部署和调用WebService的简单示例 webservice 可以用于分布式应用程序之间的交互,和不同程序之间的交互. 具体详细用法可去查询资料.下面开始创建一个简单的webservic ...
转载：深入浅出Zookeeper
ZAB协议 ZAB协议是专门为zookeeper实现分布式协调功能而设计.zookeeper主要是根据ZAB协议是实现分布式系统数据一致性. zookeeper根据ZAB协议建立了主备模型完成zook ...
Could not read document: Can not deserialize instance of java.lang.String out of START_ARRAY
线上问题: { "timestamp": "1544510665", "status": 400, "error": & ...
1024程序员节宅男节日快乐 -- JAVA快速开发平台，JEECG 3.8宅男优化版本发布
JEECG 3.8 版本发布,系统全面升级,重构上传组件.优化代码生成器机制! 导读 ⊙平台性能优化,系统更稳定,速度闪电般提升 ⊙系统上传组件全面重构,使用plupload组件,解决flash的 ...
1.Sed | Awk | Grep | Find
1.Sed | Awk | Grep | Find 可以参考的文档链接 CentOS7 查看当前机器已经启动的端口的Shell命令: netstat -lntup | awk -F' ' {'pr ...
CentOS编译安装软件过程中遇到zlib.h: No such file or directory
使用命令:yum install zlib-devel 解决问题.
java获取当前日期所在的周的周一，并以周一为一周开始
public String getMonday(String date) { if (date == null || date.equals("")) { System.out.p ...
OGRE Tutorials 1
[Guide to building OGRE] 1.Preparing the build environment You should now create a build directory f ...
HTML基本入门完成
四. (一)丶下拉框select元素:一般可以创建单选或多选菜单.<select>与<option>一般同时使用,select代表下拉框,option代表他的每一项. 1.基本 ...
find 递归/不递归查找子目录的方法
1.递归查找(find 命令是递归遍历文件夹的) 命令:find . -name “*.txt” //当前路径下递归查找以.txt结尾的文件夹 2.不递归查找 find . -name “*.txt ...

faster-rcnn 笔记

faster-rcnn 笔记的更多相关文章

随机推荐

热门专题