这段代码包括由输入图片随机生成相应的RoIs,并生成相应的blobs,由roidb得到相应的

minibatch。其代码如下。

# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# -------------------------------------------------------- """Compute minibatch blobs for training a Fast R-CNN network.""" import numpy as np
import numpy.random as npr
import cv2
from fast_rcnn.config import cfg
from utils.blob import prep_im_for_blob, im_list_to_blob def get_minibatch(roidb, num_classes):
"""Given a roidb, construct a minibatch sampled from it."""
num_images = len(roidb)
# Sample random scales to use for each image in this batch
random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES),
size=num_images)#随机索引组成的numpy,大小是roidb的长度
assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \
'num_images ({}) must divide BATCH_SIZE ({})'. \
format(num_images, cfg.TRAIN.BATCH_SIZE)
rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images #每张图的rois
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) #目标rois # Get the input image blob, formatted for caffe
im_blob, im_scales = _get_image_blob(roidb, random_scale_inds) blobs = {'data': im_blob} if cfg.TRAIN.HAS_RPN: #每个blobs包含图片中相应的box、gt_box信息
assert len(im_scales) == 1, "Single batch only"
assert len(roidb) == 1, "Single batch only"
# gt boxes: (x1, y1, x2, y2, cls)
gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0]
gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0]
gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds]
blobs['gt_boxes'] = gt_boxes
blobs['im_info'] = np.array(
[[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
dtype=np.float32)
else: # not using RPN
# Now, build the region of interest and label blobs
rois_blob = np.zeros((0, 5), dtype=np.float32)
labels_blob = np.zeros((0), dtype=np.float32)
bbox_targets_blob = np.zeros((0, 4 * num_classes), dtype=np.float32)
bbox_inside_blob = np.zeros(bbox_targets_blob.shape, dtype=np.float32)
# all_overlaps = []
for im_i in xrange(num_images):
labels, overlaps, im_rois, bbox_targets, bbox_inside_weights \
= _sample_rois(roidb[im_i], fg_rois_per_image, rois_per_image,
num_classes) # Add to RoIs blob
rois = _project_im_rois(im_rois, im_scales[im_i])
batch_ind = im_i * np.ones((rois.shape[0], 1))
rois_blob_this_image = np.hstack((batch_ind, rois))
rois_blob = np.vstack((rois_blob, rois_blob_this_image)) # Add to labels, bbox targets, and bbox loss blobs
labels_blob = np.hstack((labels_blob, labels))
bbox_targets_blob = np.vstack((bbox_targets_blob, bbox_targets))
bbox_inside_blob = np.vstack((bbox_inside_blob, bbox_inside_weights))
# all_overlaps = np.hstack((all_overlaps, overlaps)) # For debug visualizations
# _vis_minibatch(im_blob, rois_blob, labels_blob, all_overlaps) blobs['rois'] = rois_blob
blobs['labels'] = labels_blob if cfg.TRAIN.BBOX_REG:
blobs['bbox_targets'] = bbox_targets_blob
blobs['bbox_inside_weights'] = bbox_inside_blob
blobs['bbox_outside_weights'] = \
np.array(bbox_inside_blob > 0).astype(np.float32) return blobs
#随机生成前景和背景的RoIs
def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes):
"""Generate a random sample of RoIs comprising foreground and background
examples.
"""
# label = class RoI has max overlap with
labels = roidb['max_classes']
overlaps = roidb['max_overlaps']
rois = roidb['boxes'] # Select foreground RoIs as those with >= FG_THRESH overlap
fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
# Guard against the case when an image has fewer than fg_rois_per_image
# foreground RoIs
fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size)
# Sample foreground regions without replacement
if fg_inds.size > 0:
fg_inds = npr.choice(
fg_inds, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) &
(overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
# Compute number of background RoIs to take from this image (guarding
# against there being fewer than desired)
bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
bg_inds.size)
# Sample foreground regions without replacement
if bg_inds.size > 0:
bg_inds = npr.choice(
bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg)
keep_inds = np.append(fg_inds, bg_inds)
# Select sampled values from various arrays:
labels = labels[keep_inds]
# Clamp labels for the background RoIs to 0
labels[fg_rois_per_this_image:] = 0
overlaps = overlaps[keep_inds]
rois = rois[keep_inds] bbox_targets, bbox_inside_weights = _get_bbox_regression_labels(
roidb['bbox_targets'][keep_inds, :], num_classes) return labels, overlaps, rois, bbox_targets, bbox_inside_weights
#由相应尺度的roidb生成相应的blob
def _get_image_blob(roidb, scale_inds):
"""Builds an input blob from the images in the roidb at the specified
scales.
"""
num_images = len(roidb)
processed_ims = []
im_scales = []
for i in xrange(num_images):
im = cv2.imread(roidb[i]['image'])
if roidb[i]['flipped']:
im = im[:, ::-1, :]
target_size = cfg.TRAIN.SCALES[scale_inds[i]]
im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
cfg.TRAIN.MAX_SIZE)prep_im_for_blob: util的blob.py中;用于将图片平均后缩放。#im_scales: 每张图片的缩放率
# cfg.PIXEL_MEANS: 原始图片会集体减去该值达到mean
im_scales.append(im_scale)
processed_ims.append(im) # Create a blob to hold the input images
blob = im_list_to_blob(processed_ims)#将以list形式存放的图片数据处理成(batch elem, channel, height, width)的im_blob形式,height,width用的是此次计算所有图片的最大值 return blob, im_scales#blob是一个字典,与name_to_top对应,方便把blob数据放进top def _project_im_rois(im_rois, im_scale_factor): #图片缩放时,相应的rois也进行缩放
"""Project image RoIs into the rescaled training image."""
rois = im_rois * im_scale_factor
return rois
#由roidb返回相应的box及inside_weights
def _get_bbox_regression_labels(bbox_target_data, num_classes):
"""Bounding-box regression targets are stored in a compact form in the
roidb. This function expands those targets into the 4-of-4*K representation used
by the network (i.e. only one class has non-zero targets). The loss weights
are similarly expanded. Returns:
bbox_target_data (ndarray): N x 4K blob of regression targets
bbox_inside_weights (ndarray): N x 4K blob of loss weights
"""
clss = bbox_target_data[:, 0]
bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
inds = np.where(clss > 0)[0]
for ind in inds:
cls = clss[ind]
start = 4 * cls
end = start + 4
bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS
return bbox_targets, bbox_inside_weights def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps):
"""Visualize a mini-batch for debugging."""
import matplotlib.pyplot as plt
for i in xrange(rois_blob.shape[0]):
rois = rois_blob[i, :]
im_ind = rois[0]
roi = rois[1:]
im = im_blob[im_ind, :, :, :].transpose((1, 2, 0)).copy()
im += cfg.PIXEL_MEANS
im = im[:, :, (2, 1, 0)]
im = im.astype(np.uint8)
cls = labels_blob[i]
plt.imshow(im)
print 'class: ', cls, ' overlap: ', overlaps[i]
plt.gca().add_patch(
plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0],
roi[3] - roi[1], fill=False,
edgecolor='r', linewidth=3)
)
plt.show()

r-cnn学习(八):minibatch的更多相关文章

  1. Python Tutorial 学习(八)--Errors and Exceptions

    Python Tutorial 学习(八)--Errors and Exceptions恢复 Errors and Exceptions 错误与异常 此前,我们还没有开始着眼于错误信息.不过如果你是一 ...

  2. CNN学习笔记:批标准化

    CNN学习笔记:批标准化 Batch Normalization Batch Normalization, 批标准化, 是将分散的数据统一的一种做法, 也是优化神经网络的一种方法. 在神经网络的训练过 ...

  3. R基础学习

    R基础学习 The Art of R Programming 1.seq 产生等差数列:seq(from,to,by) seq(from,to,length) for(i in 1:length(x) ...

  4. 卷积神经网络(CNN)学习笔记1:基础入门

    卷积神经网络(CNN)学习笔记1:基础入门 Posted on 2016-03-01   |   In Machine Learning  |   9 Comments  |   14935  Vie ...

  5. SVG 学习<八> SVG的路径——path(2)贝塞尔曲线命令、光滑贝塞尔曲线命令

    目录 SVG 学习<一>基础图形及线段 SVG 学习<二>进阶 SVG世界,视野,视窗 stroke属性 svg分组 SVG 学习<三>渐变 SVG 学习<四 ...

  6. R语言学习 第四篇:函数和流程控制

    变量用于临时存储数据,而函数用于操作数据,实现代码的重复使用.在R中,函数只是另一种数据类型的变量,可以被分配,操作,甚至把函数作为参数传递给其他函数.分支控制和循环控制,和通用编程语言的风格很相似, ...

  7. CNN学习笔记:目标函数

    CNN学习笔记:目标函数 分类任务中的目标函数 目标函数,亦称损失函数或代价函数,是整个网络模型的指挥棒,通过样本的预测结果与真实标记产生的误差来反向传播指导网络参数学习和表示学习. 假设某分类任务共 ...

  8. CNN学习笔记:卷积神经网络

    CNN学习笔记:卷积神经网络 卷积神经网络 基本结构 卷积神经网络是一种层次模型,其输入是原始数据,如RGB图像.音频等.卷积神经网络通过卷积(convolution)操作.汇合(pooling)操作 ...

  9. CNN学习笔记:全连接层

    CNN学习笔记:全连接层 全连接层 全连接层在整个网络卷积神经网络中起到“分类器”的作用.如果说卷积层.池化层和激活函数等操作是将原始数据映射到隐层特征空间的话,全连接层则起到将学到的特征表示映射到样 ...

  10. CNN学习笔记:池化层

    CNN学习笔记:池化层 池化 池化(Pooling)是卷积神经网络中另一个重要的概念,它实际上是一种形式的降采样.有多种不同形式的非线性池化函数,而其中“最大池化(Max pooling)”是最为常见 ...

随机推荐

  1. Winform菜单和工具栏控件

    1.ContextMenuStrip--右键菜单 可以绑定在任何一个控件上,添加操作快捷键,并可以设置多层 每行相当于一个按钮,输入-可添加分割线 2.MenuStrip--菜单 优先级最高,一定会出 ...

  2. reveal

    链接 界面调试工具Reveal Reveal使用教程 iOS分析UI利器——Reveal及简单破解方法 Reveal使用步骤和 破解Revealapp的试用时间限制 end

  3. play with make

    1) the VPATH variable In make world, the VPATH variable guide you where to find the .c/.o file 2) th ...

  4. ajax实现下拉菜单无刷新加载更多

    $(function() { var page = 1; var discount = $('#discount'); var innerHeight = window.innerHeight; va ...

  5. 上海闪酷成为京东商城第一批独立软件开发商(ISV)

    闪酷信息技术(上海)有限公司一直致力于为品牌企业提供电子商务软件及其服务,为其拓展电商渠道保驾护航.上海闪酷依据多年的行业经验和技术积累,与中国 最大的B2C商城达成战略合作,为其2万多家品牌供应商提 ...

  6. Javascript最简单的把html字符串编码的方法

    html字符串是指’<div id=”a”>aklsdjfklsjdfl</div>’这样的带html特殊符号的字符串,我们通常要对他进行处理再输出以免输出成了真正的html元 ...

  7. .NET和JAVA同等加密方法,MD5和DES对称加密记录

    C#版: using System; using System.Security.Cryptography; using System.Text; namespace ConsoleApplicati ...

  8. .Net开源微型ORM框架测评

    什么是ORM? 对象关系映射(英语:Object Relation Mapping,简称ORM,或O/RM,或O/R mapping),是一种程序技术,用于实现面向对象编程语言里不同类型系统的数据之间 ...

  9. PlayMaker的应用

    PlayMaker属于一个可视化的状态机编辑工具,集成到了Unity的IDE里,在Unity的市场上很受欢迎,本人看见后第一感觉是跟CryEngine的那个状态机特别相似.CE的那个状态机编辑器其实是 ...

  10. sublime text3下BracketHighlighter的配置方法

    st3的配置方法和st2是有区别的,所以网上搜索到的方法大多不能用,我google之后总结了一下. 一. 1.在st3中按preferences-->package settings--> ...