faster rcnn 做识别
faster rcnn 主要分为四个部分:
1. convolutional part: 特征提取 可以使用vgg,resnet 等等
2.region proposal network: 生成 region proposals,通过softmax 判断anchors属于background 或者目标。再通过bounding box
regression修正anchors的位置。
3. RoI pooling: 该层得到proposals的feature maps。作为全连接层的输入来判定目标
4. classification:对feature maps 进行分类。通过bounding box regression 得到精确位置。
region proposal network:
传统算法中使用滑动窗口或selective search的方法 太复杂
Faster rcnn 中RPN是由网络自己生成。
rois的shape 为(num_rois,5),4维定位, ·1维表示是哪副图
rois 的输入为(N, W/16, H/16, channels) N为batch的大小,图像原始大小为(W,H),经过vgg的4
层下采样层除以16.
void Compute(OpKernelContext* context) override
{
// Grab the input tensor
//input 中包含两个的tensor,为feature map 和 得到rois
const Tensor& bottom_data = context->input(0);
const Tensor& bottom_rois = context->input(1);
auto bottom_data_flat = bottom_data.flat<T>();
auto bottom_rois_flat = bottom_rois.flat<T>(); // data should have 4 dimensions. batch,width,height,channels
OP_REQUIRES(context, bottom_data.dims() == 4,
errors::InvalidArgument("data must be 4-dimensional")); // rois should have 2 dimensions. num_rois, 4
OP_REQUIRES(context, bottom_rois.dims() == 2,
errors::InvalidArgument("rois must be 2-dimensional")); // Number of ROIs
int num_rois = bottom_rois.dim_size(0);
// batch size
int batch_size = bottom_data.dim_size(0);
// data height
int data_height = bottom_data.dim_size(1);
// data width
int data_width = bottom_data.dim_size(2);
// Number of channels
int num_channels = bottom_data.dim_size(3); // construct the output shape, 输出为 4维,rois 的个数,rois 的尺度,在context 中传入的,channels
int dims[4];
dims[0] = num_rois;
dims[1] = pooled_height_;
dims[2] = pooled_width_;
dims[3] = num_channels;
TensorShape output_shape;
TensorShapeUtils::MakeShape(dims, 4, &output_shape); // Create output tensors
Tensor* output_tensor = NULL;
OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output_tensor));
auto output = output_tensor->template flat<T>(); Tensor* argmax_tensor = NULL;
OP_REQUIRES_OK(context, context->allocate_output(1, output_shape, &argmax_tensor));
auto argmax = argmax_tensor->template flat<int>(); int pooled_height = pooled_height_;
int pooled_width = pooled_width_;
# rois 的空间变换尺度
float spatial_scale = spatial_scale_; auto shard = [pooled_height, pooled_width, spatial_scale,
num_rois, batch_size, data_height, data_width, num_channels,
&bottom_data_flat, &bottom_rois_flat, &output, &argmax]
(int64 start, int64 limit) {
for (int64 b = start; b < limit; ++b)
{
// (n, ph, pw, c) is an element in the pooled output
int n = b;
int c = n % num_channels;
n /= num_channels;
int pw = n % pooled_width;
n /= pooled_width;
int ph = n % pooled_height;
n /= pooled_height;
//对rois 进行空间尺度上的变换
const float* bottom_rois = bottom_rois_flat.data() + n * 5;
int roi_batch_ind = bottom_rois[0];
int roi_start_w = round(bottom_rois[1] * spatial_scale);
int roi_start_h = round(bottom_rois[2] * spatial_scale);
int roi_end_w = round(bottom_rois[3] * spatial_scale);
int roi_end_h = round(bottom_rois[4] * spatial_scale); // Force malformed ROIs to be 1x1
int roi_width = std::max(roi_end_w - roi_start_w + 1, 1);
int roi_height = std::max(roi_end_h - roi_start_h + 1, 1);
const T bin_size_h = static_cast<T>(roi_height)
/ static_cast<T>(pooled_height);
const T bin_size_w = static_cast<T>(roi_width)
/ static_cast<T>(pooled_width); int hstart = static_cast<int>(floor(ph * bin_size_h));
int wstart = static_cast<int>(floor(pw * bin_size_w));
int hend = static_cast<int>(ceil((ph + 1) * bin_size_h));
int wend = static_cast<int>(ceil((pw + 1) * bin_size_w)); // Add roi offsets and clip to input boundaries
hstart = std::min(std::max(hstart + roi_start_h, 0), data_height);
hend = std::min(std::max(hend + roi_start_h, 0), data_height);
wstart = std::min(std::max(wstart + roi_start_w, 0), data_width);
wend = std::min(std::max(wend + roi_start_w, 0), data_width);
bool is_empty = (hend <= hstart) || (wend <= wstart); // Define an empty pooling region to be zero
float maxval = is_empty ? 0 : -FLT_MAX;
// If nothing is pooled, argmax = -1 causes nothing to be backprop'd
int maxidx = -1;
const float* bottom_data = bottom_data_flat.data() + roi_batch_ind * num_channels * data_height * data_width;
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
int bottom_index = (h * data_width + w) * num_channels + c;
if (bottom_data[bottom_index] > maxval) {
maxval = bottom_data[bottom_index];
maxidx = bottom_index;
}
}
}
output(b) = maxval;
argmax(b) = maxidx;
}
}; const DeviceBase::CpuWorkerThreads& worker_threads =
*(context->device()->tensorflow_cpu_worker_threads());
const int64 shard_cost =
num_rois * num_channels * pooled_height * pooled_width * spatial_scale;
Shard(worker_threads.num_threads, worker_threads.workers,
output.size(), shard_cost, shard);
}
private:
int pooled_height_;
int pooled_width_;
float spatial_scale_;
}; bool ROIPoolForwardLaucher(
const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
const int width, const int channels, const int pooled_height,
const int pooled_width, const float* bottom_rois,
float* top_data, int* argmax_data, const Eigen::GpuDevice& d);
static void RoiPoolingKernel(
OpKernelContext* context, const Tensor* bottom_data, const Tensor* bottom_rois,
const float spatial_scale, const int num_rois, const int height,
const int width, const int channels, const int pooled_height,
const int pooled_width, const TensorShape& tensor_output_shape)
{
Tensor* output = nullptr;
Tensor* argmax = nullptr;
OP_REQUIRES_OK(context, context->allocate_output(0, tensor_output_shape, &output));
OP_REQUIRES_OK(context, context->allocate_output(1, tensor_output_shape, &argmax)); if (!context->status().ok()) {
return;
} ROIPoolForwardLaucher(
bottom_data->flat<float>().data(), spatial_scale, num_rois, height,
width, channels, pooled_height, pooled_width, bottom_rois->flat<float>().data(),
output->flat<float>().data(), argmax->flat<int>().data(), context->eigen_device<Eigen::GpuDevice>());
} template <class T>
class RoiPoolOp<Eigen::GpuDevice, T> : public OpKernel {
public:
typedef Eigen::GpuDevice Device; explicit RoiPoolOp(OpKernelConstruction* context) : OpKernel(context) { // Get the pool height
OP_REQUIRES_OK(context,
context->GetAttr("pooled_height", &pooled_height_));
// Check that pooled_height is positive
OP_REQUIRES(context, pooled_height_ >= 0,
errors::InvalidArgument("Need pooled_height >= 0, got ",
pooled_height_));
// Get the pool width
OP_REQUIRES_OK(context,
context->GetAttr("pooled_width", &pooled_width_));
// Check that pooled_width is positive
OP_REQUIRES(context, pooled_width_ >= 0,
errors::InvalidArgument("Need pooled_width >= 0, got ",
pooled_width_));
// Get the spatial scale
OP_REQUIRES_OK(context,
context->GetAttr("spatial_scale", &spatial_scale_));
} void Compute(OpKernelContext* context) override
{
// Grab the input tensor
const Tensor& bottom_data = context->input(0);
const Tensor& bottom_rois = context->input(1); // data should have 4 dimensions.
OP_REQUIRES(context, bottom_data.dims() == 4,
errors::InvalidArgument("data must be 4-dimensional")); // rois should have 2 dimensions.
OP_REQUIRES(context, bottom_rois.dims() == 2,
errors::InvalidArgument("rois must be 2-dimensional")); // Number of ROIs
int num_rois = bottom_rois.dim_size(0);
// batch size
int batch_size = bottom_data.dim_size(0);
// data height
int data_height = bottom_data.dim_size(1);
// data width
int data_width = bottom_data.dim_size(2);
// Number of channels
int num_channels = bottom_data.dim_size(3); // construct the output shape
int dims[4];
dims[0] = num_rois;
dims[1] = pooled_height_;
dims[2] = pooled_width_;
dims[3] = num_channels;
TensorShape output_shape;
TensorShapeUtils::MakeShape(dims, 4, &output_shape); RoiPoolingKernel(context, &bottom_data, &bottom_rois, spatial_scale_, num_rois, data_height,
data_width, num_channels, pooled_height_, pooled_width_, output_shape); }
private:
int pooled_height_;
int pooled_width_;
float spatial_scale_;
}; // compute gradient
template <class Device, class T>
class RoiPoolGradOp : public OpKernel {
public:
explicit RoiPoolGradOp(OpKernelConstruction* context) : OpKernel(context) { // Get the pool height
OP_REQUIRES_OK(context,
context->GetAttr("pooled_height", &pooled_height_));
// Check that pooled_height is positive
OP_REQUIRES(context, pooled_height_ >= 0,
errors::InvalidArgument("Need pooled_height >= 0, got ",
pooled_height_));
// Get the pool width
OP_REQUIRES_OK(context,
context->GetAttr("pooled_width", &pooled_width_));
// Check that pooled_width is positive
OP_REQUIRES(context, pooled_width_ >= 0,
errors::InvalidArgument("Need pooled_width >= 0, got ",
pooled_width_));
// Get the spatial scale
OP_REQUIRES_OK(context,
context->GetAttr("spatial_scale", &spatial_scale_));
} void Compute(OpKernelContext* context) override
{
// Grab the input tensor
const Tensor& bottom_data = context->input(0);
const Tensor& bottom_rois = context->input(1);
const Tensor& argmax_data = context->input(2);
const Tensor& out_backprop = context->input(3); auto bottom_data_flat = bottom_data.flat<T>();
auto bottom_rois_flat = bottom_rois.flat<T>();
auto argmax_data_flat = argmax_data.flat<int32>();
auto out_backprop_flat = out_backprop.flat<T>(); // data should have 4 dimensions.
OP_REQUIRES(context, bottom_data.dims() == 4,
errors::InvalidArgument("data must be 4-dimensional")); // rois should have 2 dimensions.
OP_REQUIRES(context, bottom_rois.dims() == 2,
errors::InvalidArgument("rois must be 2-dimensional")); OP_REQUIRES(context, argmax_data.dims() == 4,
errors::InvalidArgument("argmax_data must be 4-dimensional")); OP_REQUIRES(context, out_backprop.dims() == 4,
errors::InvalidArgument("out_backprop must be 4-dimensional")); // Number of ROIs
int num_rois = bottom_rois.dim_size(0);
// batch size
int batch_size = bottom_data.dim_size(0);
// data height
int data_height = bottom_data.dim_size(1);
// data width
int data_width = bottom_data.dim_size(2);
// Number of channels
int num_channels = bottom_data.dim_size(3); // construct the output shape
TensorShape output_shape = bottom_data.shape(); // Create output tensors
Tensor* output_tensor = NULL;
OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output_tensor));
auto output = output_tensor->template flat<T>(); int pooled_height = pooled_height_;
int pooled_width = pooled_width_;
float spatial_scale = spatial_scale_;
auto shard = [pooled_height, pooled_width, spatial_scale,
num_rois, batch_size, data_height, data_width, num_channels,
&bottom_data_flat, &bottom_rois_flat, &argmax_data_flat,
&out_backprop_flat, &output](int64 start, int64 limit) {
for (int64 b = start; b < limit; ++b)
{
// (n, h, w, c) coords in bottom data
int n = b;
int c = n % num_channels;
n /= num_channels;
int w = n % data_width;
n /= data_width;
int h = n % data_height;
n /= data_height; float gradient = 0.0;
// Accumulate gradient over all ROIs that pooled this element
for (int roi_n = 0; roi_n < num_rois; ++roi_n)
{
const float* offset_bottom_rois = bottom_rois_flat.data() + roi_n * 5;
int roi_batch_ind = offset_bottom_rois[0];
// Skip if ROI's batch index doesn't match n
if (n != roi_batch_ind) {
continue;
} int roi_start_w = round(offset_bottom_rois[1] * spatial_scale);
int roi_start_h = round(offset_bottom_rois[2] * spatial_scale);
int roi_end_w = round(offset_bottom_rois[3] * spatial_scale);
int roi_end_h = round(offset_bottom_rois[4] * spatial_scale); // Skip if ROI doesn't include (h, w)
const bool in_roi = (w >= roi_start_w && w <= roi_end_w &&
h >= roi_start_h && h <= roi_end_h);
if (!in_roi) {
continue;
} int offset = roi_n * pooled_height * pooled_width * num_channels;
const float* offset_top_diff = out_backprop_flat.data() + offset;
const int* offset_argmax_data = argmax_data_flat.data() + offset; // Compute feasible set of pooled units that could have pooled
// this bottom unit // Force malformed ROIs to be 1x1
int roi_width = std::max(roi_end_w - roi_start_w + 1, 1);
int roi_height = std::max(roi_end_h - roi_start_h + 1, 1); const T bin_size_h = static_cast<T>(roi_height)
/ static_cast<T>(pooled_height);
const T bin_size_w = static_cast<T>(roi_width)
/ static_cast<T>(pooled_width); int phstart = floor(static_cast<int>(h - roi_start_h) / bin_size_h);
int phend = ceil(static_cast<int>(h - roi_start_h + 1) / bin_size_h);
int pwstart = floor(static_cast<int>(w - roi_start_w) / bin_size_w);
int pwend = ceil(static_cast<int>(w - roi_start_w + 1) / bin_size_w); phstart = std::min(std::max(phstart, 0), pooled_height);
phend = std::min(std::max(phend, 0), pooled_height);
pwstart = std::min(std::max(pwstart, 0), pooled_width);
pwend = std::min(std::max(pwend, 0), pooled_width); for (int ph = phstart; ph < phend; ++ph) {
for (int pw = pwstart; pw < pwend; ++pw) {
if (offset_argmax_data[(ph * pooled_width + pw) * num_channels + c] == (h * data_width + w) * num_channels + c)
{
gradient += offset_top_diff[(ph * pooled_width + pw) * num_channels + c];
}
}
}
}
output(b) = gradient;
}
}; const DeviceBase::CpuWorkerThreads& worker_threads =
*(context->device()->tensorflow_cpu_worker_threads());
const int64 shard_cost =
num_rois * num_channels * pooled_height * pooled_width * spatial_scale;
Shard(worker_threads.num_threads, worker_threads.workers,
output.size(), shard_cost, shard);
}
private:
int pooled_height_;
int pooled_width_;
float spatial_scale_;
}; bool ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
const int height, const int width, const int channels, const int pooled_height,
const int pooled_width, const float* bottom_rois,
float* bottom_diff, const int* argmax_data, const Eigen::GpuDevice& d); static void RoiPoolingGradKernel(
OpKernelContext* context, const Tensor* bottom_data, const Tensor* bottom_rois, const Tensor* argmax_data, const Tensor* out_backprop,
const float spatial_scale, const int batch_size, const int num_rois, const int height,
const int width, const int channels, const int pooled_height,
const int pooled_width, const TensorShape& tensor_output_shape)
{
Tensor* output = nullptr;
OP_REQUIRES_OK(context, context->allocate_output(0, tensor_output_shape, &output)); if (!context->status().ok()) {
return;
} ROIPoolBackwardLaucher(
out_backprop->flat<float>().data(), spatial_scale, batch_size, num_rois, height,
width, channels, pooled_height, pooled_width, bottom_rois->flat<float>().data(),
output->flat<float>().data(), argmax_data->flat<int>().data(), context->eigen_device<Eigen::GpuDevice>());
} template <class T>
class RoiPoolGradOp<Eigen::GpuDevice, T> : public OpKernel {
public:
explicit RoiPoolGradOp(OpKernelConstruction* context) : OpKernel(context) { // Get the pool height
OP_REQUIRES_OK(context,
context->GetAttr("pooled_height", &pooled_height_));
// Check that pooled_height is positive
OP_REQUIRES(context, pooled_height_ >= 0,
errors::InvalidArgument("Need pooled_height >= 0, got ",
pooled_height_));
// Get the pool width
OP_REQUIRES_OK(context,
context->GetAttr("pooled_width", &pooled_width_));
// Check that pooled_width is positive
OP_REQUIRES(context, pooled_width_ >= 0,
errors::InvalidArgument("Need pooled_width >= 0, got ",
pooled_width_));
// Get the spatial scale
OP_REQUIRES_OK(context,
context->GetAttr("spatial_scale", &spatial_scale_));
} void Compute(OpKernelContext* context) override
{
// Grab the input tensor
const Tensor& bottom_data = context->input(0);
const Tensor& bottom_rois = context->input(1);
const Tensor& argmax_data = context->input(2);
const Tensor& out_backprop = context->input(3); // data should have 4 dimensions.
OP_REQUIRES(context, bottom_data.dims() == 4,
errors::InvalidArgument("data must be 4-dimensional")); // rois should have 2 dimensions.
OP_REQUIRES(context, bottom_rois.dims() == 2,
errors::InvalidArgument("rois must be 2-dimensional")); OP_REQUIRES(context, argmax_data.dims() == 4,
errors::InvalidArgument("argmax_data must be 4-dimensional")); OP_REQUIRES(context, out_backprop.dims() == 4,
errors::InvalidArgument("out_backprop must be 4-dimensional")); // Number of ROIs
int num_rois = bottom_rois.dim_size(0);
// batch size
int batch_size = bottom_data.dim_size(0);
// data height
int height = bottom_data.dim_size(1);
// data width
int width = bottom_data.dim_size(2);
// Number of channels
int channels = bottom_data.dim_size(3); // construct the output shape
TensorShape output_shape = bottom_data.shape(); RoiPoolingGradKernel(
context, &bottom_data, &bottom_rois, &argmax_data, &out_backprop,
spatial_scale_, batch_size, num_rois, height, width, channels, pooled_height_,
pooled_width_, output_shape); }
private:
int pooled_height_;
int pooled_width_;
float spatial_scale_;
}; REGISTER_KERNEL_BUILDER(Name("RoiPool").Device(DEVICE_CPU).TypeConstraint<float>("T"), RoiPoolOp<CPUDevice, float>);
REGISTER_KERNEL_BUILDER(Name("RoiPoolGrad").Device(DEVICE_CPU).TypeConstraint<float>("T"), RoiPoolGradOp<CPUDevice, float>);
#if GOOGLE_CUDA
REGISTER_KERNEL_BUILDER(Name("RoiPool").Device(DEVICE_GPU).TypeConstraint<float>("T"), RoiPoolOp<Eigen::GpuDevice, float>);
REGISTER_KERNEL_BUILDER(Name("RoiPoolGrad").Device(DEVICE_GPU).TypeConstraint<float>("T"), RoiPoolGradOp<Eigen::GpuDevice, float>);
#endif
logo 的识别:
anchor : 的ratio,logo的长宽比例的选择
的scale,大小,较小
faster rcnn 做识别的更多相关文章
- 使用Faster R-CNN做目标检测 - 学习luminoth代码
像玩乐高一样拆解Faster R-CNN:详解目标检测的实现过程 https://mp.weixin.qq.com/s/M_i38L2brq69BYzmaPeJ9w 直接参考开源目标检测代码lumin ...
- faster rcnn源码阅读笔记1
自己保存的源码阅读笔记哈 faster rcnn 的主要识别过程(粗略) (开始填坑了): 一张3通道,1600*1600图像输入中,经过特征提取网络,得到100*100*512的feature ma ...
- Faster RCNN代码理解(Python)
转自http://www.infocool.net/kb/Python/201611/209696.html#原文地址 第一步,准备 从train_faster_rcnn_alt_opt.py入: 初 ...
- 基于候选区域的深度学习目标检测算法R-CNN,Fast R-CNN,Faster R-CNN
参考文献 [1]Rich feature hierarchies for accurate object detection and semantic segmentation [2]Fast R-C ...
- object detection技术演进:RCNN、Fast RCNN、Faster RCNN
object detection我的理解,就是在给定的图片中精确找到物体所在位置,并标注出物体的类别.object detection要解决的问题就是物体在哪里,是什么这整个流程的问题.然而,这个问题 ...
- 【深度学习】目标检测算法总结(R-CNN、Fast R-CNN、Faster R-CNN、FPN、YOLO、SSD、RetinaNet)
目标检测是很多计算机视觉任务的基础,不论我们需要实现图像与文字的交互还是需要识别精细类别,它都提供了可靠的信息.本文对目标检测进行了整体回顾,第一部分从RCNN开始介绍基于候选区域的目标检测器,包括F ...
- AI佳作解读系列(二)——目标检测AI算法集杂谈:R-CNN,faster R-CNN,yolo,SSD,yoloV2,yoloV3
1 引言 深度学习目前已经应用到了各个领域,应用场景大体分为三类:物体识别,目标检测,自然语言处理.本文着重与分析目标检测领域的深度学习方法,对其中的经典模型框架进行深入分析. 目标检测可以理解为是物 ...
- 基于深度学习的目标检测技术演进:R-CNN、Fast R-CNN、Faster R-CNN
object detection我的理解,就是在给定的图片中精确找到物体所在位置,并标注出物体的类别.object detection要解决的问题就是物体在哪里,是什么这整个流程的问题.然而,这个问题 ...
- Faster RCNN 学习笔记
下面的介绍都是基于VGG16 的Faster RCNN网络,各网络的差异在于Conv layers层提取特征时有细微差异,至于后续的RPN层.Pooling层及全连接的分类和目标定位基本相同. 一). ...
随机推荐
- 前端JS Excel解析导入
本文转载自:https://www.cnblogs.com/yinqingvip/p/6743213.html 需要用到js-xlsx:下载地址:js-xlsx <!DOCTYPE html&g ...
- tyvj/joyoi 2018 小猫爬山
2018,这个题号吼哇! 搜索第一题,巨水. WA了一次,因为忘了还原... #include <cstdio> ; int n, W, ans, weigh[N], cost[N]; i ...
- 洛谷P2375 动物园
我要死了.这是我做过的最恶心的题之一. 天下第一的大毒瘤.有gay毒. 我不如熊猫好多年... 题意:给定字符串,求g[i],表示:[0, i]中满足该子串既是前缀又是后缀还不重叠的子串数. 解:题面 ...
- A1138. Postorder Traversal
Suppose that all the keys in a binary tree are distinct positive integers. Given the preorder and in ...
- Django 异步化库celery和定时任务
首先要了解Django其实是个同步框架,那么多个用户发送请求时就会发生排队的情况上一个用户的请求完成后在进行下一个,这样会对影响用户体验,所有就要用到异步方法来解决. 首先我们要安装celery库 p ...
- python 当前时间获取方法
1.先导入库:import datetime 2.获取当前日期和时间:now_time = datetime.datetime.now() 3.格式化成我们想要的日期:strftime() 比如:“2 ...
- ImageMagick: win7 | win8 & uac (用户帐户控制) 注册表的一些事
现在用win7,win8的人越来越多了, 程序在一些 win 7, win8 上运行会遇到一些之前没想过的兼容性问题. 比如 64位系统运行32位程序时的注册表重定向,还有因为 uac (用户帐户控制 ...
- c语言: 修改参数的地址,及注意事项
如果需要在函数中修改参数的地址,首先参数肯定要是指针类型,同时传递的参数不能直接使用数组变量,至少需要先转换一下. 比如: char str[] = "123"; 不能直接传 ab ...
- 震撼:多线程下的操作离不开synchronized
昨天在写一个聊天程序,在发送消息的时候是采用单独的一个线程,接收消息是在另一个线程中完成. 我在测试的过程中发现,有的时候当消息比较多时,比如: 当我刚刚发送完一条消息,这个时候要将我发送的消息添加到 ...
- dispatchEvent(AWTEvent) 分派事件
点一个按钮,显示的分派一个指定的事件给系统. 下面是一个例子,当点击close按钮时,分派一个new WindowEvent(this,WindowEvent.WINDOW_CLOSING)事件给系统 ...