[net]

batch=64                           每batch个样本更新一次参数。

subdivisions=8                     如果内存不够大，将batch分割为subdivisions个子batch，每个子batch的大小为batch/subdivisions。

                                   在darknet代码中，会将batch/subdivisions命名为batch。

height=416                         input图像的高

width=416                          Input图像的宽

channels=3                         Input图像的通道数

momentum=0.9                       动量

decay=0.0005                       权重衰减正则项，防止过拟合

angle=0                            通过旋转角度来生成更多训练样本

saturation = 1.5                   通过调整饱和度来生成更多训练样本

exposure = 1.5                     通过调整曝光量来生成更多训练样本

hue=.1                             通过调整色调来生成更多训练样本

learning_rate=0.0001               初始学习率

max_batches = 45000                训练达到max_batches后停止学习

policy=steps                       调整学习率的policy，有如下policy：CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM

steps=100,25000,35000              根据batch_num调整学习率

scales=10,.1,.1                    学习率变化的比例，累计相乘

[convolutional]

batch_normalize=1                  是否做BN

filters=32                         输出多少个特征图

size=3                             卷积核的尺寸

stride=1                           做卷积运算的步长

pad=1                              如果pad为0,padding由 padding参数指定。如果pad为1，padding大小为size/2

activation=leaky                   激活函数：

                                   logistic，loggy，relu，elu，relie，plse，hardtan，lhtan，linear，ramp，leaky，tanh，stair

[maxpool]

size=2                             池化层尺寸

stride=2                           池化步进

[convolutional]

batch_normalize=1

filters=64

size=3

stride=1

pad=1

activation=leaky

[maxpool]

size=2

stride=2

......

......

#######

[convolutional]

batch_normalize=1

size=3

stride=1

pad=1

filters=1024

activation=leaky

[convolutional]

batch_normalize=1

size=3

stride=1

pad=1

filters=1024

activation=leaky

[route]                            the route layer is to bring finer grained features in from earlier in the network

layers=-9

[reorg]                            the reorg layer is to make these features match the feature map size at the later layer.

                                   The end feature map is 13x13, the feature map from earlier is 26x26x512.

                                   The reorg layer maps the 26x26x512 feature map onto a 13x13x2048 feature map

                                   so that it can be concatenated with the feature maps at 13x13 resolution.

stride=2

[route]

layers=-1,-3

[convolutional]

batch_normalize=1

size=3

stride=1

pad=1

filters=1024

activation=leaky

[convolutional]

size=1

stride=1

pad=1

filters=125                        region前最后一个卷积层的filters数是特定的，计算公式为filter=num*(classes+5)

                                   5的意义是5个坐标，论文中的tx,ty,tw,th,to

activation=linear

[region]

anchors = 1.08,1.19,  3.42,4.41,  6.63,11.38,  9.42,5.11,  16.62,10.52          预选框，可以手工挑选，

                                                                                也可以通过k means 从训练样本中学出

bias_match=1

classes=20                         网络需要识别的物体种类数

coords=4                           每个box的4个坐标tx,ty,tw,th

num=5                              每个grid cell预测几个box

softmax=1                          使用softmax做激活函数

jitter=.2                          通过抖动增加噪声来抑制过拟合

rescore=1                          暂理解为一个开关，非0时通过重打分来调整l.delta（预测值与真实值的差）

object_scale=5                     暂理解为计算损失时预测框中有物体时的权重

noobject_scale=1                   暂理解为计算损失时预测框中无物体时的权重

class_scale=1                      暂理解为计算类别损失时的权重

coord_scale=1                      暂理解为计算损失时坐标偏差的权重

absolute=1

thresh = .6

random=0                           是否随机确定最后一个预测框

darknet对应代码

找到cfg文件解析的代码，选择detector demo 作为入口

darknet.c文件 main 函数开始

    } else if (0 == strcmp(argv[1], "detector")){

    run_detector(argc, argv);

Detector.c文件 run_detector函数

char *prefix = find_char_arg(argc, argv, "-prefix", 0);

float thresh = find_float_arg(argc, argv, "-thresh", .24);

float hier_thresh = find_float_arg(argc, argv, "-hier", .5);

int cam_index = find_int_arg(argc, argv, "-c", 0);

int frame_skip = find_int_arg(argc, argv, "-s", 0);

if(argc < 4){

    fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);

    return;

}

char *gpu_list = find_char_arg(argc, argv, "-gpus", 0);

char *outfile = find_char_arg(argc, argv, "-out", 0);

......

......

else if(0==strcmp(argv[2], "demo")) {

    list *options = read_data_cfg(datacfg);

    int classes = option_find_int(options, "classes", 20);

    char *name_list = option_find_str(options, "names", "data/names.list");

    char **names = get_labels(name_list);

    demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, hier_thresh);

}

read_data_cfg函数解析配置文件，保存到options指针。

class

int classes = option_find_int(options, "classes", 20);

classes为YOLO可识别的种类数

batch、learning_rate、momentum、decay和 subdivisions

demo.c文件demo函数

net = parse_network_cfg(cfgfile);

Parser.c文件 parse_network_cfg函数

list *sections = read_cfg(filename);

node *n = sections->front;

if(!n) error("Config file has no sections");

network net = make_network(sections->size - 1);

net.gpu_index = gpu_index;

size_params params;

section *s = (section *)n->val;

list *options = s->options;

if(!is_network(s)) error("First section must be [net] or [network]");

parse_net_options(options, &net);

parse_net_options函数

net->batch = option_find_int(options, "batch",1);

net->learning_rate = option_find_float(options, "learning_rate", .001);

net->momentum = option_find_float(options, "momentum", .9);

net->decay = option_find_float(options, "decay", .0001);

int subdivs = option_find_int(options, "subdivisions",1);

net->time_steps = option_find_int_quiet(options, "time_steps",1);

net->batch /= subdivs;

net->batch *= net->time_steps;

net->subdivisions = subdivs;

learning_rate为初始学习率，训练时的真正学习率和学习率的策略及初始学习率有关。

momentum为动量，在训练时加入动量可以帮助走出local minima 以及saddle point。

decay是权重衰减正则项，用来防止过拟合。

batch的值等于cfg文件中的batch/subdivisions 再乘以time_steps。
time_steps在yolo默认的cfg中是没有配置的，所以是默认值1。
因此batch可以认为就是cfg文件中的batch/subdivisions。

前面有提到batch的意义是每batch个样本更新一次参数。

而subdivisions的意义在于降低对GPU memory的要求。
darknet将batch分割为subdivisions个子batch，每个子batch的大小为batch/subdivisions，并将子batch命名为batch。

我们看下训练时和batch有关的代码

Detector.c文件的train_detector函数

#ifdef GPU

    if(ngpus == 1){

        loss = train_network(net, train);

    } else {

        loss = train_networks(nets, ngpus, train, 4);

    }

#else

    loss = train_network(net, train);

#endif

Network.c文件的train_network函数

int batch = net.batch;

int n = d.X.rows / batch;

float *X = calloc(batch*d.X.cols, sizeof(float));

float *y = calloc(batch*d.y.cols, sizeof(float));

int i;

float sum = 0;

for(i = 0; i < n; ++i){

    get_next_batch(d, batch, i*batch, X, y);

    float err = train_network_datum(net, X, y);

    sum += err;

}

train_network_datum函数

*net.seen += net.batch;

......

......

forward_network(net, state);

backward_network(net, state);

float error = get_network_cost(net);

if(((*net.seen)/net.batch)%net.subdivisions == 0) update_network(net);

我们看到，只有((*net.seen)/net.batch)%net.subdivisions == 0时才会更新网络参数。
*net.seen是已经训练过的子batch数，((*net.seen)/net.batch)%net.subdivisions的意义正是已经训练过了多少个真正的batch。

policy、steps和scales

Parser.c文件 parse_network_cfg函数

char *policy_s = option_find_str(options, "policy", "constant");

net->policy = get_policy(policy_s);

net->burn_in = option_find_int_quiet(options, "burn_in", 0);

if(net->policy == STEP){

    net->step = option_find_int(options, "step", 1);

    net->scale = option_find_float(options, "scale", 1);

} else if (net->policy == STEPS){

    char *l = option_find(options, "steps");

    char *p = option_find(options, "scales");

    if(!l || !p) error("STEPS policy must have steps and scales in cfg file");

    int len = strlen(l);

    int n = 1;

    int i;

    for(i = 0; i < len; ++i){

        if (l[i] == ',') ++n;

    }

    int *steps = calloc(n, sizeof(int));

    float *scales = calloc(n, sizeof(float));

    for(i = 0; i < n; ++i){

        int step    = atoi(l);

        float scale = atof(p);

        l = strchr(l, ',')+1;

        p = strchr(p, ',')+1;

        steps[i] = step;

        scales[i] = scale;

    }

    net->scales = scales;

    net->steps = steps;

    net->num_steps = n;

} else if (net->policy == EXP){

    net->gamma = option_find_float(options, "gamma", 1);

} else if (net->policy == SIG){

    net->gamma = option_find_float(options, "gamma", 1);

    net->step = option_find_int(options, "step", 1);

} else if (net->policy == POLY || net->policy == RANDOM){

    net->power = option_find_float(options, "power", 1);

}

get_policy函数

if (strcmp(s, "random")==0) return RANDOM;

if (strcmp(s, "poly")==0) return POLY;

if (strcmp(s, "constant")==0) return CONSTANT;

if (strcmp(s, "step")==0) return STEP;

if (strcmp(s, "exp")==0) return EXP;

if (strcmp(s, "sigmoid")==0) return SIG;

if (strcmp(s, "steps")==0) return STEPS;

fprintf(stderr, "Couldn't find policy %s, going with constant\n", s);

return CONSTANT;

学习率动态调整的策略有多种，YOLO默认使用的是steps。

yolo-voc.cfg文件：

steps=100,25000,35000

scales=10,.1,.1

Network.c文件get_current_rate函数

int batch_num = get_current_batch(net);

int i;

float rate;

switch (net.policy) {

    case CONSTANT:

        return net.learning_rate;

    case STEP:

        return net.learning_rate * pow(net.scale, batch_num/net.step);

    case STEPS:

        rate = net.learning_rate;

        for(i = 0; i < net.num_steps; ++i){

            if(net.steps[i] > batch_num) return rate;

            rate *= net.scales[i];

            //if(net.steps[i] > batch_num - 1 && net.scales[i] > 1) reset_momentum(net);

        }

        return rate;

get_current_batch获取的是(*net.seen)/(net.batch*net.subdivisions)，即真正的batch。

steps的每个阶段是根据batch_num划分的，根据配置文件，学习率会在batch_num达到100、25000、35000时发生改变。

当前的学习率是初始学习率与当前阶段及之前所有阶段对应的scale的总乘积。

convolutional超参数加载

Parser.c文件parse_network_cfg函数

LAYER_TYPE lt = string_to_layer_type(s->type);

        if(lt == CONVOLUTIONAL){

            l = parse_convolutional(options, params);

parse_convolutional函数

int n = option_find_int(options, "filters",1);

int size = option_find_int(options, "size",1);

int stride = option_find_int(options, "stride",1);

int pad = option_find_int_quiet(options, "pad",0);

int padding = option_find_int_quiet(options, "padding",0);

if(pad) padding = size/2;

char *activation_s = option_find_str(options, "activation", "logistic");

ACTIVATION activation = get_activation(activation_s);

int batch,h,w,c;

h = params.h;

w = params.w;

c = params.c;

batch=params.batch;

if(!(h && w && c)) error("Layer before convolutional layer must output image.");

int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);

需要注意的是如果enable了pad，cfg文件中的padding不会生效，实际的padding值为size/2。

YOLO配置文件理解的更多相关文章

(转载)YOLO配置文件理解
YOLO配置文件理解转载自 [net] batch=64 每batch个样本更新一次参数. subdivisions=8 如果内存不够大,将batch分割为subdivisions个子batch,每 ...
2-2-sshd服务安装管理及配置文件理解和安全调优
大纲: 1. 培养独自解决问题的能力 2. 学习第二阶段Linux服务管理的方法 3. 安装sshd服务 4. sshd服务的使用 5. sshd服务调优 6. 初步介绍sshd配置文件 ###### ...
开源服务专题之------sshd服务安装管理及配置文件理解和安全调优
本专题我将讨论一下开源服务,随着开源社区的日趋丰富,开源软件.开源服务,已经成为人类的一种公共资源,发展势头可谓一日千里,所以不可不知.SSHD服务,在我们的linux服务器上经常用到,很重要,涉及到 ...
Yolo V3理解bbox和label的关系
假如一个bbox坐标为:[35 220 62 293 3] 第一步:将bbox转换为中心坐标和宽高形式(3种缩放比例进行缩放) 那么onehot:[0 0 0 1 0 0 0 0 0 0 ...... ...
y7000笔记本 darknet-yolo安装与测试（Ubuntu18.04+Cuda9.0+Cudnn7.1）
环境配置看上一贴 https://www.cnblogs.com/clemente/p/10386479.html 1 安装darknet 1-1 克隆darknet repo git clone h ...
maven本地仓库配置文件
背景:在使用maven的过程中,感觉本地的jar包位置飘忽不定,归根结底是因为对maven的配置文件理解不清楚造成的. 在maven的安装包下面D:\apache-maven-3.6.1\conf有s ...
log4j使用快速入门【转】
1.引言在应用程序中添加日志记录总的来说基于三个目的: .监视代码中变量的变化情况,周期性的记录到文件中供其他应用进行统计分析工作 .跟踪代码运行时轨迹,作为日后审计的依据 .担当集成开发环境中的调 ...
MVC5笔记【一】
一.global.asax文件的作用:全局性配置文件理解什么是路由? 有什么作用: 路由主要提供一个路由表请求的时候被加载,请求url要去路由表当中去对照规则解析规则控制器/动作放方法,转移 ...
Centos7 关于防火墙的一些简单配置
近期安装了linux系统Centos7,接触下来发现了与原来的Centos6.5有一些差别,这里主要记录下来我的一些关于Centos7防火墙的了解. 一.firewall简介 CentOS 7中防火墙 ...

随机推荐

ASP.NET Identity实现分布式Session，Docker+Nginx+Redis+ASP.NET CORE Identity
零.背景介绍在学习ASP.NET CORE开发的过程中,身份认证是必须考虑的一项必要的组件.ASP.NET CORE Identity是由微软官方开发的一整套身份认证组件,兼具完整性和自由度.Doc ...
java课程之团队开发冲刺阶段2.8
昨日总结: 1.具体情况已经写在了昨天的当日总结当中遇到的问题: 1.toolbar的返回键与菜单键冲突,导致无法同时使用今天的任务: 1.完整实现课程查询任务当日总结: 1.完整实现,唯一的遗 ...
Java中多态的实例
public class cf { /** * 实际上这里涉及方法调用的优先问题, * 优先级由高到低依次为:this.show(O).super.show(O).this.show((super)O ...
掌握这三点，轻松搞定Essay写作
英文essay写作涉及的范围很广,任何文字形式的材料都涉及写作.所以,不单单是专业的文字工作者要在写作上下功夫,一般人在从小到大的学校教育里要应对的作文.读书报告.美国高中及大学里的论文.英文演讲以及 ...
送票啦~ | 京东云邀您参加AI顶级盛会GTC CHINA 2019
本年度不可错过的AI顶级盛会 GTC CHINA2019 即将于12月16–19日在苏州举行京东云重量级技术专家将携 AI前沿热议话题亮相京东云相关AI最新动态,也会一并为您带上小小剧透,快来看 ...
Python基本数据类型之字符串
Python转义字符在需要在字符中使用特殊字符时,python用反斜杠(\)转义字符.如下表: 转义字符描述 \(在行尾时) 续行符 \\ 反斜杠符号 \' 单引号 \" 双引号 \a ...
（简单模拟）P1003 铺地毯
题解: 从最后一个输入的数据开始排查,如果说你找到了这个点上面有地毯,那么就直接输出这个值,如果没找到就按照题干的意思输出-1. #include<iostream>#include< ...
faster rcnn报错：TypeError: slice indices must be integers or None or have an __index__ method
https://blog.csdn.net/qq_27637315/article/details/78849756 https://blog.csdn.net/qq_21089969/article ...
.NETCore部署步骤
1.下载.NET CORE运行时下载地址:https://dotnet.microsoft.com/download 2.windows安装下载的运行时 3.检查.是否安装成功 ,dotnet -- ...
LCA--P3379 【模板】最近公共祖先（LCA）
题目描述如题,给定一棵有根多叉树,请求出指定两个点直接最近的公共祖先. 输入格式第一行包含三个正整数 N,M,S,分别表示树的结点个数.询问的个数和树根结点的序号. 接下来 N−1 行每行包含两个 ...

YOLO配置文件理解