perf-perf stat用户层代码分析
perf_event 源码分析
前言
简单来说,perf是一种性能监测工具,它首先对通用处理器提供的performance counter进行编程,设定计数器阈值和事件,然后性能计数器就会在设定事件发生时递增计数器,直至这个计数器的计数值达到阈值,在不同的结构中对于计数器数值的提取有不同的方式,例如MIPS上会注册一个硬件中断,这样在计数器溢出时触发一个硬件中断,在中断处理函数中记录数值,x86中则是利用通知链机制,将溢出处理函数注册到die_chain通知链上,它会利用任何一个硬件中断发生的时机,检测性能计数器是否溢出,是则记录这个数值,这种实现方式就避免了单独为性能计数器溢出注册一个硬件中断。
perf源码分为用户层和内核层,用户层代码为用户提供命令行指定事件与采样方式,perf的一大特点就体现在丰富的用户层工具,可以说,内核部分代码只是为perf提供采样引擎,用户层才是perf的精华。用户层代码位于src/tools/perf目录下,c代码有13000行左右,此外还有大量的脚本程序。内核层代码分为结构无关代码(位于src/kernel/core/目录),和结构相关代码(位于src/arch/x86/cpu/**)。
这里先列个框架:首先从系统启动初始化开始,perf-init的相关工作,之后介绍用户层指定事件,通过系统调用转入内核,执行采样,采样数据通过内存映射返回给用户层,用户层工具进行上层分析并显示
perf_event源码分析(一)——cmd_record
perf's main entry
tools/perf/perf.c
static struct cmd_struct commands[] = {
{ "buildid-cache", cmd_buildid_cache, 0 },
{ "buildid-list", cmd_buildid_list, 0 },
{ "diff", cmd_diff, 0 },
{ "evlist", cmd_evlist, 0 },
{ "help", cmd_help, 0 },
{ "list", cmd_list, 0 },
{ "record", cmd_record, 0 },
{ "report", cmd_report, 0 },
{ "bench", cmd_bench, 0 },
{ "stat", cmd_stat, 0 },
{ "timechart", cmd_timechart, 0 },
{ "top", cmd_top, 0 },
{ "annotate", cmd_annotate, 0 },
{ "version", cmd_version, 0 },
{ "script", cmd_script, 0 },
{ "sched", cmd_sched, 0 },
#ifdef HAVE_LIBELF_SUPPORT
{ "probe", cmd_probe, 0 },
#endif
{ "kmem", cmd_kmem, 0 },
{ "lock", cmd_lock, 0 },
{ "kvm", cmd_kvm, 0 },
{ "test", cmd_test, 0 },
#ifdef HAVE_LIBAUDIT_SUPPORT
{ "trace", cmd_trace, 0 },
#endif
{ "inject", cmd_inject, 0 },
{ "mem", cmd_mem, 0 },
{ "data", cmd_data, 0 },
};
perf record's CALL CHAIN:
cmd_record
;; new a struct "record" rec, and a struct "evlist" in rec->evlist;
perf_evlist__new
perf_config
__cmd_record(&record, argc, argv); // fill out "struct record"
perf_session__new(file, false, tool); // New a sesssion for this rec, rec->session, attention: file is "struct perf_data_file *file", &rec->file;
machines__init(&session->machines);
ordered_events__init(&session->ordered_events, ordered_events__deliver_event);
perf_data_file__open(file)
check_pipe(file)
file->path = "perf.data" // If not specified name, fill out file->path
open_file(file);
fd = perf_data_file__is_read(file) ? open_file_read(file) : open_file_write(file);
file->fd = fd;
perf_session__create_kernel_maps(session) //
fd = perf_data_file__fd(file); // Get rec's fd, rec->file->fd
record__init_features(rec);
perf_header__set_feat // Fill out session's header of this rec, rec->session->header
record__open(rec)
perf_evlist__config(evlist, opts); // perf_evlist
perf_evsel__config(evsel, opts); // perf_evsel
perf_header__clear_feat
perf_header__write_pipe / perf_session__write_header
perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, machine);
perf_event__synthesize_modules(tool, process_synthesized_event, machine);
machines__process_guests(&session->machines,perf_event__synthesize_guest_os, tool);
__machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,process_synthesized_event, opts->sample_address);
tools/perf/builtin-record.c
int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
{
int err = -ENOMEM;
struct record *rec = &record;
char errbuf[BUFSIZ];
rec->evlist = perf_evlist__new();
if (rec->evlist == NULL)
return -ENOMEM;
perf_config(perf_record_config, rec); // 解析, tools/perf/util/config.c
argc = parse_options(argc, argv, record_options, record_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc && target__none(&rec->opts.target))
usage_with_options(record_usage, record_options);
if (nr_cgroups && !rec->opts.target.system_wide) {
ui__error("cgroup monitoring only available in"
" system-wide mode\n");
usage_with_options(record_usage, record_options);
}
}
tools/perf/util/parse-events.c
setup_events // tools/perf/builtin-stat.c
parse_events // tools/perf/util/parse-events.c
parse_events // tools/perf/util/parse-events.c
int parse_events(struct perf_evlist *evlist, const char *str)
{
struct parse_events_evlist data = {
.list = LIST_HEAD_INIT(data.list),
.idx = evlist->nr_entries,
};
int ret;
ret = parse_events__scanner(str, &data, PE_START_EVENTS);
perf_pmu__parse_cleanup();
if (!ret) {
int entries = data.idx - evlist->nr_entries;
perf_evlist__splice_list_tail(evlist, &data.list, entries);
evlist->nr_groups += data.nr_groups;
return 0;
}
/*
* There are 2 users - builtin-record and builtin-test objects.
* Both call perf_evlist__delete in case of error, so we dont
* need to bother.
*/
return ret;
}
struct introduction
tools/perf/util/target.h
struct target {
const char *pid;
const char *tid;
const char *cpu_list;
const char *uid_str;
uid_t uid;
bool system_wide;
bool uses_mmap;
bool default_per_cpu;
bool per_thread;
};
===
tools/perf/util/data.h
struct perf_data_file {
const char *path;
int fd;
bool is_pipe;
bool force;
unsigned long size;
enum perf_data_mode mode;
};
===
tools/perf/util/session.h
struct perf_session {
struct perf_header header;
struct machines machines;
struct perf_evlist *evlist;
struct trace_event tevent;
bool repipe;
bool one_mmap;
void *one_mmap_addr;
u64 one_mmap_offset;
struct ordered_events ordered_events;
struct perf_data_file *file;
struct perf_tool *tool;
};
===
tools/perf/util/evlist.h
struct perf_evlist {
struct list_head entries;
struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
int nr_entries;
int nr_groups;
int nr_mmaps;
size_t mmap_len;
int id_pos;
int is_pos;
u64 combined_sample_type;
struct {
int cork_fd;
pid_t pid;
} workload;
bool overwrite;
struct fdarray pollfd;
struct perf_mmap *mmap;
struct thread_map *threads; // threads
struct cpu_map *cpus; // cpus
struct perf_evsel *selected;
struct events_stats stats;
};
===
/** struct perf_evsel - event selector **/
Each event passed from user mapping one perf_evsel struct.
struct perf_evsel {
struct list_head node;
struct perf_event_attr attr;
char *filter;
struct xyarray *fd;
struct xyarray *sample_id;
u64 *id;
struct perf_counts *counts;
struct perf_counts *prev_raw_counts;
int idx;
u32 ids;
char *name;
double scale;
const char *unit;
bool snapshot;
struct event_format *tp_format;
...
...
struct perf_evsel *leader;
}
===
tools/perf/builtin-record.c
struct record {
struct perf_tool tool;
struct record_opts opts;
u64 bytes_written;
struct perf_data_file file;
struct perf_evlist *evlist;
struct perf_session *session;
const char *progname;
int realtime_prio;
bool no_buildid;
bool no_buildid_cache;
long samples;
};
===
Here is important, perf_stat is an array include three "struct stats" in "perf_stat",
and will init perf_stat:
for (i = 0; i < 3; i++)
init_stats(&ps->res_stats[i]);
struct perf_stat {
struct stats res_stats[3];
};
tools/perf/util/stat.h
struct stats
{
double n, mean, M2;
u64 max, min;
};
====
tools/perf/util/evsel.h
struct perf_counts_values {
union {
struct {
u64 val;
u64 ena;
u64 run;
};
u64 values[3];
};
};
struct perf_counts {
s8 scaled;
struct perf_counts_values aggr;
struct perf_counts_values cpu[];
};
perf stat's CALL CHAIN
CALL CHAIN:
commands // tools/perf/perf.c
cmd_stat // tools/perf/builtin-stat.c
parse_events_option // If perf stat -e xxx, specified event name, will check this event name
parse_events
parse_events__scanner // check events
parse_events_lex_init_extra
parse_events__scan_string
parse_events_parse
parse_events__flush_buffer
parse_events__delete_buffer
parse_events_lex_destroy
perf_pmu__parse_cleanup:
perf_evlist__new();
perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, struct thread_map *threads) // evlist->cpus, evlist->threads
perf_evlist__set_maps ///
parse_options
parse_options_usage
add_default_attributes()
target__validate(&target);
perf_evlist__create_maps(evsel_list, &target) // fill out evlist->threads(thread_map)
evlist->threads = thread_map__new_str(target->pid, target->tid,target->uid); // evlist->threads
evlist->threads(thread_map) = [tid,tid,tid,tid,...]
target__uses_dummy_map(target)
evlist->cpus = cpu_map__dummy_new() // evlist->cpus
evlist->cpus = cpu_map__new(target->cpu_list)
perf_evlist__alloc_stats(evsel_list, interval) // Traverse all evsel
evlist__for_each(evlist, evsel) {
perf_evsel__alloc_stat_priv(evsel) // Alloc memory for each evsel->priv = zalloc(sizeof(struct perf_stat));
perf_evsel__reset_stat_priv(evsel)
init_stats // Fill out "struct perf_stat", perf_stat include 3 elements of "struct stats{}"
perf_evsel__alloc_counts(evsel, perf_evsel__nr_cpus(evsel)) // Alloc evsel->counts
alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel) // Alloc evsel->prev_raw_counts = addr;
}
perf_stat_init_aggr_mode()
cpu_map__build_socket_map
cpu_map__build_map(cpus, sockp, cpu_map__get_socket);
cpu_map__get_socket
cpu_map__build_core_map
cpu_map__build_map(cpus, corep, cpu_map__get_core);
cpu_map__get_core
cpu_map__get_socket
run_perf_stat(argc, argv);
__run_perf_stat(argc, argv);
perf_evlist__prepare_workload(evsel_list, &target, argv, false, workload_exec_failed_signal)
perf_evlist__set_leader(evsel_list); // evlist->nr_groups = 1 or 0 ? decide by evlist->nr_entries > 1 or not
__perf_evlist__set_leader(&evlist->entries);
evlist__for_each(evsel_list, evsel) { // Traverse all evsel
create_perf_stat_counter(evsel)
struct perf_event_attr *attr = &evsel->attr;
attr->xxx = xxx
perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)
perf_evsel__is_group_leader(evsel)
perf_evsel__open_per_thread(evsel, evsel_list->threads)
// important: __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, struct thread_map *threads)
__perf_evsel__open(evsel, &empty_cpu_map.map, threads)
// perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads), if system_wide: nthreads = 1
perf_evsel__alloc_fd(evsel, cpus->nr, nthreads)
evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
for (cpu = 0; cpu < cpus->nr; cpu++) {
for (thread = 0; thread < nthreads; thread++) {
group_fd = get_group_fd(evsel, cpu, thread);
sys_perf_event_open(&evsel->attr, pid, cpus->map[cpu], group_fd, flags);
}
}
}
perf_evlist__apply_filters(evsel_list, &counter)
evlist__for_each(evlist, evsel) {
perf_evsel__set_filter(evsel, ncpus, nthreads, evsel->filter);
}
t0 = rdclock();
clock_gettime(CLOCK_MONOTONIC, &ref_time);
if (forks) {
perf_evlist__start_workload(evsel_list);
handle_initial_delay();
if (interval) {
print_interval();
}
} else {
handle_initial_delay();
print_interval();
}
t1 = rdclock();
update_stats(&walltime_nsecs_stats, t1 - t0);
// 开始为每个evsel读
if (aggr_mode == AGGR_GLOBAL) {
evlist__for_each(evsel_list, counter) {
// 读到struct: "struct perf_counts_values", 保存在evsel的 &counter->counts->aggr , (这里evsel 就是counter)
// 还有“struct perf_stat” , counter->priv
read_counter_aggr(counter);
aggr->val = aggr->ena = aggr->run = 0; // 这里, 把 perf_counts_values aggr 全部初始化为0
read_counter(counter) // 如何读此event?遍历每个thread和cpu
int nthreads = thread_map__nr(evsel_list->threads);
int ncpus = perf_evsel__nr_cpus(counter);
int cpu, thread;
for (thread = 0; thread < nthreads; thread++) {
for (cpu = 0; cpu < ncpus; cpu++) {
// pocess + cpu 二维数组方式读, 读到 "struct perf_counts_values count"
process_per_cpu(struct perf_evsel *evsel, int cpu, int thread))
perf_evsel__read_cb(evsel, cpu, thread, &count)
memset(count, 0, sizeof(*count));
FD(evsel, cpu, thread)
readn(FD(evsel, cpu, thread), count, sizeof(*count))
ion(true, fd, buf, n);
read(fd, buf, left)
read_cb(evsel, cpu, thread, tmp);
switch (aggr_mode) {
case AGGR_CORE:
case AGGR_SOCKET:
case AGGR_NONE:
perf_evsel__compute_deltas(evsel, cpu, count);
perf_counts_values__scale(count, scale, NULL);
update_shadow_stats(evsel, count->values, cpu);
}
}
}
perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), thread_map__nr(evsel_list->threads));
}
} else {
evlist__for_each(evsel_list, counter) {
read_counter(counter);
perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1);
}
}
print_stat
print_aggr // AGGR_CORE AGGR_SOCKET
print_counter_aggr(evsel, NULL); // AGGR_GLOBAL
print_counter(evsel, NULL) // AGGR_NONE
tools/perf/util/evsel.h
struct perf_evsel {
}
perf-perf stat用户层代码分析的更多相关文章
- Express实例代码分析1——简单的用户验证登录文件
/** * Module dependencies. */ var express = require('../..');// ../..是上级目录的上级目录 var hash = require(' ...
- 完整全面的Java资源库(包括构建、操作、代码分析、编译器、数据库、社区等等)
构建 这里搜集了用来构建应用程序的工具. Apache Maven:Maven使用声明进行构建并进行依赖管理,偏向于使用约定而不是配置进行构建.Maven优于Apache Ant.后者采用了一种过程化 ...
- wifi display代码 分析
转自:http://blog.csdn.net/lilian0118/article/details/23168531 这一章中我们来看Wifi Display连接过程的建立,包含P2P的部分和RTS ...
- Linux从用户层到内核层系列 - GNU系列之glibc介绍
题记:本系列文章的目的是抛开书本从源代码和使用的角度分析Linux内核和相关源代码,byhankswang和你一起玩转linux开发 轻松搞定TCP/IP协议栈,原创文章欢迎交流, byhankswa ...
- 虚拟机创建流程中neutron代码分析(三)
前言: 当neutron-server创建了port信息,将port信息写入数据库中.流程返回到nova服务端,接着nova创建的流程继续走.在计算节点中neutron-agent同样要完成很多的工作 ...
- Android Hal层简要分析
Android Hal层简要分析 Android Hal层(即 Hardware Abstraction Layer)是Google开发的Android系统里上层应用对底层硬件操作屏蔽的一个软件层次, ...
- 【转载】word2vec原理推导与代码分析
本文的理论部分大量参考<word2vec中的数学原理详解>,按照我这种初学者方便理解的顺序重新编排.重新叙述.题图来自siegfang的博客.我提出的Java方案基于kojisekig,我 ...
- OVS 内核KEY值提取及匹配流表代码分析
原文链接:http://ry0117.com/2016/12/24/OVS内核KEY值提取及匹配流表代码分析/ 当开启OVS后,创建datapath类型为system的网桥并他添加相关接口,OVS网桥 ...
- Https与Http,SSL,DevOps, 静态代码分析工具,RFID, SSH, 非对称加密算法(使用最广泛的一种是RSA), 数字签名, 数字证书
在URL前加https://前缀表明是用SSL加密的. 你的电脑与服务器之间收发的信息传输将更加安全. Web服务器启用SSL需要获得一个服务器证书并将该证书与要使用SSL的服务器绑定. http和h ...
随机推荐
- JavaWeb开发环境搭建
Tomcat 的主要配置 Tomcat:tomcat是实现了一个JavaEE标准的最小的Webserver,是Apche组织开发的,免费的server,能够在网络中直接下载. 最新的版本号应该是8的版 ...
- C++学习之可变参数的函数与模板
所谓可变参数指的是函数的参数个数可变,参数类型不定的函数.为了编写能处理不同数量实参的函数,C++11提供了两种主要的方法:如果所有的实参类型相同,可以传递一个名为initializer_list的标 ...
- Android实现一个自己定义相机的界面
我们先实现拍照button的圆形效果哈.Android开发中,当然能够找美工人员设计图片,然后直接拿进来.只是我们能够自己写代码实现这个效果哈.最经常使用的的是用layout-list实现图片的叠加, ...
- Android开发之利用SQLite进行数据存储
Android开发之利用SQLite进行数据存储 Android开发之利用SQLite进行数据存储 SQLite数据库简单介绍 Android中怎样使用SQLite 1 创建SQLiteOpenHel ...
- ios MetalPerformanceShaders 使用总结
MPSCNNConvolution 1.初始化时传人 UnsafePointer<Float> 时要传入w,不能是 &(w[0]).否则w其实传入失败,卷积的结果是nan或inf. ...
- nodejs下cannot post错误
我写了一段CoffeeScript代码,主要是流程为: 1.当客户端请求方式为GET时,输出页面,页面上有个form,form 里有个submit按钮.form的 action="" ...
- 浅析Netty原理
- 【Hibernate总结系列】使用举例
本节讲述如何使用Hibernate实现记录的增.删.改和查功能. 1 查询 在Hibernate中使用查询时,一般使用Hql查询语句. HQL(Hibernate Query Language),即H ...
- DStream 转换操作----无状态转换
DStream转换操作包括无状态转换和有状态转换. 无状态转换:每个批次的处理不依赖于之前批次的数据. 有状态转换:当前批次的处理需要使用之前批次的数据或者中间结果.有状态转换包括基于滑动窗口的转换和 ...
- PCB genesis 大孔扩孔(不用G84命令)实现方法
PCB钻孔时,当钻刀>6.3mm时,超出钻孔范围,钻孔工序是没有这么大的钻刀,当这种情况,工程CAM会都采用G84命令用小孔扩孔的方式制作, 在这里介绍一种如果不用G84命令,用程序实现将大孔生 ...