epoll oneshot
/* Epoll private bits inside the event mask */
#define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET | EPOLLEXCLUSIVE)
主要是看下:惊群源:
1、socket wake_up
2、epoll_wait 中wake_up
目前data ready的时候调用sk_data_ready 唤醒进程,此时唤醒进程选择了 只唤醒一个
/ nr_exclusive是1 static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, int nr_exclusive, int wake_flags, void *key) { wait_queue_t *curr, *next; list_for_each_entry_safe(curr, next, &q->task_list, task_list) { unsigned flags = curr->flags; if (curr->func(curr, mode, wake_flags, key) && (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) break; } }
(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
传进来的nr_exclusive是1, 所以flags & WQ_FLAG_EXCLUSIVE为真的时候,执行一次,就会跳出循环。
Epoll_create()在fork子进程之前
所有进程都共享一个 epfd, 所以data ready 唤醒进程的时候即使加上 nr_exclusive = 1 只唤醒一个进程, 那么唤醒那个一个呢?
也就是当连接到来时,我们需要选择一个进程来accept,这个时候,任何一个accept都是可以的。当连接建立以后,后续的读写事件,却与进程有了关联。一个请求与a进程建立连接后,后续的读写也应该由a进程来做。
当读写事件发生时,应该通知哪个进程呢?Epoll并不知道,因此,事件有可能错误通知另一个进程处理
Epoll_create()在fork子进程之后
每个进程的读写事件,只注册在自己进程的epoll中。所以不会出现竞争
但是accept呢???
目前有的内核版本说是会出现有的不会!!!
这就需要看内核版本实现了 无非就是唤醒的时候加上一些标志。。。当然是用reuseport 一劳永逸!!
如果不是是用reuseport实现只唤醒一个进程,那么wake_up的时候就是唤醒等待队列的头一个。。那怎么做到负载均衡呢???
所以还是reuseport好!!!!
*/
static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, int wake_flags, void *key)
{
wait_queue_t *curr, *next; list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
unsigned flags = curr->flags; if (curr->func(curr, mode, wake_flags, key) &&
(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
break;
}
} void __wake_up(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, void *key)
{
unsigned long flags; spin_lock_irqsave(&q->lock, flags);
__wake_up_common(q, mode, nr_exclusive, 0, key);
spin_unlock_irqrestore(&q->lock, flags);
} /*
* This is the callback that is passed to the wait queue wakeup
* machanism. It is called by the stored file descriptors when they
* have events to report.
*/
static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
int pwake = 0;
unsigned long flags;
struct epitem *epi = ep_item_from_wait(wait);
struct eventpoll *ep = epi->ep; spin_lock_irqsave(&ep->lock, flags); /*
* If the event mask does not contain any poll(2) event, we consider the
* descriptor to be disabled. This condition is likely the effect of the
* EPOLLONESHOT bit that disables the descriptor when an event is received,
* until the next EPOLL_CTL_MOD will be issued.
*/
if (!(epi->event.events & ~EP_PRIVATE_BITS))
goto out_unlock; /*
* Check the events coming with the callback. At this stage, not
* every device reports the events in the "key" parameter of the
* callback. We need to be able to handle both cases here, hence the
* test for "key" != NULL before the event match test.
*/
if (key && !((unsigned long) key & epi->event.events))
goto out_unlock; /*
* If we are trasfering events to userspace, we can hold no locks
* (because we're accessing user memory, and because of linux f_op->poll()
* semantics). All the events that happens during that period of time are
* chained in ep->ovflist and requeued later on.
*/
if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) {
if (epi->next == EP_UNACTIVE_PTR) {
epi->next = ep->ovflist;
ep->ovflist = epi;
}
goto out_unlock;
} /* If this file is already in the ready list we exit soon */
if (!ep_is_linked(&epi->rdllink))
list_add_tail(&epi->rdllink, &ep->rdllist); /*
* Wake up ( if active ) both the eventpoll wait list and the ->poll()
* wait list.
*/
if (waitqueue_active(&ep->wq))
wake_up_locked(&ep->wq);
if (waitqueue_active(&ep->poll_wait))
pwake++; out_unlock:
spin_unlock_irqrestore(&ep->lock, flags); /* We have to call this outside the lock */
if (pwake)
ep_poll_safewake(&ep->poll_wait); return 1;
} static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
int pwake = 0;
unsigned long flags;
struct epitem *epi = ep_item_from_wait(wait);
struct eventpoll *ep = epi->ep;
int ewake = 0; if ((unsigned long)key & POLLFREE) {
ep_pwq_from_wait(wait)->whead = NULL;
/*
* whead = NULL above can race with ep_remove_wait_queue()
* which can do another remove_wait_queue() after us, so we
* can't use __remove_wait_queue(). whead->lock is held by
* the caller.
*/
list_del_init(&wait->task_list);
} spin_lock_irqsave(&ep->lock, flags); /*
* If the event mask does not contain any poll(2) event, we consider the
* descriptor to be disabled. This condition is likely the effect of the
* EPOLLONESHOT bit that disables the descriptor when an event is received,
* until the next EPOLL_CTL_MOD will be issued.
*/
if (!(epi->event.events & ~EP_PRIVATE_BITS))
goto out_unlock; /*
* Check the events coming with the callback. At this stage, not
* every device reports the events in the "key" parameter of the
* callback. We need to be able to handle both cases here, hence the
* test for "key" != NULL before the event match test.
*/
if (key && !((unsigned long) key & epi->event.events))
goto out_unlock; /*
* If we are transferring events to userspace, we can hold no locks
* (because we're accessing user memory, and because of linux f_op->poll()
* semantics). All the events that happen during that period of time are
* chained in ep->ovflist and requeued later on.
*/
if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) {
if (epi->next == EP_UNACTIVE_PTR) {
epi->next = ep->ovflist;
ep->ovflist = epi;
if (epi->ws) {
/*
* Activate ep->ws since epi->ws may get
* deactivated at any time.
*/
__pm_stay_awake(ep->ws);
} }
goto out_unlock;
} /* If this file is already in the ready list we exit soon */
if (!ep_is_linked(&epi->rdllink)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake_rcu(epi);
} /*
* Wake up ( if active ) both the eventpoll wait list and the ->poll()
* wait list.
*/
if (waitqueue_active(&ep->wq)) {
if ((epi->event.events & EPOLLEXCLUSIVE) &&
!((unsigned long)key & POLLFREE)) {
switch ((unsigned long)key & EPOLLINOUT_BITS) {
case POLLIN:
if (epi->event.events & POLLIN)
ewake = 1;
break;
case POLLOUT:
if (epi->event.events & POLLOUT)
ewake = 1;
break;
case 0:
ewake = 1;
break;
}
}
wake_up_locked(&ep->wq);
}
if (waitqueue_active(&ep->poll_wait))
pwake++; out_unlock:
spin_unlock_irqrestore(&ep->lock, flags); /* We have to call this outside the lock */
if (pwake)
ep_poll_safewake(&ep->poll_wait); if (epi->event.events & EPOLLEXCLUSIVE)
return ewake; return 1;
}
/*
* This is the callback that is used to add our wait queue to the
* target file wakeup lists.
*/
static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
poll_table *pt)
{
struct epitem *epi = ep_item_from_epqueue(pt);
struct eppoll_entry *pwq; if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL))) {
init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);
pwq->whead = whead;
pwq->base = epi;
if (epi->event.events & EPOLLEXCLUSIVE)
add_wait_queue_exclusive(whead, &pwq->wait);
else
add_wait_queue(whead, &pwq->wait);
list_add_tail(&pwq->llink, &epi->pwqlist);
epi->nwait++;
} else {
/* We have to signal that an error occurred */
epi->nwait = -1;
}
}
根据EPOLLEXCLUSIVE 加入到不同的唤醒 队列add_wait_queue_exclusive add_wait_queue
在wake_up的时候 通过nr-exclu 控制 但是 要想break还需要返回 ep_poll_callback返回 true;
对于epoll_oneshot
在epoll_wait后 send fd 到user时,
if (epi->event.events & EPOLLONESHOT) 不会重复添加进去 导致后续链式唤醒
epi->event.events &= EP_PRIVATE_BITS;
else if (!(epi->event.events & EPOLLET)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake(epi);
}
同时在ep_call_back的时候也会 继续检查 EPOLLONESHOT ,房子 epoll_wait返回时,在处理data中,fd又有数据需要相应,此时多线程 中别的线程可以相应。。。。乱序了!!!
/*
* If the event mask does not contain any poll(2) event, we consider the descriptor to be disabled. This condition is likely the effect of the
* EPOLLONESHOT bit that disables the descriptor when an event is received,* until the next EPOLL_CTL_MOD will be issued.
*/
if (!(epi->event.events & ~EP_PRIVATE_BITS))
goto out_unlock;
epoll oneshot的更多相关文章
- epoll中et+多线程模式中很重要的EPOLL_ONESHOT实验
因为et模式需要循环读取,但是在读取过程中,如果有新的事件到达,很可能触发了其他线程来处理这个socket,那就乱了. EPOLL_ONESHOT就是用来避免这种情况.注意在一个线程处理完一个sock ...
- Linux编程之epoll
现在有这么一个场景:我是一个很忙的大老板,我有100个手机,手机来信息了,我的秘书就会告诉我"老板,你的手机来信息了."我很生气,我的秘书就是这样子,每次手机来信息就只告诉我来信息 ...
- I/O多路复用之epoll实战
概念 IO多路复用是指内核一旦发现进程指定的一个或者多个IO条件准备读取,它就通知该进程 通俗理解(摘自网上一大神) 这些名词比较绕口,理解涵义就好.一个epoll场景:一个酒吧服务员(一个线程),前 ...
- Select、Poll、Epoll、 异步IO 介绍
一.概念相关介绍 同步IO和异步IO,阻塞IO和非阻塞IO分别是什么,到底有什么区别?不同的人在不同的上下文下给出的答案是不同的.所以先限定一下本文的上下文. 本文讨论的背景是Linux环境下的net ...
- epoll的LT和ET使用EPOLLONESHOT
epoll有两种触发的方式即LT(水平触发)和ET(边缘触发)两种,在前者,只要存在着事件就会不断的触发,直到处理完成,而后者只触发一次相同事件或者说只在从非触发到触发两个状态转换的时候儿才触发. 这 ...
- 基于epoll的简单的httpserver
该httpserver已经能够处理并发连接,支持多个client并发訪问,每一个连接能够持续读写数据.当然.这仅仅是一个简单的学习样例.还有非常多bug,发表出来仅仅是希望大家能够互相学习.我也在不断 ...
- python epoll实现异步socket
一.同步和异步: 在程序执行中,同步运行意味着等待调用的函数.线程.子进程等的返回结果后继续处理:异步指不等待当下的返回结果,直接运行主进程下面的程序,等到有返回结果时,通知主进程处理.有点高效. 二 ...
- Linux中epoll+线程池实现高并发
服务器并发模型通常可分为单线程和多线程模型,这里的线程通常是指“I/O线程”,即负责I/O操作,协调分配任务的“管理线程”,而实际的请求和任务通常交由所谓“工作者线程”处理.通常多线程模型下,每个线程 ...
- 第九章 用多线程来读取epoll模型下的client数据
#include <sys/types.h> #include <sys/socket.h> #include <netinet/in.h> #include &l ...
随机推荐
- day05 Pyhton学习总结
1.字符串str s1="asasd",字符串不能修改 修改以后只能赋值给另一个变量 ret1=s1 1.切片 s1[0], s1[-1], s1[2:4], s1[-1:-4:- ...
- JSON,数组根据字段分组
function GroupbyName(data, Name) { //data数据源,Name 根据什么字段分组 var map = {}, dest = []; for (var i = 0; ...
- C++冷知识(1)
func()等价于func(void) 也就是说在C++中,参数列表为空意味着不接受任何参数.之所以要注意这一点是因为在C语言中,参数列表为空意味着参数不确定.两者的语义是有巨大差别的,作为学了C再学 ...
- 用 C 语言游戏编程开发!果然最担心的事又发生了!
30了.我要怎么办,老了.人就像一头小毛驴,方向都是牵着的人定的. 这个项目从去年开始的,一个手机游戏,当时接这个项目的时候其实没有太多考虑,我一向都喜欢打肿脸充胖子的,好面子,人家找上门来,不能不给 ...
- 【循环矩阵乘优化DP】BZOJ 2510 弱题
题目大意 有 \(M\) 个球,一开始每个球均有一个初始标号,标号范围为 \(1\) - \(N\) 且为整数,标号为 \(i\) 的球有 \(a_i\) 个,并保证 \(\sum a_i = M\) ...
- 【最大匹配+二分答案】POJ 3057 Evacuation
题目大意 POJ链接 有一个\(X×Y\)的房间,X代表墙壁,D是门,.代表人.这个房间着火了,人要跑出去,但是每一个时间点只有一个人可以从门出去. 问最后一个人逃出去的最短时间,如果不能逃出去,输出 ...
- go正则贴吧
package main import ( "fmt" "io/ioutil" "net/http" "regexp" ...
- zabbix安装中文语言包及中文乱码的解决(zabbix5.0)
一,zabbix不能配置中文界面的问题: 1, zabbix5.0 系统安装后,web界面不能选择使用中文 系统提示: You are not able to choose some of the l ...
- centos8平台使用ulimit做系统资源限制
一,ulimit的用途 1, ulimit 用于shell启动进程所占用的资源,可用于修改系统资源限制 2, 使用ulimit -a 可以查看当前系统的所有限制值 使用ulimit -n <可以 ...
- Python之数据类型总结
1.字符串 2.数字 3.列表 4.元组 5.字典 可变 or 不可变 1:可变:列表.字典 2:不可变:字符串,数字,元组 访问顺序 1.直接访问:数字 2.顺序访问:字符串,列表,元组 3.映射访 ...