/* Epoll private bits inside the event mask */
#define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET | EPOLLEXCLUSIVE)

主要是看下:惊群源:

1、socket wake_up 

2、epoll_wait 中wake_up 

目前data ready的时候调用sk_data_ready 唤醒进程,此时唤醒进程选择了  只唤醒一个

/ nr_exclusive是1  

static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,  

int nr_exclusive, int wake_flags, void *key)  

{  

wait_queue_t *curr, *next;  

list_for_each_entry_safe(curr, next, &q->task_list, task_list) {  

unsigned flags = curr->flags;  

if (curr->func(curr, mode, wake_flags, key) &&  

(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)  

break;  

}  

}  

(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)

传进来的nr_exclusive是1, 所以flags & WQ_FLAG_EXCLUSIVE为真的时候,执行一次,就会跳出循环。

Epoll_create()在fork子进程之前

所有进程都共享一个 epfd, 所以data ready 唤醒进程的时候即使加上 nr_exclusive = 1 只唤醒一个进程, 那么唤醒那个一个呢?

也就是当连接到来时,我们需要选择一个进程来accept,这个时候,任何一个accept都是可以的。当连接建立以后,后续的读写事件,却与进程有了关联。一个请求与a进程建立连接后,后续的读写也应该由a进程来做。

当读写事件发生时,应该通知哪个进程呢?Epoll并不知道,因此,事件有可能错误通知另一个进程处理

Epoll_create()在fork子进程之后

每个进程的读写事件,只注册在自己进程的epoll中。所以不会出现竞争

但是accept呢???

目前有的内核版本说是会出现有的不会!!!

这就需要看内核版本实现了 无非就是唤醒的时候加上一些标志。。。当然是用reuseport 一劳永逸!!

如果不是是用reuseport实现只唤醒一个进程,那么wake_up的时候就是唤醒等待队列的头一个。。那怎么做到负载均衡呢???

所以还是reuseport好!!!!

  */
static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, int wake_flags, void *key)
{
wait_queue_t *curr, *next; list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
unsigned flags = curr->flags; if (curr->func(curr, mode, wake_flags, key) &&
(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
break;
}
} void __wake_up(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, void *key)
{
unsigned long flags; spin_lock_irqsave(&q->lock, flags);
__wake_up_common(q, mode, nr_exclusive, 0, key);
spin_unlock_irqrestore(&q->lock, flags);
} /*
* This is the callback that is passed to the wait queue wakeup
* machanism. It is called by the stored file descriptors when they
* have events to report.
*/
static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
int pwake = 0;
unsigned long flags;
struct epitem *epi = ep_item_from_wait(wait);
struct eventpoll *ep = epi->ep; spin_lock_irqsave(&ep->lock, flags); /*
* If the event mask does not contain any poll(2) event, we consider the
* descriptor to be disabled. This condition is likely the effect of the
* EPOLLONESHOT bit that disables the descriptor when an event is received,
* until the next EPOLL_CTL_MOD will be issued.
*/
if (!(epi->event.events & ~EP_PRIVATE_BITS))
goto out_unlock; /*
* Check the events coming with the callback. At this stage, not
* every device reports the events in the "key" parameter of the
* callback. We need to be able to handle both cases here, hence the
* test for "key" != NULL before the event match test.
*/
if (key && !((unsigned long) key & epi->event.events))
goto out_unlock; /*
* If we are trasfering events to userspace, we can hold no locks
* (because we're accessing user memory, and because of linux f_op->poll()
* semantics). All the events that happens during that period of time are
* chained in ep->ovflist and requeued later on.
*/
if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) {
if (epi->next == EP_UNACTIVE_PTR) {
epi->next = ep->ovflist;
ep->ovflist = epi;
}
goto out_unlock;
} /* If this file is already in the ready list we exit soon */
if (!ep_is_linked(&epi->rdllink))
list_add_tail(&epi->rdllink, &ep->rdllist); /*
* Wake up ( if active ) both the eventpoll wait list and the ->poll()
* wait list.
*/
if (waitqueue_active(&ep->wq))
wake_up_locked(&ep->wq);
if (waitqueue_active(&ep->poll_wait))
pwake++; out_unlock:
spin_unlock_irqrestore(&ep->lock, flags); /* We have to call this outside the lock */
if (pwake)
ep_poll_safewake(&ep->poll_wait); return 1;
} static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
int pwake = 0;
unsigned long flags;
struct epitem *epi = ep_item_from_wait(wait);
struct eventpoll *ep = epi->ep;
int ewake = 0; if ((unsigned long)key & POLLFREE) {
ep_pwq_from_wait(wait)->whead = NULL;
/*
* whead = NULL above can race with ep_remove_wait_queue()
* which can do another remove_wait_queue() after us, so we
* can't use __remove_wait_queue(). whead->lock is held by
* the caller.
*/
list_del_init(&wait->task_list);
} spin_lock_irqsave(&ep->lock, flags); /*
* If the event mask does not contain any poll(2) event, we consider the
* descriptor to be disabled. This condition is likely the effect of the
* EPOLLONESHOT bit that disables the descriptor when an event is received,
* until the next EPOLL_CTL_MOD will be issued.
*/
if (!(epi->event.events & ~EP_PRIVATE_BITS))
goto out_unlock; /*
* Check the events coming with the callback. At this stage, not
* every device reports the events in the "key" parameter of the
* callback. We need to be able to handle both cases here, hence the
* test for "key" != NULL before the event match test.
*/
if (key && !((unsigned long) key & epi->event.events))
goto out_unlock; /*
* If we are transferring events to userspace, we can hold no locks
* (because we're accessing user memory, and because of linux f_op->poll()
* semantics). All the events that happen during that period of time are
* chained in ep->ovflist and requeued later on.
*/
if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) {
if (epi->next == EP_UNACTIVE_PTR) {
epi->next = ep->ovflist;
ep->ovflist = epi;
if (epi->ws) {
/*
* Activate ep->ws since epi->ws may get
* deactivated at any time.
*/
__pm_stay_awake(ep->ws);
} }
goto out_unlock;
} /* If this file is already in the ready list we exit soon */
if (!ep_is_linked(&epi->rdllink)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake_rcu(epi);
} /*
* Wake up ( if active ) both the eventpoll wait list and the ->poll()
* wait list.
*/
if (waitqueue_active(&ep->wq)) {
if ((epi->event.events & EPOLLEXCLUSIVE) &&
!((unsigned long)key & POLLFREE)) {
switch ((unsigned long)key & EPOLLINOUT_BITS) {
case POLLIN:
if (epi->event.events & POLLIN)
ewake = 1;
break;
case POLLOUT:
if (epi->event.events & POLLOUT)
ewake = 1;
break;
case 0:
ewake = 1;
break;
}
}
wake_up_locked(&ep->wq);
}
if (waitqueue_active(&ep->poll_wait))
pwake++; out_unlock:
spin_unlock_irqrestore(&ep->lock, flags); /* We have to call this outside the lock */
if (pwake)
ep_poll_safewake(&ep->poll_wait); if (epi->event.events & EPOLLEXCLUSIVE)
return ewake; return 1;
}
/*
* This is the callback that is used to add our wait queue to the
* target file wakeup lists.
*/
static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
poll_table *pt)
{
struct epitem *epi = ep_item_from_epqueue(pt);
struct eppoll_entry *pwq; if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL))) {
init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);
pwq->whead = whead;
pwq->base = epi;
if (epi->event.events & EPOLLEXCLUSIVE)
add_wait_queue_exclusive(whead, &pwq->wait);
else
add_wait_queue(whead, &pwq->wait);
list_add_tail(&pwq->llink, &epi->pwqlist);
epi->nwait++;
} else {
/* We have to signal that an error occurred */
epi->nwait = -1;
}
}

根据EPOLLEXCLUSIVE 加入到不同的唤醒 队列add_wait_queue_exclusive  add_wait_queue

在wake_up的时候 通过nr-exclu  控制 但是 要想break还需要返回 ep_poll_callback返回 true;

对于epoll_oneshot

在epoll_wait后  send fd 到user时,

if (epi->event.events & EPOLLONESHOT)  不会重复添加进去 导致后续链式唤醒
                epi->event.events &= EP_PRIVATE_BITS;
  else if (!(epi->event.events & EPOLLET)) {
                list_add_tail(&epi->rdllink, &ep->rdllist);
                ep_pm_stay_awake(epi);
            }

同时在ep_call_back的时候也会 继续检查 EPOLLONESHOT ,房子 epoll_wait返回时,在处理data中,fd又有数据需要相应,此时多线程 中别的线程可以相应。。。。乱序了!!!

/*
     * If the event mask does not contain any poll(2) event, we consider the descriptor to be disabled. This condition is likely the effect of the
     * EPOLLONESHOT bit that disables the descriptor when an event is received,* until the next EPOLL_CTL_MOD will be issued.
     */
    if (!(epi->event.events & ~EP_PRIVATE_BITS))
        goto out_unlock;

epoll oneshot的更多相关文章

  1. epoll中et+多线程模式中很重要的EPOLL_ONESHOT实验

    因为et模式需要循环读取,但是在读取过程中,如果有新的事件到达,很可能触发了其他线程来处理这个socket,那就乱了. EPOLL_ONESHOT就是用来避免这种情况.注意在一个线程处理完一个sock ...

  2. Linux编程之epoll

    现在有这么一个场景:我是一个很忙的大老板,我有100个手机,手机来信息了,我的秘书就会告诉我"老板,你的手机来信息了."我很生气,我的秘书就是这样子,每次手机来信息就只告诉我来信息 ...

  3. I/O多路复用之epoll实战

    概念 IO多路复用是指内核一旦发现进程指定的一个或者多个IO条件准备读取,它就通知该进程 通俗理解(摘自网上一大神) 这些名词比较绕口,理解涵义就好.一个epoll场景:一个酒吧服务员(一个线程),前 ...

  4. Select、Poll、Epoll、 异步IO 介绍

    一.概念相关介绍 同步IO和异步IO,阻塞IO和非阻塞IO分别是什么,到底有什么区别?不同的人在不同的上下文下给出的答案是不同的.所以先限定一下本文的上下文. 本文讨论的背景是Linux环境下的net ...

  5. epoll的LT和ET使用EPOLLONESHOT

    epoll有两种触发的方式即LT(水平触发)和ET(边缘触发)两种,在前者,只要存在着事件就会不断的触发,直到处理完成,而后者只触发一次相同事件或者说只在从非触发到触发两个状态转换的时候儿才触发. 这 ...

  6. 基于epoll的简单的httpserver

    该httpserver已经能够处理并发连接,支持多个client并发訪问,每一个连接能够持续读写数据.当然.这仅仅是一个简单的学习样例.还有非常多bug,发表出来仅仅是希望大家能够互相学习.我也在不断 ...

  7. python epoll实现异步socket

    一.同步和异步: 在程序执行中,同步运行意味着等待调用的函数.线程.子进程等的返回结果后继续处理:异步指不等待当下的返回结果,直接运行主进程下面的程序,等到有返回结果时,通知主进程处理.有点高效. 二 ...

  8. Linux中epoll+线程池实现高并发

    服务器并发模型通常可分为单线程和多线程模型,这里的线程通常是指“I/O线程”,即负责I/O操作,协调分配任务的“管理线程”,而实际的请求和任务通常交由所谓“工作者线程”处理.通常多线程模型下,每个线程 ...

  9. 第九章 用多线程来读取epoll模型下的client数据

    #include <sys/types.h> #include <sys/socket.h> #include <netinet/in.h> #include &l ...

随机推荐

  1. Linux系统常用API总结

    1.错误处理 - fprintf() - perror() 2.通用I/O模型 - fd = open(pathname, flags, mode) - numread = read(fd, buff ...

  2. pandas常用方法总结

    In [49]: frame2 Out[49]: year state pop debt one 2000 Ohio 1.5 NaN two 2001 Ohio 1.7 NaN three 2002 ...

  3. 解决/lib/ld-linux.so.2: bad ELF interpreter: No such file or directory报错 (转)

    解决/lib/ld-linux.so.2: bad ELF interpreter: No such file or directory报错 念淅 2020-01-03 15:02:25 3793 收 ...

  4. cmd/powershell常用命令 git常用命令

    cmd/powershell: 1. 新建文件夹: mkdir directoryName 2. 新建文件: cmd: type nul>fileName (空文件) powershell: n ...

  5. SonarQube 7.7 安装教程

    SonarQube 7.7 安装教程 一. CentOS设置 1. 更换阿里源 curl -o /etc/yum.repos.d/CentOS-Base.repo http://mirrors.ali ...

  6. 【Azure微服务 Service Fabric 】如何转移Service Fabric集群中的种子节点(Seed Node)

    注意:在对Service Fabric的节点做操作之前,请务必确认是否是种子节点(Seed Node)且当前节点的数量是否与SF的持久层要求的数量一致. 可靠性级别是 Service Fabric 群 ...

  7. EFS加密

    目录 EFS简介 EFS的特点 EFS的缺陷 EFS证书 证书的导出 证书的安装 EFS加密 方法一 方法二 EFS简介 EFS(Encrypting File System,加密文件系统)是Wind ...

  8. 抽空学学KVM(七):虚拟机快照和克隆

    前几天学写了KVM中qume-info命令的使用,今天学学在虚拟化里面用处广泛的快照和克隆功能,snapshot和virt-clone.对于snapshot命令的使用其实很简单.进入virsh界面以后 ...

  9. eclipse配置打开选中文件存储的目录快捷配置

    方便同时复制多个包的文件 https://jingyan.baidu.com/article/adc8151353a896f723bf73cd.html

  10. 布隆过滤器 Bloom Filter 2

    date: 2020-04-01 17:00:00 updated: 2020-04-01 17:00:00 Bloom Filter 布隆过滤器 之前的一版笔记 点此跳转 1. 什么是布隆过滤器 本 ...