/* Epoll private bits inside the event mask */
#define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET | EPOLLEXCLUSIVE)

主要是看下:惊群源:

1、socket wake_up 

2、epoll_wait 中wake_up 

目前data ready的时候调用sk_data_ready 唤醒进程,此时唤醒进程选择了  只唤醒一个

/ nr_exclusive是1  

static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,  

int nr_exclusive, int wake_flags, void *key)  

{  

wait_queue_t *curr, *next;  

list_for_each_entry_safe(curr, next, &q->task_list, task_list) {  

unsigned flags = curr->flags;  

if (curr->func(curr, mode, wake_flags, key) &&  

(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)  

break;  

}  

}  

(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)

传进来的nr_exclusive是1, 所以flags & WQ_FLAG_EXCLUSIVE为真的时候,执行一次,就会跳出循环。

Epoll_create()在fork子进程之前

所有进程都共享一个 epfd, 所以data ready 唤醒进程的时候即使加上 nr_exclusive = 1 只唤醒一个进程, 那么唤醒那个一个呢?

也就是当连接到来时,我们需要选择一个进程来accept,这个时候,任何一个accept都是可以的。当连接建立以后,后续的读写事件,却与进程有了关联。一个请求与a进程建立连接后,后续的读写也应该由a进程来做。

当读写事件发生时,应该通知哪个进程呢?Epoll并不知道,因此,事件有可能错误通知另一个进程处理

Epoll_create()在fork子进程之后

每个进程的读写事件,只注册在自己进程的epoll中。所以不会出现竞争

但是accept呢???

目前有的内核版本说是会出现有的不会!!!

这就需要看内核版本实现了 无非就是唤醒的时候加上一些标志。。。当然是用reuseport 一劳永逸!!

如果不是是用reuseport实现只唤醒一个进程,那么wake_up的时候就是唤醒等待队列的头一个。。那怎么做到负载均衡呢???

所以还是reuseport好!!!!

  */
static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, int wake_flags, void *key)
{
wait_queue_t *curr, *next; list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
unsigned flags = curr->flags; if (curr->func(curr, mode, wake_flags, key) &&
(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
break;
}
} void __wake_up(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, void *key)
{
unsigned long flags; spin_lock_irqsave(&q->lock, flags);
__wake_up_common(q, mode, nr_exclusive, 0, key);
spin_unlock_irqrestore(&q->lock, flags);
} /*
* This is the callback that is passed to the wait queue wakeup
* machanism. It is called by the stored file descriptors when they
* have events to report.
*/
static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
int pwake = 0;
unsigned long flags;
struct epitem *epi = ep_item_from_wait(wait);
struct eventpoll *ep = epi->ep; spin_lock_irqsave(&ep->lock, flags); /*
* If the event mask does not contain any poll(2) event, we consider the
* descriptor to be disabled. This condition is likely the effect of the
* EPOLLONESHOT bit that disables the descriptor when an event is received,
* until the next EPOLL_CTL_MOD will be issued.
*/
if (!(epi->event.events & ~EP_PRIVATE_BITS))
goto out_unlock; /*
* Check the events coming with the callback. At this stage, not
* every device reports the events in the "key" parameter of the
* callback. We need to be able to handle both cases here, hence the
* test for "key" != NULL before the event match test.
*/
if (key && !((unsigned long) key & epi->event.events))
goto out_unlock; /*
* If we are trasfering events to userspace, we can hold no locks
* (because we're accessing user memory, and because of linux f_op->poll()
* semantics). All the events that happens during that period of time are
* chained in ep->ovflist and requeued later on.
*/
if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) {
if (epi->next == EP_UNACTIVE_PTR) {
epi->next = ep->ovflist;
ep->ovflist = epi;
}
goto out_unlock;
} /* If this file is already in the ready list we exit soon */
if (!ep_is_linked(&epi->rdllink))
list_add_tail(&epi->rdllink, &ep->rdllist); /*
* Wake up ( if active ) both the eventpoll wait list and the ->poll()
* wait list.
*/
if (waitqueue_active(&ep->wq))
wake_up_locked(&ep->wq);
if (waitqueue_active(&ep->poll_wait))
pwake++; out_unlock:
spin_unlock_irqrestore(&ep->lock, flags); /* We have to call this outside the lock */
if (pwake)
ep_poll_safewake(&ep->poll_wait); return 1;
} static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
int pwake = 0;
unsigned long flags;
struct epitem *epi = ep_item_from_wait(wait);
struct eventpoll *ep = epi->ep;
int ewake = 0; if ((unsigned long)key & POLLFREE) {
ep_pwq_from_wait(wait)->whead = NULL;
/*
* whead = NULL above can race with ep_remove_wait_queue()
* which can do another remove_wait_queue() after us, so we
* can't use __remove_wait_queue(). whead->lock is held by
* the caller.
*/
list_del_init(&wait->task_list);
} spin_lock_irqsave(&ep->lock, flags); /*
* If the event mask does not contain any poll(2) event, we consider the
* descriptor to be disabled. This condition is likely the effect of the
* EPOLLONESHOT bit that disables the descriptor when an event is received,
* until the next EPOLL_CTL_MOD will be issued.
*/
if (!(epi->event.events & ~EP_PRIVATE_BITS))
goto out_unlock; /*
* Check the events coming with the callback. At this stage, not
* every device reports the events in the "key" parameter of the
* callback. We need to be able to handle both cases here, hence the
* test for "key" != NULL before the event match test.
*/
if (key && !((unsigned long) key & epi->event.events))
goto out_unlock; /*
* If we are transferring events to userspace, we can hold no locks
* (because we're accessing user memory, and because of linux f_op->poll()
* semantics). All the events that happen during that period of time are
* chained in ep->ovflist and requeued later on.
*/
if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) {
if (epi->next == EP_UNACTIVE_PTR) {
epi->next = ep->ovflist;
ep->ovflist = epi;
if (epi->ws) {
/*
* Activate ep->ws since epi->ws may get
* deactivated at any time.
*/
__pm_stay_awake(ep->ws);
} }
goto out_unlock;
} /* If this file is already in the ready list we exit soon */
if (!ep_is_linked(&epi->rdllink)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake_rcu(epi);
} /*
* Wake up ( if active ) both the eventpoll wait list and the ->poll()
* wait list.
*/
if (waitqueue_active(&ep->wq)) {
if ((epi->event.events & EPOLLEXCLUSIVE) &&
!((unsigned long)key & POLLFREE)) {
switch ((unsigned long)key & EPOLLINOUT_BITS) {
case POLLIN:
if (epi->event.events & POLLIN)
ewake = 1;
break;
case POLLOUT:
if (epi->event.events & POLLOUT)
ewake = 1;
break;
case 0:
ewake = 1;
break;
}
}
wake_up_locked(&ep->wq);
}
if (waitqueue_active(&ep->poll_wait))
pwake++; out_unlock:
spin_unlock_irqrestore(&ep->lock, flags); /* We have to call this outside the lock */
if (pwake)
ep_poll_safewake(&ep->poll_wait); if (epi->event.events & EPOLLEXCLUSIVE)
return ewake; return 1;
}
/*
* This is the callback that is used to add our wait queue to the
* target file wakeup lists.
*/
static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
poll_table *pt)
{
struct epitem *epi = ep_item_from_epqueue(pt);
struct eppoll_entry *pwq; if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL))) {
init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);
pwq->whead = whead;
pwq->base = epi;
if (epi->event.events & EPOLLEXCLUSIVE)
add_wait_queue_exclusive(whead, &pwq->wait);
else
add_wait_queue(whead, &pwq->wait);
list_add_tail(&pwq->llink, &epi->pwqlist);
epi->nwait++;
} else {
/* We have to signal that an error occurred */
epi->nwait = -1;
}
}

根据EPOLLEXCLUSIVE 加入到不同的唤醒 队列add_wait_queue_exclusive  add_wait_queue

在wake_up的时候 通过nr-exclu  控制 但是 要想break还需要返回 ep_poll_callback返回 true;

对于epoll_oneshot

在epoll_wait后  send fd 到user时,

if (epi->event.events & EPOLLONESHOT)  不会重复添加进去 导致后续链式唤醒
                epi->event.events &= EP_PRIVATE_BITS;
  else if (!(epi->event.events & EPOLLET)) {
                list_add_tail(&epi->rdllink, &ep->rdllist);
                ep_pm_stay_awake(epi);
            }

同时在ep_call_back的时候也会 继续检查 EPOLLONESHOT ,房子 epoll_wait返回时,在处理data中,fd又有数据需要相应,此时多线程 中别的线程可以相应。。。。乱序了!!!

/*
     * If the event mask does not contain any poll(2) event, we consider the descriptor to be disabled. This condition is likely the effect of the
     * EPOLLONESHOT bit that disables the descriptor when an event is received,* until the next EPOLL_CTL_MOD will be issued.
     */
    if (!(epi->event.events & ~EP_PRIVATE_BITS))
        goto out_unlock;

epoll oneshot的更多相关文章

  1. epoll中et+多线程模式中很重要的EPOLL_ONESHOT实验

    因为et模式需要循环读取,但是在读取过程中,如果有新的事件到达,很可能触发了其他线程来处理这个socket,那就乱了. EPOLL_ONESHOT就是用来避免这种情况.注意在一个线程处理完一个sock ...

  2. Linux编程之epoll

    现在有这么一个场景:我是一个很忙的大老板,我有100个手机,手机来信息了,我的秘书就会告诉我"老板,你的手机来信息了."我很生气,我的秘书就是这样子,每次手机来信息就只告诉我来信息 ...

  3. I/O多路复用之epoll实战

    概念 IO多路复用是指内核一旦发现进程指定的一个或者多个IO条件准备读取,它就通知该进程 通俗理解(摘自网上一大神) 这些名词比较绕口,理解涵义就好.一个epoll场景:一个酒吧服务员(一个线程),前 ...

  4. Select、Poll、Epoll、 异步IO 介绍

    一.概念相关介绍 同步IO和异步IO,阻塞IO和非阻塞IO分别是什么,到底有什么区别?不同的人在不同的上下文下给出的答案是不同的.所以先限定一下本文的上下文. 本文讨论的背景是Linux环境下的net ...

  5. epoll的LT和ET使用EPOLLONESHOT

    epoll有两种触发的方式即LT(水平触发)和ET(边缘触发)两种,在前者,只要存在着事件就会不断的触发,直到处理完成,而后者只触发一次相同事件或者说只在从非触发到触发两个状态转换的时候儿才触发. 这 ...

  6. 基于epoll的简单的httpserver

    该httpserver已经能够处理并发连接,支持多个client并发訪问,每一个连接能够持续读写数据.当然.这仅仅是一个简单的学习样例.还有非常多bug,发表出来仅仅是希望大家能够互相学习.我也在不断 ...

  7. python epoll实现异步socket

    一.同步和异步: 在程序执行中,同步运行意味着等待调用的函数.线程.子进程等的返回结果后继续处理:异步指不等待当下的返回结果,直接运行主进程下面的程序,等到有返回结果时,通知主进程处理.有点高效. 二 ...

  8. Linux中epoll+线程池实现高并发

    服务器并发模型通常可分为单线程和多线程模型,这里的线程通常是指“I/O线程”,即负责I/O操作,协调分配任务的“管理线程”,而实际的请求和任务通常交由所谓“工作者线程”处理.通常多线程模型下,每个线程 ...

  9. 第九章 用多线程来读取epoll模型下的client数据

    #include <sys/types.h> #include <sys/socket.h> #include <netinet/in.h> #include &l ...

随机推荐

  1. ElementUI级联选择器动态加载Demo

    嗯,今天项目遇到,弄了一会,这里分享一下,不足之处请小伙伴指出来, 官网Demo: <el-cascader :props="props"></el-cascad ...

  2. flutter_bloc使用解析---骚年,你还在手搭bloc吗!

    前言 首先,有很多的文章在说flutter bloc模式的应用,但是百分之八九十的文章都是在说,使用StreamController+StreamBuilder搭建bloc,提升性能的会加上Inher ...

  3. 大白话聊OSI七层模型和TCP/IP四层模型

    前言 今天和大家聊的是一个比较基础的问题,OSI七层模型和TCP/IP四层模型. 小伙伴们可能有疑问,这个东西还用写文章吗,太基础了吧,网上文章多的是,随便一搜索就能找到. 确实是这样,网上资料确实很 ...

  4. list.add方法参数详解

  5. Mybatis---04Mybatis配置文件浅析(二)

    本文参考:https://www.cnblogs.com/yulinfeng/p/5991170.html 1.typeHandlers:类型处理器,设置预处理语句(PreparedStatement ...

  6. Excel基础—为什么学习Excel

    吾生也有涯,而知也无涯 点赞再看,养成习惯 自从个人计算机开始普及以后,Excel就得到了广泛的传播,工作学习生活中不处不存在Excel的影子,不论是考勤,工资还是其他的统计等等,都离不开Excel. ...

  7. Docker学习—DockerFile

    前言: 上一篇文章简单使用了docker 拉取镜像.启动容器.编译镜像:其中编译镜像时,使用到了Dockerfile,那么接下来我们就详细的来说说Dockerfile DockerFile是什么: D ...

  8. Android Choreographer 源码分析

    Choreographer 的作用主要是配合 Vsync ,给上层 App 的渲染提供一个稳定的 Message 处理的时机,也就是 Vsync 到来的时候 ,系统通过对 Vsync 信号周期的调整, ...

  9. 模拟量采集模块433Mhz LoRa无线自组网络介绍

    模拟量采集模块433Mhz LoRa无线自组网络是LPWAN(低功耗广域网Low Power Wide Area Nerwork)通信技术中的一种,是美国Semtech公司采用和推广的一种基于扩频技术 ...

  10. 05 . Go+Vue开发一个线上外卖应用(Session集成及修改用户头像到Fastdfs)

    用户头像上传 功能介绍 在用户中心中,允许用户更换自己的头像.因此,我们开发上传一张图片到服务器,并保存成为用户的头像. 接口解析 在用户模块的控制器MemberController中,解析头像上传的 ...