网络协议栈学习（二）创建 socket

　　下面通过学习linux 1.2.13源码进一步理解socket通信机制。对该版本源码的学习主要参考《Linux内核网络栈源代码情景分析》（曹桂平编著）。

　　要理解socket的本质，就要理解当调用socket函数时，该函数到底创建了什么？返回了什么？

int  socket(int family, int type, int protocol);

　　socket 函数为用户层函数，该函数对应的内核函数为sock_socket(socket.c文件），源码如下：

static int sock_socket(int family, int type, int protocol)

{

    int i, fd;

    struct socket *sock;

    struct proto_ops *ops;

    /* Locate the correct protocol family. */

    for (i = ; i < NPROTO; ++i)

    {

        if (pops[i] == NULL) continue;

        if (pops[i]->family == family)

            break;

    }

    if (i == NPROTO)

    {

          return -EINVAL;

    }

    ops = pops[i];

/*

 *    Check that this is a type that we know how to manipulate and

 *    the protocol makes sense here. The family can still reject the

 *    protocol later.

 */

    if ((type != SOCK_STREAM && type != SOCK_DGRAM &&

        type != SOCK_SEQPACKET && type != SOCK_RAW &&

        type != SOCK_PACKET) || protocol < )

            return(-EINVAL);

/*

 *    Allocate the socket and allow the family to set things up. if

 *    the protocol is 0, the family is instructed to select an appropriate

 *    default.

 */

    if (!(sock = sock_alloc()))

    {

        printk("NET: sock_socket: no more sockets\n");

        return(-ENOSR);    /* Was: EAGAIN, but we are out of

                   system resources! */

    }

    sock->type = type;

    sock->ops = ops;

    if ((i = sock->ops->create(sock, protocol)) < )

    {

        sock_release(sock);

        return(i);

    }

    if ((fd = get_fd(SOCK_INODE(sock))) < )

    {

        sock_release(sock);

        return(-EINVAL);

    }

    return(fd);

}

　　 sock_socket 函数完成如下工作：

（1）分配socket、sock结构，这两个结构在网络栈的不同层次表示一个套接字连接。

（2）分配inode、file结构用于普通文件操作。

（3）分配一个文件描述符并返回给应用程序作为以后的操作句柄。

　　sock_alloc 函数用于分配一个inode节点，并返回该节点的socket指针

struct socket *sock_alloc(void)

{

    struct inode * inode;

    struct socket * sock;

    inode = get_empty_inode();

    if (!inode)

        return NULL;

    inode->i_mode = S_IFSOCK;

    inode->i_sock = ;

    inode->i_uid = current->uid;

    inode->i_gid = current->gid;

    sock = &inode->u.socket_i;

    sock->state = SS_UNCONNECTED;

    sock->flags = ;

    sock->ops = NULL;

    sock->data = NULL;

    sock->conn = NULL;

    sock->iconn = NULL;

    sock->next = NULL;

    sock->wait = &inode->i_wait;

    sock->inode = inode;        /* "backlink": we could use pointer arithmetic instead */

    sock->fasync_list = NULL;

    sockets_in_use++;

    return sock;

}

inode的定义如下

/* include/fs.h */

struct inode {

    dev_t        i_dev;

    unsigned long    i_ino;

    umode_t        i_mode;

    nlink_t        i_nlink;

    uid_t        i_uid;

    gid_t        i_gid;

    dev_t        i_rdev;

    off_t        i_size;

    time_t        i_atime;

    time_t        i_mtime;

    time_t        i_ctime;

    unsigned long    i_blksize;

    unsigned long    i_blocks;

    unsigned long    i_version;

    struct semaphore i_sem;

    struct inode_operations * i_op;

    struct super_block * i_sb;

    struct wait_queue * i_wait;

    struct file_lock * i_flock;

    struct vm_area_struct * i_mmap;

    struct inode * i_next, * i_prev;

    struct inode * i_hash_next, * i_hash_prev;

    struct inode * i_bound_to, * i_bound_by;

    struct inode * i_mount;

    unsigned short i_count;

    unsigned short i_wcount;

    unsigned short i_flags;

    unsigned char i_lock;

    unsigned char i_dirt;

    unsigned char i_pipe;

    unsigned char i_sock;

    unsigned char i_seek;

    unsigned char i_update;

    union {

        struct pipe_inode_info pipe_i;

        struct minix_inode_info minix_i;

        struct ext_inode_info ext_i;

        struct ext2_inode_info ext2_i;

        struct hpfs_inode_info hpfs_i;

        struct msdos_inode_info msdos_i;

        struct umsdos_inode_info umsdos_i;

        struct iso_inode_info isofs_i;

        struct nfs_inode_info nfs_i;

        struct xiafs_inode_info xiafs_i;

        struct sysv_inode_info sysv_i;

        struct socket socket_i;

        void * generic_ip;

    } u;

};

　　inode 结构是文件系统的一个结构体，该结构体中的成员变量u指明了该inode结构具体的文件类型，当inode是用于socket通信时，u的值就为socket_i。sock_alloc 的作用就是创建inode结构体，然后返回socket_i的地址。至于具体如何分配inode涉及到文件系统方面的知识，这里暂不讨论。

　　当协议族为AF_INET时，ops->create 将调用inet_create(struct socket*sock, int protocol)函数。该函数将创建一个sock结构体并使得socket的data指针指向该sock结构体。

static int inet_create(struct socket *sock, int protocol)

{

    struct sock *sk;

    struct proto *prot;

    int err;

    sk = (struct sock *) kmalloc(sizeof(*sk), GFP_KERNEL);

    if (sk == NULL)

        return(-ENOBUFS);

    sk->num = ;

    sk->reuse = ;

    switch(sock->type)

    {

        case SOCK_STREAM:

        case SOCK_SEQPACKET:

            if (protocol && protocol != IPPROTO_TCP)

            {

                kfree_s((void *)sk, sizeof(*sk));

                return(-EPROTONOSUPPORT);

            }

            protocol = IPPROTO_TCP;

            sk->no_check = TCP_NO_CHECK;

            prot = &tcp_prot;

            break;

        case SOCK_DGRAM:

            if (protocol && protocol != IPPROTO_UDP)

            {

                kfree_s((void *)sk, sizeof(*sk));

                return(-EPROTONOSUPPORT);

            }

            protocol = IPPROTO_UDP;

            sk->no_check = UDP_NO_CHECK;

            prot=&udp_prot;

            break;

        case SOCK_RAW:

            if (!suser())

            {

                kfree_s((void *)sk, sizeof(*sk));

                return(-EPERM);

            }

            if (!protocol)

            {

                kfree_s((void *)sk, sizeof(*sk));

                return(-EPROTONOSUPPORT);

            }

            prot = &raw_prot;

            sk->reuse = ;

            sk->no_check = ;    /*

                         * Doesn't matter no checksum is

                         * performed anyway.

                         */

            sk->num = protocol;

            break;

        case SOCK_PACKET:

            if (!suser())

            {

                kfree_s((void *)sk, sizeof(*sk));

                return(-EPERM);

            }

            if (!protocol)

            {

                kfree_s((void *)sk, sizeof(*sk));

                return(-EPROTONOSUPPORT);

            }

            prot = &packet_prot;

            sk->reuse = ;

            sk->no_check = ;    /* Doesn't matter no checksum is

                         * performed anyway.

                         */

            sk->num = protocol;

            break;

        default:

            kfree_s((void *)sk, sizeof(*sk));

            return(-ESOCKTNOSUPPORT);

    }

    sk->socket = sock;

#ifdef CONFIG_TCP_NAGLE_OFF

    sk->nonagle = ;

#else

    sk->nonagle = ;

#endif

    sk->type = sock->type;

    sk->stamp.tv_sec=;

    sk->protocol = protocol;

        ......

    sk->timer.function = &net_timer;

    skb_queue_head_init(&sk->back_log);

    sk->blog = ;

    sock->data =(void *) sk; //socket 指向 sock

    sk->dummy_th.doff = sizeof(sk->dummy_th)/;

       ......

    if (sk->prot->init)

    {

        err = sk->prot->init(sk);

        if (err != )

        {

            destroy_sock(sk);

            return(err);

        }

    }

    return();

}

　　最后调用get_fd 返回一个文件描述符给上层应用。

/* socket.c */

static int get_fd(struct inode *inode)

{

    int fd;

    struct file *file;

    /*

     *    Find a file descriptor suitable for return to the user.

     */

    file = get_empty_filp(); // 获取一个闲置的file结构

    if (!file)

        return(-);

    for (fd = ; fd < NR_OPEN; ++fd)

        if (!current->files->fd[fd])

            break;

    if (fd == NR_OPEN)

    {

        file->f_count = ;

        return(-);

    }

    FD_CLR(fd, &current->files->close_on_exec);

        current->files->fd[fd] = file;

    file->f_op = &socket_file_ops; // socket 文件操作

    file->f_mode = ;

    file->f_flags = O_RDWR;

    file->f_count = ;

    file->f_inode = inode;

    if (inode)

        inode->i_count++;

    file->f_pos = ;

    return(fd);

}

　　get_fd 用于为网络套接字分配一个文件描述符，分配描述符的同时需要一个file结构，每个file结构都需要一个inode结构对应。内核维护一个file结构数据，get_empty_filp 函数即通过检查该数组，获取一个闲置的成员。f_op 字段的赋值实现了网络操作的普通文件接口。如果调用write、read函数进行操作就会调用相应的sock_read 和 sock_write 函数。

　　如何根据文件描述如fd找到相应的sock？

网络协议栈学习（二）创建 socket的更多相关文章

网络协议栈学习（一）socket通信实例
网络协议栈学习(一)socket通信实例该实例摘自<linux网络编程>(宋敬彬,孙海滨等著). 例子分为服务器端和客户端,客户端连接服务器后从标准输入读取输入的字符串,发送给服务器:服 ...
网络编程学习笔记-浅析socket
一.问题的引入——socket的引入是为了解决不同计算机间进程间通信的问题 .socket与进程的关系 ).socket与进程间的关系:socket 用来让一个进程和其他的进程互通信息(IPC),而S ...
网络编程学习笔记：Socket编程
文的主要内容如下: 1.网络中进程之间如何通信? 2.Socket是什么? 3.socket的基本操作 3.1.socket()函数 3.2.bind()函数 3.3.listen().connect ...
网络编程学习笔记--1.socket可读可写条件
转至 :http://blog.csdn.net/majianfei1023/article/details/45788591 socket可读可写条件,经常做为面试题被问,因为它考察被面试者对网络编 ...
Linux网络编程学习(二) ----- 进程控制(第三章)
1.进程和程序程序是一个可执行文件,而一个进程是一个执行中的程序实例.一个进程对应于一个程序的执行,进程是动态的,程序是静态的,多个进程可以并发执行同一个程序.比如几个用户可以同时运行一个编辑程序, ...
网络编程学习二（IP与端口）
InetAddress类封装计算机的ip地址,没有端口 // 使用getLocalHost方法创建InetAddress对象 InetAddress addr = InetAddress.getLo ...
pipelinewise 学习二创建一个简单的pipeline
pipelinewise 提供了方便的创建简单pipeline的命令,可以简化pipeline 的创建,同时也可以帮我们学习生成demo pipeline pipelinewise init --n ...
sublime text 2学习(二):创建可复用的代码片段
对于前端工程师来讲,写一个html页面的基本结构是体力活,每次去拷贝一个也麻烦,sublime text 2 提供了一个很好的复用代码片段.下面介绍一下创建一个html5的代码片段的过程. 在菜单上点 ...
micronaut 学习二创建一个简单的服务
micronaut 提供的cli 很方便,我们可以快速创建具有所需特性的应用,以下是一个简单的web server app 创建命令 mn create-app hello-world 效果 mn c ...

随机推荐

sublimeText3最新教程-自带插件汉化（sublime-text_build-3175_amd64）
一.可用注册码 1.更改dns 在linux下的目录是 /etc/hosts 在win7中,hosts文件的位置:C:\Windows\System32\drivers\etc 127.0.0 ...
redhat7 防火墙设置
查看防火墙的状态# firewall-cmd --staterunning # systemctl stop firewalld //关闭防火墙服务# systemctl start firewa ...
JSON类库Jackson与JSON-lib性能对比[转]
Jackson:http://jackson.codehaus.org/ JSON-lib:http://json-lib.sourceforge.net/ Gson:http://code.goog ...
Linux上Java的安装与配置
由于使用 yum 或者 apt-get 命令安装 openjdk 可能存在类库不全,从而导致用户在安装后运行相关工具时可能报错的问题,所以此处我们推荐采用手动解压安装的方式来安装 JDK.具体步骤如 ...
mybatis与hibernate常用的持久化类，及sqlsession和sqlsessionTemplate区别
首先, 通过翻阅源码,我们来整理一下mybatis进行持久化操作时重要的几个类:SqlSessionFactoryBuilder:build方法创建SqlSessionFactory实例.SqlSes ...
stm32 pwm 电调电机
先上代码 python 树莓派版本,通俗表现原理.stm32 C语言版本在后面 import RPi.GPIO as GPIO import time mode=2 IN1=11 def setup( ...
项目中的一个分页功能pagination
项目中的一个分页功能pagination <script> //总页数 ; ; //分页总数量 $(function () { // $("#pagination"). ...
clipboard异步复制文本（动态获取文本）
1.需求描述: 点击“分享”按钮的时候,发送 ajax 请求获得动态邀请连接,成功取得数据后复制到剪贴板 2.解决重点: > Clipboard 动态设置文本的使用 > Ajax请求设置为 ...
hdu 1788 最小公倍数（这题面。。。）
Chinese remainder theorem again Time Limit: 1000/1000 MS (Java/Others) Memory Limit: 32768/32768 ...
Kali Linux下常用软件安装及配置
0x00 Synaptic Synaptic(新立得)是一个高级软件包管理器,它可以管理系统内安装的每个软件及包组件,在图形界面内完成LINUX系统软件的搜寻.安装和删除. Synaptic安装简单, ...

网络协议栈学习（二）创建 socket

网络协议栈学习（二）创建 socket的更多相关文章

随机推荐

热门专题