TCP状态切换流程

enum {
/*

* Description of States:
*
* TCP_SYN_SENT sent a connection request, waiting for ack
*
* TCP_SYN_RECV received a connection request, sent ack,
* waiting for final ack in three-way handshake.
*
* TCP_ESTABLISHED connection established
*
* TCP_FIN_WAIT1 our side has shutdown, waiting to complete
* transmission of remaining buffered data
*
* TCP_FIN_WAIT2 all buffered data sent, waiting for remote
* to shutdown
*
* TCP_CLOSING both sides have shutdown but we still have
* data we have to finish sending

*
* TCP_TIME_WAIT timeout to catch resent junk before entering
* closed, can only be entered from FIN_WAIT2
* or CLOSING. Required because the other end
* may not have gotten our last ACK causing it
* to retransmit the data packet (which we ignore)
*
* TCP_CLOSE_WAIT remote side has shutdown and is waiting for
* us to finish writing our data and to shutdown
* (we have to close() to move on to LAST_ACK)
*
* TCP_LAST_ACK out side has shutdown after remote has
* shutdown. There may still be data in our
* buffer that we have to finish sending
*
* TCP_CLOSE socket is finished
*/

* 连接已建立
*/
TCP_ESTABLISHED = 1,
/*
* 已发送SYN包
*/
TCP_SYN_SENT,
/*
* 已接收到SYN包
*/
TCP_SYN_RECV,
/*
* 执行主动关闭，已发送FIN包
*/
TCP_FIN_WAIT1,
/*
* 执行主动关闭，发送的FIN包后收到对端的ACK包
*/
TCP_FIN_WAIT2,
/*
* 执行主动关闭，，接收到对端的FIN包，并发送ACK包
*/
TCP_TIME_WAIT,
/*
* 连接初始状态
*/
TCP_CLOSE,
/*
* 执行被动关闭，接收到对端的FIN包，并发送ACK包
*/
TCP_CLOSE_WAIT,
/*
* 执行被动关闭，接收到FIN包后，发送自己的FIN包
*/
TCP_LAST_ACK,
/*
* 监听状态
*/
TCP_LISTEN,
/*
* 两端同时关闭，在发送FIN包后接收到对端的FIN包
*/
TCP_CLOSING, /* Now a valid state */

TCP_MAX_STATES /* Leave at the end! */
};

一、主动端

1、TCP_CLOSE ---->TCP_SYN_SENT

 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)

 {

         struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;

         struct inet_sock *inet = inet_sk(sk);

         struct tcp_sock *tp = tcp_sk(sk);

         __be16 orig_sport, orig_dport;

         __be32 daddr, nexthop;

         struct flowi4 *fl4;

         struct rtable *rt;

         int err;

         struct ip_options_rcu *inet_opt;

         if (addr_len < sizeof(struct sockaddr_in))

                 return -EINVAL;

         if (usin->sin_family != AF_INET)

                 return -EAFNOSUPPORT;

         nexthop = daddr = usin->sin_addr.s_addr;

         inet_opt = rcu_dereference_protected(inet->inet_opt,

                                              sock_owned_by_user(sk));

         if (inet_opt && inet_opt->opt.srr) {

                 if (!daddr)

                         return -EINVAL;

                 nexthop = inet_opt->opt.faddr;

         }

         orig_sport = inet->inet_sport;

         orig_dport = usin->sin_port;

         fl4 = &inet->cork.fl.u.ip4;

         rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,

                               RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,

                               IPPROTO_TCP,

                               orig_sport, orig_dport, sk);

         if (IS_ERR(rt)) {

                 err = PTR_ERR(rt);

                 if (err == -ENETUNREACH)

                         IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);

                 return err;

         }

         if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {

                 ip_rt_put(rt);

                 return -ENETUNREACH;

         }

         if (!inet_opt || !inet_opt->opt.srr)

                 daddr = fl4->daddr;

         if (!inet->inet_saddr)

                 inet->inet_saddr = fl4->saddr;

         sk_rcv_saddr_set(sk, inet->inet_saddr);

         if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {

                 /* Reset inherited state */

                 tp->rx_opt.ts_recent       = ;

                 tp->rx_opt.ts_recent_stamp = ;

                 if (likely(!tp->repair))

                         tp->write_seq      = ;

         }

         if (tcp_death_row.sysctl_tw_recycle &&

             !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)

                 tcp_fetch_timewait_stamp(sk, &rt->dst);

         inet->inet_dport = usin->sin_port;

         sk_daddr_set(sk, daddr);

         inet_csk(sk)->icsk_ext_hdr_len = ;

         if (inet_opt)

                 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;

         tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;

         /* Socket identity is still unknown (sport may be zero).

216          * However we set state to SYN-SENT and not releasing socket

217          * lock select source port, enter ourselves into the hash tables and

218          * complete initialization after this.

219          */

220     tcp_set_state(sk, TCP_SYN_SENT);

         err = inet_hash_connect(&tcp_death_row, sk);

         if (err)

                 goto failure;

         inet_set_txhash(sk);

         rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,

                                inet->inet_sport, inet->inet_dport, sk);

         if (IS_ERR(rt)) {

                 err = PTR_ERR(rt);

                 rt = NULL;

                 goto failure;

         }

         /* OK, now commit destination to socket.  */

         sk->sk_gso_type = SKB_GSO_TCPV4;

         sk_setup_caps(sk, &rt->dst);

         if (!tp->write_seq && likely(!tp->repair))

                 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,

                                                            inet->inet_daddr,

                                                            inet->inet_sport,

                                                            usin->sin_port);

         inet->inet_id = tp->write_seq ^ jiffies;

         err = tcp_connect(sk);

         rt = NULL;

         if (err)

                 goto failure;

         return ;

 failure:

         /*

256          * This unhashes the socket and releases the local port,

257          * if necessary.

258          */

         tcp_set_state(sk, TCP_CLOSE);

         ip_rt_put(rt);

         sk->sk_route_caps = ;

         inet->inet_dport = ;

         return err;

 }

 EXPORT_SYMBOL(tcp_v4_connect);

2、TCP_SYN_SEND---->TCP_ESTABLISHED

 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,

                                          struct tcphdr *th, unsigned len)

 {

         u8 *hash_location;

         struct inet_connection_sock *icsk = inet_csk(sk);

         struct tcp_sock *tp = tcp_sk(sk);

         struct tcp_cookie_values *cvp = tp->cookie_values;

         int saved_clamp = tp->rx_opt.mss_clamp;

         tcp_parse_options(skb, &tp->rx_opt, &hash_location, );

         if (th->ack) {

                 /* rfc793:

5447                  * "If the state is SYN-SENT then

5448                  *    first check the ACK bit

5449                  *      If the ACK bit is set

5450                  *        If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send

5451                  *        a reset (unless the RST bit is set, if so drop

5452                  *        the segment and return)"

5453                  *

5454                  *  We do not send data with SYN, so that RFC-correct

5455                  *  test reduces to:

5456                  */

                 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)

                         goto reset_and_undo;

                 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&

                     !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,

                              tcp_time_stamp)) {

                         NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED);

                         goto reset_and_undo;

                 }

                 /* Now ACK is acceptable.

5468                  *

5469                  * "If the RST bit is set

5470                  *    If the ACK was acceptable then signal the user "error:

5471                  *    connection reset", drop the segment, enter CLOSED state,

5472                  *    delete TCB, and return."

5473                  */

                 if (th->rst) {

                         tcp_reset(sk);

                         goto discard;

                 }

                 /* rfc793:

5481                  *   "fifth, if neither of the SYN or RST bits is set then

5482                  *    drop the segment and return."

5483                  *

5484                  *    See note below!

5485                  *                                        --ANK(990513)

5486                  */

                 if (!th->syn)

                         goto discard_and_undo;

                 /* rfc793:

5491                  *   "If the SYN bit is on ...

5492                  *    are acceptable then ...

5493                  *    (our SYN has been ACKed), change the connection

5494                  *    state to ESTABLISHED..."

5495                  */

                 TCP_ECN_rcv_synack(tp, th);

                 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;

                 tcp_ack(sk, skb, FLAG_SLOWPATH);

                 /* Ok.. it's good. Set up sequence numbers and

5503                  * move to established.

5504                  */

                 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + ;

                 tp->rcv_wup = TCP_SKB_CB(skb)->seq + ;

                 /* RFC1323: The window in SYN & SYN/ACK segments is

5509                  * never scaled.

5510                  */

                 tp->snd_wnd = ntohs(th->window);

                 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);

                 if (!tp->rx_opt.wscale_ok) {

                         tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = ;

                         tp->window_clamp = min(tp->window_clamp, 65535U);

                 }

                 if (tp->rx_opt.saw_tstamp) {

                         tp->rx_opt.tstamp_ok       = ;

                         tp->tcp_header_len =

                                 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;

                         tp->advmss          -= TCPOLEN_TSTAMP_ALIGNED;

                         tcp_store_ts_recent(tp);

                 } else {

                         tp->tcp_header_len = sizeof(struct tcphdr);

                 }

                 if (tcp_is_sack(tp) && sysctl_tcp_fack)

                         tcp_enable_fack(tp);

                 tcp_mtup_init(sk);

                 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);

                 tcp_initialize_rcv_mss(sk);

                 /* Remember, tcp_poll() does not lock socket!

5537                  * Change state from SYN-SENT only after copied_seq

5538                  * is initialized. */

                 tp->copied_seq = tp->rcv_nxt;

                 if (cvp != NULL &&

                     cvp->cookie_pair_size >  &&

                     tp->rx_opt.cookie_plus > ) {

                         int cookie_size = tp->rx_opt.cookie_plus

                                         - TCPOLEN_COOKIE_BASE;

                         int cookie_pair_size = cookie_size

                                              + cvp->cookie_desired;

                         /* A cookie extension option was sent and returned.

5550                          * Note that each incoming SYNACK replaces the

5551                          * Responder cookie.  The initial exchange is most

5552                          * fragile, as protection against spoofing relies

5553                          * entirely upon the sequence and timestamp (above).

5554                          * This replacement strategy allows the correct pair to

5555                          * pass through, while any others will be filtered via

5556                          * Responder verification later.

5557                          */

                         if (sizeof(cvp->cookie_pair) >= cookie_pair_size) {

                                 memcpy(&cvp->cookie_pair[cvp->cookie_desired],

                                        hash_location, cookie_size);

                                 cvp->cookie_pair_size = cookie_pair_size;

                         }

                 }

                 smp_mb();

5566           tcp_set_state(sk, TCP_ESTABLISHED);



                 security_inet_conn_established(sk, skb);

                 /* Make sure socket is routed, for correct metrics.  */

                 icsk->icsk_af_ops->rebuild_header(sk);

                 tcp_init_metrics(sk);

                 tcp_init_congestion_control(sk);

                 /* Prevent spurious tcp_cwnd_restart() on first data

5578                  * packet.

5579                  */

                 tp->lsndtime = tcp_time_stamp;

                 tcp_init_buffer_space(sk);

                 if (sock_flag(sk, SOCK_KEEPOPEN))

                         inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));

                 if (!tp->rx_opt.snd_wscale)

                         __tcp_fast_path_on(tp, tp->snd_wnd);

                 else

                         tp->pred_flags = ;

                 if (!sock_flag(sk, SOCK_DEAD)) {

                         sk->sk_state_change(sk);

                         sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);

                 }

                 if (sk->sk_write_pending ||

                     icsk->icsk_accept_queue.rskq_defer_accept ||

                     icsk->icsk_ack.pingpong) {

                         /* Save one ACK. Data will be ready after

5601                          * several ticks, if write_pending is set.

5602                          *

5603                          * It may be deleted, but with this feature tcpdumps

5604                          * look so _wonderfully_ clever, that I was not able

5605                          * to stand against the temptation 8)     --ANK

5606                          */

                         inet_csk_schedule_ack(sk);

                         icsk->icsk_ack.lrcvtime = tcp_time_stamp;

                         icsk->icsk_ack.ato       = TCP_ATO_MIN;

                         tcp_incr_quickack(sk);

                         tcp_enter_quickack_mode(sk);

                         inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,

                                                   TCP_DELACK_MAX, TCP_RTO_MAX);

 discard:

                         __kfree_skb(skb);

                         return ;

                 } else {

                         tcp_send_ack(sk);

                 }

                 return -;

         }

         /* No ACK in the segment */

         if (th->rst) {

                 /* rfc793:

5628                  * "If the RST bit is set

5629                  *

5630                  *      Otherwise (no ACK) drop the segment and return."

5631                  */

                 goto discard_and_undo;

         }

         /* PAWS check. */

         if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&

             tcp_paws_reject(&tp->rx_opt, ))

                 goto discard_and_undo;

         if (th->syn) {

                 /* We see SYN without ACK. It is attempt of

5643                  * simultaneous connect with crossed SYNs.

5644                  * Particularly, it can be connect to self.

5645                  */

                 tcp_set_state(sk, TCP_SYN_RECV);

                 if (tp->rx_opt.saw_tstamp) {

                         tp->rx_opt.tstamp_ok = ;

                         tcp_store_ts_recent(tp);

                         tp->tcp_header_len =

                                 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;

                 } else {

                         tp->tcp_header_len = sizeof(struct tcphdr);

                 }

                 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + ;

                 tp->rcv_wup = TCP_SKB_CB(skb)->seq + ;

                 /* RFC1323: The window in SYN & SYN/ACK segments is

5661                  * never scaled.

5662                  */

                 tp->snd_wnd    = ntohs(th->window);

                 tp->snd_wl1    = TCP_SKB_CB(skb)->seq;

                 tp->max_window = tp->snd_wnd;

                 TCP_ECN_rcv_syn(tp, th);

                 tcp_mtup_init(sk);

                 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);

                 tcp_initialize_rcv_mss(sk);

                 tcp_send_synack(sk);

 #if 0

                 /* Note, we could accept data and URG from this segment.

5676                  * There are no obstacles to make this.

5677                  *

5678                  * However, if we ignore data in ACKless segments sometimes,

5679                  * we have no reasons to accept it sometimes.

5680                  * Also, seems the code doing it in step6 of tcp_rcv_state_process

5681                  * is not flawless. So, discard packet for sanity.

5682                  * Uncomment this return to process the data.

5683                  */

                 return -;

 #else

                 goto discard;

 #endif

         }

         /* "fifth, if neither of the SYN or RST bits is set then

5690          * drop the segment and return."

5691          */

 discard_and_undo:

         tcp_clear_options(&tp->rx_opt);

         tp->rx_opt.mss_clamp = saved_clamp;

         goto discard;

 reset_and_undo:

         tcp_clear_options(&tp->rx_opt);

         tp->rx_opt.mss_clamp = saved_clamp;

         return ;

 }

二、被动打开

1、TCP_CLOSE ----> TCP_LISTEN

 int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)

 {

         struct inet_sock *inet = inet_sk(sk);

         struct inet_connection_sock *icsk = inet_csk(sk);

         int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);

         if (rc != )

                 return rc;

         sk->sk_max_ack_backlog = ;

         sk->sk_ack_backlog = ;

         inet_csk_delack_init(sk);

         /* There is race window here: we announce ourselves listening,

808          * but this transition is still not validated by get_port().

809          * It is OK, because this socket enters to hash table only

810          * after validation is complete.

811          */

812     sk->sk_state = TCP_LISTEN;

         if (!sk->sk_prot->get_port(sk, inet->inet_num)) {

                 inet->inet_sport = htons(inet->inet_num);

                 sk_dst_reset(sk);

                 sk->sk_prot->hash(sk);

                 return ;

         }

         sk->sk_state = TCP_CLOSE;

         __reqsk_queue_destroy(&icsk->icsk_accept_queue);

         return -EADDRINUSE;

 }

2、TCP_LISTEN ----> TCP_SYN_RCVE

 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,

                                          struct tcphdr *th, unsigned len)

 {

         u8 *hash_location;

         struct inet_connection_sock *icsk = inet_csk(sk);

         struct tcp_sock *tp = tcp_sk(sk);

         struct tcp_cookie_values *cvp = tp->cookie_values;

         int saved_clamp = tp->rx_opt.mss_clamp;

         tcp_parse_options(skb, &tp->rx_opt, &hash_location, );

         if (th->ack) {

                 /* rfc793:

5447                  * "If the state is SYN-SENT then

5448                  *    first check the ACK bit

5449                  *      If the ACK bit is set

5450                  *        If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send

5451                  *        a reset (unless the RST bit is set, if so drop

5452                  *        the segment and return)"

5453                  *

5454                  *  We do not send data with SYN, so that RFC-correct

5455                  *  test reduces to:

5456                  */

                 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)

                         goto reset_and_undo;

                 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&

                     !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,

                              tcp_time_stamp)) {

                         NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED);

                         goto reset_and_undo;

                 }

                 /* Now ACK is acceptable.

5468                  *

5469                  * "If the RST bit is set

5470                  *    If the ACK was acceptable then signal the user "error:

5471                  *    connection reset", drop the segment, enter CLOSED state,

5472                  *    delete TCB, and return."

5473                  */

                 if (th->rst) {

                         tcp_reset(sk);

                         goto discard;

                 }

                 /* rfc793:

5481                  *   "fifth, if neither of the SYN or RST bits is set then

5482                  *    drop the segment and return."

5483                  *

5484                  *    See note below!

5485                  *                                        --ANK(990513)

5486                  */

                 if (!th->syn)

                         goto discard_and_undo;

                 /* rfc793:

5491                  *   "If the SYN bit is on ...

5492                  *    are acceptable then ...

5493                  *    (our SYN has been ACKed), change the connection

5494                  *    state to ESTABLISHED..."

5495                  */

                 TCP_ECN_rcv_synack(tp, th);

                 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;

                 tcp_ack(sk, skb, FLAG_SLOWPATH);

                 /* Ok.. it's good. Set up sequence numbers and

5503                  * move to established.

5504                  */

                 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + ;

                 tp->rcv_wup = TCP_SKB_CB(skb)->seq + ;

                 /* RFC1323: The window in SYN & SYN/ACK segments is

5509                  * never scaled.

5510                  */

                 tp->snd_wnd = ntohs(th->window);

                 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);

                 if (!tp->rx_opt.wscale_ok) {

                         tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = ;

                         tp->window_clamp = min(tp->window_clamp, 65535U);

                 }

                 if (tp->rx_opt.saw_tstamp) {

                         tp->rx_opt.tstamp_ok       = ;

                         tp->tcp_header_len =

                                 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;

                         tp->advmss          -= TCPOLEN_TSTAMP_ALIGNED;

                         tcp_store_ts_recent(tp);

                 } else {

                         tp->tcp_header_len = sizeof(struct tcphdr);

                 }

                 if (tcp_is_sack(tp) && sysctl_tcp_fack)

                         tcp_enable_fack(tp);

                 tcp_mtup_init(sk);

                 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);

                 tcp_initialize_rcv_mss(sk);

                 /* Remember, tcp_poll() does not lock socket!

5537                  * Change state from SYN-SENT only after copied_seq

5538                  * is initialized. */

                 tp->copied_seq = tp->rcv_nxt;

                 if (cvp != NULL &&

                     cvp->cookie_pair_size >  &&

                     tp->rx_opt.cookie_plus > ) {

                         int cookie_size = tp->rx_opt.cookie_plus

                                         - TCPOLEN_COOKIE_BASE;

                         int cookie_pair_size = cookie_size

                                              + cvp->cookie_desired;

                         /* A cookie extension option was sent and returned.

5550                          * Note that each incoming SYNACK replaces the

5551                          * Responder cookie.  The initial exchange is most

5552                          * fragile, as protection against spoofing relies

5553                          * entirely upon the sequence and timestamp (above).

5554                          * This replacement strategy allows the correct pair to

5555                          * pass through, while any others will be filtered via

5556                          * Responder verification later.

5557                          */

                         if (sizeof(cvp->cookie_pair) >= cookie_pair_size) {

                                 memcpy(&cvp->cookie_pair[cvp->cookie_desired],

                                        hash_location, cookie_size);

                                 cvp->cookie_pair_size = cookie_pair_size;

                         }

                 }

                 smp_mb();

                 tcp_set_state(sk, TCP_ESTABLISHED);

                 security_inet_conn_established(sk, skb);

                 /* Make sure socket is routed, for correct metrics.  */

                 icsk->icsk_af_ops->rebuild_header(sk);

                 tcp_init_metrics(sk);

                 tcp_init_congestion_control(sk);

                 /* Prevent spurious tcp_cwnd_restart() on first data

5578                  * packet.

5579                  */

                 tp->lsndtime = tcp_time_stamp;

                 tcp_init_buffer_space(sk);

                 if (sock_flag(sk, SOCK_KEEPOPEN))

                         inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));

                 if (!tp->rx_opt.snd_wscale)

                         __tcp_fast_path_on(tp, tp->snd_wnd);

                 else

                         tp->pred_flags = ;

                 if (!sock_flag(sk, SOCK_DEAD)) {

                         sk->sk_state_change(sk);

                         sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);

                 }

                 if (sk->sk_write_pending ||

                     icsk->icsk_accept_queue.rskq_defer_accept ||

                     icsk->icsk_ack.pingpong) {

                         /* Save one ACK. Data will be ready after

5601                          * several ticks, if write_pending is set.

5602                          *

5603                          * It may be deleted, but with this feature tcpdumps

5604                          * look so _wonderfully_ clever, that I was not able

5605                          * to stand against the temptation 8)     --ANK

5606                          */

                         inet_csk_schedule_ack(sk);

                         icsk->icsk_ack.lrcvtime = tcp_time_stamp;

                         icsk->icsk_ack.ato       = TCP_ATO_MIN;

                         tcp_incr_quickack(sk);

                         tcp_enter_quickack_mode(sk);

                         inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,

                                                   TCP_DELACK_MAX, TCP_RTO_MAX);

 discard:

                         __kfree_skb(skb);

                         return ;

                 } else {

                         tcp_send_ack(sk);

                 }

                 return -;

         }

         /* No ACK in the segment */

         if (th->rst) {

                 /* rfc793:

5628                  * "If the RST bit is set

5629                  *

5630                  *      Otherwise (no ACK) drop the segment and return."

5631                  */

                 goto discard_and_undo;

         }

         /* PAWS check. */

         if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&

             tcp_paws_reject(&tp->rx_opt, ))

                 goto discard_and_undo;

         if (th->syn) {

                 /* We see SYN without ACK. It is attempt of

5643                  * simultaneous connect with crossed SYNs.

5644                  * Particularly, it can be connect to self.

5645                  */

5646           tcp_set_state(sk, TCP_SYN_RECV);



                 if (tp->rx_opt.saw_tstamp) {

                         tp->rx_opt.tstamp_ok = ;

                         tcp_store_ts_recent(tp);

                         tp->tcp_header_len =

                                 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;

                 } else {

                         tp->tcp_header_len = sizeof(struct tcphdr);

                 }

                 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + ;

                 tp->rcv_wup = TCP_SKB_CB(skb)->seq + ;

                 /* RFC1323: The window in SYN & SYN/ACK segments is

5661                  * never scaled.

5662                  */

                 tp->snd_wnd    = ntohs(th->window);

                 tp->snd_wl1    = TCP_SKB_CB(skb)->seq;

                 tp->max_window = tp->snd_wnd;

                 TCP_ECN_rcv_syn(tp, th);

                 tcp_mtup_init(sk);

                 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);

                 tcp_initialize_rcv_mss(sk);

                 tcp_send_synack(sk);

 #if 0

                 /* Note, we could accept data and URG from this segment.

5676                  * There are no obstacles to make this.

5677                  *

5678                  * However, if we ignore data in ACKless segments sometimes,

5679                  * we have no reasons to accept it sometimes.

5680                  * Also, seems the code doing it in step6 of tcp_rcv_state_process

5681                  * is not flawless. So, discard packet for sanity.

5682                  * Uncomment this return to process the data.

5683                  */

                 return -;

 #else

                 goto discard;

 #endif

         }

         /* "fifth, if neither of the SYN or RST bits is set then

5690          * drop the segment and return."

5691          */

 discard_and_undo:

         tcp_clear_options(&tp->rx_opt);

         tp->rx_opt.mss_clamp = saved_clamp;

         goto discard;

 reset_and_undo:

         tcp_clear_options(&tp->rx_opt);

         tp->rx_opt.mss_clamp = saved_clamp;

         return ;

 }

3、TCP_SYN_RCVE ----> TCP_ESTABLISHED

 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,

                           struct tcphdr *th, unsigned len)

 {

         struct tcp_sock *tp = tcp_sk(sk);

         struct inet_connection_sock *icsk = inet_csk(sk);

         int queued = ;

         int res;

         tp->rx_opt.saw_tstamp = ;

         switch (sk->sk_state) {

         case TCP_CLOSE:

                 goto discard;

         case TCP_LISTEN:

                 if (th->ack)

                         return ;

                 if (th->rst)

                         goto discard;

                 if (th->syn) {

                         if (icsk->icsk_af_ops->conn_request(sk, skb) < )

                                 return ;

                         /* Now we have several options: In theory there is

5737                          * nothing else in the frame. KA9Q has an option to

5738                          * send data with the syn, BSD accepts data with the

5739                          * syn up to the [to be] advertised window and

5740                          * Solaris 2.1 gives you a protocol error. For now

5741                          * we just ignore it, that fits the spec precisely

5742                          * and avoids incompatibilities. It would be nice in

5743                          * future to drop through and process the data.

5744                          *

5745                          * Now that TTCP is starting to be used we ought to

5746                          * queue this data.

5747                          * But, this leaves one open to an easy denial of

5748                          * service attack, and SYN cookies can't defend

5749                          * against this problem. So, we drop the data

5750                          * in the interest of security over speed unless

5751                          * it's still in use.

5752                          */

                         kfree_skb(skb);

                         return ;

                 }

                 goto discard;

         case TCP_SYN_SENT:

                 queued = tcp_rcv_synsent_state_process(sk, skb, th, len);

                 if (queued >= )

                         return queued;

                 /* Do step6 onward by hand. */

                 tcp_urg(sk, skb, th);

                 __kfree_skb(skb);

                 tcp_data_snd_check(sk);

                 return ;

         }

         res = tcp_validate_incoming(sk, skb, th, );

         if (res <= )

                 return -res;

         /* step 5: check the ACK field */

         if (th->ack) {

                 int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > ;

                 switch (sk->sk_state) {

                 case TCP_SYN_RECV:

                         if (acceptable) {

                                 tp->copied_seq = tp->rcv_nxt;

                                 smp_mb();

5783                      tcp_set_state(sk, TCP_ESTABLISHED);

                                 sk->sk_state_change(sk);

                                 /* Note, that this wakeup is only for marginal

5787                                  * crossed SYN case. Passively open sockets

5788                                  * are not waked up, because sk->sk_sleep ==

5789                                  * NULL and sk->sk_socket == NULL.

5790                                  */

                                 if (sk->sk_socket)

                                         sk_wake_async(sk,

                                                       SOCK_WAKE_IO, POLL_OUT);

                                 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;

                                 tp->snd_wnd = ntohs(th->window) <<

                                               tp->rx_opt.snd_wscale;

                                 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);

                                 /* tcp_ack considers this ACK as duplicate

5801                                  * and does not calculate rtt.

5802                                  * Force it here.

5803                                  */

                                 tcp_ack_update_rtt(sk, , );

                                 if (tp->rx_opt.tstamp_ok)

                                         tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;

                                 /* Make sure socket is routed, for

5810                                  * correct metrics.

5811                                  */

                                 icsk->icsk_af_ops->rebuild_header(sk);

                                 tcp_init_metrics(sk);

                                 tcp_init_congestion_control(sk);

                                 /* Prevent spurious tcp_cwnd_restart() on

5819                                  * first data packet.

5820                                  */

                                 tp->lsndtime = tcp_time_stamp;

                                 tcp_mtup_init(sk);

                                 tcp_initialize_rcv_mss(sk);

                                 tcp_init_buffer_space(sk);

                                 tcp_fast_path_on(tp);

                         } else {

                                 return ;

                         }

                         break;

                 case TCP_FIN_WAIT1:

                         if (tp->snd_una == tp->write_seq) {

                                 tcp_set_state(sk, TCP_FIN_WAIT2);

                                 sk->sk_shutdown |= SEND_SHUTDOWN;

                                 dst_confirm(sk->sk_dst_cache);

                                 if (!sock_flag(sk, SOCK_DEAD))

                                         /* Wake up lingering close() */

                                         sk->sk_state_change(sk);

                                 else {

                                         int tmo;

                                         if (tp->linger2 <  ||

                                             (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&

                                              after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {

                                                 tcp_done(sk);

                                                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);

                                                 return ;

                                         }

                                         tmo = tcp_fin_time(sk);

                                         if (tmo > TCP_TIMEWAIT_LEN) {

                                                 inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);

                                         } else if (th->fin || sock_owned_by_user(sk)) {

                                                 /* Bad case. We could lose such FIN otherwise.

5857                                                  * It is not a big problem, but it looks confusing

5858                                                  * and not so rare event. We still can lose it now,

5859                                                  * if it spins in bh_lock_sock(), but it is really

5860                                                  * marginal case.

5861                                                  */

                                                 inet_csk_reset_keepalive_timer(sk, tmo);

                                         } else {

                                                 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);

                                                 goto discard;

                                         }

                                 }

                         }

                         break;

                 case TCP_CLOSING:

                         if (tp->snd_una == tp->write_seq) {

                                 tcp_time_wait(sk, TCP_TIME_WAIT, );

                                 goto discard;

                         }

                         break;

                 case TCP_LAST_ACK:

                         if (tp->snd_una == tp->write_seq) {

                                 tcp_update_metrics(sk);

                                 tcp_done(sk);

                                 goto discard;

                         }

                         break;

                 }

         } else

                 goto discard;

         /* step 6: check the URG bit */

         tcp_urg(sk, skb, th);

         /* step 7: process the segment text */

         switch (sk->sk_state) {

         case TCP_CLOSE_WAIT:

         case TCP_CLOSING:

         case TCP_LAST_ACK:

                 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))

                         break;

         case TCP_FIN_WAIT1:

         case TCP_FIN_WAIT2:

                 /* RFC 793 says to queue data in these states,

5902                  * RFC 1122 says we MUST send a reset.

5903                  * BSD 4.4 also does reset.

5904                  */

                 if (sk->sk_shutdown & RCV_SHUTDOWN) {

                         if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&

                             after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {

                                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);

                                 tcp_reset(sk);

                                 return ;

                         }

                 }

                 /* Fall through */

         case TCP_ESTABLISHED:

                 tcp_data_queue(sk, skb);

                 queued = ;

                 break;

         }

         /* tcp_data could move socket to TIME-WAIT */

         if (sk->sk_state != TCP_CLOSE) {

                 tcp_data_snd_check(sk);

                 tcp_ack_snd_check(sk);

         }

         if (!queued) {

 discard:

                 __kfree_skb(skb);

         }

         return ;

 }

三、主动关闭

1、TCP_ESTABLISHED ----> TCP_FIN_WAIT1

 void tcp_close(struct sock *sk, long timeout)

 {

         struct sk_buff *skb;

         int data_was_unread = ;

         int state;

         lock_sock(sk);

         sk->sk_shutdown = SHUTDOWN_MASK;

         if (sk->sk_state == TCP_LISTEN) {

                 tcp_set_state(sk, TCP_CLOSE);

                 /* Special case. */

                 inet_csk_listen_stop(sk);

                 goto adjudge_to_death;

         }

         /*  We need to flush the recv. buffs.  We do this only on the

2027          *  descriptor close, not protocol-sourced closes, because the

2028          *  reader process may not have drained the data yet!

2029          */

         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {

                 u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq;

                 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)

                         len--;

                 data_was_unread += len;

                 __kfree_skb(skb);

         }

         sk_mem_reclaim(sk);

         /* If socket has been already reset (e.g. in tcp_reset()) - kill it. */

         if (sk->sk_state == TCP_CLOSE)

                 goto adjudge_to_death;

         /* As outlined in RFC 2525, section 2.17, we send a RST here because

2046          * data was lost. To witness the awful effects of the old behavior of

2047          * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk

2048          * GET in an FTP client, suspend the process, wait for the client to

2049          * advertise a zero window, then kill -9 the FTP client, wheee...

2050          * Note: timeout is always zero in such a case.

2051          */

         if (unlikely(tcp_sk(sk)->repair)) {

                 sk->sk_prot->disconnect(sk, );

         } else if (data_was_unread) {

                 /* Unread data was tossed, zap the connection. */

                 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);

                 tcp_set_state(sk, TCP_CLOSE);

                 tcp_send_active_reset(sk, sk->sk_allocation);

         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {

                 /* Check zero linger _after_ checking for unread data. */

                 sk->sk_prot->disconnect(sk, );

                 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONDATA);

         } else if (tcp_close_state(sk)) {

                 /* We FIN if the application ate all the data before

2065                  * zapping the connection.

2066                  */

                 /* RED-PEN. Formally speaking, we have broken TCP state

2069                  * machine. State transitions:

2070                  *

2071                  * TCP_ESTABLISHED -> TCP_FIN_WAIT1

2072                  * TCP_SYN_RECV -> TCP_FIN_WAIT1 (forget it, it's impossible)

2073                  * TCP_CLOSE_WAIT -> TCP_LAST_ACK

2074                  *

2075                  * are legal only when FIN has been sent (i.e. in window),

2076                  * rather than queued out of window. Purists blame.

2077                  *

2078                  * F.e. "RFC state" is ESTABLISHED,

2079                  * if Linux state is FIN-WAIT-1, but FIN is still not sent.

2080                  *

2081                  * The visible declinations are that sometimes

2082                  * we enter time-wait state, when it is not required really

2083                  * (harmless), do not send active resets, when they are

2084                  * required by specs (TCP_ESTABLISHED, TCP_CLOSE_WAIT, when

2085                  * they look as CLOSING or LAST_ACK for Linux)

2086                  * Probably, I missed some more holelets.

2087                  *                                              --ANK

2088                  * XXX (TFO) - To start off we don't support SYN+ACK+FIN

2089                  * in a single packet! (May consider it later but will

2090                  * probably need API support or TCP_CORK SYN-ACK until

2091                  * data is written and socket is closed.)

2092                  */

                 tcp_send_fin(sk);

         }

         sk_stream_wait_close(sk, timeout);

 adjudge_to_death:

         state = sk->sk_state;

         sock_hold(sk);

         sock_orphan(sk);

         /* It is the last release_sock in its life. It will remove backlog. */

         release_sock(sk);

         /* Now socket is owned by kernel and we acquire BH lock

2108            to finish close. No need to check for user refs.

2109          */

         local_bh_disable();

         bh_lock_sock(sk);

         WARN_ON(sock_owned_by_user(sk));

         percpu_counter_inc(sk->sk_prot->orphan_count);

         /* Have we already been destroyed by a softirq or backlog? */

         if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)

                 goto out;

         /*      This is a (useful) BSD violating of the RFC. There is a

2121          *      problem with TCP as specified in that the other end could

2122          *      keep a socket open forever with no application left this end.

2123          *      We use a 1 minute timeout (about the same as BSD) then kill

2124          *      our end. If they send after that then tough - BUT: long enough

2125          *      that we won't make the old 4*rto = almost no time - whoops

2126          *      reset mistake.

2127          *

2128          *      Nope, it was not mistake. It is really desired behaviour

2129          *      f.e. on http servers, when such sockets are useless, but

2130          *      consume significant resources. Let's do it with special

2131          *      linger2 option.                                 --ANK

2132          */

         if (sk->sk_state == TCP_FIN_WAIT2) {

                 struct tcp_sock *tp = tcp_sk(sk);

                 if (tp->linger2 < ) {

                         tcp_set_state(sk, TCP_CLOSE);

                         tcp_send_active_reset(sk, GFP_ATOMIC);

                         NET_INC_STATS_BH(sock_net(sk),

                                         LINUX_MIB_TCPABORTONLINGER);

                 } else {

                         const int tmo = tcp_fin_time(sk);

                         if (tmo > TCP_TIMEWAIT_LEN) {

                                 inet_csk_reset_keepalive_timer(sk,

                                                 tmo - TCP_TIMEWAIT_LEN);

                         } else {

                                 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);

                                 goto out;

                         }

                 }

         }

         if (sk->sk_state != TCP_CLOSE) {

                 sk_mem_reclaim(sk);

                 if (tcp_check_oom(sk, )) {

                         tcp_set_state(sk, TCP_CLOSE);

                         tcp_send_active_reset(sk, GFP_ATOMIC);

                         NET_INC_STATS_BH(sock_net(sk),

                                         LINUX_MIB_TCPABORTONMEMORY);

                 }

         }

         if (sk->sk_state == TCP_CLOSE) {

                 struct request_sock *req = tcp_sk(sk)->fastopen_rsk;

                 /* We could get here with a non-NULL req if the socket is

2166                  * aborted (e.g., closed with unread data) before 3WHS

2167                  * finishes.

2168                  */

                 if (req)

                         reqsk_fastopen_remove(sk, req, false);

                 inet_csk_destroy_sock(sk);

         }

         /* Otherwise, socket is reprieved until protocol close. */

 out:

         bh_unlock_sock(sk);

         local_bh_enable();

         sock_put(sk);

 }

 EXPORT_SYMBOL(tcp_close);

 static int tcp_close_state(struct sock *sk)

 {

         int next = (int)new_state[sk->sk_state];

         int ns = next & TCP_STATE_MASK;

1964     tcp_set_state(sk, ns);



         return next & TCP_ACTION_FIN;

 }

 void tcp_shutdown(struct sock *sk, int how)

 {

         /*      We need to grab some memory, and put together a FIN,

1977          *      and then put it into the queue to be sent.

1978          *              Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.

1979          */

         if (!(how & SEND_SHUTDOWN))

                 return;

         /* If we've already sent a FIN, or it's a closed state, skip this. */

         if (( << sk->sk_state) &

             (TCPF_ESTABLISHED | TCPF_SYN_SENT |

              TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {

                 /* Clear out any half completed packets.  FIN if needed. */

                 if (tcp_close_state(sk))

                         tcp_send_fin(sk);

         }

 }

 static int tcp_close_state(struct sock *sk)

 {

         int next = (int)new_state[sk->sk_state];

         int ns = next & TCP_STATE_MASK;

1964     tcp_set_state(sk, ns);



         return next & TCP_ACTION_FIN;

 }

2、TCP_FIN_WAIT1---->TCP_FIN_WAIT2

 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,

                           struct tcphdr *th, unsigned len)

 {

         struct tcp_sock *tp = tcp_sk(sk);

         struct inet_connection_sock *icsk = inet_csk(sk);

         int queued = ;

         int res;

         tp->rx_opt.saw_tstamp = ;

         switch (sk->sk_state) {

         case TCP_CLOSE:

                 goto discard;

         case TCP_LISTEN:

                 if (th->ack)

                         return ;

                 if (th->rst)

                         goto discard;

                 if (th->syn) {

                         if (icsk->icsk_af_ops->conn_request(sk, skb) < )

                                 return ;

                         /* Now we have several options: In theory there is

5737                          * nothing else in the frame. KA9Q has an option to

5738                          * send data with the syn, BSD accepts data with the

5739                          * syn up to the [to be] advertised window and

5740                          * Solaris 2.1 gives you a protocol error. For now

5741                          * we just ignore it, that fits the spec precisely

5742                          * and avoids incompatibilities. It would be nice in

5743                          * future to drop through and process the data.

5744                          *

5745                          * Now that TTCP is starting to be used we ought to

5746                          * queue this data.

5747                          * But, this leaves one open to an easy denial of

5748                          * service attack, and SYN cookies can't defend

5749                          * against this problem. So, we drop the data

5750                          * in the interest of security over speed unless

5751                          * it's still in use.

5752                          */

                         kfree_skb(skb);

                         return ;

                 }

                 goto discard;

         case TCP_SYN_SENT:

                 queued = tcp_rcv_synsent_state_process(sk, skb, th, len);

                 if (queued >= )

                         return queued;

                 /* Do step6 onward by hand. */

                 tcp_urg(sk, skb, th);

                 __kfree_skb(skb);

                 tcp_data_snd_check(sk);

                 return ;

         }

         res = tcp_validate_incoming(sk, skb, th, );

         if (res <= )

                 return -res;

         /* step 5: check the ACK field */

         if (th->ack) {

                 int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > ;

                 switch (sk->sk_state) {

                 case TCP_SYN_RECV:

                         if (acceptable) {

                                 tp->copied_seq = tp->rcv_nxt;

                                 smp_mb();

                                 tcp_set_state(sk, TCP_ESTABLISHED);

                                 sk->sk_state_change(sk);

                                 /* Note, that this wakeup is only for marginal

5787                                  * crossed SYN case. Passively open sockets

5788                                  * are not waked up, because sk->sk_sleep ==

5789                                  * NULL and sk->sk_socket == NULL.

5790                                  */

                                 if (sk->sk_socket)

                                         sk_wake_async(sk,

                                                       SOCK_WAKE_IO, POLL_OUT);

                                 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;

                                 tp->snd_wnd = ntohs(th->window) <<

                                               tp->rx_opt.snd_wscale;

                                 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);

                                 /* tcp_ack considers this ACK as duplicate

5801                                  * and does not calculate rtt.

5802                                  * Force it here.

5803                                  */

                                 tcp_ack_update_rtt(sk, , );

                                 if (tp->rx_opt.tstamp_ok)

                                         tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;

                                 /* Make sure socket is routed, for

5810                                  * correct metrics.

5811                                  */

                                 icsk->icsk_af_ops->rebuild_header(sk);

                                 tcp_init_metrics(sk);

                                 tcp_init_congestion_control(sk);

                                 /* Prevent spurious tcp_cwnd_restart() on

5819                                  * first data packet.

5820                                  */

                                 tp->lsndtime = tcp_time_stamp;

                                 tcp_mtup_init(sk);

                                 tcp_initialize_rcv_mss(sk);

                                 tcp_init_buffer_space(sk);

                                 tcp_fast_path_on(tp);

                         } else {

                                 return ;

                         }

                         break;

                 case TCP_FIN_WAIT1:

                         if (tp->snd_una == tp->write_seq) {

5834                      tcp_set_state(sk, TCP_FIN_WAIT2);

                                 sk->sk_shutdown |= SEND_SHUTDOWN;

                                 dst_confirm(sk->sk_dst_cache);

                                 if (!sock_flag(sk, SOCK_DEAD))

                                         /* Wake up lingering close() */

                                         sk->sk_state_change(sk);

                                 else {

                                         int tmo;

                                         if (tp->linger2 <  ||

                                             (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&

                                              after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {

                                                 tcp_done(sk);

                                                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);

                                                 return ;

                                         }

                                         tmo = tcp_fin_time(sk);

                                         if (tmo > TCP_TIMEWAIT_LEN) {

                                                 inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);

                                         } else if (th->fin || sock_owned_by_user(sk)) {

                                                 /* Bad case. We could lose such FIN otherwise.

5857                                                  * It is not a big problem, but it looks confusing

5858                                                  * and not so rare event. We still can lose it now,

5859                                                  * if it spins in bh_lock_sock(), but it is really

5860                                                  * marginal case.

5861                                                  */

                                                 inet_csk_reset_keepalive_timer(sk, tmo);

                                         } else {

                                                 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);

                                                 goto discard;

                                         }

                                 }

                         }

                         break;

                 case TCP_CLOSING:

                         if (tp->snd_una == tp->write_seq) {

                                 tcp_time_wait(sk, TCP_TIME_WAIT, );

                                 goto discard;

                         }

                         break;

                 case TCP_LAST_ACK:

                         if (tp->snd_una == tp->write_seq) {

                                 tcp_update_metrics(sk);

                                 tcp_done(sk);

                                 goto discard;

                         }

                         break;

                 }

         } else

                 goto discard;

         /* step 6: check the URG bit */

         tcp_urg(sk, skb, th);

         /* step 7: process the segment text */

         switch (sk->sk_state) {

         case TCP_CLOSE_WAIT:

         case TCP_CLOSING:

         case TCP_LAST_ACK:

                 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))

                         break;

         case TCP_FIN_WAIT1:

         case TCP_FIN_WAIT2:

                 /* RFC 793 says to queue data in these states,

5902                  * RFC 1122 says we MUST send a reset.

5903                  * BSD 4.4 also does reset.

5904                  */

                 if (sk->sk_shutdown & RCV_SHUTDOWN) {

                         if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&

                             after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {

                                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);

                                 tcp_reset(sk);

                                 return ;

                         }

                 }

                 /* Fall through */

         case TCP_ESTABLISHED:

                 tcp_data_queue(sk, skb);

                 queued = ;

                 break;

         }

         /* tcp_data could move socket to TIME-WAIT */

         if (sk->sk_state != TCP_CLOSE) {

                 tcp_data_snd_check(sk);

                 tcp_ack_snd_check(sk);

         }

         if (!queued) {

 discard:

                 __kfree_skb(skb);

         }

         return ;

 }

3、TCP_FIN_WAIT2 ----> TCP_TIME_WAIT

 static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)

 {

         struct tcp_sock *tp = tcp_sk(sk);

         inet_csk_schedule_ack(sk);

         sk->sk_shutdown |= RCV_SHUTDOWN;

         sock_set_flag(sk, SOCK_DONE);

         switch (sk->sk_state) {

         case TCP_SYN_RECV:

         case TCP_ESTABLISHED:

                 /* Move to CLOSE_WAIT */

                 tcp_set_state(sk, TCP_CLOSE_WAIT);

                 inet_csk(sk)->icsk_ack.pingpong = ;

                 break;

         case TCP_CLOSE_WAIT:

         case TCP_CLOSING:

                 /* Received a retransmission of the FIN, do

4084                  * nothing.

4085                  */

                 break;

         case TCP_LAST_ACK:

                 /* RFC793: Remain in the LAST-ACK state. */

                 break;

         case TCP_FIN_WAIT1:

                 /* This case occurs when a simultaneous close

4093                  * happens, we must ack the received FIN and

4094                  * enter the CLOSING state.

4095                  */

                 tcp_send_ack(sk);

                 tcp_set_state(sk, TCP_CLOSING);

                 break;

         case TCP_FIN_WAIT2:

                 /* Received a FIN -- send ACK and enter TIME_WAIT. */

                 tcp_send_ack(sk);

4102           tcp_time_wait(sk, TCP_TIME_WAIT, 0);

                 break;

         default:

                 /* Only TCP_LISTEN and TCP_CLOSE are left, in these

4106                  * cases we should never reach this piece of code.

4107                  */

                 printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",

                        __func__, sk->sk_state);

                 break;

         }

         /* It _is_ possible, that we have something out-of-order _after_ FIN.

4114          * Probably, we should reset in this case. For now drop them.

4115          */

         __skb_queue_purge(&tp->out_of_order_queue);

         if (tcp_is_sack(tp))

                 tcp_sack_reset(&tp->rx_opt);

         sk_mem_reclaim(sk);

         if (!sock_flag(sk, SOCK_DEAD)) {

                 sk->sk_state_change(sk);

                 /* Do not send POLL_HUP for half duplex close. */

                 if (sk->sk_shutdown == SHUTDOWN_MASK ||

                     sk->sk_state == TCP_CLOSE)

                         sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);

                 else

                         sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);

         }

 }

4、TCP_TIME_WAIT ---->TCP_CLOSE

四、被动关闭

1、TCP_ESTABLISHED---->TCP_CLOSE_WAIT

 static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)

 {

         struct tcp_sock *tp = tcp_sk(sk);

         inet_csk_schedule_ack(sk);

         sk->sk_shutdown |= RCV_SHUTDOWN;

         sock_set_flag(sk, SOCK_DONE);

         switch (sk->sk_state) {

         case TCP_SYN_RECV:

         case TCP_ESTABLISHED:

                 /* Move to CLOSE_WAIT */

                 tcp_set_state(sk, TCP_CLOSE_WAIT);

                 inet_csk(sk)->icsk_ack.pingpong = ;

                 break;

         case TCP_CLOSE_WAIT:

         case TCP_CLOSING:

                 /* Received a retransmission of the FIN, do

4084                  * nothing.

4085                  */

                 break;

         case TCP_LAST_ACK:

                 /* RFC793: Remain in the LAST-ACK state. */

                 break;

         case TCP_FIN_WAIT1:

                 /* This case occurs when a simultaneous close

4093                  * happens, we must ack the received FIN and

4094                  * enter the CLOSING state.

4095                  */

                 tcp_send_ack(sk);

4097           tcp_set_state(sk, TCP_CLOSING);

                 break;

         case TCP_FIN_WAIT2:

                 /* Received a FIN -- send ACK and enter TIME_WAIT. */

                 tcp_send_ack(sk);

                 tcp_time_wait(sk, TCP_TIME_WAIT, );

                 break;

         default:

                 /* Only TCP_LISTEN and TCP_CLOSE are left, in these

4106                  * cases we should never reach this piece of code.

4107                  */

                 printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",

                        __func__, sk->sk_state);

                 break;

         }

         /* It _is_ possible, that we have something out-of-order _after_ FIN.

4114          * Probably, we should reset in this case. For now drop them.

4115          */

         __skb_queue_purge(&tp->out_of_order_queue);

         if (tcp_is_sack(tp))

                 tcp_sack_reset(&tp->rx_opt);

         sk_mem_reclaim(sk);

         if (!sock_flag(sk, SOCK_DEAD)) {

                 sk->sk_state_change(sk);

                 /* Do not send POLL_HUP for half duplex close. */

                 if (sk->sk_shutdown == SHUTDOWN_MASK ||

                     sk->sk_state == TCP_CLOSE)

                         sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);

                 else

                         sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);

         }

 }

2、TCP_CLOSE_WAIT---->TCP_LAST_ACK

 void tcp_shutdown(struct sock *sk, int how)

 {

         /*      We need to grab some memory, and put together a FIN,

1977          *      and then put it into the queue to be sent.

1978          *              Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.

1979          */

         if (!(how & SEND_SHUTDOWN))

                 return;

         /* If we've already sent a FIN, or it's a closed state, skip this. */

         if (( << sk->sk_state) &

             (TCPF_ESTABLISHED | TCPF_SYN_SENT |

              TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {

                 /* Clear out any half completed packets.  FIN if needed. */

                 if (tcp_close_state(sk))

                         tcp_send_fin(sk);

         }

 }

 static int tcp_close_state(struct sock *sk)

 {

         int next = (int)new_state[sk->sk_state];

         int ns = next & TCP_STATE_MASK;

         tcp_set_state(sk, ns);

         return next & TCP_ACTION_FIN;

 }

3、TCP_LAST_ACK---->TCP_CLOSE

 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,

                           struct tcphdr *th, unsigned len)

 {

         struct tcp_sock *tp = tcp_sk(sk);

         struct inet_connection_sock *icsk = inet_csk(sk);

         int queued = ;

         int res;

         tp->rx_opt.saw_tstamp = ;

         switch (sk->sk_state) {

         case TCP_CLOSE:

                 goto discard;

         case TCP_LISTEN:

                 if (th->ack)

                         return ;

                 if (th->rst)

                         goto discard;

                 if (th->syn) {

                         if (icsk->icsk_af_ops->conn_request(sk, skb) < )

                                 return ;

                         /* Now we have several options: In theory there is

5737                          * nothing else in the frame. KA9Q has an option to

5738                          * send data with the syn, BSD accepts data with the

5739                          * syn up to the [to be] advertised window and

5740                          * Solaris 2.1 gives you a protocol error. For now

5741                          * we just ignore it, that fits the spec precisely

5742                          * and avoids incompatibilities. It would be nice in

5743                          * future to drop through and process the data.

5744                          *

5745                          * Now that TTCP is starting to be used we ought to

5746                          * queue this data.

5747                          * But, this leaves one open to an easy denial of

5748                          * service attack, and SYN cookies can't defend

5749                          * against this problem. So, we drop the data

5750                          * in the interest of security over speed unless

5751                          * it's still in use.

5752                          */

                         kfree_skb(skb);

                         return ;

                 }

                 goto discard;

         case TCP_SYN_SENT:

                 queued = tcp_rcv_synsent_state_process(sk, skb, th, len);

                 if (queued >= )

                         return queued;

                 /* Do step6 onward by hand. */

                 tcp_urg(sk, skb, th);

                 __kfree_skb(skb);

                 tcp_data_snd_check(sk);

                 return ;

         }

         res = tcp_validate_incoming(sk, skb, th, );

         if (res <= )

                 return -res;

         /* step 5: check the ACK field */

         if (th->ack) {

                 int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > ;

                 switch (sk->sk_state) {

                 case TCP_SYN_RECV:

                         if (acceptable) {

                                 tp->copied_seq = tp->rcv_nxt;

                                 smp_mb();

                                 tcp_set_state(sk, TCP_ESTABLISHED);

                                 sk->sk_state_change(sk);

                                 /* Note, that this wakeup is only for marginal

5787                                  * crossed SYN case. Passively open sockets

5788                                  * are not waked up, because sk->sk_sleep ==

5789                                  * NULL and sk->sk_socket == NULL.

5790                                  */

                                 if (sk->sk_socket)

                                         sk_wake_async(sk,

                                                       SOCK_WAKE_IO, POLL_OUT);

                                 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;

                                 tp->snd_wnd = ntohs(th->window) <<

                                               tp->rx_opt.snd_wscale;

                                 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);

                                 /* tcp_ack considers this ACK as duplicate

5801                                  * and does not calculate rtt.

5802                                  * Force it here.

5803                                  */

                                 tcp_ack_update_rtt(sk, , );

                                 if (tp->rx_opt.tstamp_ok)

                                         tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;

                                 /* Make sure socket is routed, for

5810                                  * correct metrics.

5811                                  */

                                 icsk->icsk_af_ops->rebuild_header(sk);

                                 tcp_init_metrics(sk);

                                 tcp_init_congestion_control(sk);

                                 /* Prevent spurious tcp_cwnd_restart() on

5819                                  * first data packet.

5820                                  */

                                 tp->lsndtime = tcp_time_stamp;

                                 tcp_mtup_init(sk);

                                 tcp_initialize_rcv_mss(sk);

                                 tcp_init_buffer_space(sk);

                                 tcp_fast_path_on(tp);

                         } else {

                                 return ;

                         }

                         break;

                 case TCP_FIN_WAIT1:

                         if (tp->snd_una == tp->write_seq) {

                                 tcp_set_state(sk, TCP_FIN_WAIT2);

                                 sk->sk_shutdown |= SEND_SHUTDOWN;

                                 dst_confirm(sk->sk_dst_cache);

                                 if (!sock_flag(sk, SOCK_DEAD))

                                         /* Wake up lingering close() */

                                         sk->sk_state_change(sk);

                                 else {

                                         int tmo;

                                         if (tp->linger2 <  ||

                                             (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&

                                              after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {

                                                 tcp_done(sk);

                                                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);

                                                 return ;

                                         }

                                         tmo = tcp_fin_time(sk);

                                         if (tmo > TCP_TIMEWAIT_LEN) {

                                                 inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);

                                         } else if (th->fin || sock_owned_by_user(sk)) {

                                                 /* Bad case. We could lose such FIN otherwise.

5857                                                  * It is not a big problem, but it looks confusing

5858                                                  * and not so rare event. We still can lose it now,

5859                                                  * if it spins in bh_lock_sock(), but it is really

5860                                                  * marginal case.

5861                                                  */

                                                 inet_csk_reset_keepalive_timer(sk, tmo);

                                         } else {

                                                 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);

                                                 goto discard;

                                         }

                                 }

                         }

                         break;

                 case TCP_CLOSING:

                         if (tp->snd_una == tp->write_seq) {

                                 tcp_time_wait(sk, TCP_TIME_WAIT, );

                                 goto discard;

                         }

                         break;

                 case TCP_LAST_ACK:

                         if (tp->snd_una == tp->write_seq) {

                                 tcp_update_metrics(sk);

                                 tcp_done(sk);

                                 goto discard;

                         }

                         break;

                 }

         } else

                 goto discard;

         /* step 6: check the URG bit */

         tcp_urg(sk, skb, th);

         /* step 7: process the segment text */

         switch (sk->sk_state) {

         case TCP_CLOSE_WAIT:

         case TCP_CLOSING:

         case TCP_LAST_ACK:

                 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))

                         break;

         case TCP_FIN_WAIT1:

         case TCP_FIN_WAIT2:

                 /* RFC 793 says to queue data in these states,

5902                  * RFC 1122 says we MUST send a reset.

5903                  * BSD 4.4 also does reset.

5904                  */

                 if (sk->sk_shutdown & RCV_SHUTDOWN) {

                         if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&

                             after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {

                                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);

                                 tcp_reset(sk);

                                 return ;

                         }

                 }

                 /* Fall through */

         case TCP_ESTABLISHED:

                 tcp_data_queue(sk, skb);

                 queued = ;

                 break;

         }

         /* tcp_data could move socket to TIME-WAIT */

         if (sk->sk_state != TCP_CLOSE) {

                 tcp_data_snd_check(sk);

                 tcp_ack_snd_check(sk);

         }

         if (!queued) {

 discard:

                 __kfree_skb(skb);

         }

         return ;

 }

TCP状态切换流程的更多相关文章

TCP状态变迁流程
主动建立TCP链接情况: 被动建立TCP链接情况主动断开链接的情况被动断开连接的情况在TIME_WAIT阶段需要停留2倍的MSL,MSL即Maximum Segment Lifetime,表示任 ...
TCP状态
TCP状态 TCP连接中包含不同的状态,如何通过状态来判断程序问题尤为重要. 三次握手图中的connection部分为三次握手. 四次握手图中的close部分为四次握手. CLOSE_WAIT 服 ...
TCP建立连接和释放的过程，及TCP状态变迁图
一.TCP报文格式下面是TCP报文格式图: 重要字段介绍: (1)序号:Seq序号,占32位,用来标识从TCP源端向目的端发送的字节流,发起方发送数据时对此进行标记. (2)确认序号:Ack序号,占 ...
读懂TCP状态转移
读懂TCP状态转移过程,对理解网络编程颇有帮助,本文将对TCP状态转移过程进行介绍,但各状态(总共11个)含义不在本文介绍的范围,请参考文末的书目列表. TCP状态转换图(state transiti ...
Linux:TCP状态/半关闭/2MSL/端口复用
TCP状态 CLOSED:表示初始状态. LISTEN:该状态表示服务器端的某个SOCKET处于监听状态,可以接受连接. SYN_SENT:这个状态与SYN_RCVD遥相呼应,当客户端SOCKET执行 ...
TCP状态知识总结(图解)
tcp状态: LISTEN:侦听来自远方的TCP端口的连接请求 SYN-SENT:再发送连接请求后等待匹配的连接请求 SYN-RECEIVED:再收到和发送一个连接请求后等待对方对连接请求的确认 ...
TCP状态转换图、滑动窗口、半连接状态、2MSL
一.TCP状态转换图下图对排除和定位网络或系统故障时大有帮助,也帮助我们更好的编写Linux程序,对嵌入式开发也有指导意义. 先回顾一下TCP建立连接的三次握手过程,以及关闭连接的四次握手过程 ...
Java线程状态切换以及核心方法
1.Java线程状态 1.1 线程主要状态 ①初始(NEW):新创建了一个线程对象,但还没有调用start()方法.②运行(RUNNABLE):Java线程中将就绪(ready)和运行中(runnin ...
TCP状态转换（图解+文字解说）
<深入分析 javaweb 技术内幕>P38 读书扩展作者:淮左白衣写于2018年4月12日20:58:36 目录 TCP状态转换图解图解三次握手文字讲解三次握手: 图解四次挥手 ...

随机推荐

【题解】P5151 HKE与他的小朋友
[题解]P5151 HKE与他的小朋友实际上,位置的关系可以看做一组递推式,$f(a_i)=f(a_j),f(a_j)=f(a_t),etc...$那么我们可以压进一个矩阵里面. 考虑到这个矩阵 ...
JavaWeb—Base64编码（转载）
基本概念 Base64这个术语最初是在“MIME内容传输编码规范”中提出的.Base64不是一种加密算法,虽然编码后的字符串看起来有点加密的赶脚.它实际上是一种“二进制到文本”的编码方法,它能够将给定 ...
Spark2.0机器学习系列之11：聚类(幂迭代聚类， power iteration clustering， PIC)
在Spark2.0版本中(不是基于RDD API的MLlib),共有四种聚类方法: (1)K-means (2)Latent Dirichlet all ...
Linux登录欢迎图案
命令提示符设置: export PS1='\n\[\e[37;1m[\]\[\e[31;1m\]\u\[\e[39;1m\]@\[\e[33;1m\]\H \[\e[34;1m\]\w\[\e[37; ...
使用Webdriver执行JS
首先,我们使用如下方式初始化driver: WebDriver driver = new FirefoxDriver(); JavascriptExecutor jse = (JavascriptEx ...
Extjs添加行双击事件
var grid = new Ext.grid.GridPanel({ store: store, trackMouseOver: false, disableSelection: true, aut ...
windows安装redis, php5.5
全套安装包地址 http://download.csdn.net/detail/whellote/9572797 解压 redis-2.2.5-win32-win64, 将里面的内容拷贝到j:/r ...
JavaScript：传对象数组到后台
页面: <script> function improve() { var improveForm = $('#improveForm'); if (!improveForm.valid( ...
Python+Appium API
1.contextscontexts(self): Returns the contexts within the current session. 返回当前会话中的上下文,使用后可以识别H5页面的控 ...
SSH整合不错的博客
https://blog.csdn.net/struggling_rong/article/details/63153833?locationNum=9&fps=1 好好看看看哦

TCP状态切换流程

TCP状态切换流程的更多相关文章

随机推荐

热门专题