TCP状态切换流程
enum {
/*
* Description of States:
*
* TCP_SYN_SENT sent a connection request, waiting for ack
*
* TCP_SYN_RECV received a connection request, sent ack,
* waiting for final ack in three-way handshake.
*
* TCP_ESTABLISHED connection established
*
* TCP_FIN_WAIT1 our side has shutdown, waiting to complete
* transmission of remaining buffered data
*
* TCP_FIN_WAIT2 all buffered data sent, waiting for remote
* to shutdown
*
* TCP_CLOSING both sides have shutdown but we still have
* data we have to finish sending
*
* TCP_TIME_WAIT timeout to catch resent junk before entering
* closed, can only be entered from FIN_WAIT2
* or CLOSING. Required because the other end
* may not have gotten our last ACK causing it
* to retransmit the data packet (which we ignore)
*
* TCP_CLOSE_WAIT remote side has shutdown and is waiting for
* us to finish writing our data and to shutdown
* (we have to close() to move on to LAST_ACK)
*
* TCP_LAST_ACK out side has shutdown after remote has
* shutdown. There may still be data in our
* buffer that we have to finish sending
*
* TCP_CLOSE socket is finished
*/
* 连接已建立
*/
TCP_ESTABLISHED = 1,
/*
* 已发送SYN包
*/
TCP_SYN_SENT,
/*
* 已接收到SYN包
*/
TCP_SYN_RECV,
/*
* 执行主动关闭,已发送FIN包
*/
TCP_FIN_WAIT1,
/*
* 执行主动关闭,发送的FIN包后收到对端的ACK包
*/
TCP_FIN_WAIT2,
/*
* 执行主动关闭,,接收到对端的FIN包,并发送ACK包
*/
TCP_TIME_WAIT,
/*
* 连接初始状态
*/
TCP_CLOSE,
/*
* 执行被动关闭,接收到对端的FIN包,并发送ACK包
*/
TCP_CLOSE_WAIT,
/*
* 执行被动关闭,接收到FIN包后,发送自己的FIN包
*/
TCP_LAST_ACK,
/*
* 监听状态
*/
TCP_LISTEN,
/*
* 两端同时关闭,在发送FIN包后接收到对端的FIN包
*/
TCP_CLOSING, /* Now a valid state */
TCP_MAX_STATES /* Leave at the end! */
};
一、主动端
1、TCP_CLOSE ---->TCP_SYN_SENT
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
struct inet_sock *inet = inet_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
__be16 orig_sport, orig_dport;
__be32 daddr, nexthop;
struct flowi4 *fl4;
struct rtable *rt;
int err;
struct ip_options_rcu *inet_opt; if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL; if (usin->sin_family != AF_INET)
return -EAFNOSUPPORT; nexthop = daddr = usin->sin_addr.s_addr;
inet_opt = rcu_dereference_protected(inet->inet_opt,
sock_owned_by_user(sk));
if (inet_opt && inet_opt->opt.srr) {
if (!daddr)
return -EINVAL;
nexthop = inet_opt->opt.faddr;
} orig_sport = inet->inet_sport;
orig_dport = usin->sin_port;
fl4 = &inet->cork.fl.u.ip4;
rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
IPPROTO_TCP,
orig_sport, orig_dport, sk);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
if (err == -ENETUNREACH)
IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
return err;
} if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
ip_rt_put(rt);
return -ENETUNREACH;
} if (!inet_opt || !inet_opt->opt.srr)
daddr = fl4->daddr; if (!inet->inet_saddr)
inet->inet_saddr = fl4->saddr;
sk_rcv_saddr_set(sk, inet->inet_saddr); if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
/* Reset inherited state */
tp->rx_opt.ts_recent = ;
tp->rx_opt.ts_recent_stamp = ;
if (likely(!tp->repair))
tp->write_seq = ;
} if (tcp_death_row.sysctl_tw_recycle &&
!tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
tcp_fetch_timewait_stamp(sk, &rt->dst); inet->inet_dport = usin->sin_port;
sk_daddr_set(sk, daddr); inet_csk(sk)->icsk_ext_hdr_len = ;
if (inet_opt)
inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; /* Socket identity is still unknown (sport may be zero).
216 * However we set state to SYN-SENT and not releasing socket
217 * lock select source port, enter ourselves into the hash tables and
218 * complete initialization after this.
219 */
220 tcp_set_state(sk, TCP_SYN_SENT);
err = inet_hash_connect(&tcp_death_row, sk);
if (err)
goto failure; inet_set_txhash(sk); rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
inet->inet_sport, inet->inet_dport, sk);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
rt = NULL;
goto failure;
}
/* OK, now commit destination to socket. */
sk->sk_gso_type = SKB_GSO_TCPV4;
sk_setup_caps(sk, &rt->dst); if (!tp->write_seq && likely(!tp->repair))
tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
inet->inet_daddr,
inet->inet_sport,
usin->sin_port); inet->inet_id = tp->write_seq ^ jiffies; err = tcp_connect(sk); rt = NULL;
if (err)
goto failure; return ; failure:
/*
256 * This unhashes the socket and releases the local port,
257 * if necessary.
258 */
tcp_set_state(sk, TCP_CLOSE);
ip_rt_put(rt);
sk->sk_route_caps = ;
inet->inet_dport = ;
return err;
}
EXPORT_SYMBOL(tcp_v4_connect);
2、TCP_SYN_SEND---->TCP_ESTABLISHED
static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
struct tcphdr *th, unsigned len)
{
u8 *hash_location;
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_cookie_values *cvp = tp->cookie_values;
int saved_clamp = tp->rx_opt.mss_clamp; tcp_parse_options(skb, &tp->rx_opt, &hash_location, ); if (th->ack) {
/* rfc793:
5447 * "If the state is SYN-SENT then
5448 * first check the ACK bit
5449 * If the ACK bit is set
5450 * If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send
5451 * a reset (unless the RST bit is set, if so drop
5452 * the segment and return)"
5453 *
5454 * We do not send data with SYN, so that RFC-correct
5455 * test reduces to:
5456 */
if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)
goto reset_and_undo; if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
!between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
tcp_time_stamp)) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED);
goto reset_and_undo;
} /* Now ACK is acceptable.
5468 *
5469 * "If the RST bit is set
5470 * If the ACK was acceptable then signal the user "error:
5471 * connection reset", drop the segment, enter CLOSED state,
5472 * delete TCB, and return."
5473 */ if (th->rst) {
tcp_reset(sk);
goto discard;
} /* rfc793:
5481 * "fifth, if neither of the SYN or RST bits is set then
5482 * drop the segment and return."
5483 *
5484 * See note below!
5485 * --ANK(990513)
5486 */
if (!th->syn)
goto discard_and_undo; /* rfc793:
5491 * "If the SYN bit is on ...
5492 * are acceptable then ...
5493 * (our SYN has been ACKed), change the connection
5494 * state to ESTABLISHED..."
5495 */ TCP_ECN_rcv_synack(tp, th); tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
tcp_ack(sk, skb, FLAG_SLOWPATH); /* Ok.. it's good. Set up sequence numbers and
5503 * move to established.
5504 */
tp->rcv_nxt = TCP_SKB_CB(skb)->seq + ;
tp->rcv_wup = TCP_SKB_CB(skb)->seq + ; /* RFC1323: The window in SYN & SYN/ACK segments is
5509 * never scaled.
5510 */
tp->snd_wnd = ntohs(th->window);
tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); if (!tp->rx_opt.wscale_ok) {
tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = ;
tp->window_clamp = min(tp->window_clamp, 65535U);
} if (tp->rx_opt.saw_tstamp) {
tp->rx_opt.tstamp_ok = ;
tp->tcp_header_len =
sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
tcp_store_ts_recent(tp);
} else {
tp->tcp_header_len = sizeof(struct tcphdr);
} if (tcp_is_sack(tp) && sysctl_tcp_fack)
tcp_enable_fack(tp); tcp_mtup_init(sk);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk); /* Remember, tcp_poll() does not lock socket!
5537 * Change state from SYN-SENT only after copied_seq
5538 * is initialized. */
tp->copied_seq = tp->rcv_nxt; if (cvp != NULL &&
cvp->cookie_pair_size > &&
tp->rx_opt.cookie_plus > ) {
int cookie_size = tp->rx_opt.cookie_plus
- TCPOLEN_COOKIE_BASE;
int cookie_pair_size = cookie_size
+ cvp->cookie_desired; /* A cookie extension option was sent and returned.
5550 * Note that each incoming SYNACK replaces the
5551 * Responder cookie. The initial exchange is most
5552 * fragile, as protection against spoofing relies
5553 * entirely upon the sequence and timestamp (above).
5554 * This replacement strategy allows the correct pair to
5555 * pass through, while any others will be filtered via
5556 * Responder verification later.
5557 */
if (sizeof(cvp->cookie_pair) >= cookie_pair_size) {
memcpy(&cvp->cookie_pair[cvp->cookie_desired],
hash_location, cookie_size);
cvp->cookie_pair_size = cookie_pair_size;
}
} smp_mb();
5566 tcp_set_state(sk, TCP_ESTABLISHED);
security_inet_conn_established(sk, skb); /* Make sure socket is routed, for correct metrics. */
icsk->icsk_af_ops->rebuild_header(sk); tcp_init_metrics(sk); tcp_init_congestion_control(sk); /* Prevent spurious tcp_cwnd_restart() on first data
5578 * packet.
5579 */
tp->lsndtime = tcp_time_stamp; tcp_init_buffer_space(sk); if (sock_flag(sk, SOCK_KEEPOPEN))
inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp)); if (!tp->rx_opt.snd_wscale)
__tcp_fast_path_on(tp, tp->snd_wnd);
else
tp->pred_flags = ; if (!sock_flag(sk, SOCK_DEAD)) {
sk->sk_state_change(sk);
sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
} if (sk->sk_write_pending ||
icsk->icsk_accept_queue.rskq_defer_accept ||
icsk->icsk_ack.pingpong) {
/* Save one ACK. Data will be ready after
5601 * several ticks, if write_pending is set.
5602 *
5603 * It may be deleted, but with this feature tcpdumps
5604 * look so _wonderfully_ clever, that I was not able
5605 * to stand against the temptation 8) --ANK
5606 */
inet_csk_schedule_ack(sk);
icsk->icsk_ack.lrcvtime = tcp_time_stamp;
icsk->icsk_ack.ato = TCP_ATO_MIN;
tcp_incr_quickack(sk);
tcp_enter_quickack_mode(sk);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
TCP_DELACK_MAX, TCP_RTO_MAX); discard:
__kfree_skb(skb);
return ;
} else {
tcp_send_ack(sk);
}
return -;
} /* No ACK in the segment */ if (th->rst) {
/* rfc793:
5628 * "If the RST bit is set
5629 *
5630 * Otherwise (no ACK) drop the segment and return."
5631 */ goto discard_and_undo;
} /* PAWS check. */
if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
tcp_paws_reject(&tp->rx_opt, ))
goto discard_and_undo; if (th->syn) {
/* We see SYN without ACK. It is attempt of
5643 * simultaneous connect with crossed SYNs.
5644 * Particularly, it can be connect to self.
5645 */
tcp_set_state(sk, TCP_SYN_RECV); if (tp->rx_opt.saw_tstamp) {
tp->rx_opt.tstamp_ok = ;
tcp_store_ts_recent(tp);
tp->tcp_header_len =
sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
} else {
tp->tcp_header_len = sizeof(struct tcphdr);
} tp->rcv_nxt = TCP_SKB_CB(skb)->seq + ;
tp->rcv_wup = TCP_SKB_CB(skb)->seq + ; /* RFC1323: The window in SYN & SYN/ACK segments is
5661 * never scaled.
5662 */
tp->snd_wnd = ntohs(th->window);
tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
tp->max_window = tp->snd_wnd; TCP_ECN_rcv_syn(tp, th); tcp_mtup_init(sk);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk); tcp_send_synack(sk);
#if 0
/* Note, we could accept data and URG from this segment.
5676 * There are no obstacles to make this.
5677 *
5678 * However, if we ignore data in ACKless segments sometimes,
5679 * we have no reasons to accept it sometimes.
5680 * Also, seems the code doing it in step6 of tcp_rcv_state_process
5681 * is not flawless. So, discard packet for sanity.
5682 * Uncomment this return to process the data.
5683 */
return -;
#else
goto discard;
#endif
}
/* "fifth, if neither of the SYN or RST bits is set then
5690 * drop the segment and return."
5691 */ discard_and_undo:
tcp_clear_options(&tp->rx_opt);
tp->rx_opt.mss_clamp = saved_clamp;
goto discard; reset_and_undo:
tcp_clear_options(&tp->rx_opt);
tp->rx_opt.mss_clamp = saved_clamp;
return ;
}
二、被动打开
1、TCP_CLOSE ----> TCP_LISTEN
int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
{
struct inet_sock *inet = inet_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries); if (rc != )
return rc; sk->sk_max_ack_backlog = ;
sk->sk_ack_backlog = ;
inet_csk_delack_init(sk); /* There is race window here: we announce ourselves listening,
808 * but this transition is still not validated by get_port().
809 * It is OK, because this socket enters to hash table only
810 * after validation is complete.
811 */
812 sk->sk_state = TCP_LISTEN;
if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
inet->inet_sport = htons(inet->inet_num); sk_dst_reset(sk);
sk->sk_prot->hash(sk); return ;
} sk->sk_state = TCP_CLOSE;
__reqsk_queue_destroy(&icsk->icsk_accept_queue);
return -EADDRINUSE;
}
2、TCP_LISTEN ----> TCP_SYN_RCVE
static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
struct tcphdr *th, unsigned len)
{
u8 *hash_location;
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_cookie_values *cvp = tp->cookie_values;
int saved_clamp = tp->rx_opt.mss_clamp; tcp_parse_options(skb, &tp->rx_opt, &hash_location, ); if (th->ack) {
/* rfc793:
5447 * "If the state is SYN-SENT then
5448 * first check the ACK bit
5449 * If the ACK bit is set
5450 * If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send
5451 * a reset (unless the RST bit is set, if so drop
5452 * the segment and return)"
5453 *
5454 * We do not send data with SYN, so that RFC-correct
5455 * test reduces to:
5456 */
if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)
goto reset_and_undo; if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
!between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
tcp_time_stamp)) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED);
goto reset_and_undo;
} /* Now ACK is acceptable.
5468 *
5469 * "If the RST bit is set
5470 * If the ACK was acceptable then signal the user "error:
5471 * connection reset", drop the segment, enter CLOSED state,
5472 * delete TCB, and return."
5473 */ if (th->rst) {
tcp_reset(sk);
goto discard;
} /* rfc793:
5481 * "fifth, if neither of the SYN or RST bits is set then
5482 * drop the segment and return."
5483 *
5484 * See note below!
5485 * --ANK(990513)
5486 */
if (!th->syn)
goto discard_and_undo; /* rfc793:
5491 * "If the SYN bit is on ...
5492 * are acceptable then ...
5493 * (our SYN has been ACKed), change the connection
5494 * state to ESTABLISHED..."
5495 */ TCP_ECN_rcv_synack(tp, th); tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
tcp_ack(sk, skb, FLAG_SLOWPATH); /* Ok.. it's good. Set up sequence numbers and
5503 * move to established.
5504 */
tp->rcv_nxt = TCP_SKB_CB(skb)->seq + ;
tp->rcv_wup = TCP_SKB_CB(skb)->seq + ; /* RFC1323: The window in SYN & SYN/ACK segments is
5509 * never scaled.
5510 */
tp->snd_wnd = ntohs(th->window);
tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); if (!tp->rx_opt.wscale_ok) {
tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = ;
tp->window_clamp = min(tp->window_clamp, 65535U);
} if (tp->rx_opt.saw_tstamp) {
tp->rx_opt.tstamp_ok = ;
tp->tcp_header_len =
sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
tcp_store_ts_recent(tp);
} else {
tp->tcp_header_len = sizeof(struct tcphdr);
} if (tcp_is_sack(tp) && sysctl_tcp_fack)
tcp_enable_fack(tp); tcp_mtup_init(sk);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk); /* Remember, tcp_poll() does not lock socket!
5537 * Change state from SYN-SENT only after copied_seq
5538 * is initialized. */
tp->copied_seq = tp->rcv_nxt; if (cvp != NULL &&
cvp->cookie_pair_size > &&
tp->rx_opt.cookie_plus > ) {
int cookie_size = tp->rx_opt.cookie_plus
- TCPOLEN_COOKIE_BASE;
int cookie_pair_size = cookie_size
+ cvp->cookie_desired; /* A cookie extension option was sent and returned.
5550 * Note that each incoming SYNACK replaces the
5551 * Responder cookie. The initial exchange is most
5552 * fragile, as protection against spoofing relies
5553 * entirely upon the sequence and timestamp (above).
5554 * This replacement strategy allows the correct pair to
5555 * pass through, while any others will be filtered via
5556 * Responder verification later.
5557 */
if (sizeof(cvp->cookie_pair) >= cookie_pair_size) {
memcpy(&cvp->cookie_pair[cvp->cookie_desired],
hash_location, cookie_size);
cvp->cookie_pair_size = cookie_pair_size;
}
} smp_mb();
tcp_set_state(sk, TCP_ESTABLISHED); security_inet_conn_established(sk, skb); /* Make sure socket is routed, for correct metrics. */
icsk->icsk_af_ops->rebuild_header(sk); tcp_init_metrics(sk); tcp_init_congestion_control(sk); /* Prevent spurious tcp_cwnd_restart() on first data
5578 * packet.
5579 */
tp->lsndtime = tcp_time_stamp; tcp_init_buffer_space(sk); if (sock_flag(sk, SOCK_KEEPOPEN))
inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp)); if (!tp->rx_opt.snd_wscale)
__tcp_fast_path_on(tp, tp->snd_wnd);
else
tp->pred_flags = ; if (!sock_flag(sk, SOCK_DEAD)) {
sk->sk_state_change(sk);
sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
} if (sk->sk_write_pending ||
icsk->icsk_accept_queue.rskq_defer_accept ||
icsk->icsk_ack.pingpong) {
/* Save one ACK. Data will be ready after
5601 * several ticks, if write_pending is set.
5602 *
5603 * It may be deleted, but with this feature tcpdumps
5604 * look so _wonderfully_ clever, that I was not able
5605 * to stand against the temptation 8) --ANK
5606 */
inet_csk_schedule_ack(sk);
icsk->icsk_ack.lrcvtime = tcp_time_stamp;
icsk->icsk_ack.ato = TCP_ATO_MIN;
tcp_incr_quickack(sk);
tcp_enter_quickack_mode(sk);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
TCP_DELACK_MAX, TCP_RTO_MAX); discard:
__kfree_skb(skb);
return ;
} else {
tcp_send_ack(sk);
}
return -;
} /* No ACK in the segment */ if (th->rst) {
/* rfc793:
5628 * "If the RST bit is set
5629 *
5630 * Otherwise (no ACK) drop the segment and return."
5631 */ goto discard_and_undo;
} /* PAWS check. */
if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
tcp_paws_reject(&tp->rx_opt, ))
goto discard_and_undo; if (th->syn) {
/* We see SYN without ACK. It is attempt of
5643 * simultaneous connect with crossed SYNs.
5644 * Particularly, it can be connect to self.
5645 */
5646 tcp_set_state(sk, TCP_SYN_RECV);
if (tp->rx_opt.saw_tstamp) {
tp->rx_opt.tstamp_ok = ;
tcp_store_ts_recent(tp);
tp->tcp_header_len =
sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
} else {
tp->tcp_header_len = sizeof(struct tcphdr);
} tp->rcv_nxt = TCP_SKB_CB(skb)->seq + ;
tp->rcv_wup = TCP_SKB_CB(skb)->seq + ; /* RFC1323: The window in SYN & SYN/ACK segments is
5661 * never scaled.
5662 */
tp->snd_wnd = ntohs(th->window);
tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
tp->max_window = tp->snd_wnd; TCP_ECN_rcv_syn(tp, th); tcp_mtup_init(sk);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk); tcp_send_synack(sk);
#if 0
/* Note, we could accept data and URG from this segment.
5676 * There are no obstacles to make this.
5677 *
5678 * However, if we ignore data in ACKless segments sometimes,
5679 * we have no reasons to accept it sometimes.
5680 * Also, seems the code doing it in step6 of tcp_rcv_state_process
5681 * is not flawless. So, discard packet for sanity.
5682 * Uncomment this return to process the data.
5683 */
return -;
#else
goto discard;
#endif
}
/* "fifth, if neither of the SYN or RST bits is set then
5690 * drop the segment and return."
5691 */ discard_and_undo:
tcp_clear_options(&tp->rx_opt);
tp->rx_opt.mss_clamp = saved_clamp;
goto discard; reset_and_undo:
tcp_clear_options(&tp->rx_opt);
tp->rx_opt.mss_clamp = saved_clamp;
return ;
}
3、TCP_SYN_RCVE ----> TCP_ESTABLISHED
int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
struct tcphdr *th, unsigned len)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
int queued = ;
int res; tp->rx_opt.saw_tstamp = ; switch (sk->sk_state) {
case TCP_CLOSE:
goto discard; case TCP_LISTEN:
if (th->ack)
return ; if (th->rst)
goto discard; if (th->syn) {
if (icsk->icsk_af_ops->conn_request(sk, skb) < )
return ; /* Now we have several options: In theory there is
5737 * nothing else in the frame. KA9Q has an option to
5738 * send data with the syn, BSD accepts data with the
5739 * syn up to the [to be] advertised window and
5740 * Solaris 2.1 gives you a protocol error. For now
5741 * we just ignore it, that fits the spec precisely
5742 * and avoids incompatibilities. It would be nice in
5743 * future to drop through and process the data.
5744 *
5745 * Now that TTCP is starting to be used we ought to
5746 * queue this data.
5747 * But, this leaves one open to an easy denial of
5748 * service attack, and SYN cookies can't defend
5749 * against this problem. So, we drop the data
5750 * in the interest of security over speed unless
5751 * it's still in use.
5752 */
kfree_skb(skb);
return ;
}
goto discard; case TCP_SYN_SENT:
queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
if (queued >= )
return queued; /* Do step6 onward by hand. */
tcp_urg(sk, skb, th);
__kfree_skb(skb);
tcp_data_snd_check(sk);
return ;
} res = tcp_validate_incoming(sk, skb, th, );
if (res <= )
return -res; /* step 5: check the ACK field */
if (th->ack) {
int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > ; switch (sk->sk_state) {
case TCP_SYN_RECV:
if (acceptable) {
tp->copied_seq = tp->rcv_nxt;
smp_mb();
5783 tcp_set_state(sk, TCP_ESTABLISHED);
sk->sk_state_change(sk); /* Note, that this wakeup is only for marginal
5787 * crossed SYN case. Passively open sockets
5788 * are not waked up, because sk->sk_sleep ==
5789 * NULL and sk->sk_socket == NULL.
5790 */
if (sk->sk_socket)
sk_wake_async(sk,
SOCK_WAKE_IO, POLL_OUT); tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
tp->snd_wnd = ntohs(th->window) <<
tp->rx_opt.snd_wscale;
tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); /* tcp_ack considers this ACK as duplicate
5801 * and does not calculate rtt.
5802 * Force it here.
5803 */
tcp_ack_update_rtt(sk, , ); if (tp->rx_opt.tstamp_ok)
tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; /* Make sure socket is routed, for
5810 * correct metrics.
5811 */
icsk->icsk_af_ops->rebuild_header(sk); tcp_init_metrics(sk); tcp_init_congestion_control(sk); /* Prevent spurious tcp_cwnd_restart() on
5819 * first data packet.
5820 */
tp->lsndtime = tcp_time_stamp; tcp_mtup_init(sk);
tcp_initialize_rcv_mss(sk);
tcp_init_buffer_space(sk);
tcp_fast_path_on(tp);
} else {
return ;
}
break; case TCP_FIN_WAIT1:
if (tp->snd_una == tp->write_seq) {
tcp_set_state(sk, TCP_FIN_WAIT2);
sk->sk_shutdown |= SEND_SHUTDOWN;
dst_confirm(sk->sk_dst_cache); if (!sock_flag(sk, SOCK_DEAD))
/* Wake up lingering close() */
sk->sk_state_change(sk);
else {
int tmo; if (tp->linger2 < ||
(TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
tcp_done(sk);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
return ;
} tmo = tcp_fin_time(sk);
if (tmo > TCP_TIMEWAIT_LEN) {
inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
} else if (th->fin || sock_owned_by_user(sk)) {
/* Bad case. We could lose such FIN otherwise.
5857 * It is not a big problem, but it looks confusing
5858 * and not so rare event. We still can lose it now,
5859 * if it spins in bh_lock_sock(), but it is really
5860 * marginal case.
5861 */
inet_csk_reset_keepalive_timer(sk, tmo);
} else {
tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
goto discard;
}
}
}
break; case TCP_CLOSING:
if (tp->snd_una == tp->write_seq) {
tcp_time_wait(sk, TCP_TIME_WAIT, );
goto discard;
}
break; case TCP_LAST_ACK:
if (tp->snd_una == tp->write_seq) {
tcp_update_metrics(sk);
tcp_done(sk);
goto discard;
}
break;
}
} else
goto discard; /* step 6: check the URG bit */
tcp_urg(sk, skb, th); /* step 7: process the segment text */
switch (sk->sk_state) {
case TCP_CLOSE_WAIT:
case TCP_CLOSING:
case TCP_LAST_ACK:
if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
break;
case TCP_FIN_WAIT1:
case TCP_FIN_WAIT2:
/* RFC 793 says to queue data in these states,
5902 * RFC 1122 says we MUST send a reset.
5903 * BSD 4.4 also does reset.
5904 */
if (sk->sk_shutdown & RCV_SHUTDOWN) {
if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
tcp_reset(sk);
return ;
}
}
/* Fall through */
case TCP_ESTABLISHED:
tcp_data_queue(sk, skb);
queued = ;
break;
} /* tcp_data could move socket to TIME-WAIT */
if (sk->sk_state != TCP_CLOSE) {
tcp_data_snd_check(sk);
tcp_ack_snd_check(sk);
} if (!queued) {
discard:
__kfree_skb(skb);
}
return ;
}
三、主动关闭
1、TCP_ESTABLISHED ----> TCP_FIN_WAIT1
void tcp_close(struct sock *sk, long timeout)
{
struct sk_buff *skb;
int data_was_unread = ;
int state; lock_sock(sk);
sk->sk_shutdown = SHUTDOWN_MASK; if (sk->sk_state == TCP_LISTEN) {
tcp_set_state(sk, TCP_CLOSE); /* Special case. */
inet_csk_listen_stop(sk); goto adjudge_to_death;
} /* We need to flush the recv. buffs. We do this only on the
2027 * descriptor close, not protocol-sourced closes, because the
2028 * reader process may not have drained the data yet!
2029 */
while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
len--;
data_was_unread += len;
__kfree_skb(skb);
} sk_mem_reclaim(sk); /* If socket has been already reset (e.g. in tcp_reset()) - kill it. */
if (sk->sk_state == TCP_CLOSE)
goto adjudge_to_death; /* As outlined in RFC 2525, section 2.17, we send a RST here because
2046 * data was lost. To witness the awful effects of the old behavior of
2047 * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk
2048 * GET in an FTP client, suspend the process, wait for the client to
2049 * advertise a zero window, then kill -9 the FTP client, wheee...
2050 * Note: timeout is always zero in such a case.
2051 */
if (unlikely(tcp_sk(sk)->repair)) {
sk->sk_prot->disconnect(sk, );
} else if (data_was_unread) {
/* Unread data was tossed, zap the connection. */
NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, sk->sk_allocation);
} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
/* Check zero linger _after_ checking for unread data. */
sk->sk_prot->disconnect(sk, );
NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
} else if (tcp_close_state(sk)) {
/* We FIN if the application ate all the data before
2065 * zapping the connection.
2066 */ /* RED-PEN. Formally speaking, we have broken TCP state
2069 * machine. State transitions:
2070 *
2071 * TCP_ESTABLISHED -> TCP_FIN_WAIT1
2072 * TCP_SYN_RECV -> TCP_FIN_WAIT1 (forget it, it's impossible)
2073 * TCP_CLOSE_WAIT -> TCP_LAST_ACK
2074 *
2075 * are legal only when FIN has been sent (i.e. in window),
2076 * rather than queued out of window. Purists blame.
2077 *
2078 * F.e. "RFC state" is ESTABLISHED,
2079 * if Linux state is FIN-WAIT-1, but FIN is still not sent.
2080 *
2081 * The visible declinations are that sometimes
2082 * we enter time-wait state, when it is not required really
2083 * (harmless), do not send active resets, when they are
2084 * required by specs (TCP_ESTABLISHED, TCP_CLOSE_WAIT, when
2085 * they look as CLOSING or LAST_ACK for Linux)
2086 * Probably, I missed some more holelets.
2087 * --ANK
2088 * XXX (TFO) - To start off we don't support SYN+ACK+FIN
2089 * in a single packet! (May consider it later but will
2090 * probably need API support or TCP_CORK SYN-ACK until
2091 * data is written and socket is closed.)
2092 */
tcp_send_fin(sk);
} sk_stream_wait_close(sk, timeout); adjudge_to_death:
state = sk->sk_state;
sock_hold(sk);
sock_orphan(sk); /* It is the last release_sock in its life. It will remove backlog. */
release_sock(sk); /* Now socket is owned by kernel and we acquire BH lock
2108 to finish close. No need to check for user refs.
2109 */
local_bh_disable();
bh_lock_sock(sk);
WARN_ON(sock_owned_by_user(sk)); percpu_counter_inc(sk->sk_prot->orphan_count); /* Have we already been destroyed by a softirq or backlog? */
if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
goto out; /* This is a (useful) BSD violating of the RFC. There is a
2121 * problem with TCP as specified in that the other end could
2122 * keep a socket open forever with no application left this end.
2123 * We use a 1 minute timeout (about the same as BSD) then kill
2124 * our end. If they send after that then tough - BUT: long enough
2125 * that we won't make the old 4*rto = almost no time - whoops
2126 * reset mistake.
2127 *
2128 * Nope, it was not mistake. It is really desired behaviour
2129 * f.e. on http servers, when such sockets are useless, but
2130 * consume significant resources. Let's do it with special
2131 * linger2 option. --ANK
2132 */ if (sk->sk_state == TCP_FIN_WAIT2) {
struct tcp_sock *tp = tcp_sk(sk);
if (tp->linger2 < ) {
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_ATOMIC);
NET_INC_STATS_BH(sock_net(sk),
LINUX_MIB_TCPABORTONLINGER);
} else {
const int tmo = tcp_fin_time(sk); if (tmo > TCP_TIMEWAIT_LEN) {
inet_csk_reset_keepalive_timer(sk,
tmo - TCP_TIMEWAIT_LEN);
} else {
tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
goto out;
}
}
}
if (sk->sk_state != TCP_CLOSE) {
sk_mem_reclaim(sk);
if (tcp_check_oom(sk, )) {
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_ATOMIC);
NET_INC_STATS_BH(sock_net(sk),
LINUX_MIB_TCPABORTONMEMORY);
}
} if (sk->sk_state == TCP_CLOSE) {
struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
/* We could get here with a non-NULL req if the socket is
2166 * aborted (e.g., closed with unread data) before 3WHS
2167 * finishes.
2168 */
if (req)
reqsk_fastopen_remove(sk, req, false);
inet_csk_destroy_sock(sk);
}
/* Otherwise, socket is reprieved until protocol close. */ out:
bh_unlock_sock(sk);
local_bh_enable();
sock_put(sk);
}
EXPORT_SYMBOL(tcp_close); static int tcp_close_state(struct sock *sk)
{
int next = (int)new_state[sk->sk_state];
int ns = next & TCP_STATE_MASK; 1964 tcp_set_state(sk, ns);
return next & TCP_ACTION_FIN;
}
void tcp_shutdown(struct sock *sk, int how)
{
/* We need to grab some memory, and put together a FIN,
1977 * and then put it into the queue to be sent.
1978 * Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
1979 */
if (!(how & SEND_SHUTDOWN))
return; /* If we've already sent a FIN, or it's a closed state, skip this. */
if (( << sk->sk_state) &
(TCPF_ESTABLISHED | TCPF_SYN_SENT |
TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
/* Clear out any half completed packets. FIN if needed. */
if (tcp_close_state(sk))
tcp_send_fin(sk);
}
} static int tcp_close_state(struct sock *sk)
{
int next = (int)new_state[sk->sk_state];
int ns = next & TCP_STATE_MASK; 1964 tcp_set_state(sk, ns);
return next & TCP_ACTION_FIN;
}
2、TCP_FIN_WAIT1---->TCP_FIN_WAIT2
int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
struct tcphdr *th, unsigned len)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
int queued = ;
int res; tp->rx_opt.saw_tstamp = ; switch (sk->sk_state) {
case TCP_CLOSE:
goto discard; case TCP_LISTEN:
if (th->ack)
return ; if (th->rst)
goto discard; if (th->syn) {
if (icsk->icsk_af_ops->conn_request(sk, skb) < )
return ; /* Now we have several options: In theory there is
5737 * nothing else in the frame. KA9Q has an option to
5738 * send data with the syn, BSD accepts data with the
5739 * syn up to the [to be] advertised window and
5740 * Solaris 2.1 gives you a protocol error. For now
5741 * we just ignore it, that fits the spec precisely
5742 * and avoids incompatibilities. It would be nice in
5743 * future to drop through and process the data.
5744 *
5745 * Now that TTCP is starting to be used we ought to
5746 * queue this data.
5747 * But, this leaves one open to an easy denial of
5748 * service attack, and SYN cookies can't defend
5749 * against this problem. So, we drop the data
5750 * in the interest of security over speed unless
5751 * it's still in use.
5752 */
kfree_skb(skb);
return ;
}
goto discard; case TCP_SYN_SENT:
queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
if (queued >= )
return queued; /* Do step6 onward by hand. */
tcp_urg(sk, skb, th);
__kfree_skb(skb);
tcp_data_snd_check(sk);
return ;
} res = tcp_validate_incoming(sk, skb, th, );
if (res <= )
return -res; /* step 5: check the ACK field */
if (th->ack) {
int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > ; switch (sk->sk_state) {
case TCP_SYN_RECV:
if (acceptable) {
tp->copied_seq = tp->rcv_nxt;
smp_mb();
tcp_set_state(sk, TCP_ESTABLISHED);
sk->sk_state_change(sk); /* Note, that this wakeup is only for marginal
5787 * crossed SYN case. Passively open sockets
5788 * are not waked up, because sk->sk_sleep ==
5789 * NULL and sk->sk_socket == NULL.
5790 */
if (sk->sk_socket)
sk_wake_async(sk,
SOCK_WAKE_IO, POLL_OUT); tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
tp->snd_wnd = ntohs(th->window) <<
tp->rx_opt.snd_wscale;
tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); /* tcp_ack considers this ACK as duplicate
5801 * and does not calculate rtt.
5802 * Force it here.
5803 */
tcp_ack_update_rtt(sk, , ); if (tp->rx_opt.tstamp_ok)
tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; /* Make sure socket is routed, for
5810 * correct metrics.
5811 */
icsk->icsk_af_ops->rebuild_header(sk); tcp_init_metrics(sk); tcp_init_congestion_control(sk); /* Prevent spurious tcp_cwnd_restart() on
5819 * first data packet.
5820 */
tp->lsndtime = tcp_time_stamp; tcp_mtup_init(sk);
tcp_initialize_rcv_mss(sk);
tcp_init_buffer_space(sk);
tcp_fast_path_on(tp);
} else {
return ;
}
break; case TCP_FIN_WAIT1:
if (tp->snd_una == tp->write_seq) {
5834 tcp_set_state(sk, TCP_FIN_WAIT2);
sk->sk_shutdown |= SEND_SHUTDOWN;
dst_confirm(sk->sk_dst_cache); if (!sock_flag(sk, SOCK_DEAD))
/* Wake up lingering close() */
sk->sk_state_change(sk);
else {
int tmo; if (tp->linger2 < ||
(TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
tcp_done(sk);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
return ;
} tmo = tcp_fin_time(sk);
if (tmo > TCP_TIMEWAIT_LEN) {
inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
} else if (th->fin || sock_owned_by_user(sk)) {
/* Bad case. We could lose such FIN otherwise.
5857 * It is not a big problem, but it looks confusing
5858 * and not so rare event. We still can lose it now,
5859 * if it spins in bh_lock_sock(), but it is really
5860 * marginal case.
5861 */
inet_csk_reset_keepalive_timer(sk, tmo);
} else {
tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
goto discard;
}
}
}
break; case TCP_CLOSING:
if (tp->snd_una == tp->write_seq) {
tcp_time_wait(sk, TCP_TIME_WAIT, );
goto discard;
}
break; case TCP_LAST_ACK:
if (tp->snd_una == tp->write_seq) {
tcp_update_metrics(sk);
tcp_done(sk);
goto discard;
}
break;
}
} else
goto discard; /* step 6: check the URG bit */
tcp_urg(sk, skb, th); /* step 7: process the segment text */
switch (sk->sk_state) {
case TCP_CLOSE_WAIT:
case TCP_CLOSING:
case TCP_LAST_ACK:
if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
break;
case TCP_FIN_WAIT1:
case TCP_FIN_WAIT2:
/* RFC 793 says to queue data in these states,
5902 * RFC 1122 says we MUST send a reset.
5903 * BSD 4.4 also does reset.
5904 */
if (sk->sk_shutdown & RCV_SHUTDOWN) {
if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
tcp_reset(sk);
return ;
}
}
/* Fall through */
case TCP_ESTABLISHED:
tcp_data_queue(sk, skb);
queued = ;
break;
} /* tcp_data could move socket to TIME-WAIT */
if (sk->sk_state != TCP_CLOSE) {
tcp_data_snd_check(sk);
tcp_ack_snd_check(sk);
} if (!queued) {
discard:
__kfree_skb(skb);
}
return ;
}
3、TCP_FIN_WAIT2 ----> TCP_TIME_WAIT
static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
{
struct tcp_sock *tp = tcp_sk(sk); inet_csk_schedule_ack(sk); sk->sk_shutdown |= RCV_SHUTDOWN;
sock_set_flag(sk, SOCK_DONE); switch (sk->sk_state) {
case TCP_SYN_RECV:
case TCP_ESTABLISHED:
/* Move to CLOSE_WAIT */
tcp_set_state(sk, TCP_CLOSE_WAIT);
inet_csk(sk)->icsk_ack.pingpong = ;
break; case TCP_CLOSE_WAIT:
case TCP_CLOSING:
/* Received a retransmission of the FIN, do
4084 * nothing.
4085 */
break;
case TCP_LAST_ACK:
/* RFC793: Remain in the LAST-ACK state. */
break; case TCP_FIN_WAIT1:
/* This case occurs when a simultaneous close
4093 * happens, we must ack the received FIN and
4094 * enter the CLOSING state.
4095 */
tcp_send_ack(sk);
tcp_set_state(sk, TCP_CLOSING);
break;
case TCP_FIN_WAIT2:
/* Received a FIN -- send ACK and enter TIME_WAIT. */
tcp_send_ack(sk);
4102 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
break;
default:
/* Only TCP_LISTEN and TCP_CLOSE are left, in these
4106 * cases we should never reach this piece of code.
4107 */
printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
__func__, sk->sk_state);
break;
} /* It _is_ possible, that we have something out-of-order _after_ FIN.
4114 * Probably, we should reset in this case. For now drop them.
4115 */
__skb_queue_purge(&tp->out_of_order_queue);
if (tcp_is_sack(tp))
tcp_sack_reset(&tp->rx_opt);
sk_mem_reclaim(sk); if (!sock_flag(sk, SOCK_DEAD)) {
sk->sk_state_change(sk); /* Do not send POLL_HUP for half duplex close. */
if (sk->sk_shutdown == SHUTDOWN_MASK ||
sk->sk_state == TCP_CLOSE)
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
else
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
}
}
4、TCP_TIME_WAIT ---->TCP_CLOSE
四、被动关闭
1、TCP_ESTABLISHED---->TCP_CLOSE_WAIT
static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
{
struct tcp_sock *tp = tcp_sk(sk); inet_csk_schedule_ack(sk); sk->sk_shutdown |= RCV_SHUTDOWN;
sock_set_flag(sk, SOCK_DONE); switch (sk->sk_state) {
case TCP_SYN_RECV:
case TCP_ESTABLISHED:
/* Move to CLOSE_WAIT */
tcp_set_state(sk, TCP_CLOSE_WAIT);
inet_csk(sk)->icsk_ack.pingpong = ;
break; case TCP_CLOSE_WAIT:
case TCP_CLOSING:
/* Received a retransmission of the FIN, do
4084 * nothing.
4085 */
break;
case TCP_LAST_ACK:
/* RFC793: Remain in the LAST-ACK state. */
break; case TCP_FIN_WAIT1:
/* This case occurs when a simultaneous close
4093 * happens, we must ack the received FIN and
4094 * enter the CLOSING state.
4095 */
tcp_send_ack(sk);
4097 tcp_set_state(sk, TCP_CLOSING);
break;
case TCP_FIN_WAIT2:
/* Received a FIN -- send ACK and enter TIME_WAIT. */
tcp_send_ack(sk);
tcp_time_wait(sk, TCP_TIME_WAIT, );
break;
default:
/* Only TCP_LISTEN and TCP_CLOSE are left, in these
4106 * cases we should never reach this piece of code.
4107 */
printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
__func__, sk->sk_state);
break;
} /* It _is_ possible, that we have something out-of-order _after_ FIN.
4114 * Probably, we should reset in this case. For now drop them.
4115 */
__skb_queue_purge(&tp->out_of_order_queue);
if (tcp_is_sack(tp))
tcp_sack_reset(&tp->rx_opt);
sk_mem_reclaim(sk); if (!sock_flag(sk, SOCK_DEAD)) {
sk->sk_state_change(sk); /* Do not send POLL_HUP for half duplex close. */
if (sk->sk_shutdown == SHUTDOWN_MASK ||
sk->sk_state == TCP_CLOSE)
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
else
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
}
}
2、TCP_CLOSE_WAIT---->TCP_LAST_ACK
void tcp_shutdown(struct sock *sk, int how)
{
/* We need to grab some memory, and put together a FIN,
1977 * and then put it into the queue to be sent.
1978 * Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
1979 */
if (!(how & SEND_SHUTDOWN))
return; /* If we've already sent a FIN, or it's a closed state, skip this. */
if (( << sk->sk_state) &
(TCPF_ESTABLISHED | TCPF_SYN_SENT |
TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
/* Clear out any half completed packets. FIN if needed. */
if (tcp_close_state(sk))
tcp_send_fin(sk);
}
} static int tcp_close_state(struct sock *sk)
{
int next = (int)new_state[sk->sk_state];
int ns = next & TCP_STATE_MASK; tcp_set_state(sk, ns); return next & TCP_ACTION_FIN;
}
3、TCP_LAST_ACK---->TCP_CLOSE
int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
struct tcphdr *th, unsigned len)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
int queued = ;
int res; tp->rx_opt.saw_tstamp = ; switch (sk->sk_state) {
case TCP_CLOSE:
goto discard; case TCP_LISTEN:
if (th->ack)
return ; if (th->rst)
goto discard; if (th->syn) {
if (icsk->icsk_af_ops->conn_request(sk, skb) < )
return ; /* Now we have several options: In theory there is
5737 * nothing else in the frame. KA9Q has an option to
5738 * send data with the syn, BSD accepts data with the
5739 * syn up to the [to be] advertised window and
5740 * Solaris 2.1 gives you a protocol error. For now
5741 * we just ignore it, that fits the spec precisely
5742 * and avoids incompatibilities. It would be nice in
5743 * future to drop through and process the data.
5744 *
5745 * Now that TTCP is starting to be used we ought to
5746 * queue this data.
5747 * But, this leaves one open to an easy denial of
5748 * service attack, and SYN cookies can't defend
5749 * against this problem. So, we drop the data
5750 * in the interest of security over speed unless
5751 * it's still in use.
5752 */
kfree_skb(skb);
return ;
}
goto discard; case TCP_SYN_SENT:
queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
if (queued >= )
return queued; /* Do step6 onward by hand. */
tcp_urg(sk, skb, th);
__kfree_skb(skb);
tcp_data_snd_check(sk);
return ;
} res = tcp_validate_incoming(sk, skb, th, );
if (res <= )
return -res; /* step 5: check the ACK field */
if (th->ack) {
int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > ; switch (sk->sk_state) {
case TCP_SYN_RECV:
if (acceptable) {
tp->copied_seq = tp->rcv_nxt;
smp_mb();
tcp_set_state(sk, TCP_ESTABLISHED);
sk->sk_state_change(sk); /* Note, that this wakeup is only for marginal
5787 * crossed SYN case. Passively open sockets
5788 * are not waked up, because sk->sk_sleep ==
5789 * NULL and sk->sk_socket == NULL.
5790 */
if (sk->sk_socket)
sk_wake_async(sk,
SOCK_WAKE_IO, POLL_OUT); tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
tp->snd_wnd = ntohs(th->window) <<
tp->rx_opt.snd_wscale;
tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); /* tcp_ack considers this ACK as duplicate
5801 * and does not calculate rtt.
5802 * Force it here.
5803 */
tcp_ack_update_rtt(sk, , ); if (tp->rx_opt.tstamp_ok)
tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; /* Make sure socket is routed, for
5810 * correct metrics.
5811 */
icsk->icsk_af_ops->rebuild_header(sk); tcp_init_metrics(sk); tcp_init_congestion_control(sk); /* Prevent spurious tcp_cwnd_restart() on
5819 * first data packet.
5820 */
tp->lsndtime = tcp_time_stamp; tcp_mtup_init(sk);
tcp_initialize_rcv_mss(sk);
tcp_init_buffer_space(sk);
tcp_fast_path_on(tp);
} else {
return ;
}
break; case TCP_FIN_WAIT1:
if (tp->snd_una == tp->write_seq) {
tcp_set_state(sk, TCP_FIN_WAIT2);
sk->sk_shutdown |= SEND_SHUTDOWN;
dst_confirm(sk->sk_dst_cache); if (!sock_flag(sk, SOCK_DEAD))
/* Wake up lingering close() */
sk->sk_state_change(sk);
else {
int tmo; if (tp->linger2 < ||
(TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
tcp_done(sk);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
return ;
} tmo = tcp_fin_time(sk);
if (tmo > TCP_TIMEWAIT_LEN) {
inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
} else if (th->fin || sock_owned_by_user(sk)) {
/* Bad case. We could lose such FIN otherwise.
5857 * It is not a big problem, but it looks confusing
5858 * and not so rare event. We still can lose it now,
5859 * if it spins in bh_lock_sock(), but it is really
5860 * marginal case.
5861 */
inet_csk_reset_keepalive_timer(sk, tmo);
} else {
tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
goto discard;
}
}
}
break; case TCP_CLOSING:
if (tp->snd_una == tp->write_seq) {
tcp_time_wait(sk, TCP_TIME_WAIT, );
goto discard;
}
break; case TCP_LAST_ACK:
if (tp->snd_una == tp->write_seq) {
tcp_update_metrics(sk);
tcp_done(sk);
goto discard;
}
break;
}
} else
goto discard; /* step 6: check the URG bit */
tcp_urg(sk, skb, th); /* step 7: process the segment text */
switch (sk->sk_state) {
case TCP_CLOSE_WAIT:
case TCP_CLOSING:
case TCP_LAST_ACK:
if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
break;
case TCP_FIN_WAIT1:
case TCP_FIN_WAIT2:
/* RFC 793 says to queue data in these states,
5902 * RFC 1122 says we MUST send a reset.
5903 * BSD 4.4 also does reset.
5904 */
if (sk->sk_shutdown & RCV_SHUTDOWN) {
if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
tcp_reset(sk);
return ;
}
}
/* Fall through */
case TCP_ESTABLISHED:
tcp_data_queue(sk, skb);
queued = ;
break;
} /* tcp_data could move socket to TIME-WAIT */
if (sk->sk_state != TCP_CLOSE) {
tcp_data_snd_check(sk);
tcp_ack_snd_check(sk);
} if (!queued) {
discard:
__kfree_skb(skb);
}
return ;
}
TCP状态切换流程的更多相关文章
- TCP状态变迁流程
主动建立TCP链接情况: 被动建立TCP链接情况 主动断开链接的情况 被动断开连接的情况 在TIME_WAIT阶段需要停留2倍的MSL,MSL即Maximum Segment Lifetime,表示任 ...
- TCP状态
TCP状态 TCP连接中包含不同的状态,如何通过状态来判断程序问题尤为重要. 三次握手 图中的connection部分为三次握手. 四次握手 图中的close部分为四次握手. CLOSE_WAIT 服 ...
- TCP建立连接和释放的过程,及TCP状态变迁图
一.TCP报文格式 下面是TCP报文格式图: 重要字段介绍: (1)序号:Seq序号,占32位,用来标识从TCP源端向目的端发送的字节流,发起方发送数据时对此进行标记. (2)确认序号:Ack序号,占 ...
- 读懂TCP状态转移
读懂TCP状态转移过程,对理解网络编程颇有帮助,本文将对TCP状态转移过程进行介绍,但各状态(总共11个)含义不在本文介绍的范围,请参考文末的书目列表. TCP状态转换图(state transiti ...
- Linux:TCP状态/半关闭/2MSL/端口复用
TCP状态 CLOSED:表示初始状态. LISTEN:该状态表示服务器端的某个SOCKET处于监听状态,可以接受连接. SYN_SENT:这个状态与SYN_RCVD遥相呼应,当客户端SOCKET执行 ...
- TCP状态知识总结(图解)
tcp状态: LISTEN:侦听来自远方的TCP端口的连接请求 SYN-SENT:再发送连接请求后等待匹配的连接请求 SYN-RECEIVED:再收到和发送一个连接请求后等待对方对连接请求的确认 ...
- TCP状态转换图、滑动窗口、半连接状态、2MSL
一.TCP状态转换图 下图对排除和定位网络或系统故障时大有帮助,也帮助我们更好的编写Linux程序,对嵌入式开发也有指导意义. 先回顾一下TCP建立连接的三次握手过程,以及关闭连接的四次握手过程 ...
- Java线程状态切换以及核心方法
1.Java线程状态 1.1 线程主要状态 ①初始(NEW):新创建了一个线程对象,但还没有调用start()方法.②运行(RUNNABLE):Java线程中将就绪(ready)和运行中(runnin ...
- TCP状态转换(图解+文字解说)
<深入分析 javaweb 技术内幕>P38 读书扩展 作者:淮左白衣 写于2018年4月12日20:58:36 目录 TCP状态转换图解 图解三次握手 文字讲解三次握手: 图解四次挥手 ...
随机推荐
- 【题解】P5151 HKE与他的小朋友
[题解]P5151 HKE与他的小朋友 实际上,位置的关系可以看做一组递推式,\(f(a_i)=f(a_j),f(a_j)=f(a_t),etc...\)那么我们可以压进一个矩阵里面. 考虑到这个矩阵 ...
- JavaWeb—Base64编码(转载)
基本概念 Base64这个术语最初是在“MIME内容传输编码规范”中提出的.Base64不是一种加密算法,虽然编码后的字符串看起来有点加密的赶脚.它实际上是一种“二进制到文本”的编码方法,它能够将给定 ...
- Spark2.0机器学习系列之11: 聚类(幂迭代聚类, power iteration clustering, PIC)
在Spark2.0版本中(不是基于RDD API的MLlib),共有四种聚类方法: (1)K-means (2)Latent Dirichlet all ...
- Linux登录欢迎图案
命令提示符设置: export PS1='\n\[\e[37;1m[\]\[\e[31;1m\]\u\[\e[39;1m\]@\[\e[33;1m\]\H \[\e[34;1m\]\w\[\e[37; ...
- 使用Webdriver执行JS
首先,我们使用如下方式初始化driver: WebDriver driver = new FirefoxDriver(); JavascriptExecutor jse = (JavascriptEx ...
- Extjs添加行双击事件
var grid = new Ext.grid.GridPanel({ store: store, trackMouseOver: false, disableSelection: true, aut ...
- windows安装redis, php5.5
全套安装包地址 http://download.csdn.net/detail/whellote/9572797 解压 redis-2.2.5-win32-win64, 将里面的内容拷贝到j:/r ...
- JavaScript:传对象数组到后台
页面: <script> function improve() { var improveForm = $('#improveForm'); if (!improveForm.valid( ...
- Python+Appium API
1.contextscontexts(self): Returns the contexts within the current session. 返回当前会话中的上下文,使用后可以识别H5页面的控 ...
- SSH整合不错的博客
https://blog.csdn.net/struggling_rong/article/details/63153833?locationNum=9&fps=1 好好看看看哦