net/ipv4/tcp_timer.c

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     $Id: tcp_timer.c,v 1.88 2002/02/01 22:01:04 davem Exp $
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *              Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
  16  *              Linus Torvalds, <torvalds@cs.helsinki.fi>
  17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  18  *              Matthew Dillon, <dillon@apollo.west.oic.com>
  19  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  20  *              Jorge Cwik, <jorge@laser.satlink.net>
  21  */
  22
  23 #include <linux/module.h>
  24 #include <net/tcp.h>
  25
  26 int sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
  27 int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
  28 int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
  29 int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
  30 int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
  31 int sysctl_tcp_retries1 = TCP_RETR1;
  32 int sysctl_tcp_retries2 = TCP_RETR2;
  33 int sysctl_tcp_orphan_retries;
  34
  35 static void tcp_write_timer(unsigned long);
  36 static void tcp_delack_timer(unsigned long);
  37 static void tcp_keepalive_timer (unsigned long data);
  38
  39 const char timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n";
  40
  41 /*
  42  * Using different timers for retransmit, delayed acks and probes
  43  * We may wish use just one timer maintaining a list of expire jiffies
  44  * to optimize.
  45  */
  46
  47 void tcp_init_xmit_timers(struct sock *sk)
  48 {
  49         struct tcp_opt *tp = tcp_sk(sk);
  50
  51         init_timer(&tp->retransmit_timer);
  52         tp->retransmit_timer.function=&tcp_write_timer;
  53         tp->retransmit_timer.data = (unsigned long) sk;
  54         tp->pending = 0;
  55
  56         init_timer(&tp->delack_timer);
  57         tp->delack_timer.function=&tcp_delack_timer;
  58         tp->delack_timer.data = (unsigned long) sk;
  59         tp->ack.pending = 0;
  60
  61         init_timer(&sk->sk_timer);
  62         sk->sk_timer.function   = &tcp_keepalive_timer;
  63         sk->sk_timer.data       = (unsigned long)sk;
  64 }
  65
  66 void tcp_clear_xmit_timers(struct sock *sk)
  67 {
  68         struct tcp_opt *tp = tcp_sk(sk);
  69
  70         tp->pending = 0;
  71         if (timer_pending(&tp->retransmit_timer) &&
  72             del_timer(&tp->retransmit_timer))
  73                 __sock_put(sk);
  74
  75         tp->ack.pending = 0;
  76         tp->ack.blocked = 0;
  77         if (timer_pending(&tp->delack_timer) &&
  78             del_timer(&tp->delack_timer))
  79                 __sock_put(sk);
  80
  81         if (timer_pending(&sk->sk_timer) && del_timer(&sk->sk_timer))
  82                 __sock_put(sk);
  83 }
  84
  85 static void tcp_write_err(struct sock *sk)
  86 {
  87         sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
  88         sk->sk_error_report(sk);
  89
  90         tcp_done(sk);
  91         NET_INC_STATS_BH(TCPAbortOnTimeout);
  92 }
  93
  94 /* Do not allow orphaned sockets to eat all our resources.
  95  * This is direct violation of TCP specs, but it is required
  96  * to prevent DoS attacks. It is called when a retransmission timeout
  97  * or zero probe timeout occurs on orphaned socket.
  98  *
  99  * Criterium is still not confirmed experimentally and may change.
 100  * We kill the socket, if:
 101  * 1. If number of orphaned sockets exceeds an administratively configured
 102  *    limit.
 103  * 2. If we have strong memory pressure.
 104  */
 105 static int tcp_out_of_resources(struct sock *sk, int do_reset)
 106 {
 107         struct tcp_opt *tp = tcp_sk(sk);
 108         int orphans = atomic_read(&tcp_orphan_count);
 109
 110         /* If peer does not open window for long time, or did not transmit
 111          * anything for long time, penalize it. */
 112         if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
 113                 orphans <<= 1;
 114
 115         /* If some dubious ICMP arrived, penalize even more. */
 116         if (sk->sk_err_soft)
 117                 orphans <<= 1;
 118
 119         if (orphans >= sysctl_tcp_max_orphans ||
 120             (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
 121              atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
 122                 if (net_ratelimit())
 123                         printk(KERN_INFO "Out of socket memory\n");
 124
 125                 /* Catch exceptional cases, when connection requires reset.
 126                  *      1. Last segment was sent recently. */
 127                 if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
 128                     /*  2. Window is closed. */
 129                     (!tp->snd_wnd && !tp->packets_out))
 130                         do_reset = 1;
 131                 if (do_reset)
 132                         tcp_send_active_reset(sk, GFP_ATOMIC);
 133                 tcp_done(sk);
 134                 NET_INC_STATS_BH(TCPAbortOnMemory);
 135                 return 1;
 136         }
 137         return 0;
 138 }
 139
 140 /* Calculate maximal number or retries on an orphaned socket. */
 141 static int tcp_orphan_retries(struct sock *sk, int alive)
 142 {
 143         int retries = sysctl_tcp_orphan_retries; /* May be zero. */
 144
 145         /* We know from an ICMP that something is wrong. */
 146         if (sk->sk_err_soft && !alive)
 147                 retries = 0;
 148
 149         /* However, if socket sent something recently, select some safe
 150          * number of retries. 8 corresponds to >100 seconds with minimal
 151          * RTO of 200msec. */
 152         if (retries == 0 && alive)
 153                 retries = 8;
 154         return retries;
 155 }
 156
 157 /* A write timeout has occurred. Process the after effects. */
 158 static int tcp_write_timeout(struct sock *sk)
 159 {
 160         struct tcp_opt *tp = tcp_sk(sk);
 161         int retry_until;
 162
 163         if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
 164                 if (tp->retransmits)
 165                         dst_negative_advice(&sk->sk_dst_cache);
 166                 retry_until = tp->syn_retries ? : sysctl_tcp_syn_retries;
 167         } else {
 168                 if (tp->retransmits >= sysctl_tcp_retries1) {
 169                         /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black
 170                            hole detection. :-(
 171
 172                            It is place to make it. It is not made. I do not want
 173                            to make it. It is disguisting. It does not work in any
 174                            case. Let me to cite the same draft, which requires for
 175                            us to implement this:
 176
 177    "The one security concern raised by this memo is that ICMP black holes
 178    are often caused by over-zealous security administrators who block
 179    all ICMP messages.  It is vitally important that those who design and
 180    deploy security systems understand the impact of strict filtering on
 181    upper-layer protocols.  The safest web site in the world is worthless
 182    if most TCP implementations cannot transfer data from it.  It would
 183    be far nicer to have all of the black holes fixed rather than fixing
 184    all of the TCP implementations."
 185
 186                            Golden words :-).
 187                    */
 188
 189                         dst_negative_advice(&sk->sk_dst_cache);
 190                 }
 191
 192                 retry_until = sysctl_tcp_retries2;
 193                 if (sock_flag(sk, SOCK_DEAD)) {
 194                         int alive = (tp->rto < TCP_RTO_MAX);
 195
 196                         retry_until = tcp_orphan_retries(sk, alive);
 197
 198                         if (tcp_out_of_resources(sk, alive || tp->retransmits < retry_until))
 199                                 return 1;
 200                 }
 201         }
 202
 203         if (tp->retransmits >= retry_until) {
 204                 /* Has it gone just too far? */
 205                 tcp_write_err(sk);
 206                 return 1;
 207         }
 208         return 0;
 209 }
 210
 211 static void tcp_delack_timer(unsigned long data)
 212 {
 213         struct sock *sk = (struct sock*)data;
 214         struct tcp_opt *tp = tcp_sk(sk);
 215
 216         bh_lock_sock(sk);
 217         if (sock_owned_by_user(sk)) {
 218                 /* Try again later. */
 219                 tp->ack.blocked = 1;
 220                 NET_INC_STATS_BH(DelayedACKLocked);
 221                 if (!mod_timer(&tp->delack_timer, jiffies + TCP_DELACK_MIN))
 222                         sock_hold(sk);
 223                 goto out_unlock;
 224         }
 225
 226         tcp_mem_reclaim(sk);
 227
 228         if (sk->sk_state == TCP_CLOSE || !(tp->ack.pending & TCP_ACK_TIMER))
 229                 goto out;
 230
 231         if (time_after(tp->ack.timeout, jiffies)) {
 232                 if (!mod_timer(&tp->delack_timer, tp->ack.timeout))
 233                         sock_hold(sk);
 234                 goto out;
 235         }
 236         tp->ack.pending &= ~TCP_ACK_TIMER;
 237
 238         if (skb_queue_len(&tp->ucopy.prequeue)) {
 239                 struct sk_buff *skb;
 240
 241                 NET_ADD_STATS_BH(TCPSchedulerFailed,
 242                                   skb_queue_len(&tp->ucopy.prequeue));
 243
 244                 while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
 245                         sk->sk_backlog_rcv(sk, skb);
 246
 247                 tp->ucopy.memory = 0;
 248         }
 249
 250         if (tcp_ack_scheduled(tp)) {
 251                 if (!tp->ack.pingpong) {
 252                         /* Delayed ACK missed: inflate ATO. */
 253                         tp->ack.ato = min(tp->ack.ato << 1, tp->rto);
 254                 } else {
 255                         /* Delayed ACK missed: leave pingpong mode and
 256                          * deflate ATO.
 257                          */
 258                         tp->ack.pingpong = 0;
 259                         tp->ack.ato = TCP_ATO_MIN;
 260                 }
 261                 tcp_send_ack(sk);
 262                 NET_INC_STATS_BH(DelayedACKs);
 263         }
 264         TCP_CHECK_TIMER(sk);
 265
 266 out:
 267         if (tcp_memory_pressure)
 268                 tcp_mem_reclaim(sk);
 269 out_unlock:
 270         bh_unlock_sock(sk);
 271         sock_put(sk);
 272 }
 273
 274 static void tcp_probe_timer(struct sock *sk)
 275 {
 276         struct tcp_opt *tp = tcp_sk(sk);
 277         int max_probes;
 278
 279         if (tp->packets_out || !tp->send_head) {
 280                 tp->probes_out = 0;
 281                 return;
 282         }
 283
 284         /* *WARNING* RFC 1122 forbids this
 285          *
 286          * It doesn't AFAIK, because we kill the retransmit timer -AK
 287          *
 288          * FIXME: We ought not to do it, Solaris 2.5 actually has fixing
 289          * this behaviour in Solaris down as a bug fix. [AC]
 290          *
 291          * Let me to explain. probes_out is zeroed by incoming ACKs
 292          * even if they advertise zero window. Hence, connection is killed only
 293          * if we received no ACKs for normal connection timeout. It is not killed
 294          * only because window stays zero for some time, window may be zero
 295          * until armageddon and even later. We are in full accordance
 296          * with RFCs, only probe timer combines both retransmission timeout
 297          * and probe timeout in one bottle.                             --ANK
 298          */
 299         max_probes = sysctl_tcp_retries2;
 300
 301         if (sock_flag(sk, SOCK_DEAD)) {
 302                 int alive = ((tp->rto<<tp->backoff) < TCP_RTO_MAX);
 303
 304                 max_probes = tcp_orphan_retries(sk, alive);
 305
 306                 if (tcp_out_of_resources(sk, alive || tp->probes_out <= max_probes))
 307                         return;
 308         }
 309
 310         if (tp->probes_out > max_probes) {
 311                 tcp_write_err(sk);
 312         } else {
 313                 /* Only send another probe if we didn't close things up. */
 314                 tcp_send_probe0(sk);
 315         }
 316 }
 317
 318 /*
 319  *      The TCP retransmit timer.
 320  */
 321
 322 static void tcp_retransmit_timer(struct sock *sk)
 323 {
 324         struct tcp_opt *tp = tcp_sk(sk);
 325
 326         if (tp->packets_out == 0)
 327                 goto out;
 328
 329         BUG_TRAP(!skb_queue_empty(&sk->sk_write_queue));
 330
 331         if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
 332             !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
 333                 /* Receiver dastardly shrinks window. Our retransmits
 334                  * become zero probes, but we should not timeout this
 335                  * connection. If the socket is an orphan, time it out,
 336                  * we cannot allow such beasts to hang infinitely.
 337                  */
 338 #ifdef TCP_DEBUG
 339                 if (net_ratelimit()) {
 340                         struct inet_opt *inet = inet_sk(sk);
 341                         printk(KERN_DEBUG "TCP: Treason uncloaked! Peer %u.%u.%u.%u:%u/%u shrinks window %u:%u. Repaired.\n",
 342                                NIPQUAD(inet->daddr), htons(inet->dport),
 343                                inet->num, tp->snd_una, tp->snd_nxt);
 344                 }
 345 #endif
 346                 if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {
 347                         tcp_write_err(sk);
 348                         goto out;
 349                 }
 350                 tcp_enter_loss(sk, 0);
 351                 tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue));
 352                 __sk_dst_reset(sk);
 353                 goto out_reset_timer;
 354         }
 355
 356         if (tcp_write_timeout(sk))
 357                 goto out;
 358
 359         if (tp->retransmits == 0) {
 360                 if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) {
 361                         if (tp->sack_ok) {
 362                                 if (tp->ca_state == TCP_CA_Recovery)
 363                                         NET_INC_STATS_BH(TCPSackRecoveryFail);
 364                                 else
 365                                         NET_INC_STATS_BH(TCPSackFailures);
 366                         } else {
 367                                 if (tp->ca_state == TCP_CA_Recovery)
 368                                         NET_INC_STATS_BH(TCPRenoRecoveryFail);
 369                                 else
 370                                         NET_INC_STATS_BH(TCPRenoFailures);
 371                         }
 372                 } else if (tp->ca_state == TCP_CA_Loss) {
 373                         NET_INC_STATS_BH(TCPLossFailures);
 374                 } else {
 375                         NET_INC_STATS_BH(TCPTimeouts);
 376                 }
 377         }
 378
 379         if (tcp_use_frto(sk)) {
 380                 tcp_enter_frto(sk);
 381         } else {
 382                 tcp_enter_loss(sk, 0);
 383         }
 384
 385         if (tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)) > 0) {
 386                 /* Retransmission failed because of local congestion,
 387                  * do not backoff.
 388                  */
 389                 if (!tp->retransmits)
 390                         tp->retransmits=1;
 391                 tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS,
 392                                      min(tp->rto, TCP_RESOURCE_PROBE_INTERVAL));
 393                 goto out;
 394         }
 395
 396         /* Increase the timeout each time we retransmit.  Note that
 397          * we do not increase the rtt estimate.  rto is initialized
 398          * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
 399          * that doubling rto each time is the least we can get away with.
 400          * In KA9Q, Karn uses this for the first few times, and then
 401          * goes to quadratic.  netBSD doubles, but only goes up to *64,
 402          * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
 403          * defined in the protocol as the maximum possible RTT.  I guess
 404          * we'll have to use something other than TCP to talk to the
 405          * University of Mars.
 406          *
 407          * PAWS allows us longer timeouts and large windows, so once
 408          * implemented ftp to mars will work nicely. We will have to fix
 409          * the 120 second clamps though!
 410          */
 411         tp->backoff++;
 412         tp->retransmits++;
 413
 414 out_reset_timer:
 415         tp->rto = min(tp->rto << 1, TCP_RTO_MAX);
 416         tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
 417         if (tp->retransmits > sysctl_tcp_retries1)
 418                 __sk_dst_reset(sk);
 419
 420 out:;
 421 }
 422
 423 static void tcp_write_timer(unsigned long data)
 424 {
 425         struct sock *sk = (struct sock*)data;
 426         struct tcp_opt *tp = tcp_sk(sk);
 427         int event;
 428
 429         bh_lock_sock(sk);
 430         if (sock_owned_by_user(sk)) {
 431                 /* Try again later */
 432                 if (!mod_timer(&tp->retransmit_timer, jiffies + (HZ/20)))
 433                         sock_hold(sk);
 434                 goto out_unlock;
 435         }
 436
 437         if (sk->sk_state == TCP_CLOSE || !tp->pending)
 438                 goto out;
 439
 440         if (time_after(tp->timeout, jiffies)) {
 441                 if (!mod_timer(&tp->retransmit_timer, tp->timeout))
 442                         sock_hold(sk);
 443                 goto out;
 444         }
 445
 446         event = tp->pending;
 447         tp->pending = 0;
 448
 449         switch (event) {
 450         case TCP_TIME_RETRANS:
 451                 tcp_retransmit_timer(sk);
 452                 break;
 453         case TCP_TIME_PROBE0:
 454                 tcp_probe_timer(sk);
 455                 break;
 456         }
 457         TCP_CHECK_TIMER(sk);
 458
 459 out:
 460         tcp_mem_reclaim(sk);
 461 out_unlock:
 462         bh_unlock_sock(sk);
 463         sock_put(sk);
 464 }
 465
 466 /*
 467  *      Timer for listening sockets
 468  */
 469
 470 static void tcp_synack_timer(struct sock *sk)
 471 {
 472         struct tcp_opt *tp = tcp_sk(sk);
 473         struct tcp_listen_opt *lopt = tp->listen_opt;
 474         int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries;
 475         int thresh = max_retries;
 476         unsigned long now = jiffies;
 477         struct open_request **reqp, *req;
 478         int i, budget;
 479
 480         if (lopt == NULL || lopt->qlen == 0)
 481                 return;
 482
 483         /* Normally all the openreqs are young and become mature
 484          * (i.e. converted to established socket) for first timeout.
 485          * If synack was not acknowledged for 3 seconds, it means
 486          * one of the following things: synack was lost, ack was lost,
 487          * rtt is high or nobody planned to ack (i.e. synflood).
 488          * When server is a bit loaded, queue is populated with old
 489          * open requests, reducing effective size of queue.
 490          * When server is well loaded, queue size reduces to zero
 491          * after several minutes of work. It is not synflood,
 492          * it is normal operation. The solution is pruning
 493          * too old entries overriding normal timeout, when
 494          * situation becomes dangerous.
 495          *
 496          * Essentially, we reserve half of room for young
 497          * embrions; and abort old ones without pity, if old
 498          * ones are about to clog our table.
 499          */
 500         if (lopt->qlen>>(lopt->max_qlen_log-1)) {
 501                 int young = (lopt->qlen_young<<1);
 502
 503                 while (thresh > 2) {
 504                         if (lopt->qlen < young)
 505                                 break;
 506                         thresh--;
 507                         young <<= 1;
 508                 }
 509         }
 510
 511         if (tp->defer_accept)
 512                 max_retries = tp->defer_accept;
 513
 514         budget = 2*(TCP_SYNQ_HSIZE/(TCP_TIMEOUT_INIT/TCP_SYNQ_INTERVAL));
 515         i = lopt->clock_hand;
 516
 517         do {
 518                 reqp=&lopt->syn_table[i];
 519                 while ((req = *reqp) != NULL) {
 520                         if (time_after_eq(now, req->expires)) {
 521                                 if ((req->retrans < thresh ||
 522                                      (req->acked && req->retrans < max_retries))
 523                                     && !req->class->rtx_syn_ack(sk, req, NULL)) {
 524                                         unsigned long timeo;
 525
 526                                         if (req->retrans++ == 0)
 527                                                 lopt->qlen_young--;
 528                                         timeo = min((TCP_TIMEOUT_INIT << req->retrans),
 529                                                     TCP_RTO_MAX);
 530                                         req->expires = now + timeo;
 531                                         reqp = &req->dl_next;
 532                                         continue;
 533                                 }
 534
 535                                 /* Drop this request */
 536                                 write_lock(&tp->syn_wait_lock);
 537                                 *reqp = req->dl_next;
 538                                 write_unlock(&tp->syn_wait_lock);
 539                                 lopt->qlen--;
 540                                 if (req->retrans == 0)
 541                                         lopt->qlen_young--;
 542                                 tcp_openreq_free(req);
 543                                 continue;
 544                         }
 545                         reqp = &req->dl_next;
 546                 }
 547
 548                 i = (i+1)&(TCP_SYNQ_HSIZE-1);
 549
 550         } while (--budget > 0);
 551
 552         lopt->clock_hand = i;
 553
 554         if (lopt->qlen)
 555                 tcp_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL);
 556 }
 557
 558 void tcp_delete_keepalive_timer (struct sock *sk)
 559 {
 560         if (timer_pending(&sk->sk_timer) && del_timer (&sk->sk_timer))
 561                 __sock_put(sk);
 562 }
 563
 564 void tcp_reset_keepalive_timer (struct sock *sk, unsigned long len)
 565 {
 566         if (!mod_timer(&sk->sk_timer, jiffies + len))
 567                 sock_hold(sk);
 568 }
 569
 570 void tcp_set_keepalive(struct sock *sk, int val)
 571 {
 572         if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
 573                 return;
 574
 575         if (val && !sock_flag(sk, SOCK_KEEPOPEN))
 576                 tcp_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk)));
 577         else if (!val)
 578                 tcp_delete_keepalive_timer(sk);
 579 }
 580
 581
 582 static void tcp_keepalive_timer (unsigned long data)
 583 {
 584         struct sock *sk = (struct sock *) data;
 585         struct tcp_opt *tp = tcp_sk(sk);
 586         __u32 elapsed;
 587
 588         /* Only process if socket is not in use. */
 589         bh_lock_sock(sk);
 590         if (sock_owned_by_user(sk)) {
 591                 /* Try again later. */
 592                 tcp_reset_keepalive_timer (sk, HZ/20);
 593                 goto out;
 594         }
 595
 596         if (sk->sk_state == TCP_LISTEN) {
 597                 tcp_synack_timer(sk);
 598                 goto out;
 599         }
 600
 601         if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
 602                 if (tp->linger2 >= 0) {
 603                         int tmo = tcp_fin_time(tp) - TCP_TIMEWAIT_LEN;
 604
 605                         if (tmo > 0) {
 606                                 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
 607                                 goto out;
 608                         }
 609                 }
 610                 tcp_send_active_reset(sk, GFP_ATOMIC);
 611                 goto death;
 612         }
 613
 614         if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
 615                 goto out;
 616
 617         elapsed = keepalive_time_when(tp);
 618
 619         /* It is alive without keepalive 8) */
 620         if (tp->packets_out || tp->send_head)
 621                 goto resched;
 622
 623         elapsed = tcp_time_stamp - tp->rcv_tstamp;
 624
 625         if (elapsed >= keepalive_time_when(tp)) {
 626                 if ((!tp->keepalive_probes && tp->probes_out >= sysctl_tcp_keepalive_probes) ||
 627                      (tp->keepalive_probes && tp->probes_out >= tp->keepalive_probes)) {
 628                         tcp_send_active_reset(sk, GFP_ATOMIC);
 629                         tcp_write_err(sk);
 630                         goto out;
 631                 }
 632                 if (tcp_write_wakeup(sk) <= 0) {
 633                         tp->probes_out++;
 634                         elapsed = keepalive_intvl_when(tp);
 635                 } else {
 636                         /* If keepalive was lost due to local congestion,
 637                          * try harder.
 638                          */
 639                         elapsed = TCP_RESOURCE_PROBE_INTERVAL;
 640                 }
 641         } else {
 642                 /* It is tp->rcv_tstamp + keepalive_time_when(tp) */
 643                 elapsed = keepalive_time_when(tp) - elapsed;
 644         }
 645
 646         TCP_CHECK_TIMER(sk);
 647         tcp_mem_reclaim(sk);
 648
 649 resched:
 650         tcp_reset_keepalive_timer (sk, elapsed);
 651         goto out;
 652
 653 death:
 654         tcp_done(sk);
 655
 656 out:
 657         bh_unlock_sock(sk);
 658         sock_put(sk);
 659 }
 660
 661 EXPORT_SYMBOL(tcp_clear_xmit_timers);
 662 EXPORT_SYMBOL(tcp_delete_keepalive_timer);
 663 EXPORT_SYMBOL(tcp_init_xmit_timers);
 664 EXPORT_SYMBOL(tcp_reset_keepalive_timer);