net/ipv4/tcp_timer.c

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     $Id: tcp_timer.c,v 1.88 2002/02/01 22:01:04 davem Exp $
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *              Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
  16  *              Linus Torvalds, <torvalds@cs.helsinki.fi>
  17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  18  *              Matthew Dillon, <dillon@apollo.west.oic.com>
  19  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  20  *              Jorge Cwik, <jorge@laser.satlink.net>
  21  */
  22
  23 #include <linux/module.h>
  24 #include <net/tcp.h>
  25
  26 int sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
  27 int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
  28 int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
  29 int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
  30 int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
  31 int sysctl_tcp_retries1 = TCP_RETR1;
  32 int sysctl_tcp_retries2 = TCP_RETR2;
  33 int sysctl_tcp_orphan_retries;
  34
  35 static void tcp_write_timer(unsigned long);
  36 static void tcp_delack_timer(unsigned long);
  37 static void tcp_keepalive_timer (unsigned long data);
  38
  39 const char timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n";
  40
  41 /*
  42  * Using different timers for retransmit, delayed acks and probes
  43  * We may wish use just one timer maintaining a list of expire jiffies
  44  * to optimize.
  45  */
  46
  47 void tcp_init_xmit_timers(struct sock *sk)
  48 {
  49         struct tcp_opt *tp = tcp_sk(sk);
  50
  51         init_timer(&tp->retransmit_timer);
  52         tp->retransmit_timer.function=&tcp_write_timer;
  53         tp->retransmit_timer.data = (unsigned long) sk;
  54         tp->pending = 0;
  55
  56         init_timer(&tp->delack_timer);
  57         tp->delack_timer.function=&tcp_delack_timer;
  58         tp->delack_timer.data = (unsigned long) sk;
  59         tp->ack.pending = 0;
  60
  61         init_timer(&sk->sk_timer);
  62         sk->sk_timer.function   = &tcp_keepalive_timer;
  63         sk->sk_timer.data       = (unsigned long)sk;
  64 }
  65
  66 void tcp_clear_xmit_timers(struct sock *sk)
  67 {
  68         struct tcp_opt *tp = tcp_sk(sk);
  69
  70         tp->pending = 0;
  71         sk_stop_timer(sk, &tp->retransmit_timer);
  72
  73         tp->ack.pending = 0;
  74         tp->ack.blocked = 0;
  75         sk_stop_timer(sk, &tp->delack_timer);
  76
  77         sk_stop_timer(sk, &sk->sk_timer);
  78 }
  79
  80 static void tcp_write_err(struct sock *sk)
  81 {
  82         sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
  83         sk->sk_error_report(sk);
  84
  85         tcp_done(sk);
  86         NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT);
  87 }
  88
  89 /* Do not allow orphaned sockets to eat all our resources.
  90  * This is direct violation of TCP specs, but it is required
  91  * to prevent DoS attacks. It is called when a retransmission timeout
  92  * or zero probe timeout occurs on orphaned socket.
  93  *
  94  * Criterium is still not confirmed experimentally and may change.
  95  * We kill the socket, if:
  96  * 1. If number of orphaned sockets exceeds an administratively configured
  97  *    limit.
  98  * 2. If we have strong memory pressure.
  99  */
 100 static int tcp_out_of_resources(struct sock *sk, int do_reset)
 101 {
 102         struct tcp_opt *tp = tcp_sk(sk);
 103         int orphans = atomic_read(&tcp_orphan_count);
 104
 105         /* If peer does not open window for long time, or did not transmit
 106          * anything for long time, penalize it. */
 107         if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
 108                 orphans <<= 1;
 109
 110         /* If some dubious ICMP arrived, penalize even more. */
 111         if (sk->sk_err_soft)
 112                 orphans <<= 1;
 113
 114         if (orphans >= sysctl_tcp_max_orphans ||
 115             (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
 116              atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
 117                 if (net_ratelimit())
 118                         printk(KERN_INFO "Out of socket memory\n");
 119
 120                 /* Catch exceptional cases, when connection requires reset.
 121                  *      1. Last segment was sent recently. */
 122                 if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
 123                     /*  2. Window is closed. */
 124                     (!tp->snd_wnd && !tp->packets_out))
 125                         do_reset = 1;
 126                 if (do_reset)
 127                         tcp_send_active_reset(sk, GFP_ATOMIC);
 128                 tcp_done(sk);
 129                 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
 130                 return 1;
 131         }
 132         return 0;
 133 }
 134
 135 /* Calculate maximal number or retries on an orphaned socket. */
 136 static int tcp_orphan_retries(struct sock *sk, int alive)
 137 {
 138         int retries = sysctl_tcp_orphan_retries; /* May be zero. */
 139
 140         /* We know from an ICMP that something is wrong. */
 141         if (sk->sk_err_soft && !alive)
 142                 retries = 0;
 143
 144         /* However, if socket sent something recently, select some safe
 145          * number of retries. 8 corresponds to >100 seconds with minimal
 146          * RTO of 200msec. */
 147         if (retries == 0 && alive)
 148                 retries = 8;
 149         return retries;
 150 }
 151
 152 /* A write timeout has occurred. Process the after effects. */
 153 static int tcp_write_timeout(struct sock *sk)
 154 {
 155         struct tcp_opt *tp = tcp_sk(sk);
 156         int retry_until;
 157
 158         if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
 159                 if (tp->retransmits)
 160                         dst_negative_advice(&sk->sk_dst_cache);
 161                 retry_until = tp->syn_retries ? : sysctl_tcp_syn_retries;
 162         } else {
 163                 if (tp->retransmits >= sysctl_tcp_retries1) {
 164                         /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black
 165                            hole detection. :-(
 166
 167                            It is place to make it. It is not made. I do not want
 168                            to make it. It is disguisting. It does not work in any
 169                            case. Let me to cite the same draft, which requires for
 170                            us to implement this:
 171
 172    "The one security concern raised by this memo is that ICMP black holes
 173    are often caused by over-zealous security administrators who block
 174    all ICMP messages.  It is vitally important that those who design and
 175    deploy security systems understand the impact of strict filtering on
 176    upper-layer protocols.  The safest web site in the world is worthless
 177    if most TCP implementations cannot transfer data from it.  It would
 178    be far nicer to have all of the black holes fixed rather than fixing
 179    all of the TCP implementations."
 180
 181                            Golden words :-).
 182                    */
 183
 184                         dst_negative_advice(&sk->sk_dst_cache);
 185                 }
 186
 187                 retry_until = sysctl_tcp_retries2;
 188                 if (sock_flag(sk, SOCK_DEAD)) {
 189                         int alive = (tp->rto < TCP_RTO_MAX);
 190
 191                         retry_until = tcp_orphan_retries(sk, alive);
 192
 193                         if (tcp_out_of_resources(sk, alive || tp->retransmits < retry_until))
 194                                 return 1;
 195                 }
 196         }
 197
 198         if (tp->retransmits >= retry_until) {
 199                 /* Has it gone just too far? */
 200                 tcp_write_err(sk);
 201                 return 1;
 202         }
 203         return 0;
 204 }
 205
 206 static void tcp_delack_timer(unsigned long data)
 207 {
 208         struct sock *sk = (struct sock*)data;
 209         struct tcp_opt *tp = tcp_sk(sk);
 210
 211         bh_lock_sock(sk);
 212         if (sock_owned_by_user(sk)) {
 213                 /* Try again later. */
 214                 tp->ack.blocked = 1;
 215                 NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
 216                 sk_reset_timer(sk, &tp->delack_timer, jiffies + TCP_DELACK_MIN);
 217                 goto out_unlock;
 218         }
 219
 220         sk_stream_mem_reclaim(sk);
 221
 222         if (sk->sk_state == TCP_CLOSE || !(tp->ack.pending & TCP_ACK_TIMER))
 223                 goto out;
 224
 225         if (time_after(tp->ack.timeout, jiffies)) {
 226                 sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout);
 227                 goto out;
 228         }
 229         tp->ack.pending &= ~TCP_ACK_TIMER;
 230
 231         if (skb_queue_len(&tp->ucopy.prequeue)) {
 232                 struct sk_buff *skb;
 233
 234                 NET_ADD_STATS_BH(LINUX_MIB_TCPSCHEDULERFAILED,
 235                                  skb_queue_len(&tp->ucopy.prequeue));
 236
 237                 while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
 238                         sk->sk_backlog_rcv(sk, skb);
 239
 240                 tp->ucopy.memory = 0;
 241         }
 242
 243         if (tcp_ack_scheduled(tp)) {
 244                 if (!tp->ack.pingpong) {
 245                         /* Delayed ACK missed: inflate ATO. */
 246                         tp->ack.ato = min(tp->ack.ato << 1, tp->rto);
 247                 } else {
 248                         /* Delayed ACK missed: leave pingpong mode and
 249                          * deflate ATO.
 250                          */
 251                         tp->ack.pingpong = 0;
 252                         tp->ack.ato = TCP_ATO_MIN;
 253                 }
 254                 tcp_send_ack(sk);
 255                 NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
 256         }
 257         TCP_CHECK_TIMER(sk);
 258
 259 out:
 260         if (tcp_memory_pressure)
 261                 sk_stream_mem_reclaim(sk);
 262 out_unlock:
 263         bh_unlock_sock(sk);
 264         sock_put(sk);
 265 }
 266
 267 static void tcp_probe_timer(struct sock *sk)
 268 {
 269         struct tcp_opt *tp = tcp_sk(sk);
 270         int max_probes;
 271
 272         if (tp->packets_out || !sk->sk_send_head) {
 273                 tp->probes_out = 0;
 274                 return;
 275         }
 276
 277         /* *WARNING* RFC 1122 forbids this
 278          *
 279          * It doesn't AFAIK, because we kill the retransmit timer -AK
 280          *
 281          * FIXME: We ought not to do it, Solaris 2.5 actually has fixing
 282          * this behaviour in Solaris down as a bug fix. [AC]
 283          *
 284          * Let me to explain. probes_out is zeroed by incoming ACKs
 285          * even if they advertise zero window. Hence, connection is killed only
 286          * if we received no ACKs for normal connection timeout. It is not killed
 287          * only because window stays zero for some time, window may be zero
 288          * until armageddon and even later. We are in full accordance
 289          * with RFCs, only probe timer combines both retransmission timeout
 290          * and probe timeout in one bottle.                             --ANK
 291          */
 292         max_probes = sysctl_tcp_retries2;
 293
 294         if (sock_flag(sk, SOCK_DEAD)) {
 295                 int alive = ((tp->rto<<tp->backoff) < TCP_RTO_MAX);
 296
 297                 max_probes = tcp_orphan_retries(sk, alive);
 298
 299                 if (tcp_out_of_resources(sk, alive || tp->probes_out <= max_probes))
 300                         return;
 301         }
 302
 303         if (tp->probes_out > max_probes) {
 304                 tcp_write_err(sk);
 305         } else {
 306                 /* Only send another probe if we didn't close things up. */
 307                 tcp_send_probe0(sk);
 308         }
 309 }
 310
 311 /*
 312  *      The TCP retransmit timer.
 313  */
 314
 315 static void tcp_retransmit_timer(struct sock *sk)
 316 {
 317         struct tcp_opt *tp = tcp_sk(sk);
 318
 319         if (tp->packets_out == 0)
 320                 goto out;
 321
 322         BUG_TRAP(!skb_queue_empty(&sk->sk_write_queue));
 323
 324         if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
 325             !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
 326                 /* Receiver dastardly shrinks window. Our retransmits
 327                  * become zero probes, but we should not timeout this
 328                  * connection. If the socket is an orphan, time it out,
 329                  * we cannot allow such beasts to hang infinitely.
 330                  */
 331 #ifdef TCP_DEBUG
 332                 if (net_ratelimit()) {
 333                         struct inet_opt *inet = inet_sk(sk);
 334                         printk(KERN_DEBUG "TCP: Treason uncloaked! Peer %u.%u.%u.%u:%u/%u shrinks window %u:%u. Repaired.\n",
 335                                NIPQUAD(inet->daddr), htons(inet->dport),
 336                                inet->num, tp->snd_una, tp->snd_nxt);
 337                 }
 338 #endif
 339                 if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {
 340                         tcp_write_err(sk);
 341                         goto out;
 342                 }
 343                 tcp_enter_loss(sk, 0);
 344                 tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue));
 345                 __sk_dst_reset(sk);
 346                 goto out_reset_timer;
 347         }
 348
 349         if (tcp_write_timeout(sk))
 350                 goto out;
 351
 352         if (tp->retransmits == 0) {
 353                 if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) {
 354                         if (tp->sack_ok) {
 355                                 if (tp->ca_state == TCP_CA_Recovery)
 356                                         NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL);
 357                                 else
 358                                         NET_INC_STATS_BH(LINUX_MIB_TCPSACKFAILURES);
 359                         } else {
 360                                 if (tp->ca_state == TCP_CA_Recovery)
 361                                         NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERYFAIL);
 362                                 else
 363                                         NET_INC_STATS_BH(LINUX_MIB_TCPRENOFAILURES);
 364                         }
 365                 } else if (tp->ca_state == TCP_CA_Loss) {
 366                         NET_INC_STATS_BH(LINUX_MIB_TCPLOSSFAILURES);
 367                 } else {
 368                         NET_INC_STATS_BH(LINUX_MIB_TCPTIMEOUTS);
 369                 }
 370         }
 371
 372         if (tcp_use_frto(sk)) {
 373                 tcp_enter_frto(sk);
 374         } else {
 375                 tcp_enter_loss(sk, 0);
 376         }
 377
 378         if (tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)) > 0) {
 379                 /* Retransmission failed because of local congestion,
 380                  * do not backoff.
 381                  */
 382                 if (!tp->retransmits)
 383                         tp->retransmits=1;
 384                 tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS,
 385                                      min(tp->rto, TCP_RESOURCE_PROBE_INTERVAL));
 386                 goto out;
 387         }
 388
 389         /* Increase the timeout each time we retransmit.  Note that
 390          * we do not increase the rtt estimate.  rto is initialized
 391          * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
 392          * that doubling rto each time is the least we can get away with.
 393          * In KA9Q, Karn uses this for the first few times, and then
 394          * goes to quadratic.  netBSD doubles, but only goes up to *64,
 395          * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
 396          * defined in the protocol as the maximum possible RTT.  I guess
 397          * we'll have to use something other than TCP to talk to the
 398          * University of Mars.
 399          *
 400          * PAWS allows us longer timeouts and large windows, so once
 401          * implemented ftp to mars will work nicely. We will have to fix
 402          * the 120 second clamps though!
 403          */
 404         tp->backoff++;
 405         tp->retransmits++;
 406
 407 out_reset_timer:
 408         tp->rto = min(tp->rto << 1, TCP_RTO_MAX);
 409         tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
 410         if (tp->retransmits > sysctl_tcp_retries1)
 411                 __sk_dst_reset(sk);
 412
 413 out:;
 414 }
 415
 416 static void tcp_write_timer(unsigned long data)
 417 {
 418         struct sock *sk = (struct sock*)data;
 419         struct tcp_opt *tp = tcp_sk(sk);
 420         int event;
 421
 422         bh_lock_sock(sk);
 423         if (sock_owned_by_user(sk)) {
 424                 /* Try again later */
 425                 sk_reset_timer(sk, &tp->retransmit_timer, jiffies + (HZ / 20));
 426                 goto out_unlock;
 427         }
 428
 429         if (sk->sk_state == TCP_CLOSE || !tp->pending)
 430                 goto out;
 431
 432         if (time_after(tp->timeout, jiffies)) {
 433                 sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout);
 434                 goto out;
 435         }
 436
 437         event = tp->pending;
 438         tp->pending = 0;
 439
 440         switch (event) {
 441         case TCP_TIME_RETRANS:
 442                 tcp_retransmit_timer(sk);
 443                 break;
 444         case TCP_TIME_PROBE0:
 445                 tcp_probe_timer(sk);
 446                 break;
 447         }
 448         TCP_CHECK_TIMER(sk);
 449
 450 out:
 451         sk_stream_mem_reclaim(sk);
 452 out_unlock:
 453         bh_unlock_sock(sk);
 454         sock_put(sk);
 455 }
 456
 457 /*
 458  *      Timer for listening sockets
 459  */
 460
 461 static void tcp_synack_timer(struct sock *sk)
 462 {
 463         struct tcp_opt *tp = tcp_sk(sk);
 464         struct tcp_listen_opt *lopt = tp->listen_opt;
 465         int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries;
 466         int thresh = max_retries;
 467         unsigned long now = jiffies;
 468         struct open_request **reqp, *req;
 469         int i, budget;
 470
 471         if (lopt == NULL || lopt->qlen == 0)
 472                 return;
 473
 474         /* Normally all the openreqs are young and become mature
 475          * (i.e. converted to established socket) for first timeout.
 476          * If synack was not acknowledged for 3 seconds, it means
 477          * one of the following things: synack was lost, ack was lost,
 478          * rtt is high or nobody planned to ack (i.e. synflood).
 479          * When server is a bit loaded, queue is populated with old
 480          * open requests, reducing effective size of queue.
 481          * When server is well loaded, queue size reduces to zero
 482          * after several minutes of work. It is not synflood,
 483          * it is normal operation. The solution is pruning
 484          * too old entries overriding normal timeout, when
 485          * situation becomes dangerous.
 486          *
 487          * Essentially, we reserve half of room for young
 488          * embrions; and abort old ones without pity, if old
 489          * ones are about to clog our table.
 490          */
 491         if (lopt->qlen>>(lopt->max_qlen_log-1)) {
 492 #ifdef CONFIG_ACCEPT_QUEUES
 493                 int young = 0;
 494
 495                 for(i=0; i < NUM_ACCEPT_QUEUES; i++)
 496                         young += lopt->qlen_young[i];
 497
 498                 young <<= 1;
 499 #else
 500                 int young = (lopt->qlen_young<<1);
 501 #endif
 502
 503                 while (thresh > 2) {
 504                         if (lopt->qlen < young)
 505                                 break;
 506                         thresh--;
 507                         young <<= 1;
 508                 }
 509         }
 510
 511         if (tp->defer_accept)
 512                 max_retries = tp->defer_accept;
 513
 514         budget = 2*(TCP_SYNQ_HSIZE/(TCP_TIMEOUT_INIT/TCP_SYNQ_INTERVAL));
 515         i = lopt->clock_hand;
 516
 517         do {
 518                 reqp=&lopt->syn_table[i];
 519                 while ((req = *reqp) != NULL) {
 520                         if (time_after_eq(now, req->expires)) {
 521                                 if ((req->retrans < thresh ||
 522                                      (req->acked && req->retrans < max_retries))
 523                                     && !req->class->rtx_syn_ack(sk, req, NULL)) {
 524                                         unsigned long timeo;
 525
 526                                         if (req->retrans++ == 0)
 527 #ifdef CONFIG_ACCEPT_QUEUES
 528                                                 lopt->qlen_young[req->acceptq_class]--;
 529 #else
 530                                                 lopt->qlen_young--;
 531 #endif
 532                                         timeo = min((TCP_TIMEOUT_INIT << req->retrans), TCP_RTO_MAX);
 533                                         req->expires = now + timeo;
 534                                         reqp = &req->dl_next;
 535                                         continue;
 536                                 }
 537
 538                                 /* Drop this request */
 539                                 write_lock(&tp->syn_wait_lock);
 540                                 *reqp = req->dl_next;
 541                                 write_unlock(&tp->syn_wait_lock);
 542                                 lopt->qlen--;
 543                                 if (req->retrans == 0)
 544 #ifdef CONFIG_ACCEPT_QUEUES
 545                                                 lopt->qlen_young[req->acceptq_class]--;
 546 #else
 547                                                 lopt->qlen_young--;
 548 #endif
 549                                 tcp_openreq_free(req);
 550                                 continue;
 551                         }
 552                         reqp = &req->dl_next;
 553                 }
 554
 555                 i = (i+1)&(TCP_SYNQ_HSIZE-1);
 556
 557         } while (--budget > 0);
 558
 559         lopt->clock_hand = i;
 560
 561         if (lopt->qlen)
 562                 tcp_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL);
 563 }
 564
 565 void tcp_delete_keepalive_timer (struct sock *sk)
 566 {
 567         sk_stop_timer(sk, &sk->sk_timer);
 568 }
 569
 570 void tcp_reset_keepalive_timer (struct sock *sk, unsigned long len)
 571 {
 572         sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
 573 }
 574
 575 void tcp_set_keepalive(struct sock *sk, int val)
 576 {
 577         if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
 578                 return;
 579
 580         if (val && !sock_flag(sk, SOCK_KEEPOPEN))
 581                 tcp_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk)));
 582         else if (!val)
 583                 tcp_delete_keepalive_timer(sk);
 584 }
 585
 586
 587 static void tcp_keepalive_timer (unsigned long data)
 588 {
 589         struct sock *sk = (struct sock *) data;
 590         struct tcp_opt *tp = tcp_sk(sk);
 591         __u32 elapsed;
 592
 593         /* Only process if socket is not in use. */
 594         bh_lock_sock(sk);
 595         if (sock_owned_by_user(sk)) {
 596                 /* Try again later. */
 597                 tcp_reset_keepalive_timer (sk, HZ/20);
 598                 goto out;
 599         }
 600
 601         if (sk->sk_state == TCP_LISTEN) {
 602                 tcp_synack_timer(sk);
 603                 goto out;
 604         }
 605
 606         if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
 607                 if (tp->linger2 >= 0) {
 608                         int tmo = tcp_fin_time(tp) - TCP_TIMEWAIT_LEN;
 609
 610                         if (tmo > 0) {
 611                                 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
 612                                 goto out;
 613                         }
 614                 }
 615                 tcp_send_active_reset(sk, GFP_ATOMIC);
 616                 goto death;
 617         }
 618
 619         if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
 620                 goto out;
 621
 622         elapsed = keepalive_time_when(tp);
 623
 624         /* It is alive without keepalive 8) */
 625         if (tp->packets_out || sk->sk_send_head)
 626                 goto resched;
 627
 628         elapsed = tcp_time_stamp - tp->rcv_tstamp;
 629
 630         if (elapsed >= keepalive_time_when(tp)) {
 631                 if ((!tp->keepalive_probes && tp->probes_out >= sysctl_tcp_keepalive_probes) ||
 632                      (tp->keepalive_probes && tp->probes_out >= tp->keepalive_probes)) {
 633                         tcp_send_active_reset(sk, GFP_ATOMIC);
 634                         tcp_write_err(sk);
 635                         goto out;
 636                 }
 637                 if (tcp_write_wakeup(sk) <= 0) {
 638                         tp->probes_out++;
 639                         elapsed = keepalive_intvl_when(tp);
 640                 } else {
 641                         /* If keepalive was lost due to local congestion,
 642                          * try harder.
 643                          */
 644                         elapsed = TCP_RESOURCE_PROBE_INTERVAL;
 645                 }
 646         } else {
 647                 /* It is tp->rcv_tstamp + keepalive_time_when(tp) */
 648                 elapsed = keepalive_time_when(tp) - elapsed;
 649         }
 650
 651         TCP_CHECK_TIMER(sk);
 652         sk_stream_mem_reclaim(sk);
 653
 654 resched:
 655         tcp_reset_keepalive_timer (sk, elapsed);
 656         goto out;
 657
 658 death:
 659         tcp_done(sk);
 660
 661 out:
 662         bh_unlock_sock(sk);
 663         sock_put(sk);
 664 }
 665
 666 EXPORT_SYMBOL(tcp_clear_xmit_timers);
 667 EXPORT_SYMBOL(tcp_delete_keepalive_timer);
 668 EXPORT_SYMBOL(tcp_init_xmit_timers);
 669 EXPORT_SYMBOL(tcp_reset_keepalive_timer);