net/ipv4/tcp_timer.c

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     $Id: tcp_timer.c,v 1.88 2002/02/01 22:01:04 davem Exp $
   9  *
  10  * Authors:     Ross Biro
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *              Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
  16  *              Linus Torvalds, <torvalds@cs.helsinki.fi>
  17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  18  *              Matthew Dillon, <dillon@apollo.west.oic.com>
  19  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  20  *              Jorge Cwik, <jorge@laser.satlink.net>
  21  */
  22
  23 #include <linux/module.h>
  24 #include <net/tcp.h>
  25
  26 int sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
  27 int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
  28 int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
  29 int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
  30 int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
  31 int sysctl_tcp_retries1 = TCP_RETR1;
  32 int sysctl_tcp_retries2 = TCP_RETR2;
  33 int sysctl_tcp_orphan_retries;
  34
  35 static void tcp_write_timer(unsigned long);
  36 static void tcp_delack_timer(unsigned long);
  37 static void tcp_keepalive_timer (unsigned long data);
  38
  39 #ifdef TCP_DEBUG
  40 const char tcp_timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n";
  41 EXPORT_SYMBOL(tcp_timer_bug_msg);
  42 #endif
  43
  44 /*
  45  * Using different timers for retransmit, delayed acks and probes
  46  * We may wish use just one timer maintaining a list of expire jiffies
  47  * to optimize.
  48  */
  49
  50 void tcp_init_xmit_timers(struct sock *sk)
  51 {
  52         struct tcp_sock *tp = tcp_sk(sk);
  53
  54         init_timer(&tp->retransmit_timer);
  55         tp->retransmit_timer.function=&tcp_write_timer;
  56         tp->retransmit_timer.data = (unsigned long) sk;
  57         tp->pending = 0;
  58
  59         init_timer(&tp->delack_timer);
  60         tp->delack_timer.function=&tcp_delack_timer;
  61         tp->delack_timer.data = (unsigned long) sk;
  62         tp->ack.pending = 0;
  63
  64         init_timer(&sk->sk_timer);
  65         sk->sk_timer.function   = &tcp_keepalive_timer;
  66         sk->sk_timer.data       = (unsigned long)sk;
  67 }
  68
  69 void tcp_clear_xmit_timers(struct sock *sk)
  70 {
  71         struct tcp_sock *tp = tcp_sk(sk);
  72
  73         tp->pending = 0;
  74         sk_stop_timer(sk, &tp->retransmit_timer);
  75
  76         tp->ack.pending = 0;
  77         tp->ack.blocked = 0;
  78         sk_stop_timer(sk, &tp->delack_timer);
  79
  80         sk_stop_timer(sk, &sk->sk_timer);
  81 }
  82
  83 static void tcp_write_err(struct sock *sk)
  84 {
  85         sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
  86         sk->sk_error_report(sk);
  87
  88         tcp_done(sk);
  89         NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT);
  90 }
  91
  92 /* Do not allow orphaned sockets to eat all our resources.
  93  * This is direct violation of TCP specs, but it is required
  94  * to prevent DoS attacks. It is called when a retransmission timeout
  95  * or zero probe timeout occurs on orphaned socket.
  96  *
  97  * Criterium is still not confirmed experimentally and may change.
  98  * We kill the socket, if:
  99  * 1. If number of orphaned sockets exceeds an administratively configured
 100  *    limit.
 101  * 2. If we have strong memory pressure.
 102  */
 103 static int tcp_out_of_resources(struct sock *sk, int do_reset)
 104 {
 105         struct tcp_sock *tp = tcp_sk(sk);
 106         int orphans = atomic_read(&tcp_orphan_count);
 107
 108         /* If peer does not open window for long time, or did not transmit
 109          * anything for long time, penalize it. */
 110         if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
 111                 orphans <<= 1;
 112
 113         /* If some dubious ICMP arrived, penalize even more. */
 114         if (sk->sk_err_soft)
 115                 orphans <<= 1;
 116
 117         if (orphans >= sysctl_tcp_max_orphans ||
 118             (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
 119              atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
 120                 if (net_ratelimit())
 121                         printk(KERN_INFO "Out of socket memory\n");
 122
 123                 /* Catch exceptional cases, when connection requires reset.
 124                  *      1. Last segment was sent recently. */
 125                 if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
 126                     /*  2. Window is closed. */
 127                     (!tp->snd_wnd && !tp->packets_out))
 128                         do_reset = 1;
 129                 if (do_reset)
 130                         tcp_send_active_reset(sk, GFP_ATOMIC);
 131                 tcp_done(sk);
 132                 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
 133                 return 1;
 134         }
 135         return 0;
 136 }
 137
 138 /* Calculate maximal number or retries on an orphaned socket. */
 139 static int tcp_orphan_retries(struct sock *sk, int alive)
 140 {
 141         int retries = sysctl_tcp_orphan_retries; /* May be zero. */
 142
 143         /* We know from an ICMP that something is wrong. */
 144         if (sk->sk_err_soft && !alive)
 145                 retries = 0;
 146
 147         /* However, if socket sent something recently, select some safe
 148          * number of retries. 8 corresponds to >100 seconds with minimal
 149          * RTO of 200msec. */
 150         if (retries == 0 && alive)
 151                 retries = 8;
 152         return retries;
 153 }
 154
 155 /* A write timeout has occurred. Process the after effects. */
 156 static int tcp_write_timeout(struct sock *sk)
 157 {
 158         struct tcp_sock *tp = tcp_sk(sk);
 159         int retry_until;
 160
 161         if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
 162                 if (tp->retransmits)
 163                         dst_negative_advice(&sk->sk_dst_cache);
 164                 retry_until = tp->syn_retries ? : sysctl_tcp_syn_retries;
 165         } else {
 166                 if (tp->retransmits >= sysctl_tcp_retries1) {
 167                         /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black
 168                            hole detection. :-(
 169
 170                            It is place to make it. It is not made. I do not want
 171                            to make it. It is disguisting. It does not work in any
 172                            case. Let me to cite the same draft, which requires for
 173                            us to implement this:
 174
 175    "The one security concern raised by this memo is that ICMP black holes
 176    are often caused by over-zealous security administrators who block
 177    all ICMP messages.  It is vitally important that those who design and
 178    deploy security systems understand the impact of strict filtering on
 179    upper-layer protocols.  The safest web site in the world is worthless
 180    if most TCP implementations cannot transfer data from it.  It would
 181    be far nicer to have all of the black holes fixed rather than fixing
 182    all of the TCP implementations."
 183
 184                            Golden words :-).
 185                    */
 186
 187                         dst_negative_advice(&sk->sk_dst_cache);
 188                 }
 189
 190                 retry_until = sysctl_tcp_retries2;
 191                 if (sock_flag(sk, SOCK_DEAD)) {
 192                         int alive = (tp->rto < TCP_RTO_MAX);
 193
 194                         retry_until = tcp_orphan_retries(sk, alive);
 195
 196                         if (tcp_out_of_resources(sk, alive || tp->retransmits < retry_until))
 197                                 return 1;
 198                 }
 199         }
 200
 201         if (tp->retransmits >= retry_until) {
 202                 /* Has it gone just too far? */
 203                 tcp_write_err(sk);
 204                 return 1;
 205         }
 206         return 0;
 207 }
 208
 209 static void tcp_delack_timer(unsigned long data)
 210 {
 211         struct sock *sk = (struct sock*)data;
 212         struct tcp_sock *tp = tcp_sk(sk);
 213
 214         bh_lock_sock(sk);
 215         if (sock_owned_by_user(sk)) {
 216                 /* Try again later. */
 217                 tp->ack.blocked = 1;
 218                 NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
 219                 sk_reset_timer(sk, &tp->delack_timer, jiffies + TCP_DELACK_MIN);
 220                 goto out_unlock;
 221         }
 222
 223         sk_stream_mem_reclaim(sk);
 224
 225         if (sk->sk_state == TCP_CLOSE || !(tp->ack.pending & TCP_ACK_TIMER))
 226                 goto out;
 227
 228         if (time_after(tp->ack.timeout, jiffies)) {
 229                 sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout);
 230                 goto out;
 231         }
 232         tp->ack.pending &= ~TCP_ACK_TIMER;
 233
 234         if (skb_queue_len(&tp->ucopy.prequeue)) {
 235                 struct sk_buff *skb;
 236
 237                 NET_ADD_STATS_BH(LINUX_MIB_TCPSCHEDULERFAILED,
 238                                  skb_queue_len(&tp->ucopy.prequeue));
 239
 240                 while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
 241                         sk->sk_backlog_rcv(sk, skb);
 242
 243                 tp->ucopy.memory = 0;
 244         }
 245
 246         if (tcp_ack_scheduled(tp)) {
 247                 if (!tp->ack.pingpong) {
 248                         /* Delayed ACK missed: inflate ATO. */
 249                         tp->ack.ato = min(tp->ack.ato << 1, tp->rto);
 250                 } else {
 251                         /* Delayed ACK missed: leave pingpong mode and
 252                          * deflate ATO.
 253                          */
 254                         tp->ack.pingpong = 0;
 255                         tp->ack.ato = TCP_ATO_MIN;
 256                 }
 257                 tcp_send_ack(sk);
 258                 NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
 259         }
 260         TCP_CHECK_TIMER(sk);
 261
 262 out:
 263         if (tcp_memory_pressure)
 264                 sk_stream_mem_reclaim(sk);
 265 out_unlock:
 266         bh_unlock_sock(sk);
 267         sock_put(sk);
 268 }
 269
 270 static void tcp_probe_timer(struct sock *sk)
 271 {
 272         struct tcp_sock *tp = tcp_sk(sk);
 273         int max_probes;
 274
 275         if (tp->packets_out || !sk->sk_send_head) {
 276                 tp->probes_out = 0;
 277                 return;
 278         }
 279
 280         /* *WARNING* RFC 1122 forbids this
 281          *
 282          * It doesn't AFAIK, because we kill the retransmit timer -AK
 283          *
 284          * FIXME: We ought not to do it, Solaris 2.5 actually has fixing
 285          * this behaviour in Solaris down as a bug fix. [AC]
 286          *
 287          * Let me to explain. probes_out is zeroed by incoming ACKs
 288          * even if they advertise zero window. Hence, connection is killed only
 289          * if we received no ACKs for normal connection timeout. It is not killed
 290          * only because window stays zero for some time, window may be zero
 291          * until armageddon and even later. We are in full accordance
 292          * with RFCs, only probe timer combines both retransmission timeout
 293          * and probe timeout in one bottle.                             --ANK
 294          */
 295         max_probes = sysctl_tcp_retries2;
 296
 297         if (sock_flag(sk, SOCK_DEAD)) {
 298                 int alive = ((tp->rto<<tp->backoff) < TCP_RTO_MAX);
 299
 300                 max_probes = tcp_orphan_retries(sk, alive);
 301
 302                 if (tcp_out_of_resources(sk, alive || tp->probes_out <= max_probes))
 303                         return;
 304         }
 305
 306         if (tp->probes_out > max_probes) {
 307                 tcp_write_err(sk);
 308         } else {
 309                 /* Only send another probe if we didn't close things up. */
 310                 tcp_send_probe0(sk);
 311         }
 312 }
 313
 314 /*
 315  *      The TCP retransmit timer.
 316  */
 317
 318 static void tcp_retransmit_timer(struct sock *sk)
 319 {
 320         struct tcp_sock *tp = tcp_sk(sk);
 321
 322         if (!tp->packets_out)
 323                 goto out;
 324
 325         BUG_TRAP(!skb_queue_empty(&sk->sk_write_queue));
 326
 327         if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
 328             !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
 329                 /* Receiver dastardly shrinks window. Our retransmits
 330                  * become zero probes, but we should not timeout this
 331                  * connection. If the socket is an orphan, time it out,
 332                  * we cannot allow such beasts to hang infinitely.
 333                  */
 334 #ifdef TCP_DEBUG
 335                 if (net_ratelimit()) {
 336                         struct inet_sock *inet = inet_sk(sk);
 337                         printk(KERN_DEBUG "TCP: Treason uncloaked! Peer %u.%u.%u.%u:%u/%u shrinks window %u:%u. Repaired.\n",
 338                                NIPQUAD(inet->daddr), htons(inet->dport),
 339                                inet->num, tp->snd_una, tp->snd_nxt);
 340                 }
 341 #endif
 342                 if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {
 343                         tcp_write_err(sk);
 344                         goto out;
 345                 }
 346                 tcp_enter_loss(sk, 0);
 347                 tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue));
 348                 __sk_dst_reset(sk);
 349                 goto out_reset_timer;
 350         }
 351
 352         if (tcp_write_timeout(sk))
 353                 goto out;
 354
 355         if (tp->retransmits == 0) {
 356                 if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) {
 357                         if (tp->rx_opt.sack_ok) {
 358                                 if (tp->ca_state == TCP_CA_Recovery)
 359                                         NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL);
 360                                 else
 361                                         NET_INC_STATS_BH(LINUX_MIB_TCPSACKFAILURES);
 362                         } else {
 363                                 if (tp->ca_state == TCP_CA_Recovery)
 364                                         NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERYFAIL);
 365                                 else
 366                                         NET_INC_STATS_BH(LINUX_MIB_TCPRENOFAILURES);
 367                         }
 368                 } else if (tp->ca_state == TCP_CA_Loss) {
 369                         NET_INC_STATS_BH(LINUX_MIB_TCPLOSSFAILURES);
 370                 } else {
 371                         NET_INC_STATS_BH(LINUX_MIB_TCPTIMEOUTS);
 372                 }
 373         }
 374
 375         if (tcp_use_frto(sk)) {
 376                 tcp_enter_frto(sk);
 377         } else {
 378                 tcp_enter_loss(sk, 0);
 379         }
 380
 381         if (tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)) > 0) {
 382                 /* Retransmission failed because of local congestion,
 383                  * do not backoff.
 384                  */
 385                 if (!tp->retransmits)
 386                         tp->retransmits=1;
 387                 tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS,
 388                                      min(tp->rto, TCP_RESOURCE_PROBE_INTERVAL));
 389                 goto out;
 390         }
 391
 392         /* Increase the timeout each time we retransmit.  Note that
 393          * we do not increase the rtt estimate.  rto is initialized
 394          * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
 395          * that doubling rto each time is the least we can get away with.
 396          * In KA9Q, Karn uses this for the first few times, and then
 397          * goes to quadratic.  netBSD doubles, but only goes up to *64,
 398          * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
 399          * defined in the protocol as the maximum possible RTT.  I guess
 400          * we'll have to use something other than TCP to talk to the
 401          * University of Mars.
 402          *
 403          * PAWS allows us longer timeouts and large windows, so once
 404          * implemented ftp to mars will work nicely. We will have to fix
 405          * the 120 second clamps though!
 406          */
 407         tp->backoff++;
 408         tp->retransmits++;
 409
 410 out_reset_timer:
 411         tp->rto = min(tp->rto << 1, TCP_RTO_MAX);
 412         tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
 413         if (tp->retransmits > sysctl_tcp_retries1)
 414                 __sk_dst_reset(sk);
 415
 416 out:;
 417 }
 418
 419 static void tcp_write_timer(unsigned long data)
 420 {
 421         struct sock *sk = (struct sock*)data;
 422         struct tcp_sock *tp = tcp_sk(sk);
 423         int event;
 424
 425         bh_lock_sock(sk);
 426         if (sock_owned_by_user(sk)) {
 427                 /* Try again later */
 428                 sk_reset_timer(sk, &tp->retransmit_timer, jiffies + (HZ / 20));
 429                 goto out_unlock;
 430         }
 431
 432         if (sk->sk_state == TCP_CLOSE || !tp->pending)
 433                 goto out;
 434
 435         if (time_after(tp->timeout, jiffies)) {
 436                 sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout);
 437                 goto out;
 438         }
 439
 440         event = tp->pending;
 441         tp->pending = 0;
 442
 443         switch (event) {
 444         case TCP_TIME_RETRANS:
 445                 tcp_retransmit_timer(sk);
 446                 break;
 447         case TCP_TIME_PROBE0:
 448                 tcp_probe_timer(sk);
 449                 break;
 450         }
 451         TCP_CHECK_TIMER(sk);
 452
 453 out:
 454         sk_stream_mem_reclaim(sk);
 455 out_unlock:
 456         bh_unlock_sock(sk);
 457         sock_put(sk);
 458 }
 459
 460 /*
 461  *      Timer for listening sockets
 462  */
 463
 464 static void tcp_synack_timer(struct sock *sk)
 465 {
 466         struct tcp_sock *tp = tcp_sk(sk);
 467         struct tcp_listen_opt *lopt = tp->listen_opt;
 468         int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries;
 469         int thresh = max_retries;
 470         unsigned long now = jiffies;
 471         struct open_request **reqp, *req;
 472         int i, budget;
 473
 474         if (lopt == NULL || lopt->qlen == 0)
 475                 return;
 476
 477         /* Normally all the openreqs are young and become mature
 478          * (i.e. converted to established socket) for first timeout.
 479          * If synack was not acknowledged for 3 seconds, it means
 480          * one of the following things: synack was lost, ack was lost,
 481          * rtt is high or nobody planned to ack (i.e. synflood).
 482          * When server is a bit loaded, queue is populated with old
 483          * open requests, reducing effective size of queue.
 484          * When server is well loaded, queue size reduces to zero
 485          * after several minutes of work. It is not synflood,
 486          * it is normal operation. The solution is pruning
 487          * too old entries overriding normal timeout, when
 488          * situation becomes dangerous.
 489          *
 490          * Essentially, we reserve half of room for young
 491          * embrions; and abort old ones without pity, if old
 492          * ones are about to clog our table.
 493          */
 494         if (lopt->qlen>>(lopt->max_qlen_log-1)) {
 495                 int young = (lopt->qlen_young<<1);
 496
 497                 while (thresh > 2) {
 498                         if (lopt->qlen < young)
 499                                 break;
 500                         thresh--;
 501                         young <<= 1;
 502                 }
 503         }
 504
 505         if (tp->defer_accept)
 506                 max_retries = tp->defer_accept;
 507
 508         budget = 2*(TCP_SYNQ_HSIZE/(TCP_TIMEOUT_INIT/TCP_SYNQ_INTERVAL));
 509         i = lopt->clock_hand;
 510
 511         do {
 512                 reqp=&lopt->syn_table[i];
 513                 while ((req = *reqp) != NULL) {
 514                         if (time_after_eq(now, req->expires)) {
 515                                 if ((req->retrans < thresh ||
 516                                      (req->acked && req->retrans < max_retries))
 517                                     && !req->class->rtx_syn_ack(sk, req, NULL)) {
 518                                         unsigned long timeo;
 519
 520                                         if (req->retrans++ == 0)
 521                                                 lopt->qlen_young--;
 522                                         timeo = min((TCP_TIMEOUT_INIT << req->retrans),
 523                                                     TCP_RTO_MAX);
 524                                         req->expires = now + timeo;
 525                                         reqp = &req->dl_next;
 526                                         continue;
 527                                 }
 528
 529                                 /* Drop this request */
 530                                 write_lock(&tp->syn_wait_lock);
 531                                 *reqp = req->dl_next;
 532                                 write_unlock(&tp->syn_wait_lock);
 533                                 lopt->qlen--;
 534                                 if (req->retrans == 0)
 535                                         lopt->qlen_young--;
 536                                 tcp_openreq_free(req);
 537                                 continue;
 538                         }
 539                         reqp = &req->dl_next;
 540                 }
 541
 542                 i = (i+1)&(TCP_SYNQ_HSIZE-1);
 543
 544         } while (--budget > 0);
 545
 546         lopt->clock_hand = i;
 547
 548         if (lopt->qlen)
 549                 tcp_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL);
 550 }
 551
 552 void tcp_delete_keepalive_timer (struct sock *sk)
 553 {
 554         sk_stop_timer(sk, &sk->sk_timer);
 555 }
 556
 557 void tcp_reset_keepalive_timer (struct sock *sk, unsigned long len)
 558 {
 559         sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
 560 }
 561
 562 void tcp_set_keepalive(struct sock *sk, int val)
 563 {
 564         if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
 565                 return;
 566
 567         if (val && !sock_flag(sk, SOCK_KEEPOPEN))
 568                 tcp_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk)));
 569         else if (!val)
 570                 tcp_delete_keepalive_timer(sk);
 571 }
 572
 573
 574 static void tcp_keepalive_timer (unsigned long data)
 575 {
 576         struct sock *sk = (struct sock *) data;
 577         struct tcp_sock *tp = tcp_sk(sk);
 578         __u32 elapsed;
 579
 580         /* Only process if socket is not in use. */
 581         bh_lock_sock(sk);
 582         if (sock_owned_by_user(sk)) {
 583                 /* Try again later. */
 584                 tcp_reset_keepalive_timer (sk, HZ/20);
 585                 goto out;
 586         }
 587
 588         if (sk->sk_state == TCP_LISTEN) {
 589                 tcp_synack_timer(sk);
 590                 goto out;
 591         }
 592
 593         if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
 594                 if (tp->linger2 >= 0) {
 595                         int tmo = tcp_fin_time(tp) - TCP_TIMEWAIT_LEN;
 596
 597                         if (tmo > 0) {
 598                                 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
 599                                 goto out;
 600                         }
 601                 }
 602                 tcp_send_active_reset(sk, GFP_ATOMIC);
 603                 goto death;
 604         }
 605
 606         if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
 607                 goto out;
 608
 609         elapsed = keepalive_time_when(tp);
 610
 611         /* It is alive without keepalive 8) */
 612         if (tp->packets_out || sk->sk_send_head)
 613                 goto resched;
 614
 615         elapsed = tcp_time_stamp - tp->rcv_tstamp;
 616
 617         if (elapsed >= keepalive_time_when(tp)) {
 618                 if ((!tp->keepalive_probes && tp->probes_out >= sysctl_tcp_keepalive_probes) ||
 619                      (tp->keepalive_probes && tp->probes_out >= tp->keepalive_probes)) {
 620                         tcp_send_active_reset(sk, GFP_ATOMIC);
 621                         tcp_write_err(sk);
 622                         goto out;
 623                 }
 624                 if (tcp_write_wakeup(sk) <= 0) {
 625                         tp->probes_out++;
 626                         elapsed = keepalive_intvl_when(tp);
 627                 } else {
 628                         /* If keepalive was lost due to local congestion,
 629                          * try harder.
 630                          */
 631                         elapsed = TCP_RESOURCE_PROBE_INTERVAL;
 632                 }
 633         } else {
 634                 /* It is tp->rcv_tstamp + keepalive_time_when(tp) */
 635                 elapsed = keepalive_time_when(tp) - elapsed;
 636         }
 637
 638         TCP_CHECK_TIMER(sk);
 639         sk_stream_mem_reclaim(sk);
 640
 641 resched:
 642         tcp_reset_keepalive_timer (sk, elapsed);
 643         goto out;
 644
 645 death:
 646         tcp_done(sk);
 647
 648 out:
 649         bh_unlock_sock(sk);
 650         sock_put(sk);
 651 }
 652
 653 EXPORT_SYMBOL(tcp_clear_xmit_timers);
 654 EXPORT_SYMBOL(tcp_delete_keepalive_timer);
 655 EXPORT_SYMBOL(tcp_init_xmit_timers);
 656 EXPORT_SYMBOL(tcp_reset_keepalive_timer);