ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / net / ipv4 / ipvs / ip_vs_proto_tcp.c
1 /*
2  * ip_vs_proto_tcp.c:   TCP load balancing support for IPVS
3  *
4  * Version:     $Id: ip_vs_proto_tcp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
5  *
6  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
7  *              Julian Anastasov <ja@ssi.bg>
8  *
9  *              This program is free software; you can redistribute it and/or
10  *              modify it under the terms of the GNU General Public License
11  *              as published by the Free Software Foundation; either version
12  *              2 of the License, or (at your option) any later version.
13  *
14  * Changes:
15  *
16  */
17
18 #include <linux/kernel.h>
19 #include <linux/ip.h>
20 #include <linux/tcp.h>                  /* for tcphdr */
21 #include <net/ip.h>
22 #include <net/tcp.h>                    /* for csum_tcpudp_magic */
23 #include <linux/netfilter_ipv4.h>
24
25 #include <net/ip_vs.h>
26
27
28 static struct ip_vs_conn *
29 tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
30                 const struct iphdr *iph, unsigned int proto_off, int inverse)
31 {
32         __u16 ports[2];
33
34         if (skb_copy_bits(skb, proto_off, ports, sizeof(ports)) < 0)
35                 return NULL;
36
37         if (likely(!inverse)) {
38                 return ip_vs_conn_in_get(iph->protocol,
39                                          iph->saddr, ports[0],
40                                          iph->daddr, ports[1]);
41         } else {
42                 return ip_vs_conn_in_get(iph->protocol,
43                                          iph->daddr, ports[1],
44                                          iph->saddr, ports[0]);
45         }
46 }
47
48 static struct ip_vs_conn *
49 tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
50                  const struct iphdr *iph, unsigned int proto_off, int inverse)
51 {
52         __u16 ports[2];
53
54         if (skb_copy_bits(skb, proto_off, ports, sizeof(ports)) < 0)
55                 return NULL;
56
57         if (likely(!inverse)) {
58                 return ip_vs_conn_out_get(iph->protocol,
59                                           iph->saddr, ports[0],
60                                           iph->daddr, ports[1]);
61         } else {
62                 return ip_vs_conn_out_get(iph->protocol,
63                                           iph->daddr, ports[1],
64                                           iph->saddr, ports[0]);
65         }
66 }
67
68
69 static int
70 tcp_conn_schedule(struct sk_buff *skb,
71                   struct ip_vs_protocol *pp,
72                   int *verdict, struct ip_vs_conn **cpp)
73 {
74         struct ip_vs_service *svc;
75         struct tcphdr tcph;
76
77         if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &tcph, sizeof(tcph)) < 0) {
78                 *verdict = NF_DROP;
79                 return 0;
80         }
81
82         if (tcph.syn &&
83             (svc = ip_vs_service_get(skb->nfmark, skb->nh.iph->protocol,
84                                      skb->nh.iph->daddr, tcph.dest))) {
85                 if (ip_vs_todrop()) {
86                         /*
87                          * It seems that we are very loaded.
88                          * We have to drop this packet :(
89                          */
90                         ip_vs_service_put(svc);
91                         *verdict = NF_DROP;
92                         return 0;
93                 }
94
95                 /*
96                  * Let the virtual server select a real server for the
97                  * incoming connection, and create a connection entry.
98                  */
99                 *cpp = ip_vs_schedule(svc, skb);
100                 if (!*cpp) {
101                         *verdict = ip_vs_leave(svc, skb, pp);
102                         return 0;
103                 }
104                 ip_vs_service_put(svc);
105         }
106         return 1;
107 }
108
109
110 static inline void
111 tcp_fast_csum_update(struct tcphdr *tcph, u32 oldip, u32 newip,
112                      u16 oldport, u16 newport)
113 {
114         tcph->check =
115                 ip_vs_check_diff(~oldip, newip,
116                                  ip_vs_check_diff(oldport ^ 0xFFFF,
117                                                   newport, tcph->check));
118 }
119
120
121 static int
122 tcp_snat_handler(struct sk_buff **pskb,
123                  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
124 {
125         struct tcphdr *tcph;
126         unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4;
127
128         /* csum_check requires unshared skb */
129         if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
130                 return 0;
131
132         if (unlikely(cp->app != NULL)) {
133                 /* Some checks before mangling */
134                 if (pp->csum_check && !pp->csum_check(*pskb, pp))
135                         return 0;
136
137                 /* Call application helper if needed */
138                 if (!ip_vs_app_pkt_out(cp, pskb))
139                         return 0;
140         }
141
142         tcph = (void *)(*pskb)->nh.iph + tcphoff;
143         tcph->source = cp->vport;
144
145         /* Adjust TCP checksums */
146         if (!cp->app) {
147                 /* Only port and addr are changed, do fast csum update */
148                 tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
149                                      cp->dport, cp->vport);
150                 if ((*pskb)->ip_summed == CHECKSUM_HW)
151                         (*pskb)->ip_summed = CHECKSUM_NONE;
152         } else {
153                 /* full checksum calculation */
154                 tcph->check = 0;
155                 (*pskb)->csum = skb_checksum(*pskb, tcphoff,
156                                              (*pskb)->len - tcphoff, 0);
157                 tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
158                                                 (*pskb)->len - tcphoff,
159                                                 cp->protocol,
160                                                 (*pskb)->csum);
161                 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%d)\n",
162                           pp->name, tcph->check,
163                           (char*)&(tcph->check) - (char*)tcph);
164         }
165         return 1;
166 }
167
168
169 static int
170 tcp_dnat_handler(struct sk_buff **pskb,
171                  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
172 {
173         struct tcphdr *tcph;
174         unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4;
175
176         /* csum_check requires unshared skb */
177         if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
178                 return 0;
179
180         if (unlikely(cp->app != NULL)) {
181                 /* Some checks before mangling */
182                 if (pp->csum_check && !pp->csum_check(*pskb, pp))
183                         return 0;
184
185                 /*
186                  *      Attempt ip_vs_app call.
187                  *      It will fix ip_vs_conn and iph ack_seq stuff
188                  */
189                 if (!ip_vs_app_pkt_in(cp, pskb))
190                         return 0;
191         }
192
193         tcph = (void *)(*pskb)->nh.iph + tcphoff;
194         tcph->dest = cp->dport;
195
196         /*
197          *      Adjust TCP checksums
198          */
199         if (!cp->app) {
200                 /* Only port and addr are changed, do fast csum update */
201                 tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
202                                      cp->vport, cp->dport);
203                 if ((*pskb)->ip_summed == CHECKSUM_HW)
204                         (*pskb)->ip_summed = CHECKSUM_NONE;
205         } else {
206                 /* full checksum calculation */
207                 tcph->check = 0;
208                 (*pskb)->csum = skb_checksum(*pskb, tcphoff,
209                                              (*pskb)->len - tcphoff, 0);
210                 tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
211                                                 (*pskb)->len - tcphoff,
212                                                 cp->protocol,
213                                                 (*pskb)->csum);
214                 (*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
215         }
216         return 1;
217 }
218
219
220 static int
221 tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
222 {
223         unsigned int tcphoff = skb->nh.iph->ihl*4;
224
225         switch (skb->ip_summed) {
226         case CHECKSUM_NONE:
227                 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
228         case CHECKSUM_HW:
229                 if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
230                                       skb->len - tcphoff,
231                                       skb->nh.iph->protocol, skb->csum)) {
232                         IP_VS_DBG_RL_PKT(0, pp, skb, 0,
233                                          "Failed checksum for");
234                         return 0;
235                 }
236                 break;
237         default:
238                 /* CHECKSUM_UNNECESSARY */
239                 break;
240         }
241
242         return 1;
243 }
244
245
246 #define TCP_DIR_INPUT           0
247 #define TCP_DIR_OUTPUT          4
248 #define TCP_DIR_INPUT_ONLY      8
249
250 static int tcp_state_off[IP_VS_DIR_LAST] = {
251         [IP_VS_DIR_INPUT]               =       TCP_DIR_INPUT,
252         [IP_VS_DIR_OUTPUT]              =       TCP_DIR_OUTPUT,
253         [IP_VS_DIR_INPUT_ONLY]          =       TCP_DIR_INPUT_ONLY,
254 };
255
256 /*
257  *      Timeout table[state]
258  */
259 static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
260         [IP_VS_TCP_S_NONE]              =       2*HZ,
261         [IP_VS_TCP_S_ESTABLISHED]       =       15*60*HZ,
262         [IP_VS_TCP_S_SYN_SENT]          =       2*60*HZ,
263         [IP_VS_TCP_S_SYN_RECV]          =       1*60*HZ,
264         [IP_VS_TCP_S_FIN_WAIT]          =       2*60*HZ,
265         [IP_VS_TCP_S_TIME_WAIT]         =       2*60*HZ,
266         [IP_VS_TCP_S_CLOSE]             =       10*HZ,
267         [IP_VS_TCP_S_CLOSE_WAIT]        =       60*HZ,
268         [IP_VS_TCP_S_LAST_ACK]          =       30*HZ,
269         [IP_VS_TCP_S_LISTEN]            =       2*60*HZ,
270         [IP_VS_TCP_S_SYNACK]            =       120*HZ,
271         [IP_VS_TCP_S_LAST]              =       2*HZ,
272 };
273
274
275 #if 0
276
277 /* FIXME: This is going to die */
278
279 static int tcp_timeouts_dos[IP_VS_TCP_S_LAST+1] = {
280         [IP_VS_TCP_S_NONE]              =       2*HZ,
281         [IP_VS_TCP_S_ESTABLISHED]       =       8*60*HZ,
282         [IP_VS_TCP_S_SYN_SENT]          =       60*HZ,
283         [IP_VS_TCP_S_SYN_RECV]          =       10*HZ,
284         [IP_VS_TCP_S_FIN_WAIT]          =       60*HZ,
285         [IP_VS_TCP_S_TIME_WAIT]         =       60*HZ,
286         [IP_VS_TCP_S_CLOSE]             =       10*HZ,
287         [IP_VS_TCP_S_CLOSE_WAIT]        =       60*HZ,
288         [IP_VS_TCP_S_LAST_ACK]          =       30*HZ,
289         [IP_VS_TCP_S_LISTEN]            =       2*60*HZ,
290         [IP_VS_TCP_S_SYNACK]            =       100*HZ,
291         [IP_VS_TCP_S_LAST]              =       2*HZ,
292 };
293
294 #endif
295
296 static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
297         [IP_VS_TCP_S_NONE]              =       "NONE",
298         [IP_VS_TCP_S_ESTABLISHED]       =       "ESTABLISHED",
299         [IP_VS_TCP_S_SYN_SENT]          =       "SYN_SENT",
300         [IP_VS_TCP_S_SYN_RECV]          =       "SYN_RECV",
301         [IP_VS_TCP_S_FIN_WAIT]          =       "FIN_WAIT",
302         [IP_VS_TCP_S_TIME_WAIT]         =       "TIME_WAIT",
303         [IP_VS_TCP_S_CLOSE]             =       "CLOSE",
304         [IP_VS_TCP_S_CLOSE_WAIT]        =       "CLOSE_WAIT",
305         [IP_VS_TCP_S_LAST_ACK]          =       "LAST_ACK",
306         [IP_VS_TCP_S_LISTEN]            =       "LISTEN",
307         [IP_VS_TCP_S_SYNACK]            =       "SYNACK",
308         [IP_VS_TCP_S_LAST]              =       "BUG!",
309 };
310
311 #define sNO IP_VS_TCP_S_NONE
312 #define sES IP_VS_TCP_S_ESTABLISHED
313 #define sSS IP_VS_TCP_S_SYN_SENT
314 #define sSR IP_VS_TCP_S_SYN_RECV
315 #define sFW IP_VS_TCP_S_FIN_WAIT
316 #define sTW IP_VS_TCP_S_TIME_WAIT
317 #define sCL IP_VS_TCP_S_CLOSE
318 #define sCW IP_VS_TCP_S_CLOSE_WAIT
319 #define sLA IP_VS_TCP_S_LAST_ACK
320 #define sLI IP_VS_TCP_S_LISTEN
321 #define sSA IP_VS_TCP_S_SYNACK
322
323 struct tcp_states_t {
324         int next_state[IP_VS_TCP_S_LAST];
325 };
326
327 static const char * tcp_state_name(int state)
328 {
329         if (state >= IP_VS_TCP_S_LAST)
330                 return "ERR!";
331         return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
332 }
333
334 static struct tcp_states_t tcp_states [] = {
335 /*      INPUT */
336 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
337 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
338 /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},
339 /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
340 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},
341
342 /*      OUTPUT */
343 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
344 /*syn*/ {{sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI, sSR }},
345 /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
346 /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
347 /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
348
349 /*      INPUT-ONLY */
350 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
351 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
352 /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
353 /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
354 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
355 };
356
357 static struct tcp_states_t tcp_states_dos [] = {
358 /*      INPUT */
359 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
360 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
361 /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},
362 /*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
363 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
364
365 /*      OUTPUT */
366 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
367 /*syn*/ {{sSS, sES, sSS, sSA, sSS, sSS, sSS, sSS, sSS, sLI, sSA }},
368 /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
369 /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
370 /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
371
372 /*      INPUT-ONLY */
373 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
374 /*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},
375 /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
376 /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
377 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
378 };
379
380 static struct tcp_states_t *tcp_state_table = tcp_states;
381
382
383 static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
384 {
385         int on = (flags & 1);           /* secure_tcp */
386
387         /*
388         ** FIXME: change secure_tcp to independent sysctl var
389         ** or make it per-service or per-app because it is valid
390         ** for most if not for all of the applications. Something
391         ** like "capabilities" (flags) for each object.
392         */
393         tcp_state_table = (on? tcp_states_dos : tcp_states);
394 }
395
396 static int
397 tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
398 {
399         return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
400                                        tcp_state_name_table, sname, to);
401 }
402
403 static inline int tcp_state_idx(struct tcphdr *th)
404 {
405         if (th->rst)
406                 return 3;
407         if (th->syn)
408                 return 0;
409         if (th->fin)
410                 return 1;
411         if (th->ack)
412                 return 2;
413         return -1;
414 }
415
416 static inline void
417 set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
418               int direction, struct tcphdr *th)
419 {
420         int state_idx;
421         int new_state = IP_VS_TCP_S_CLOSE;
422         int state_off = tcp_state_off[direction];
423
424         /*
425          *    Update state offset to INPUT_ONLY if necessary
426          *    or delete NO_OUTPUT flag if output packet detected
427          */
428         if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
429                 if (state_off == TCP_DIR_OUTPUT)
430                         cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
431                 else
432                         state_off = TCP_DIR_INPUT_ONLY;
433         }
434
435         if ((state_idx = tcp_state_idx(th)) < 0) {
436                 IP_VS_DBG(8, "tcp_state_idx=%d!!!\n", state_idx);
437                 goto tcp_state_out;
438         }
439
440         new_state = tcp_state_table[state_off+state_idx].next_state[cp->state];
441
442   tcp_state_out:
443         if (new_state != cp->state) {
444                 struct ip_vs_dest *dest = cp->dest;
445
446                 IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->"
447                           "%u.%u.%u.%u:%d state: %s->%s cnt:%d\n",
448                           pp->name,
449                           (state_off==TCP_DIR_OUTPUT)?"output ":"input ",
450                           th->syn? 'S' : '.',
451                           th->fin? 'F' : '.',
452                           th->ack? 'A' : '.',
453                           th->rst? 'R' : '.',
454                           NIPQUAD(cp->daddr), ntohs(cp->dport),
455                           NIPQUAD(cp->caddr), ntohs(cp->cport),
456                           tcp_state_name(cp->state),
457                           tcp_state_name(new_state),
458                           atomic_read(&cp->refcnt));
459                 if (dest) {
460                         if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
461                             (new_state != IP_VS_TCP_S_ESTABLISHED)) {
462                                 atomic_dec(&dest->activeconns);
463                                 atomic_inc(&dest->inactconns);
464                                 cp->flags |= IP_VS_CONN_F_INACTIVE;
465                         } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
466                                    (new_state == IP_VS_TCP_S_ESTABLISHED)) {
467                                 atomic_inc(&dest->activeconns);
468                                 atomic_dec(&dest->inactconns);
469                                 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
470                         }
471                 }
472         }
473
474         cp->timeout = pp->timeout_table[cp->state = new_state];
475 }
476
477
478 /*
479  *      Handle state transitions
480  */
481 static int
482 tcp_state_transition(struct ip_vs_conn *cp, int direction,
483                      const struct sk_buff *skb,
484                      struct ip_vs_protocol *pp)
485 {
486         struct tcphdr tcph;
487
488         if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &tcph, sizeof(tcph)) < 0)
489                 return 0;
490
491         spin_lock(&cp->lock);
492         set_tcp_state(pp, cp, direction, &tcph);
493         spin_unlock(&cp->lock);
494
495         return 1;
496 }
497
498
499 /*
500  *      Hash table for TCP application incarnations
501  */
502 #define TCP_APP_TAB_BITS        4
503 #define TCP_APP_TAB_SIZE        (1 << TCP_APP_TAB_BITS)
504 #define TCP_APP_TAB_MASK        (TCP_APP_TAB_SIZE - 1)
505
506 static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
507 static spinlock_t tcp_app_lock = SPIN_LOCK_UNLOCKED;
508
509 static inline __u16 tcp_app_hashkey(__u16 port)
510 {
511         return ((port >> TCP_APP_TAB_BITS) ^ port) & TCP_APP_TAB_MASK;
512 }
513
514
515 static int tcp_register_app(struct ip_vs_app *inc)
516 {
517         struct ip_vs_app *i;
518         __u16 hash, port = inc->port;
519         int ret = 0;
520
521         hash = tcp_app_hashkey(port);
522
523         spin_lock_bh(&tcp_app_lock);
524         list_for_each_entry(i, &tcp_apps[hash], p_list) {
525                 if (i->port == port) {
526                         ret = -EEXIST;
527                         goto out;
528                 }
529         }
530         list_add(&inc->p_list, &tcp_apps[hash]);
531         atomic_inc(&ip_vs_protocol_tcp.appcnt);
532
533   out:
534         spin_unlock_bh(&tcp_app_lock);
535         return ret;
536 }
537
538
539 static void
540 tcp_unregister_app(struct ip_vs_app *inc)
541 {
542         spin_lock_bh(&tcp_app_lock);
543         atomic_dec(&ip_vs_protocol_tcp.appcnt);
544         list_del(&inc->p_list);
545         spin_unlock_bh(&tcp_app_lock);
546 }
547
548
549 static int
550 tcp_app_conn_bind(struct ip_vs_conn *cp)
551 {
552         int hash;
553         struct ip_vs_app *inc;
554         int result = 0;
555
556         /* Default binding: bind app only for NAT */
557         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
558                 return 0;
559
560         /* Lookup application incarnations and bind the right one */
561         hash = tcp_app_hashkey(cp->vport);
562
563         spin_lock(&tcp_app_lock);
564         list_for_each_entry(inc, &tcp_apps[hash], p_list) {
565                 if (inc->port == cp->vport) {
566                         if (unlikely(!ip_vs_app_inc_get(inc)))
567                                 break;
568                         spin_unlock(&tcp_app_lock);
569
570                         IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
571                                   "%u.%u.%u.%u:%u to app %s on port %u\n",
572                                   __FUNCTION__,
573                                   NIPQUAD(cp->caddr), ntohs(cp->cport),
574                                   NIPQUAD(cp->vaddr), ntohs(cp->vport),
575                                   inc->name, ntohs(inc->port));
576                         cp->app = inc;
577                         if (inc->init_conn)
578                                 result = inc->init_conn(inc, cp);
579                         goto out;
580                 }
581         }
582         spin_unlock(&tcp_app_lock);
583
584   out:
585         return result;
586 }
587
588
589 /*
590  *      Set LISTEN timeout. (ip_vs_conn_put will setup timer)
591  */
592 void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
593 {
594         spin_lock(&cp->lock);
595         cp->state = IP_VS_TCP_S_LISTEN;
596         cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
597         spin_unlock(&cp->lock);
598 }
599
600
601 static void tcp_init(struct ip_vs_protocol *pp)
602 {
603         IP_VS_INIT_HASH_TABLE(tcp_apps);
604         pp->timeout_table = tcp_timeouts;
605 }
606
607
608 static void tcp_exit(struct ip_vs_protocol *pp)
609 {
610 }
611
612
613 struct ip_vs_protocol ip_vs_protocol_tcp = {
614         .name =                 "TCP",
615         .protocol =             IPPROTO_TCP,
616         .dont_defrag =          0,
617         .appcnt =               ATOMIC_INIT(0),
618         .init =                 tcp_init,
619         .exit =                 tcp_exit,
620         .register_app =         tcp_register_app,
621         .unregister_app =       tcp_unregister_app,
622         .conn_schedule =        tcp_conn_schedule,
623         .conn_in_get =          tcp_conn_in_get,
624         .conn_out_get =         tcp_conn_out_get,
625         .snat_handler =         tcp_snat_handler,
626         .dnat_handler =         tcp_dnat_handler,
627         .csum_check =           tcp_csum_check,
628         .state_name =           tcp_state_name,
629         .state_transition =     tcp_state_transition,
630         .app_conn_bind =        tcp_app_conn_bind,
631         .debug_packet =         ip_vs_tcpudp_debug_packet,
632         .timeout_change =       tcp_timeout_change,
633         .set_state_timeout =    tcp_set_state_timeout,
634 };