ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / net / ipv4 / ipvs / ip_vs_proto_udp.c
1 /*
2  * ip_vs_proto_udp.c:   UDP load balancing support for IPVS
3  *
4  * Version:     $Id: ip_vs_proto_udp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
5  *
6  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
7  *              Julian Anastasov <ja@ssi.bg>
8  *
9  *              This program is free software; you can redistribute it and/or
10  *              modify it under the terms of the GNU General Public License
11  *              as published by the Free Software Foundation; either version
12  *              2 of the License, or (at your option) any later version.
13  *
14  * Changes:
15  *
16  */
17
18 #include <linux/kernel.h>
19 #include <linux/netfilter_ipv4.h>
20
21 #include <net/ip_vs.h>
22
23
24 static struct ip_vs_conn *
25 udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
26                 const struct iphdr *iph, unsigned int proto_off, int inverse)
27 {
28         struct ip_vs_conn *cp;
29         __u16 ports[2];
30
31         if (skb_copy_bits(skb, proto_off, ports, sizeof(ports)) < 0)
32                 return NULL;
33
34         if (likely(!inverse)) {
35                 cp = ip_vs_conn_in_get(iph->protocol,
36                                        iph->saddr, ports[0],
37                                        iph->daddr, ports[1]);
38         } else {
39                 cp = ip_vs_conn_in_get(iph->protocol,
40                                        iph->daddr, ports[1],
41                                        iph->saddr, ports[0]);
42         }
43
44         return cp;
45 }
46
47
48 static struct ip_vs_conn *
49 udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
50                  const struct iphdr *iph, unsigned int proto_off, int inverse)
51 {
52         struct ip_vs_conn *cp;
53         __u16 ports[2];
54
55         if (skb_copy_bits(skb, skb->nh.iph->ihl*4, ports, sizeof(ports)) < 0)
56                 return NULL;
57
58         if (likely(!inverse)) {
59                 cp = ip_vs_conn_out_get(iph->protocol,
60                                         iph->saddr, ports[0],
61                                         iph->daddr, ports[1]);
62         } else {
63                 cp = ip_vs_conn_out_get(iph->protocol,
64                                         iph->daddr, ports[1],
65                                         iph->saddr, ports[0]);
66         }
67
68         return cp;
69 }
70
71
72 static int
73 udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
74                   int *verdict, struct ip_vs_conn **cpp)
75 {
76         struct ip_vs_service *svc;
77         struct udphdr udph;
78
79         if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &udph, sizeof(udph)) < 0) {
80                 *verdict = NF_DROP;
81                 return 0;
82         }
83
84         if ((svc = ip_vs_service_get(skb->nfmark, skb->nh.iph->protocol,
85                                      skb->nh.iph->daddr, udph.dest))) {
86                 if (ip_vs_todrop()) {
87                         /*
88                          * It seems that we are very loaded.
89                          * We have to drop this packet :(
90                          */
91                         ip_vs_service_put(svc);
92                         *verdict = NF_DROP;
93                         return 0;
94                 }
95
96                 /*
97                  * Let the virtual server select a real server for the
98                  * incoming connection, and create a connection entry.
99                  */
100                 *cpp = ip_vs_schedule(svc, skb);
101                 if (!*cpp) {
102                         *verdict = ip_vs_leave(svc, skb, pp);
103                         return 0;
104                 }
105                 ip_vs_service_put(svc);
106         }
107         return 1;
108 }
109
110
111 static inline void
112 udp_fast_csum_update(struct udphdr *uhdr, u32 oldip, u32 newip,
113                      u16 oldport, u16 newport)
114 {
115         uhdr->check =
116                 ip_vs_check_diff(~oldip, newip,
117                                  ip_vs_check_diff(oldport ^ 0xFFFF,
118                                                   newport, uhdr->check));
119         if (!uhdr->check)
120                 uhdr->check = 0xFFFF;
121 }
122
123 static int
124 udp_snat_handler(struct sk_buff **pskb,
125                  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
126 {
127         struct udphdr *udph;
128         unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
129
130         /* csum_check requires unshared skb */
131         if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
132                 return 0;
133
134         if (unlikely(cp->app != NULL)) {
135                 /* Some checks before mangling */
136                 if (pp->csum_check && !pp->csum_check(*pskb, pp))
137                         return 0;
138
139                 /*
140                  *      Call application helper if needed
141                  */
142                 if (!ip_vs_app_pkt_out(cp, pskb))
143                         return 0;
144         }
145
146         udph = (void *)(*pskb)->nh.iph + udphoff;
147         udph->source = cp->vport;
148
149         /*
150          *      Adjust UDP checksums
151          */
152         if (!cp->app && (udph->check != 0)) {
153                 /* Only port and addr are changed, do fast csum update */
154                 udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
155                                      cp->dport, cp->vport);
156                 if ((*pskb)->ip_summed == CHECKSUM_HW)
157                         (*pskb)->ip_summed = CHECKSUM_NONE;
158         } else {
159                 /* full checksum calculation */
160                 udph->check = 0;
161                 (*pskb)->csum = skb_checksum(*pskb, udphoff,
162                                              (*pskb)->len - udphoff, 0);
163                 udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
164                                                 (*pskb)->len - udphoff,
165                                                 cp->protocol,
166                                                 (*pskb)->csum);
167                 if (udph->check == 0)
168                         udph->check = 0xFFFF;
169                 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%d)\n",
170                           pp->name, udph->check,
171                           (char*)&(udph->check) - (char*)udph);
172         }
173         return 1;
174 }
175
176
177 static int
178 udp_dnat_handler(struct sk_buff **pskb,
179                  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
180 {
181         struct udphdr *udph;
182         unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
183
184         /* csum_check requires unshared skb */
185         if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
186                 return 0;
187
188         if (unlikely(cp->app != NULL)) {
189                 /* Some checks before mangling */
190                 if (pp->csum_check && !pp->csum_check(*pskb, pp))
191                         return 0;
192
193                 /*
194                  *      Attempt ip_vs_app call.
195                  *      It will fix ip_vs_conn
196                  */
197                 if (!ip_vs_app_pkt_in(cp, pskb))
198                         return 0;
199         }
200
201         udph = (void *)(*pskb)->nh.iph + udphoff;
202         udph->dest = cp->dport;
203
204         /*
205          *      Adjust UDP checksums
206          */
207         if (!cp->app && (udph->check != 0)) {
208                 /* Only port and addr are changed, do fast csum update */
209                 udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
210                                      cp->vport, cp->dport);
211                 if ((*pskb)->ip_summed == CHECKSUM_HW)
212                         (*pskb)->ip_summed = CHECKSUM_NONE;
213         } else {
214                 /* full checksum calculation */
215                 udph->check = 0;
216                 (*pskb)->csum = skb_checksum(*pskb, udphoff,
217                                              (*pskb)->len - udphoff, 0);
218                 udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
219                                                 (*pskb)->len - udphoff,
220                                                 cp->protocol,
221                                                 (*pskb)->csum);
222                 if (udph->check == 0)
223                         udph->check = 0xFFFF;
224                 (*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
225         }
226         return 1;
227 }
228
229
230 static int
231 udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
232 {
233         struct udphdr udph;
234         unsigned int udphoff = skb->nh.iph->ihl*4;
235
236         if (skb_copy_bits(skb, udphoff, &udph, sizeof(udph)) < 0)
237                 return 0;
238
239         if (udph.check != 0) {
240                 switch (skb->ip_summed) {
241                 case CHECKSUM_NONE:
242                         skb->csum = skb_checksum(skb, udphoff,
243                                                  skb->len - udphoff, 0);
244                 case CHECKSUM_HW:
245                         if (csum_tcpudp_magic(skb->nh.iph->saddr,
246                                               skb->nh.iph->daddr,
247                                               skb->len - udphoff,
248                                               skb->nh.iph->protocol,
249                                               skb->csum)) {
250                                 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
251                                                  "Failed checksum for");
252                                 return 0;
253                         }
254                         break;
255                 default:
256                         /* CHECKSUM_UNNECESSARY */
257                         break;
258                 }
259         }
260         return 1;
261 }
262
263
264 /*
265  *      Note: the caller guarantees that only one of register_app,
266  *      unregister_app or app_conn_bind is called each time.
267  */
268
269 #define UDP_APP_TAB_BITS        4
270 #define UDP_APP_TAB_SIZE        (1 << UDP_APP_TAB_BITS)
271 #define UDP_APP_TAB_MASK        (UDP_APP_TAB_SIZE - 1)
272
273 static struct list_head udp_apps[UDP_APP_TAB_SIZE];
274 static spinlock_t udp_app_lock = SPIN_LOCK_UNLOCKED;
275
276 static inline __u16 udp_app_hashkey(__u16 port)
277 {
278         return ((port >> UDP_APP_TAB_BITS) ^ port) & UDP_APP_TAB_MASK;
279 }
280
281
282 static int udp_register_app(struct ip_vs_app *inc)
283 {
284         struct ip_vs_app *i;
285         __u16 hash, port = inc->port;
286         int ret = 0;
287
288         hash = udp_app_hashkey(port);
289
290
291         spin_lock_bh(&udp_app_lock);
292         list_for_each_entry(i, &udp_apps[hash], p_list) {
293                 if (i->port == port) {
294                         ret = -EEXIST;
295                         goto out;
296                 }
297         }
298         list_add(&inc->p_list, &udp_apps[hash]);
299         atomic_inc(&ip_vs_protocol_udp.appcnt);
300
301   out:
302         spin_unlock_bh(&udp_app_lock);
303         return ret;
304 }
305
306
307 static void
308 udp_unregister_app(struct ip_vs_app *inc)
309 {
310         spin_lock_bh(&udp_app_lock);
311         atomic_dec(&ip_vs_protocol_udp.appcnt);
312         list_del(&inc->p_list);
313         spin_unlock_bh(&udp_app_lock);
314 }
315
316
317 static int udp_app_conn_bind(struct ip_vs_conn *cp)
318 {
319         int hash;
320         struct ip_vs_app *inc;
321         int result = 0;
322
323         /* Default binding: bind app only for NAT */
324         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
325                 return 0;
326
327         /* Lookup application incarnations and bind the right one */
328         hash = udp_app_hashkey(cp->vport);
329
330         spin_lock(&udp_app_lock);
331         list_for_each_entry(inc, &udp_apps[hash], p_list) {
332                 if (inc->port == cp->vport) {
333                         if (unlikely(!ip_vs_app_inc_get(inc)))
334                                 break;
335                         spin_unlock(&udp_app_lock);
336
337                         IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
338                                   "%u.%u.%u.%u:%u to app %s on port %u\n",
339                                   __FUNCTION__,
340                                   NIPQUAD(cp->caddr), ntohs(cp->cport),
341                                   NIPQUAD(cp->vaddr), ntohs(cp->vport),
342                                   inc->name, ntohs(inc->port));
343                         cp->app = inc;
344                         if (inc->init_conn)
345                                 result = inc->init_conn(inc, cp);
346                         goto out;
347                 }
348         }
349         spin_unlock(&udp_app_lock);
350
351   out:
352         return result;
353 }
354
355
356 static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
357         [IP_VS_UDP_S_NORMAL]            =       5*60*HZ,
358         [IP_VS_UDP_S_LAST]              =       2*HZ,
359 };
360
361 static char * udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
362         [IP_VS_UDP_S_NORMAL]            =       "UDP",
363         [IP_VS_UDP_S_LAST]              =       "BUG!",
364 };
365
366
367 static int
368 udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
369 {
370         return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
371                                        udp_state_name_table, sname, to);
372 }
373
374 static const char * udp_state_name(int state)
375 {
376         if (state >= IP_VS_UDP_S_LAST)
377                 return "ERR!";
378         return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
379 }
380
381 static int
382 udp_state_transition(struct ip_vs_conn *cp, int direction,
383                      const struct sk_buff *skb,
384                      struct ip_vs_protocol *pp)
385 {
386         cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
387         return 1;
388 }
389
390 static void udp_init(struct ip_vs_protocol *pp)
391 {
392         IP_VS_INIT_HASH_TABLE(udp_apps);
393         pp->timeout_table = udp_timeouts;
394 }
395
396 static void udp_exit(struct ip_vs_protocol *pp)
397 {
398 }
399
400
401 struct ip_vs_protocol ip_vs_protocol_udp = {
402         .name =                 "UDP",
403         .protocol =             IPPROTO_UDP,
404         .dont_defrag =          0,
405         .init =                 udp_init,
406         .exit =                 udp_exit,
407         .conn_schedule =        udp_conn_schedule,
408         .conn_in_get =          udp_conn_in_get,
409         .conn_out_get =         udp_conn_out_get,
410         .snat_handler =         udp_snat_handler,
411         .dnat_handler =         udp_dnat_handler,
412         .csum_check =           udp_csum_check,
413         .state_transition =     udp_state_transition,
414         .state_name =           udp_state_name,
415         .register_app =         udp_register_app,
416         .unregister_app =       udp_unregister_app,
417         .app_conn_bind =        udp_app_conn_bind,
418         .debug_packet =         ip_vs_tcpudp_debug_packet,
419         .timeout_change =       NULL,
420         .set_state_timeout =    udp_set_state_timeout,
421 };