vserver 1.9.3
[linux-2.6.git] / net / ipv4 / netfilter / ip_fw_compat_masq.c
1 /* Masquerading compatibility layer.
2
3    Note that there are no restrictions on other programs binding to
4    ports 61000:65095 (in 2.0 and 2.2 they get EADDRINUSE).  Just DON'T
5    DO IT.
6  */
7
8 /* (C) 1999-2001 Paul `Rusty' Russell
9  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
10  *
11  * This program is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License version 2 as
13  * published by the Free Software Foundation.
14  */
15
16 #include <linux/skbuff.h>
17 #include <linux/in.h>
18 #include <linux/ip.h>
19 #include <linux/icmp.h>
20 #include <linux/udp.h>
21 #include <linux/netfilter_ipv4.h>
22 #include <linux/netdevice.h>
23 #include <linux/inetdevice.h>
24 #include <linux/proc_fs.h>
25 #include <linux/module.h>
26 #include <net/route.h>
27 #include <net/ip.h>
28
29 #define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)
30 #define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock)
31
32 #include <linux/netfilter_ipv4/ip_conntrack.h>
33 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
34 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
35 #include <linux/netfilter_ipv4/ip_nat.h>
36 #include <linux/netfilter_ipv4/ip_nat_core.h>
37 #include <linux/netfilter_ipv4/listhelp.h>
38 #include "ip_fw_compat.h"
39
40 #if 0
41 #define DEBUGP printk
42 #else
43 #define DEBUGP(format, args...)
44 #endif
45
46 unsigned int
47 do_masquerade(struct sk_buff **pskb, const struct net_device *dev)
48 {
49         struct ip_nat_info *info;
50         enum ip_conntrack_info ctinfo;
51         struct ip_conntrack *ct;
52         unsigned int ret;
53
54         /* Sorry, only ICMP, TCP and UDP. */
55         if ((*pskb)->nh.iph->protocol != IPPROTO_ICMP
56             && (*pskb)->nh.iph->protocol != IPPROTO_TCP
57             && (*pskb)->nh.iph->protocol != IPPROTO_UDP)
58                 return NF_DROP;
59
60         /* Feed it to connection tracking; in fact we're in NF_IP_FORWARD,
61            but connection tracking doesn't expect that */
62         ret = ip_conntrack_in(NF_IP_POST_ROUTING, pskb, dev, NULL, NULL);
63         if (ret != NF_ACCEPT) {
64                 DEBUGP("ip_conntrack_in returned %u.\n", ret);
65                 return ret;
66         }
67
68         ct = ip_conntrack_get(*pskb, &ctinfo);
69
70         if (!ct) {
71                 DEBUGP("ip_conntrack_in set to invalid conntrack.\n");
72                 return NF_DROP;
73         }
74
75         info = &ct->nat.info;
76
77         WRITE_LOCK(&ip_nat_lock);
78         /* Setup the masquerade, if not already */
79         if (!info->initialized) {
80                 u_int32_t newsrc;
81                 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = (*pskb)->nh.iph->daddr } } };
82                 struct rtable *rt;
83                 struct ip_nat_multi_range range;
84
85                 /* Pass 0 instead of saddr, since it's going to be changed
86                    anyway. */
87                 if (ip_route_output_key(&rt, &fl) != 0) {
88                         DEBUGP("ipnat_rule_masquerade: Can't reroute.\n");
89                         return NF_DROP;
90                 }
91                 newsrc = inet_select_addr(rt->u.dst.dev, rt->rt_gateway,
92                                           RT_SCOPE_UNIVERSE);
93                 ip_rt_put(rt);
94                 range = ((struct ip_nat_multi_range)
95                          { 1,
96                            {{IP_NAT_RANGE_MAP_IPS|IP_NAT_RANGE_PROTO_SPECIFIED,
97                              newsrc, newsrc,
98                              { htons(61000) }, { htons(65095) } } } });
99
100                 ret = ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
101                 if (ret != NF_ACCEPT) {
102                         WRITE_UNLOCK(&ip_nat_lock);
103                         return ret;
104                 }
105         } else
106                 DEBUGP("Masquerading already done on this conn.\n");
107         WRITE_UNLOCK(&ip_nat_lock);
108
109         return do_bindings(ct, ctinfo, info, NF_IP_POST_ROUTING, pskb);
110 }
111
112 void
113 check_for_masq_error(struct sk_buff **pskb)
114 {
115         enum ip_conntrack_info ctinfo;
116         struct ip_conntrack *ct;
117
118         ct = ip_conntrack_get(*pskb, &ctinfo);
119         /* Wouldn't be here if not tracked already => masq'ed ICMP
120            ping or error related to masq'd connection */
121         IP_NF_ASSERT(ct);
122         if (ctinfo == IP_CT_RELATED) {
123                 icmp_reply_translation(pskb, ct, NF_IP_PRE_ROUTING,
124                                        CTINFO2DIR(ctinfo));
125                 icmp_reply_translation(pskb, ct, NF_IP_POST_ROUTING,
126                                        CTINFO2DIR(ctinfo));
127         }
128 }
129
130 unsigned int
131 check_for_demasq(struct sk_buff **pskb)
132 {
133         struct ip_conntrack_tuple tuple;
134         struct ip_conntrack_protocol *protocol;
135         struct ip_conntrack_tuple_hash *h;
136         enum ip_conntrack_info ctinfo;
137         struct ip_conntrack *ct;
138         int ret;
139
140         protocol = ip_ct_find_proto((*pskb)->nh.iph->protocol);
141
142         /* We don't feed packets to conntrack system unless we know
143            they're part of an connection already established by an
144            explicit masq command. */
145         switch ((*pskb)->nh.iph->protocol) {
146         case IPPROTO_ICMP:
147                 /* ICMP errors. */
148                 protocol->error(*pskb, &ctinfo, NF_IP_PRE_ROUTING);
149                 ct = (struct ip_conntrack *)(*pskb)->nfct;
150                 if (ct) {
151                         /* We only do SNAT in the compatibility layer.
152                            So we can manipulate ICMP errors from
153                            server here (== DNAT).  Do SNAT icmp manips
154                            in POST_ROUTING handling. */
155                         if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
156                                 icmp_reply_translation(pskb, ct,
157                                                        NF_IP_PRE_ROUTING,
158                                                        CTINFO2DIR(ctinfo));
159                                 icmp_reply_translation(pskb, ct,
160                                                        NF_IP_POST_ROUTING,
161                                                        CTINFO2DIR(ctinfo));
162                         }
163                         return NF_ACCEPT;
164                 }
165                 /* Fall thru... */
166         case IPPROTO_TCP:
167         case IPPROTO_UDP:
168                 IP_NF_ASSERT(((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
169
170                 if (!ip_ct_get_tuple((*pskb)->nh.iph, *pskb,
171                                      (*pskb)->nh.iph->ihl*4, &tuple, protocol)) {
172                         if (net_ratelimit())
173                                 printk("ip_fw_compat_masq: Can't get tuple\n");
174                         return NF_ACCEPT;
175                 }
176                 break;
177
178         default:
179                 /* Not ours... */
180                 return NF_ACCEPT;
181         }
182         h = ip_conntrack_find_get(&tuple, NULL);
183
184         /* MUST be found, and MUST be reply. */
185         if (h && DIRECTION(h) == 1) {
186                 ret = ip_conntrack_in(NF_IP_PRE_ROUTING, pskb,
187                                       NULL, NULL, NULL);
188
189                 /* Put back the reference gained from find_get */
190                 nf_conntrack_put(&h->ctrack->ct_general);
191                 if (ret == NF_ACCEPT) {
192                         struct ip_conntrack *ct;
193                         ct = ip_conntrack_get(*pskb, &ctinfo);
194
195                         if (ct) {
196                                 struct ip_nat_info *info = &ct->nat.info;
197
198                                 do_bindings(ct, ctinfo, info,
199                                             NF_IP_PRE_ROUTING,
200                                             pskb);
201                         } else
202                                 if (net_ratelimit()) 
203                                         printk("ip_fw_compat_masq: conntrack"
204                                                " didn't like\n");
205                 }
206         } else {
207                 if (h)
208                         /* Put back the reference gained from find_get */
209                         nf_conntrack_put(&h->ctrack->ct_general);
210                 ret = NF_ACCEPT;
211         }
212
213         return ret;
214 }
215
216 int ip_fw_masq_timeouts(void *user, int len)
217 {
218         printk("Sorry: masquerading timeouts set 5DAYS/2MINS/60SECS\n");
219         return 0;
220 }
221
222 static const char *masq_proto_name(u_int16_t protonum)
223 {
224         switch (protonum) {
225         case IPPROTO_TCP: return "TCP";
226         case IPPROTO_UDP: return "UDP";
227         case IPPROTO_ICMP: return "ICMP";
228         default: return "MORE-CAFFEINE-FOR-RUSTY";
229         }
230 }
231
232 static unsigned int
233 print_masq(char *buffer, const struct ip_conntrack *conntrack)
234 {
235         char temp[129];
236
237         /* This is for backwards compatibility, but ick!.
238            We should never export jiffies to userspace.
239         */
240         sprintf(temp,"%s %08X:%04X %08X:%04X %04X %08X %6d %6d %7lu",
241                 masq_proto_name(conntrack->tuplehash[0].tuple.dst.protonum),
242                 ntohl(conntrack->tuplehash[0].tuple.src.ip),
243                 ntohs(conntrack->tuplehash[0].tuple.src.u.all),
244                 ntohl(conntrack->tuplehash[0].tuple.dst.ip),
245                 ntohs(conntrack->tuplehash[0].tuple.dst.u.all),
246                 ntohs(conntrack->tuplehash[1].tuple.dst.u.all),
247                 /* Sorry, no init_seq, delta or previous_delta (yet). */
248                 0, 0, 0,
249                 conntrack->timeout.expires - jiffies);
250
251         return sprintf(buffer, "%-127s\n", temp);
252 }
253
254 /* Returns true when finished. */
255 static int
256 masq_iterate(const struct ip_conntrack_tuple_hash *hash,
257              char *buffer, off_t offset, off_t *upto,
258              unsigned int *len, unsigned int maxlen)
259 {
260         unsigned int newlen;
261
262         IP_NF_ASSERT(hash->ctrack);
263
264         /* Only count originals */
265         if (DIRECTION(hash))
266                 return 0;
267
268         if ((*upto)++ < offset)
269                 return 0;
270
271         newlen = print_masq(buffer + *len, hash->ctrack);
272         if (*len + newlen > maxlen)
273                 return 1;
274         else *len += newlen;
275
276         return 0;
277 }
278
279 /* Everything in the hash is masqueraded. */
280 static int
281 masq_procinfo(char *buffer, char **start, off_t offset, int length)
282 {
283         unsigned int i;
284         int len = 0;
285         off_t upto = 1;
286
287         /* Header: first record */
288         if (offset == 0) {
289                 char temp[128];
290
291                 sprintf(temp,
292                         "Prc FromIP   FPrt ToIP     TPrt Masq Init-seq  Delta PDelta Expires (free=0,0,0)");
293                 len = sprintf(buffer, "%-127s\n", temp);
294                 offset = 1;
295         }
296
297         READ_LOCK(&ip_conntrack_lock);
298         /* Traverse hash; print originals then reply. */
299         for (i = 0; i < ip_conntrack_htable_size; i++) {
300                 if (LIST_FIND(&ip_conntrack_hash[i], masq_iterate,
301                               struct ip_conntrack_tuple_hash *,
302                               buffer, offset, &upto, &len, length))
303                         break;
304         }
305         READ_UNLOCK(&ip_conntrack_lock);
306
307         /* `start' hack - see fs/proc/generic.c line ~165 */
308         *start = (char *)((unsigned int)upto - offset);
309         return len;
310 }
311
312 int __init masq_init(void)
313 {
314         int ret;
315         struct proc_dir_entry *proc;
316
317         ret = ip_conntrack_init();
318         if (ret == 0) {
319                 ret = ip_nat_init();
320                 if (ret == 0) {
321                         proc = proc_net_create("ip_masquerade",
322                                                0, masq_procinfo);
323                         if (proc)
324                                 proc->owner = THIS_MODULE;
325                         else {
326                                 ip_nat_cleanup();
327                                 ip_conntrack_cleanup();
328                                 ret = -ENOMEM;
329                         }
330                 } else
331                         ip_conntrack_cleanup();
332         }
333
334         return ret;
335 }
336
337 void masq_cleanup(void)
338 {
339         ip_nat_cleanup();
340         ip_conntrack_cleanup();
341         proc_net_remove("ip_masquerade");
342 }