VServer 1.9.2 (patch-2.6.8.1-vs1.9.2.diff)
[linux-2.6.git] / net / ipv4 / netfilter / ip_fw_compat_masq.c
1 /* Masquerading compatibility layer.
2
3    Note that there are no restrictions on other programs binding to
4    ports 61000:65095 (in 2.0 and 2.2 they get EADDRINUSE).  Just DON'T
5    DO IT.
6  */
7
8 /* (C) 1999-2001 Paul `Rusty' Russell
9  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
10  *
11  * This program is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License version 2 as
13  * published by the Free Software Foundation.
14  */
15
16 #include <linux/skbuff.h>
17 #include <linux/in.h>
18 #include <linux/ip.h>
19 #include <linux/icmp.h>
20 #include <linux/udp.h>
21 #include <linux/netfilter_ipv4.h>
22 #include <linux/netdevice.h>
23 #include <linux/inetdevice.h>
24 #include <linux/proc_fs.h>
25 #include <linux/module.h>
26 #include <net/route.h>
27 #include <net/ip.h>
28
29 #define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)
30 #define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock)
31
32 #include <linux/netfilter_ipv4/ip_conntrack.h>
33 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
34 #include <linux/netfilter_ipv4/ip_nat.h>
35 #include <linux/netfilter_ipv4/ip_nat_core.h>
36 #include <linux/netfilter_ipv4/listhelp.h>
37 #include "ip_fw_compat.h"
38
39 #if 0
40 #define DEBUGP printk
41 #else
42 #define DEBUGP(format, args...)
43 #endif
44
45 unsigned int
46 do_masquerade(struct sk_buff **pskb, const struct net_device *dev)
47 {
48         struct ip_nat_info *info;
49         enum ip_conntrack_info ctinfo;
50         struct ip_conntrack *ct;
51         unsigned int ret;
52
53         /* Sorry, only ICMP, TCP and UDP. */
54         if ((*pskb)->nh.iph->protocol != IPPROTO_ICMP
55             && (*pskb)->nh.iph->protocol != IPPROTO_TCP
56             && (*pskb)->nh.iph->protocol != IPPROTO_UDP)
57                 return NF_DROP;
58
59         /* Feed it to connection tracking; in fact we're in NF_IP_FORWARD,
60            but connection tracking doesn't expect that */
61         ret = ip_conntrack_in(NF_IP_POST_ROUTING, pskb, dev, NULL, NULL);
62         if (ret != NF_ACCEPT) {
63                 DEBUGP("ip_conntrack_in returned %u.\n", ret);
64                 return ret;
65         }
66
67         ct = ip_conntrack_get(*pskb, &ctinfo);
68
69         if (!ct) {
70                 DEBUGP("ip_conntrack_in set to invalid conntrack.\n");
71                 return NF_DROP;
72         }
73
74         info = &ct->nat.info;
75
76         WRITE_LOCK(&ip_nat_lock);
77         /* Setup the masquerade, if not already */
78         if (!info->initialized) {
79                 u_int32_t newsrc;
80                 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = (*pskb)->nh.iph->daddr } } };
81                 struct rtable *rt;
82                 struct ip_nat_multi_range range;
83
84                 /* Pass 0 instead of saddr, since it's going to be changed
85                    anyway. */
86                 if (ip_route_output_key(&rt, &fl) != 0) {
87                         DEBUGP("ipnat_rule_masquerade: Can't reroute.\n");
88                         return NF_DROP;
89                 }
90                 newsrc = inet_select_addr(rt->u.dst.dev, rt->rt_gateway,
91                                           RT_SCOPE_UNIVERSE);
92                 ip_rt_put(rt);
93                 range = ((struct ip_nat_multi_range)
94                          { 1,
95                            {{IP_NAT_RANGE_MAP_IPS|IP_NAT_RANGE_PROTO_SPECIFIED,
96                              newsrc, newsrc,
97                              { htons(61000) }, { htons(65095) } } } });
98
99                 ret = ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
100                 if (ret != NF_ACCEPT) {
101                         WRITE_UNLOCK(&ip_nat_lock);
102                         return ret;
103                 }
104         } else
105                 DEBUGP("Masquerading already done on this conn.\n");
106         WRITE_UNLOCK(&ip_nat_lock);
107
108         return do_bindings(ct, ctinfo, info, NF_IP_POST_ROUTING, pskb);
109 }
110
111 void
112 check_for_masq_error(struct sk_buff **pskb)
113 {
114         enum ip_conntrack_info ctinfo;
115         struct ip_conntrack *ct;
116
117         ct = ip_conntrack_get(*pskb, &ctinfo);
118         /* Wouldn't be here if not tracked already => masq'ed ICMP
119            ping or error related to masq'd connection */
120         IP_NF_ASSERT(ct);
121         if (ctinfo == IP_CT_RELATED) {
122                 icmp_reply_translation(pskb, ct, NF_IP_PRE_ROUTING,
123                                        CTINFO2DIR(ctinfo));
124                 icmp_reply_translation(pskb, ct, NF_IP_POST_ROUTING,
125                                        CTINFO2DIR(ctinfo));
126         }
127 }
128
129 unsigned int
130 check_for_demasq(struct sk_buff **pskb)
131 {
132         struct ip_conntrack_tuple tuple;
133         struct ip_conntrack_protocol *protocol;
134         struct ip_conntrack_tuple_hash *h;
135         enum ip_conntrack_info ctinfo;
136         struct ip_conntrack *ct;
137         int ret;
138
139         protocol = ip_ct_find_proto((*pskb)->nh.iph->protocol);
140
141         /* We don't feed packets to conntrack system unless we know
142            they're part of an connection already established by an
143            explicit masq command. */
144         switch ((*pskb)->nh.iph->protocol) {
145         case IPPROTO_ICMP:
146                 /* ICMP errors. */
147                 ct = icmp_error_track(*pskb, &ctinfo, NF_IP_PRE_ROUTING);
148                 if (ct) {
149                         /* We only do SNAT in the compatibility layer.
150                            So we can manipulate ICMP errors from
151                            server here (== DNAT).  Do SNAT icmp manips
152                            in POST_ROUTING handling. */
153                         if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
154                                 icmp_reply_translation(pskb, ct,
155                                                        NF_IP_PRE_ROUTING,
156                                                        CTINFO2DIR(ctinfo));
157                                 icmp_reply_translation(pskb, ct,
158                                                        NF_IP_POST_ROUTING,
159                                                        CTINFO2DIR(ctinfo));
160                         }
161                         return NF_ACCEPT;
162                 }
163                 /* Fall thru... */
164         case IPPROTO_TCP:
165         case IPPROTO_UDP:
166                 IP_NF_ASSERT(((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
167
168                 if (!get_tuple((*pskb)->nh.iph, *pskb, (*pskb)->nh.iph->ihl*4, &tuple, protocol)) {
169                         if (net_ratelimit())
170                                 printk("ip_fw_compat_masq: Can't get tuple\n");
171                         return NF_ACCEPT;
172                 }
173                 break;
174
175         default:
176                 /* Not ours... */
177                 return NF_ACCEPT;
178         }
179         h = ip_conntrack_find_get(&tuple, NULL);
180
181         /* MUST be found, and MUST be reply. */
182         if (h && DIRECTION(h) == 1) {
183                 ret = ip_conntrack_in(NF_IP_PRE_ROUTING, pskb,
184                                       NULL, NULL, NULL);
185
186                 /* Put back the reference gained from find_get */
187                 nf_conntrack_put(&h->ctrack->infos[0]);
188                 if (ret == NF_ACCEPT) {
189                         struct ip_conntrack *ct;
190                         ct = ip_conntrack_get(*pskb, &ctinfo);
191
192                         if (ct) {
193                                 struct ip_nat_info *info = &ct->nat.info;
194
195                                 do_bindings(ct, ctinfo, info,
196                                             NF_IP_PRE_ROUTING,
197                                             pskb);
198                         } else
199                                 if (net_ratelimit()) 
200                                         printk("ip_fw_compat_masq: conntrack"
201                                                " didn't like\n");
202                 }
203         } else {
204                 if (h)
205                         /* Put back the reference gained from find_get */
206                         nf_conntrack_put(&h->ctrack->infos[0]);
207                 ret = NF_ACCEPT;
208         }
209
210         return ret;
211 }
212
213 int ip_fw_masq_timeouts(void *user, int len)
214 {
215         printk("Sorry: masquerading timeouts set 5DAYS/2MINS/60SECS\n");
216         return 0;
217 }
218
219 static const char *masq_proto_name(u_int16_t protonum)
220 {
221         switch (protonum) {
222         case IPPROTO_TCP: return "TCP";
223         case IPPROTO_UDP: return "UDP";
224         case IPPROTO_ICMP: return "ICMP";
225         default: return "MORE-CAFFEINE-FOR-RUSTY";
226         }
227 }
228
229 static unsigned int
230 print_masq(char *buffer, const struct ip_conntrack *conntrack)
231 {
232         char temp[129];
233
234         /* This is for backwards compatibility, but ick!.
235            We should never export jiffies to userspace.
236         */
237         sprintf(temp,"%s %08X:%04X %08X:%04X %04X %08X %6d %6d %7lu",
238                 masq_proto_name(conntrack->tuplehash[0].tuple.dst.protonum),
239                 ntohl(conntrack->tuplehash[0].tuple.src.ip),
240                 ntohs(conntrack->tuplehash[0].tuple.src.u.all),
241                 ntohl(conntrack->tuplehash[0].tuple.dst.ip),
242                 ntohs(conntrack->tuplehash[0].tuple.dst.u.all),
243                 ntohs(conntrack->tuplehash[1].tuple.dst.u.all),
244                 /* Sorry, no init_seq, delta or previous_delta (yet). */
245                 0, 0, 0,
246                 conntrack->timeout.expires - jiffies);
247
248         return sprintf(buffer, "%-127s\n", temp);
249 }
250
251 /* Returns true when finished. */
252 static int
253 masq_iterate(const struct ip_conntrack_tuple_hash *hash,
254              char *buffer, off_t offset, off_t *upto,
255              unsigned int *len, unsigned int maxlen)
256 {
257         unsigned int newlen;
258
259         IP_NF_ASSERT(hash->ctrack);
260
261         /* Only count originals */
262         if (DIRECTION(hash))
263                 return 0;
264
265         if ((*upto)++ < offset)
266                 return 0;
267
268         newlen = print_masq(buffer + *len, hash->ctrack);
269         if (*len + newlen > maxlen)
270                 return 1;
271         else *len += newlen;
272
273         return 0;
274 }
275
276 /* Everything in the hash is masqueraded. */
277 static int
278 masq_procinfo(char *buffer, char **start, off_t offset, int length)
279 {
280         unsigned int i;
281         int len = 0;
282         off_t upto = 1;
283
284         /* Header: first record */
285         if (offset == 0) {
286                 char temp[128];
287
288                 sprintf(temp,
289                         "Prc FromIP   FPrt ToIP     TPrt Masq Init-seq  Delta PDelta Expires (free=0,0,0)");
290                 len = sprintf(buffer, "%-127s\n", temp);
291                 offset = 1;
292         }
293
294         READ_LOCK(&ip_conntrack_lock);
295         /* Traverse hash; print originals then reply. */
296         for (i = 0; i < ip_conntrack_htable_size; i++) {
297                 if (LIST_FIND(&ip_conntrack_hash[i], masq_iterate,
298                               struct ip_conntrack_tuple_hash *,
299                               buffer, offset, &upto, &len, length))
300                         break;
301         }
302         READ_UNLOCK(&ip_conntrack_lock);
303
304         /* `start' hack - see fs/proc/generic.c line ~165 */
305         *start = (char *)((unsigned int)upto - offset);
306         return len;
307 }
308
309 int __init masq_init(void)
310 {
311         int ret;
312         struct proc_dir_entry *proc;
313
314         ret = ip_conntrack_init();
315         if (ret == 0) {
316                 ret = ip_nat_init();
317                 if (ret == 0) {
318                         proc = proc_net_create("ip_masquerade",
319                                                0, masq_procinfo);
320                         if (proc)
321                                 proc->owner = THIS_MODULE;
322                         else {
323                                 ip_nat_cleanup();
324                                 ip_conntrack_cleanup();
325                                 ret = -ENOMEM;
326                         }
327                 } else
328                         ip_conntrack_cleanup();
329         }
330
331         return ret;
332 }
333
334 void masq_cleanup(void)
335 {
336         ip_nat_cleanup();
337         ip_conntrack_cleanup();
338         proc_net_remove("ip_masquerade");
339 }