ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / net / ipv4 / netfilter / ipt_REJECT.c
1 /*
2  * This is a module which is used for rejecting packets.
3  * Added support for customized reject packets (Jozsef Kadlecsik).
4  * Added support for ICMP type-3-code-13 (Maciej Soltysiak). [RFC 1812]
5  */
6
7 /* (C) 1999-2001 Paul `Rusty' Russell
8  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License version 2 as
12  * published by the Free Software Foundation.
13  */
14
15 #include <linux/config.h>
16 #include <linux/module.h>
17 #include <linux/skbuff.h>
18 #include <linux/ip.h>
19 #include <linux/udp.h>
20 #include <linux/icmp.h>
21 #include <net/icmp.h>
22 #include <net/ip.h>
23 #include <net/tcp.h>
24 #include <net/route.h>
25 #include <linux/netfilter_ipv4/ip_tables.h>
26 #include <linux/netfilter_ipv4/ipt_REJECT.h>
27 #ifdef CONFIG_BRIDGE_NETFILTER
28 #include <linux/netfilter_bridge.h>
29 #endif
30
31 MODULE_LICENSE("GPL");
32 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
33 MODULE_DESCRIPTION("iptables REJECT target module");
34
35 #if 0
36 #define DEBUGP printk
37 #else
38 #define DEBUGP(format, args...)
39 #endif
40
41 /* If the original packet is part of a connection, but the connection
42    is not confirmed, our manufactured reply will not be associated
43    with it, so we need to do this manually. */
44 static void connection_attach(struct sk_buff *new_skb, struct nf_ct_info *nfct)
45 {
46         void (*attach)(struct sk_buff *, struct nf_ct_info *);
47
48         /* Avoid module unload race with ip_ct_attach being NULLed out */
49         if (nfct && (attach = ip_ct_attach) != NULL) {
50                 mb(); /* Just to be sure: must be read before executing this */
51                 attach(new_skb, nfct);
52         }
53 }
54
55 static inline struct rtable *route_reverse(struct sk_buff *skb, int hook)
56 {
57         struct iphdr *iph = skb->nh.iph;
58         struct dst_entry *odst;
59         struct flowi fl = {};
60         struct rtable *rt;
61
62         /* We don't require ip forwarding to be enabled to be able to
63          * send a RST reply for bridged traffic. */
64         if (hook != NF_IP_FORWARD
65 #ifdef CONFIG_BRIDGE_NETFILTER
66             || (skb->nf_bridge && skb->nf_bridge->mask & BRNF_BRIDGED)
67 #endif
68            ) {
69                 fl.nl_u.ip4_u.daddr = iph->saddr;
70                 if (hook == NF_IP_LOCAL_IN)
71                         fl.nl_u.ip4_u.saddr = iph->daddr;
72                 fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
73
74                 if (ip_route_output_key(&rt, &fl) != 0)
75                         return NULL;
76         } else {
77                 /* non-local src, find valid iif to satisfy
78                  * rp-filter when calling ip_route_input. */
79                 fl.nl_u.ip4_u.daddr = iph->daddr;
80                 if (ip_route_output_key(&rt, &fl) != 0)
81                         return NULL;
82
83                 odst = skb->dst;
84                 if (ip_route_input(skb, iph->saddr, iph->daddr,
85                                    RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
86                         dst_release(&rt->u.dst);
87                         return NULL;
88                 }
89                 dst_release(&rt->u.dst);
90                 rt = (struct rtable *)skb->dst;
91                 skb->dst = odst;
92         }
93
94         if (rt->u.dst.error) {
95                 dst_release(&rt->u.dst);
96                 rt = NULL;
97         }
98
99         return rt;
100 }
101
102 /* Send RST reply */
103 static void send_reset(struct sk_buff *oldskb, int hook)
104 {
105         struct sk_buff *nskb;
106         struct tcphdr otcph, *tcph;
107         struct rtable *rt;
108         u_int16_t tmp_port;
109         u_int32_t tmp_addr;
110         int needs_ack;
111         int hh_len;
112
113         /* IP header checks: fragment. */
114         if (oldskb->nh.iph->frag_off & htons(IP_OFFSET))
115                 return;
116
117         if (skb_copy_bits(oldskb, oldskb->nh.iph->ihl*4,
118                           &otcph, sizeof(otcph)) < 0)
119                 return;
120
121         /* No RST for RST. */
122         if (otcph.rst)
123                 return;
124
125         /* FIXME: Check checksum --RR */
126         if ((rt = route_reverse(oldskb, hook)) == NULL)
127                 return;
128
129         hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
130
131         /* We need a linear, writeable skb.  We also need to expand
132            headroom in case hh_len of incoming interface < hh_len of
133            outgoing interface */
134         nskb = skb_copy_expand(oldskb, hh_len, skb_tailroom(oldskb),
135                                GFP_ATOMIC);
136         if (!nskb) {
137                 dst_release(&rt->u.dst);
138                 return;
139         }
140
141         dst_release(nskb->dst);
142         nskb->dst = &rt->u.dst;
143
144         /* This packet will not be the same as the other: clear nf fields */
145         nf_conntrack_put(nskb->nfct);
146         nskb->nfct = NULL;
147         nskb->nfcache = 0;
148 #ifdef CONFIG_NETFILTER_DEBUG
149         nskb->nf_debug = 0;
150 #endif
151         nskb->nfmark = 0;
152 #ifdef CONFIG_BRIDGE_NETFILTER
153         nf_bridge_put(nskb->nf_bridge);
154         nskb->nf_bridge = NULL;
155 #endif
156
157         tcph = (struct tcphdr *)((u_int32_t*)nskb->nh.iph + nskb->nh.iph->ihl);
158
159         /* Swap source and dest */
160         tmp_addr = nskb->nh.iph->saddr;
161         nskb->nh.iph->saddr = nskb->nh.iph->daddr;
162         nskb->nh.iph->daddr = tmp_addr;
163         tmp_port = tcph->source;
164         tcph->source = tcph->dest;
165         tcph->dest = tmp_port;
166
167         /* Truncate to length (no data) */
168         tcph->doff = sizeof(struct tcphdr)/4;
169         skb_trim(nskb, nskb->nh.iph->ihl*4 + sizeof(struct tcphdr));
170         nskb->nh.iph->tot_len = htons(nskb->len);
171
172         if (tcph->ack) {
173                 needs_ack = 0;
174                 tcph->seq = otcph.ack_seq;
175                 tcph->ack_seq = 0;
176         } else {
177                 needs_ack = 1;
178                 tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin
179                                       + oldskb->len - oldskb->nh.iph->ihl*4
180                                       - (otcph.doff<<2));
181                 tcph->seq = 0;
182         }
183
184         /* Reset flags */
185         ((u_int8_t *)tcph)[13] = 0;
186         tcph->rst = 1;
187         tcph->ack = needs_ack;
188
189         tcph->window = 0;
190         tcph->urg_ptr = 0;
191
192         /* Adjust TCP checksum */
193         tcph->check = 0;
194         tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr),
195                                    nskb->nh.iph->saddr,
196                                    nskb->nh.iph->daddr,
197                                    csum_partial((char *)tcph,
198                                                 sizeof(struct tcphdr), 0));
199
200         /* Adjust IP TTL, DF */
201         nskb->nh.iph->ttl = MAXTTL;
202         /* Set DF, id = 0 */
203         nskb->nh.iph->frag_off = htons(IP_DF);
204         nskb->nh.iph->id = 0;
205
206         /* Adjust IP checksum */
207         nskb->nh.iph->check = 0;
208         nskb->nh.iph->check = ip_fast_csum((unsigned char *)nskb->nh.iph, 
209                                            nskb->nh.iph->ihl);
210
211         /* "Never happens" */
212         if (nskb->len > dst_pmtu(nskb->dst))
213                 goto free_nskb;
214
215         connection_attach(nskb, oldskb->nfct);
216
217         NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, nskb, NULL, nskb->dst->dev,
218                 ip_finish_output);
219         return;
220
221  free_nskb:
222         kfree_skb(nskb);
223 }
224
225 static void send_unreach(struct sk_buff *skb_in, int code)
226 {
227         struct iphdr *iph;
228         struct udphdr *udph;
229         struct icmphdr *icmph;
230         struct sk_buff *nskb;
231         u32 saddr;
232         u8 tos;
233         int hh_len, length;
234         struct rtable *rt = (struct rtable*)skb_in->dst;
235         unsigned char *data;
236
237         if (!rt)
238                 return;
239
240         /* FIXME: Use sysctl number. --RR */
241         if (!xrlim_allow(&rt->u.dst, 1*HZ))
242                 return;
243
244         iph = skb_in->nh.iph;
245
246         /* No replies to physical multicast/broadcast */
247         if (skb_in->pkt_type!=PACKET_HOST)
248                 return;
249
250         /* Now check at the protocol level */
251         if (rt->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST))
252                 return;
253
254         /* Only reply to fragment 0. */
255         if (iph->frag_off&htons(IP_OFFSET))
256                 return;
257
258         /* Ensure we have at least 8 bytes of proto header. */
259         if (skb_in->len < skb_in->nh.iph->ihl*4 + 8)
260                 return;
261
262         /* if UDP checksum is set, verify it's correct */
263         if (iph->protocol == IPPROTO_UDP
264             && skb_in->tail-(u8*)iph >= sizeof(struct udphdr)) {
265                 int datalen = skb_in->len - (iph->ihl<<2);
266                 udph = (struct udphdr *)((char *)iph + (iph->ihl<<2));
267                 if (udph->check
268                     && csum_tcpudp_magic(iph->saddr, iph->daddr,
269                                          datalen, IPPROTO_UDP,
270                                          csum_partial((char *)udph, datalen,
271                                                       0)) != 0)
272                         return;
273         }
274
275         /* If we send an ICMP error to an ICMP error a mess would result.. */
276         if (iph->protocol == IPPROTO_ICMP
277             && skb_in->tail-(u8*)iph >= sizeof(struct icmphdr)) {
278                 icmph = (struct icmphdr *)((char *)iph + (iph->ihl<<2));
279
280                 if (skb_copy_bits(skb_in, skb_in->nh.iph->ihl*4,
281                                   icmph, sizeof(*icmph)) < 0)
282                         return;
283
284                 /* Between echo-reply (0) and timestamp (13),
285                    everything except echo-request (8) is an error.
286                    Also, anything greater than NR_ICMP_TYPES is
287                    unknown, and hence should be treated as an error... */
288                 if ((icmph->type < ICMP_TIMESTAMP
289                      && icmph->type != ICMP_ECHOREPLY
290                      && icmph->type != ICMP_ECHO)
291                     || icmph->type > NR_ICMP_TYPES)
292                         return;
293         }
294
295         saddr = iph->daddr;
296         if (!(rt->rt_flags & RTCF_LOCAL))
297                 saddr = 0;
298
299         tos = (iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL;
300
301         {
302                 struct flowi fl = { .nl_u = { .ip4_u =
303                                               { .daddr = skb_in->nh.iph->saddr,
304                                                 .saddr = saddr,
305                                                 .tos = RT_TOS(tos) } } };
306                 if (ip_route_output_key(&rt, &fl))
307                         return;
308         }
309         /* RFC says return as much as we can without exceeding 576 bytes. */
310         length = skb_in->len + sizeof(struct iphdr) + sizeof(struct icmphdr);
311
312         if (length > dst_pmtu(&rt->u.dst))
313                 length = dst_pmtu(&rt->u.dst);
314         if (length > 576)
315                 length = 576;
316
317         hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
318
319         nskb = alloc_skb(hh_len + length, GFP_ATOMIC);
320         if (!nskb) {
321                 ip_rt_put(rt);
322                 return;
323         }
324
325         nskb->priority = 0;
326         nskb->dst = &rt->u.dst;
327         skb_reserve(nskb, hh_len);
328
329         /* Set up IP header */
330         iph = nskb->nh.iph
331                 = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
332         iph->version=4;
333         iph->ihl=5;
334         iph->tos=tos;
335         iph->tot_len = htons(length);
336
337         /* PMTU discovery never applies to ICMP packets. */
338         iph->frag_off = 0;
339
340         iph->ttl = MAXTTL;
341         ip_select_ident(iph, &rt->u.dst, NULL);
342         iph->protocol=IPPROTO_ICMP;
343         iph->saddr=rt->rt_src;
344         iph->daddr=rt->rt_dst;
345         iph->check=0;
346         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
347
348         /* Set up ICMP header. */
349         icmph = nskb->h.icmph
350                 = (struct icmphdr *)skb_put(nskb, sizeof(struct icmphdr));
351         icmph->type = ICMP_DEST_UNREACH;
352         icmph->code = code;     
353         icmph->un.gateway = 0;
354         icmph->checksum = 0;
355         
356         /* Copy as much of original packet as will fit */
357         data = skb_put(nskb,
358                        length - sizeof(struct iphdr) - sizeof(struct icmphdr));
359
360         skb_copy_bits(skb_in, 0, data,
361                       length - sizeof(struct iphdr) - sizeof(struct icmphdr));
362
363         icmph->checksum = ip_compute_csum((unsigned char *)icmph,
364                                           length - sizeof(struct iphdr));
365
366         connection_attach(nskb, skb_in->nfct);
367
368         NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, nskb, NULL, nskb->dst->dev,
369                 ip_finish_output);
370 }       
371
372 static unsigned int reject(struct sk_buff **pskb,
373                            const struct net_device *in,
374                            const struct net_device *out,
375                            unsigned int hooknum,
376                            const void *targinfo,
377                            void *userinfo)
378 {
379         const struct ipt_reject_info *reject = targinfo;
380
381         /* Our naive response construction doesn't deal with IP
382            options, and probably shouldn't try. */
383         if ((*pskb)->nh.iph->ihl<<2 != sizeof(struct iphdr))
384                 return NF_DROP;
385
386         /* WARNING: This code causes reentry within iptables.
387            This means that the iptables jump stack is now crap.  We
388            must return an absolute verdict. --RR */
389         switch (reject->with) {
390         case IPT_ICMP_NET_UNREACHABLE:
391                 send_unreach(*pskb, ICMP_NET_UNREACH);
392                 break;
393         case IPT_ICMP_HOST_UNREACHABLE:
394                 send_unreach(*pskb, ICMP_HOST_UNREACH);
395                 break;
396         case IPT_ICMP_PROT_UNREACHABLE:
397                 send_unreach(*pskb, ICMP_PROT_UNREACH);
398                 break;
399         case IPT_ICMP_PORT_UNREACHABLE:
400                 send_unreach(*pskb, ICMP_PORT_UNREACH);
401                 break;
402         case IPT_ICMP_NET_PROHIBITED:
403                 send_unreach(*pskb, ICMP_NET_ANO);
404                 break;
405         case IPT_ICMP_HOST_PROHIBITED:
406                 send_unreach(*pskb, ICMP_HOST_ANO);
407                 break;
408         case IPT_ICMP_ADMIN_PROHIBITED:
409                 send_unreach(*pskb, ICMP_PKT_FILTERED);
410                 break;
411         case IPT_TCP_RESET:
412                 send_reset(*pskb, hooknum);
413         case IPT_ICMP_ECHOREPLY:
414                 /* Doesn't happen. */
415                 break;
416         }
417
418         return NF_DROP;
419 }
420
421 static int check(const char *tablename,
422                  const struct ipt_entry *e,
423                  void *targinfo,
424                  unsigned int targinfosize,
425                  unsigned int hook_mask)
426 {
427         const struct ipt_reject_info *rejinfo = targinfo;
428
429         if (targinfosize != IPT_ALIGN(sizeof(struct ipt_reject_info))) {
430                 DEBUGP("REJECT: targinfosize %u != 0\n", targinfosize);
431                 return 0;
432         }
433
434         /* Only allow these for packet filtering. */
435         if (strcmp(tablename, "filter") != 0) {
436                 DEBUGP("REJECT: bad table `%s'.\n", tablename);
437                 return 0;
438         }
439         if ((hook_mask & ~((1 << NF_IP_LOCAL_IN)
440                            | (1 << NF_IP_FORWARD)
441                            | (1 << NF_IP_LOCAL_OUT))) != 0) {
442                 DEBUGP("REJECT: bad hook mask %X\n", hook_mask);
443                 return 0;
444         }
445
446         if (rejinfo->with == IPT_ICMP_ECHOREPLY) {
447                 printk("REJECT: ECHOREPLY no longer supported.\n");
448                 return 0;
449         } else if (rejinfo->with == IPT_TCP_RESET) {
450                 /* Must specify that it's a TCP packet */
451                 if (e->ip.proto != IPPROTO_TCP
452                     || (e->ip.invflags & IPT_INV_PROTO)) {
453                         DEBUGP("REJECT: TCP_RESET invalid for non-tcp\n");
454                         return 0;
455                 }
456         }
457
458         return 1;
459 }
460
461 static struct ipt_target ipt_reject_reg = {
462         .name           = "REJECT",
463         .target         = reject,
464         .checkentry     = check,
465         .me             = THIS_MODULE,
466 };
467
468 static int __init init(void)
469 {
470         return ipt_register_target(&ipt_reject_reg);
471 }
472
473 static void __exit fini(void)
474 {
475         ipt_unregister_target(&ipt_reject_reg);
476 }
477
478 module_init(init);
479 module_exit(fini);