2 * Packet matching code.
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
11 * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12 * - increase module usage count as soon as we have rules inside
15 #include <linux/config.h>
16 #include <linux/cache.h>
17 #include <linux/skbuff.h>
18 #include <linux/kmod.h>
19 #include <linux/vmalloc.h>
20 #include <linux/netdevice.h>
21 #include <linux/module.h>
22 #include <linux/tcp.h>
23 #include <linux/udp.h>
24 #include <linux/icmp.h>
26 #include <asm/uaccess.h>
27 #include <asm/semaphore.h>
28 #include <linux/proc_fs.h>
30 #include <linux/netfilter_ipv4/ip_tables.h>
32 MODULE_LICENSE("GPL");
33 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
34 MODULE_DESCRIPTION("IPv4 packet filter");
36 /*#define DEBUG_IP_FIREWALL*/
37 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
38 /*#define DEBUG_IP_FIREWALL_USER*/
40 #ifdef DEBUG_IP_FIREWALL
41 #define dprintf(format, args...) printk(format , ## args)
43 #define dprintf(format, args...)
46 #ifdef DEBUG_IP_FIREWALL_USER
47 #define duprintf(format, args...) printk(format , ## args)
49 #define duprintf(format, args...)
52 #ifdef CONFIG_NETFILTER_DEBUG
53 #define IP_NF_ASSERT(x) \
56 printk("IP_NF_ASSERT: %s:%s:%u\n", \
57 __FUNCTION__, __FILE__, __LINE__); \
60 #define IP_NF_ASSERT(x)
62 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
64 static DECLARE_MUTEX(ipt_mutex);
67 #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
68 #define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
69 #include <linux/netfilter_ipv4/lockhelp.h>
70 #include <linux/netfilter_ipv4/listhelp.h>
73 /* All the better to debug you with... */
79 We keep a set of rules for each CPU, so we can avoid write-locking
80 them in the softirq when updating the counters and therefore
81 only need to read-lock in the softirq; doing a write_lock_bh() in user
82 context stops packets coming through and allows user context to read
83 the counters or update the rules.
85 To be cache friendly on SMP, we arrange them like so:
87 ... cache-align padding ...
90 Hence the start of any table is given by get_table() below. */
92 /* The table itself */
97 /* Number of entries: FIXME. --RR */
99 /* Initial number of entries. Needed for module usage count */
100 unsigned int initial_entries;
102 /* Entry points and underflows */
103 unsigned int hook_entry[NF_IP_NUMHOOKS];
104 unsigned int underflow[NF_IP_NUMHOOKS];
106 /* ipt_entry tables: one per CPU */
107 char entries[0] ____cacheline_aligned;
110 static LIST_HEAD(ipt_target);
111 static LIST_HEAD(ipt_match);
112 static LIST_HEAD(ipt_tables);
113 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
116 #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
118 #define TABLE_OFFSET(t,p) 0
122 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
123 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
124 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
127 /* Returns whether matches rule or not. */
129 ip_packet_match(const struct iphdr *ip,
132 const struct ipt_ip *ipinfo,
138 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
140 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
142 || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
144 dprintf("Source or dest mismatch.\n");
146 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
148 NIPQUAD(ipinfo->smsk.s_addr),
149 NIPQUAD(ipinfo->src.s_addr),
150 ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
151 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
153 NIPQUAD(ipinfo->dmsk.s_addr),
154 NIPQUAD(ipinfo->dst.s_addr),
155 ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
159 /* Look for ifname matches; this should unroll nicely. */
160 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
161 ret |= (((const unsigned long *)indev)[i]
162 ^ ((const unsigned long *)ipinfo->iniface)[i])
163 & ((const unsigned long *)ipinfo->iniface_mask)[i];
166 if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
167 dprintf("VIA in mismatch (%s vs %s).%s\n",
168 indev, ipinfo->iniface,
169 ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
173 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
174 ret |= (((const unsigned long *)outdev)[i]
175 ^ ((const unsigned long *)ipinfo->outiface)[i])
176 & ((const unsigned long *)ipinfo->outiface_mask)[i];
179 if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
180 dprintf("VIA out mismatch (%s vs %s).%s\n",
181 outdev, ipinfo->outiface,
182 ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
186 /* Check specific protocol */
188 && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
189 dprintf("Packet protocol %hi does not match %hi.%s\n",
190 ip->protocol, ipinfo->proto,
191 ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
195 /* If we have a fragment rule but the packet is not a fragment
196 * then we return zero */
197 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
198 dprintf("Fragment rule but not fragment.%s\n",
199 ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
207 ip_checkentry(const struct ipt_ip *ip)
209 if (ip->flags & ~IPT_F_MASK) {
210 duprintf("Unknown flag bits set: %08X\n",
211 ip->flags & ~IPT_F_MASK);
214 if (ip->invflags & ~IPT_INV_MASK) {
215 duprintf("Unknown invflag bits set: %08X\n",
216 ip->invflags & ~IPT_INV_MASK);
223 ipt_error(struct sk_buff **pskb,
224 const struct net_device *in,
225 const struct net_device *out,
226 unsigned int hooknum,
227 const void *targinfo,
231 printk("ip_tables: error: `%s'\n", (char *)targinfo);
237 int do_match(struct ipt_entry_match *m,
238 const struct sk_buff *skb,
239 const struct net_device *in,
240 const struct net_device *out,
244 /* Stop iteration if it doesn't match */
245 if (!m->u.kernel.match->match(skb, in, out, m->data, offset, hotdrop))
251 static inline struct ipt_entry *
252 get_entry(void *base, unsigned int offset)
254 return (struct ipt_entry *)(base + offset);
257 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
259 ipt_do_table(struct sk_buff **pskb,
261 const struct net_device *in,
262 const struct net_device *out,
263 struct ipt_table *table,
266 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
271 /* Initializing verdict to NF_DROP keeps gcc happy. */
272 unsigned int verdict = NF_DROP;
273 const char *indev, *outdev;
275 struct ipt_entry *e, *back;
278 ip = (*pskb)->nh.iph;
279 datalen = (*pskb)->len - ip->ihl * 4;
280 indev = in ? in->name : nulldevname;
281 outdev = out ? out->name : nulldevname;
282 /* We handle fragments by dealing with the first fragment as
283 * if it was a normal packet. All other fragments are treated
284 * normally, except that they will NEVER match rules that ask
285 * things we don't know, ie. tcp syn flag or ports). If the
286 * rule is also a fragment-specific rule, non-fragments won't
288 offset = ntohs(ip->frag_off) & IP_OFFSET;
290 read_lock_bh(&table->lock);
291 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
292 table_base = (void *)table->private->entries
293 + TABLE_OFFSET(table->private, smp_processor_id());
294 e = get_entry(table_base, table->private->hook_entry[hook]);
296 #ifdef CONFIG_NETFILTER_DEBUG
297 /* Check noone else using our table */
298 if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
299 && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
300 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
303 &((struct ipt_entry *)table_base)->comefrom,
304 ((struct ipt_entry *)table_base)->comefrom);
306 ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
309 /* For return from builtin chain */
310 back = get_entry(table_base, table->private->underflow[hook]);
315 (*pskb)->nfcache |= e->nfcache;
316 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
317 struct ipt_entry_target *t;
319 if (IPT_MATCH_ITERATE(e, do_match,
321 offset, &hotdrop) != 0)
324 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
326 t = ipt_get_target(e);
327 IP_NF_ASSERT(t->u.kernel.target);
328 /* Standard target? */
329 if (!t->u.kernel.target->target) {
332 v = ((struct ipt_standard_target *)t)->verdict;
334 /* Pop from stack? */
335 if (v != IPT_RETURN) {
336 verdict = (unsigned)(-v) - 1;
340 back = get_entry(table_base,
345 != (void *)e + e->next_offset) {
346 /* Save old back ptr in next entry */
347 struct ipt_entry *next
348 = (void *)e + e->next_offset;
350 = (void *)back - table_base;
351 /* set back pointer to next entry */
355 e = get_entry(table_base, v);
357 /* Targets which reenter must return
359 #ifdef CONFIG_NETFILTER_DEBUG
360 ((struct ipt_entry *)table_base)->comefrom
363 verdict = t->u.kernel.target->target(pskb,
369 #ifdef CONFIG_NETFILTER_DEBUG
370 if (((struct ipt_entry *)table_base)->comefrom
372 && verdict == IPT_CONTINUE) {
373 printk("Target %s reentered!\n",
374 t->u.kernel.target->name);
377 ((struct ipt_entry *)table_base)->comefrom
380 /* Target might have changed stuff. */
381 ip = (*pskb)->nh.iph;
382 datalen = (*pskb)->len - ip->ihl * 4;
384 if (verdict == IPT_CONTINUE)
385 e = (void *)e + e->next_offset;
393 e = (void *)e + e->next_offset;
397 #ifdef CONFIG_NETFILTER_DEBUG
398 ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
400 read_unlock_bh(&table->lock);
402 #ifdef DEBUG_ALLOW_ALL
411 /* If it succeeds, returns element and locks mutex */
413 find_inlist_lock_noload(struct list_head *head,
416 struct semaphore *mutex)
421 duprintf("find_inlist: searching for `%s' in %s.\n",
422 name, head == &ipt_target ? "ipt_target"
423 : head == &ipt_match ? "ipt_match"
424 : head == &ipt_tables ? "ipt_tables" : "UNKNOWN");
427 *error = down_interruptible(mutex);
431 ret = list_named_find(head, name);
440 #define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
443 find_inlist_lock(struct list_head *head,
447 struct semaphore *mutex)
451 ret = find_inlist_lock_noload(head, name, error, mutex);
453 duprintf("find_inlist: loading `%s%s'.\n", prefix, name);
454 request_module("%s%s", prefix, name);
455 ret = find_inlist_lock_noload(head, name, error, mutex);
462 static inline struct ipt_table *
463 ipt_find_table_lock(const char *name, int *error, struct semaphore *mutex)
465 return find_inlist_lock(&ipt_tables, name, "iptable_", error, mutex);
468 static inline struct ipt_match *
469 find_match_lock(const char *name, int *error, struct semaphore *mutex)
471 return find_inlist_lock(&ipt_match, name, "ipt_", error, mutex);
474 static struct ipt_target *
475 ipt_find_target_lock(const char *name, int *error, struct semaphore *mutex)
477 return find_inlist_lock(&ipt_target, name, "ipt_", error, mutex);
481 __ipt_find_target_lock(const char *name, int *error)
483 return ipt_find_target_lock(name,error,&ipt_mutex);
492 /* All zeroes == unconditional rule. */
494 unconditional(const struct ipt_ip *ip)
498 for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
499 if (((__u32 *)ip)[i])
505 /* Figures out from what hook each rule can be called: returns 0 if
506 there are loops. Puts hook bitmask in comefrom. */
508 mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
512 /* No recursion; use packet counter to save back ptrs (reset
513 to 0 as we leave), and comefrom to save source hook bitmask */
514 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
515 unsigned int pos = newinfo->hook_entry[hook];
517 = (struct ipt_entry *)(newinfo->entries + pos);
519 if (!(valid_hooks & (1 << hook)))
522 /* Set initial back pointer. */
523 e->counters.pcnt = pos;
526 struct ipt_standard_target *t
527 = (void *)ipt_get_target(e);
529 if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
530 printk("iptables: loop hook %u pos %u %08X.\n",
531 hook, pos, e->comefrom);
535 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
537 /* Unconditional return/END. */
538 if (e->target_offset == sizeof(struct ipt_entry)
539 && (strcmp(t->target.u.user.name,
540 IPT_STANDARD_TARGET) == 0)
542 && unconditional(&e->ip)) {
543 unsigned int oldpos, size;
545 /* Return: backtrack through the last
548 e->comefrom ^= (1<<NF_IP_NUMHOOKS);
549 #ifdef DEBUG_IP_FIREWALL_USER
551 & (1 << NF_IP_NUMHOOKS)) {
552 duprintf("Back unset "
559 pos = e->counters.pcnt;
560 e->counters.pcnt = 0;
562 /* We're at the start. */
566 e = (struct ipt_entry *)
567 (newinfo->entries + pos);
568 } while (oldpos == pos + e->next_offset);
571 size = e->next_offset;
572 e = (struct ipt_entry *)
573 (newinfo->entries + pos + size);
574 e->counters.pcnt = pos;
577 int newpos = t->verdict;
579 if (strcmp(t->target.u.user.name,
580 IPT_STANDARD_TARGET) == 0
582 /* This a jump; chase it. */
583 duprintf("Jump rule %u -> %u\n",
586 /* ... this is a fallthru */
587 newpos = pos + e->next_offset;
589 e = (struct ipt_entry *)
590 (newinfo->entries + newpos);
591 e->counters.pcnt = pos;
596 duprintf("Finished chain %u\n", hook);
602 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
604 if (i && (*i)-- == 0)
607 if (m->u.kernel.match->destroy)
608 m->u.kernel.match->destroy(m->data,
609 m->u.match_size - sizeof(*m));
610 module_put(m->u.kernel.match->me);
615 standard_check(const struct ipt_entry_target *t,
616 unsigned int max_offset)
618 struct ipt_standard_target *targ = (void *)t;
620 /* Check standard info. */
622 != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
623 duprintf("standard_check: target size %u != %u\n",
625 IPT_ALIGN(sizeof(struct ipt_standard_target)));
629 if (targ->verdict >= 0
630 && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
631 duprintf("ipt_standard_check: bad verdict (%i)\n",
636 if (targ->verdict < -NF_MAX_VERDICT - 1) {
637 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
645 check_match(struct ipt_entry_match *m,
647 const struct ipt_ip *ip,
648 unsigned int hookmask,
652 struct ipt_match *match;
654 match = find_match_lock(m->u.user.name, &ret, &ipt_mutex);
656 duprintf("check_match: `%s' not found\n", m->u.user.name);
659 if (!try_module_get(match->me)) {
663 m->u.kernel.match = match;
666 if (m->u.kernel.match->checkentry
667 && !m->u.kernel.match->checkentry(name, ip, m->data,
668 m->u.match_size - sizeof(*m),
670 module_put(m->u.kernel.match->me);
671 duprintf("ip_tables: check failed for `%s'.\n",
672 m->u.kernel.match->name);
680 static struct ipt_target ipt_standard_target;
683 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
686 struct ipt_entry_target *t;
687 struct ipt_target *target;
691 if (!ip_checkentry(&e->ip)) {
692 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
697 ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
699 goto cleanup_matches;
701 t = ipt_get_target(e);
702 target = ipt_find_target_lock(t->u.user.name, &ret, &ipt_mutex);
704 duprintf("check_entry: `%s' not found\n", t->u.user.name);
705 goto cleanup_matches;
707 if (!try_module_get(target->me)) {
710 goto cleanup_matches;
712 t->u.kernel.target = target;
715 if (t->u.kernel.target == &ipt_standard_target) {
716 if (!standard_check(t, size)) {
718 goto cleanup_matches;
720 } else if (t->u.kernel.target->checkentry
721 && !t->u.kernel.target->checkentry(name, e, t->data,
725 module_put(t->u.kernel.target->me);
726 duprintf("ip_tables: check failed for `%s'.\n",
727 t->u.kernel.target->name);
729 goto cleanup_matches;
736 IPT_MATCH_ITERATE(e, cleanup_match, &j);
741 check_entry_size_and_hooks(struct ipt_entry *e,
742 struct ipt_table_info *newinfo,
744 unsigned char *limit,
745 const unsigned int *hook_entries,
746 const unsigned int *underflows,
751 if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
752 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
753 duprintf("Bad offset %p\n", e);
758 < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
759 duprintf("checking: element %p size %u\n",
764 /* Check hooks & underflows */
765 for (h = 0; h < NF_IP_NUMHOOKS; h++) {
766 if ((unsigned char *)e - base == hook_entries[h])
767 newinfo->hook_entry[h] = hook_entries[h];
768 if ((unsigned char *)e - base == underflows[h])
769 newinfo->underflow[h] = underflows[h];
772 /* FIXME: underflows must be unconditional, standard verdicts
773 < 0 (not IPT_RETURN). --RR */
775 /* Clear counters and comefrom */
776 e->counters = ((struct ipt_counters) { 0, 0 });
784 cleanup_entry(struct ipt_entry *e, unsigned int *i)
786 struct ipt_entry_target *t;
788 if (i && (*i)-- == 0)
791 /* Cleanup all matches */
792 IPT_MATCH_ITERATE(e, cleanup_match, NULL);
793 t = ipt_get_target(e);
794 if (t->u.kernel.target->destroy)
795 t->u.kernel.target->destroy(t->data,
796 t->u.target_size - sizeof(*t));
797 module_put(t->u.kernel.target->me);
801 /* Checks and translates the user-supplied table segment (held in
804 translate_table(const char *name,
805 unsigned int valid_hooks,
806 struct ipt_table_info *newinfo,
809 const unsigned int *hook_entries,
810 const unsigned int *underflows)
815 newinfo->size = size;
816 newinfo->number = number;
818 /* Init all hooks to impossible value. */
819 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
820 newinfo->hook_entry[i] = 0xFFFFFFFF;
821 newinfo->underflow[i] = 0xFFFFFFFF;
824 duprintf("translate_table: size %u\n", newinfo->size);
826 /* Walk through entries, checking offsets. */
827 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
828 check_entry_size_and_hooks,
831 newinfo->entries + size,
832 hook_entries, underflows, &i);
837 duprintf("translate_table: %u not %u entries\n",
842 /* Check hooks all assigned */
843 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
844 /* Only hooks which are valid */
845 if (!(valid_hooks & (1 << i)))
847 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
848 duprintf("Invalid hook entry %u %u\n",
852 if (newinfo->underflow[i] == 0xFFFFFFFF) {
853 duprintf("Invalid underflow %u %u\n",
859 if (!mark_source_chains(newinfo, valid_hooks))
862 /* Finally, each sanity check must pass */
864 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
865 check_entry, name, size, &i);
868 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
873 /* And one copy for every other CPU */
874 for (i = 1; i < NR_CPUS; i++) {
875 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
877 SMP_ALIGN(newinfo->size));
883 static struct ipt_table_info *
884 replace_table(struct ipt_table *table,
885 unsigned int num_counters,
886 struct ipt_table_info *newinfo,
889 struct ipt_table_info *oldinfo;
891 #ifdef CONFIG_NETFILTER_DEBUG
893 struct ipt_entry *table_base;
896 for (i = 0; i < NR_CPUS; i++) {
898 (void *)newinfo->entries
899 + TABLE_OFFSET(newinfo, i);
901 table_base->comefrom = 0xdead57ac;
906 /* Do the substitution. */
907 write_lock_bh(&table->lock);
908 /* Check inside lock: is the old number correct? */
909 if (num_counters != table->private->number) {
910 duprintf("num_counters != table->private->number (%u/%u)\n",
911 num_counters, table->private->number);
912 write_unlock_bh(&table->lock);
916 oldinfo = table->private;
917 table->private = newinfo;
918 newinfo->initial_entries = oldinfo->initial_entries;
919 write_unlock_bh(&table->lock);
926 add_entry_to_counter(const struct ipt_entry *e,
927 struct ipt_counters total[],
930 ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
937 get_counters(const struct ipt_table_info *t,
938 struct ipt_counters counters[])
943 for (cpu = 0; cpu < NR_CPUS; cpu++) {
945 IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
947 add_entry_to_counter,
954 copy_entries_to_user(unsigned int total_size,
955 struct ipt_table *table,
956 void __user *userptr)
958 unsigned int off, num, countersize;
960 struct ipt_counters *counters;
963 /* We need atomic snapshot of counters: rest doesn't change
964 (other than comefrom, which userspace doesn't care
966 countersize = sizeof(struct ipt_counters) * table->private->number;
967 counters = vmalloc(countersize);
969 if (counters == NULL)
972 /* First, sum counters... */
973 memset(counters, 0, countersize);
974 write_lock_bh(&table->lock);
975 get_counters(table->private, counters);
976 write_unlock_bh(&table->lock);
978 /* ... then copy entire thing from CPU 0... */
979 if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
984 /* FIXME: use iterator macros --RR */
985 /* ... then go back and fix counters and names */
986 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
988 struct ipt_entry_match *m;
989 struct ipt_entry_target *t;
991 e = (struct ipt_entry *)(table->private->entries + off);
992 if (copy_to_user(userptr + off
993 + offsetof(struct ipt_entry, counters),
995 sizeof(counters[num])) != 0) {
1000 for (i = sizeof(struct ipt_entry);
1001 i < e->target_offset;
1002 i += m->u.match_size) {
1005 if (copy_to_user(userptr + off + i
1006 + offsetof(struct ipt_entry_match,
1008 m->u.kernel.match->name,
1009 strlen(m->u.kernel.match->name)+1)
1016 t = ipt_get_target(e);
1017 if (copy_to_user(userptr + off + e->target_offset
1018 + offsetof(struct ipt_entry_target,
1020 t->u.kernel.target->name,
1021 strlen(t->u.kernel.target->name)+1) != 0) {
1033 get_entries(const struct ipt_get_entries *entries,
1034 struct ipt_get_entries __user *uptr)
1037 struct ipt_table *t;
1039 t = ipt_find_table_lock(entries->name, &ret, &ipt_mutex);
1041 duprintf("t->private->number = %u\n",
1042 t->private->number);
1043 if (entries->size == t->private->size)
1044 ret = copy_entries_to_user(t->private->size,
1045 t, uptr->entrytable);
1047 duprintf("get_entries: I've got %u not %u!\n",
1054 duprintf("get_entries: Can't find %s!\n",
1061 do_replace(void __user *user, unsigned int len)
1064 struct ipt_replace tmp;
1065 struct ipt_table *t;
1066 struct ipt_table_info *newinfo, *oldinfo;
1067 struct ipt_counters *counters;
1069 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1072 /* Hack: Causes ipchains to give correct error msg --RR */
1073 if (len != sizeof(tmp) + tmp.size)
1074 return -ENOPROTOOPT;
1076 /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1077 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1080 newinfo = vmalloc(sizeof(struct ipt_table_info)
1081 + SMP_ALIGN(tmp.size) * NR_CPUS);
1085 if (copy_from_user(newinfo->entries, user + sizeof(tmp),
1091 counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
1096 memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
1098 ret = translate_table(tmp.name, tmp.valid_hooks,
1099 newinfo, tmp.size, tmp.num_entries,
1100 tmp.hook_entry, tmp.underflow);
1102 goto free_newinfo_counters;
1104 duprintf("ip_tables: Translated table\n");
1106 t = ipt_find_table_lock(tmp.name, &ret, &ipt_mutex);
1108 goto free_newinfo_counters_untrans;
1111 if (tmp.valid_hooks != t->valid_hooks) {
1112 duprintf("Valid hook crap: %08X vs %08X\n",
1113 tmp.valid_hooks, t->valid_hooks);
1115 goto free_newinfo_counters_untrans_unlock;
1118 /* Get a reference in advance, we're not allowed fail later */
1119 if (!try_module_get(t->me)) {
1121 goto free_newinfo_counters_untrans_unlock;
1125 oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1129 /* Update module usage count based on number of rules */
1130 duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1131 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1132 if ((oldinfo->number > oldinfo->initial_entries) ||
1133 (newinfo->number <= oldinfo->initial_entries))
1135 if ((oldinfo->number > oldinfo->initial_entries) &&
1136 (newinfo->number <= oldinfo->initial_entries))
1139 /* Get the old counters. */
1140 get_counters(oldinfo, counters);
1141 /* Decrease module usage counts and free resource */
1142 IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1144 /* Silent error: too late now. */
1145 copy_to_user(tmp.counters, counters,
1146 sizeof(struct ipt_counters) * tmp.num_counters);
1153 free_newinfo_counters_untrans_unlock:
1155 free_newinfo_counters_untrans:
1156 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
1157 free_newinfo_counters:
1164 /* We're lazy, and add to the first CPU; overflow works its fey magic
1165 * and everything is OK. */
1167 add_counter_to_entry(struct ipt_entry *e,
1168 const struct ipt_counters addme[],
1172 duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1174 (long unsigned int)e->counters.pcnt,
1175 (long unsigned int)e->counters.bcnt,
1176 (long unsigned int)addme[*i].pcnt,
1177 (long unsigned int)addme[*i].bcnt);
1180 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1187 do_add_counters(void __user *user, unsigned int len)
1190 struct ipt_counters_info tmp, *paddc;
1191 struct ipt_table *t;
1194 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1197 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1200 paddc = vmalloc(len);
1204 if (copy_from_user(paddc, user, len) != 0) {
1209 t = ipt_find_table_lock(tmp.name, &ret, &ipt_mutex);
1213 write_lock_bh(&t->lock);
1214 if (t->private->number != paddc->num_counters) {
1216 goto unlock_up_free;
1220 IPT_ENTRY_ITERATE(t->private->entries,
1222 add_counter_to_entry,
1226 write_unlock_bh(&t->lock);
1235 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1239 if (!capable(CAP_NET_ADMIN))
1243 case IPT_SO_SET_REPLACE:
1244 ret = do_replace(user, len);
1247 case IPT_SO_SET_ADD_COUNTERS:
1248 ret = do_add_counters(user, len);
1252 duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
1260 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1264 if (!capable(CAP_NET_ADMIN))
1268 case IPT_SO_GET_INFO: {
1269 char name[IPT_TABLE_MAXNAMELEN];
1270 struct ipt_table *t;
1272 if (*len != sizeof(struct ipt_getinfo)) {
1273 duprintf("length %u != %u\n", *len,
1274 sizeof(struct ipt_getinfo));
1279 if (copy_from_user(name, user, sizeof(name)) != 0) {
1283 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1284 t = ipt_find_table_lock(name, &ret, &ipt_mutex);
1286 struct ipt_getinfo info;
1288 info.valid_hooks = t->valid_hooks;
1289 memcpy(info.hook_entry, t->private->hook_entry,
1290 sizeof(info.hook_entry));
1291 memcpy(info.underflow, t->private->underflow,
1292 sizeof(info.underflow));
1293 info.num_entries = t->private->number;
1294 info.size = t->private->size;
1295 memcpy(info.name, name, sizeof(info.name));
1297 if (copy_to_user(user, &info, *len) != 0)
1307 case IPT_SO_GET_ENTRIES: {
1308 struct ipt_get_entries get;
1310 if (*len < sizeof(get)) {
1311 duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1313 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1315 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1316 duprintf("get_entries: %u != %u\n", *len,
1317 sizeof(struct ipt_get_entries) + get.size);
1320 ret = get_entries(&get, user);
1325 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1332 /* Registration hooks for targets. */
1334 ipt_register_target(struct ipt_target *target)
1338 ret = down_interruptible(&ipt_mutex);
1342 if (!list_named_insert(&ipt_target, target)) {
1343 duprintf("ipt_register_target: `%s' already in list!\n",
1352 ipt_unregister_target(struct ipt_target *target)
1355 LIST_DELETE(&ipt_target, target);
1360 ipt_register_match(struct ipt_match *match)
1364 ret = down_interruptible(&ipt_mutex);
1368 if (!list_named_insert(&ipt_match, match)) {
1369 duprintf("ipt_register_match: `%s' already in list!\n",
1379 ipt_unregister_match(struct ipt_match *match)
1382 LIST_DELETE(&ipt_match, match);
1386 int ipt_register_table(struct ipt_table *table)
1389 struct ipt_table_info *newinfo;
1390 static struct ipt_table_info bootstrap
1391 = { 0, 0, 0, { 0 }, { 0 }, { } };
1393 newinfo = vmalloc(sizeof(struct ipt_table_info)
1394 + SMP_ALIGN(table->table->size) * NR_CPUS);
1398 memcpy(newinfo->entries, table->table->entries, table->table->size);
1400 ret = translate_table(table->name, table->valid_hooks,
1401 newinfo, table->table->size,
1402 table->table->num_entries,
1403 table->table->hook_entry,
1404 table->table->underflow);
1410 ret = down_interruptible(&ipt_mutex);
1416 /* Don't autoload: we'd eat our tail... */
1417 if (list_named_find(&ipt_tables, table->name)) {
1422 /* Simplifies replace_table code. */
1423 table->private = &bootstrap;
1424 if (!replace_table(table, 0, newinfo, &ret))
1427 duprintf("table->private->number = %u\n",
1428 table->private->number);
1430 /* save number of initial entries */
1431 table->private->initial_entries = table->private->number;
1433 rwlock_init(&table->lock);
1434 list_prepend(&ipt_tables, table);
1445 void ipt_unregister_table(struct ipt_table *table)
1448 LIST_DELETE(&ipt_tables, table);
1451 /* Decrease module usage counts and free resources */
1452 IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
1453 cleanup_entry, NULL);
1454 vfree(table->private);
1457 /* Returns 1 if the port is matched by the range, 0 otherwise */
1459 port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1463 ret = (port >= min && port <= max) ^ invert;
1468 tcp_find_option(u_int8_t option,
1469 const struct sk_buff *skb,
1470 unsigned int optlen,
1474 /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
1475 u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
1478 duprintf("tcp_match: finding option\n");
1483 /* If we don't have the whole header, drop packet. */
1484 op = skb_header_pointer(skb,
1485 skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
1492 for (i = 0; i < optlen; ) {
1493 if (op[i] == option) return !invert;
1495 else i += op[i+1]?:1;
1502 tcp_match(const struct sk_buff *skb,
1503 const struct net_device *in,
1504 const struct net_device *out,
1505 const void *matchinfo,
1509 struct tcphdr _tcph, *th;
1510 const struct ipt_tcp *tcpinfo = matchinfo;
1515 Don't allow a fragment of TCP 8 bytes in. Nobody normal
1516 causes this. Its a cracker trying to break in by doing a
1517 flag overwrite to pass the direction checks.
1520 duprintf("Dropping evil TCP offset=1 frag.\n");
1523 /* Must not be a fragment. */
1527 #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1529 th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1530 sizeof(_tcph), &_tcph);
1532 /* We've been asked to examine this packet, and we
1533 can't. Hence, no choice but to drop. */
1534 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1539 if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1541 !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
1543 if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1545 !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
1547 if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
1548 == tcpinfo->flg_cmp,
1551 if (tcpinfo->option) {
1552 if (th->doff * 4 < sizeof(_tcph)) {
1556 if (!tcp_find_option(tcpinfo->option, skb,
1557 th->doff*4 - sizeof(_tcph),
1558 tcpinfo->invflags & IPT_TCP_INV_OPTION,
1565 /* Called when user tries to insert an entry of this type. */
1567 tcp_checkentry(const char *tablename,
1568 const struct ipt_ip *ip,
1570 unsigned int matchsize,
1571 unsigned int hook_mask)
1573 const struct ipt_tcp *tcpinfo = matchinfo;
1575 /* Must specify proto == TCP, and no unknown invflags */
1576 return ip->proto == IPPROTO_TCP
1577 && !(ip->invflags & IPT_INV_PROTO)
1578 && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1579 && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1583 udp_match(const struct sk_buff *skb,
1584 const struct net_device *in,
1585 const struct net_device *out,
1586 const void *matchinfo,
1590 struct udphdr _udph, *uh;
1591 const struct ipt_udp *udpinfo = matchinfo;
1593 /* Must not be a fragment. */
1597 uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1598 sizeof(_udph), &_udph);
1600 /* We've been asked to examine this packet, and we
1601 can't. Hence, no choice but to drop. */
1602 duprintf("Dropping evil UDP tinygram.\n");
1607 return port_match(udpinfo->spts[0], udpinfo->spts[1],
1609 !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
1610 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1612 !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1615 /* Called when user tries to insert an entry of this type. */
1617 udp_checkentry(const char *tablename,
1618 const struct ipt_ip *ip,
1620 unsigned int matchinfosize,
1621 unsigned int hook_mask)
1623 const struct ipt_udp *udpinfo = matchinfo;
1625 /* Must specify proto == UDP, and no unknown invflags */
1626 if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1627 duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1631 if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1632 duprintf("ipt_udp: matchsize %u != %u\n",
1633 matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1636 if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1637 duprintf("ipt_udp: unknown flags %X\n",
1645 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1647 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1648 u_int8_t type, u_int8_t code,
1651 return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1656 icmp_match(const struct sk_buff *skb,
1657 const struct net_device *in,
1658 const struct net_device *out,
1659 const void *matchinfo,
1663 struct icmphdr _icmph, *ic;
1664 const struct ipt_icmp *icmpinfo = matchinfo;
1666 /* Must not be a fragment. */
1670 ic = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1671 sizeof(_icmph), &_icmph);
1673 /* We've been asked to examine this packet, and we
1674 * can't. Hence, no choice but to drop.
1676 duprintf("Dropping evil ICMP tinygram.\n");
1681 return icmp_type_code_match(icmpinfo->type,
1685 !!(icmpinfo->invflags&IPT_ICMP_INV));
1688 /* Called when user tries to insert an entry of this type. */
1690 icmp_checkentry(const char *tablename,
1691 const struct ipt_ip *ip,
1693 unsigned int matchsize,
1694 unsigned int hook_mask)
1696 const struct ipt_icmp *icmpinfo = matchinfo;
1698 /* Must specify proto == ICMP, and no unknown invflags */
1699 return ip->proto == IPPROTO_ICMP
1700 && !(ip->invflags & IPT_INV_PROTO)
1701 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1702 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1705 /* The built-in targets: standard (NULL) and error. */
1706 static struct ipt_target ipt_standard_target = {
1707 .name = IPT_STANDARD_TARGET,
1710 static struct ipt_target ipt_error_target = {
1711 .name = IPT_ERROR_TARGET,
1712 .target = ipt_error,
1715 static struct nf_sockopt_ops ipt_sockopts = {
1717 .set_optmin = IPT_BASE_CTL,
1718 .set_optmax = IPT_SO_SET_MAX+1,
1719 .set = do_ipt_set_ctl,
1720 .get_optmin = IPT_BASE_CTL,
1721 .get_optmax = IPT_SO_GET_MAX+1,
1722 .get = do_ipt_get_ctl,
1725 static struct ipt_match tcp_matchstruct = {
1727 .match = &tcp_match,
1728 .checkentry = &tcp_checkentry,
1731 static struct ipt_match udp_matchstruct = {
1733 .match = &udp_match,
1734 .checkentry = &udp_checkentry,
1737 static struct ipt_match icmp_matchstruct = {
1739 .match = &icmp_match,
1740 .checkentry = &icmp_checkentry,
1743 #ifdef CONFIG_PROC_FS
1744 static inline int print_name(const char *i,
1745 off_t start_offset, char *buffer, int length,
1746 off_t *pos, unsigned int *count)
1748 if ((*count)++ >= start_offset) {
1749 unsigned int namelen;
1751 namelen = sprintf(buffer + *pos, "%s\n",
1752 i + sizeof(struct list_head));
1753 if (*pos + namelen > length) {
1754 /* Stop iterating */
1762 static inline int print_target(const struct ipt_target *t,
1763 off_t start_offset, char *buffer, int length,
1764 off_t *pos, unsigned int *count)
1766 if (t == &ipt_standard_target || t == &ipt_error_target)
1768 return print_name((char *)t, start_offset, buffer, length, pos, count);
1771 static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1774 unsigned int count = 0;
1776 if (down_interruptible(&ipt_mutex) != 0)
1779 LIST_FIND(&ipt_tables, print_name, void *,
1780 offset, buffer, length, &pos, &count);
1784 /* `start' hack - see fs/proc/generic.c line ~105 */
1785 *start=(char *)((unsigned long)count-offset);
1789 static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
1792 unsigned int count = 0;
1794 if (down_interruptible(&ipt_mutex) != 0)
1797 LIST_FIND(&ipt_target, print_target, struct ipt_target *,
1798 offset, buffer, length, &pos, &count);
1802 *start = (char *)((unsigned long)count - offset);
1806 static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
1809 unsigned int count = 0;
1811 if (down_interruptible(&ipt_mutex) != 0)
1814 LIST_FIND(&ipt_match, print_name, void *,
1815 offset, buffer, length, &pos, &count);
1819 *start = (char *)((unsigned long)count - offset);
1823 static struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
1824 { { "ip_tables_names", ipt_get_tables },
1825 { "ip_tables_targets", ipt_get_targets },
1826 { "ip_tables_matches", ipt_get_matches },
1828 #endif /*CONFIG_PROC_FS*/
1830 static int __init init(void)
1834 /* Noone else will be downing sem now, so we won't sleep */
1836 list_append(&ipt_target, &ipt_standard_target);
1837 list_append(&ipt_target, &ipt_error_target);
1838 list_append(&ipt_match, &tcp_matchstruct);
1839 list_append(&ipt_match, &udp_matchstruct);
1840 list_append(&ipt_match, &icmp_matchstruct);
1843 /* Register setsockopt */
1844 ret = nf_register_sockopt(&ipt_sockopts);
1846 duprintf("Unable to register sockopts.\n");
1850 #ifdef CONFIG_PROC_FS
1852 struct proc_dir_entry *proc;
1855 for (i = 0; ipt_proc_entry[i].name; i++) {
1856 proc = proc_net_create(ipt_proc_entry[i].name, 0,
1857 ipt_proc_entry[i].get_info);
1860 proc_net_remove(ipt_proc_entry[i].name);
1861 nf_unregister_sockopt(&ipt_sockopts);
1864 proc->owner = THIS_MODULE;
1869 printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
1873 static void __exit fini(void)
1875 nf_unregister_sockopt(&ipt_sockopts);
1876 #ifdef CONFIG_PROC_FS
1879 for (i = 0; ipt_proc_entry[i].name; i++)
1880 proc_net_remove(ipt_proc_entry[i].name);
1885 EXPORT_SYMBOL(ipt_register_table);
1886 EXPORT_SYMBOL(ipt_unregister_table);
1887 EXPORT_SYMBOL(ipt_register_match);
1888 EXPORT_SYMBOL(ipt_unregister_match);
1889 EXPORT_SYMBOL(ipt_do_table);
1890 EXPORT_SYMBOL(ipt_register_target);
1891 EXPORT_SYMBOL(ipt_unregister_target);
1892 EXPORT_SYMBOL_GPL(__ipt_find_target_lock);
1893 EXPORT_SYMBOL_GPL(__ipt_mutex_up);