2 * Packet matching code.
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
11 * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12 * - increase module usage count as soon as we have rules inside
15 #include <linux/config.h>
16 #include <linux/cache.h>
17 #include <linux/skbuff.h>
18 #include <linux/kmod.h>
19 #include <linux/vmalloc.h>
20 #include <linux/netdevice.h>
21 #include <linux/module.h>
22 #include <linux/tcp.h>
23 #include <linux/udp.h>
24 #include <linux/icmp.h>
26 #include <asm/uaccess.h>
27 #include <asm/semaphore.h>
28 #include <linux/proc_fs.h>
30 #include <linux/netfilter_ipv4/ip_tables.h>
32 MODULE_LICENSE("GPL");
33 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
34 MODULE_DESCRIPTION("IPv4 packet filter");
36 /*#define DEBUG_IP_FIREWALL*/
37 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
38 /*#define DEBUG_IP_FIREWALL_USER*/
40 #ifdef DEBUG_IP_FIREWALL
41 #define dprintf(format, args...) printk(format , ## args)
43 #define dprintf(format, args...)
46 #ifdef DEBUG_IP_FIREWALL_USER
47 #define duprintf(format, args...) printk(format , ## args)
49 #define duprintf(format, args...)
52 #ifdef CONFIG_NETFILTER_DEBUG
53 #define IP_NF_ASSERT(x) \
56 printk("IP_NF_ASSERT: %s:%s:%u\n", \
57 __FUNCTION__, __FILE__, __LINE__); \
60 #define IP_NF_ASSERT(x)
62 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
65 #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
66 #define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
67 #include <linux/netfilter_ipv4/lockhelp.h>
68 #include <linux/netfilter_ipv4/listhelp.h>
71 /* All the better to debug you with... */
77 We keep a set of rules for each CPU, so we can avoid write-locking
78 them in the softirq when updating the counters and therefore
79 only need to read-lock in the softirq; doing a write_lock_bh() in user
80 context stops packets coming through and allows user context to read
81 the counters or update the rules.
83 To be cache friendly on SMP, we arrange them like so:
85 ... cache-align padding ...
88 Hence the start of any table is given by get_table() below. */
90 /* The table itself */
95 /* Number of entries: FIXME. --RR */
97 /* Initial number of entries. Needed for module usage count */
98 unsigned int initial_entries;
100 /* Entry points and underflows */
101 unsigned int hook_entry[NF_IP_NUMHOOKS];
102 unsigned int underflow[NF_IP_NUMHOOKS];
104 /* ipt_entry tables: one per CPU */
105 char entries[0] ____cacheline_aligned;
108 static LIST_HEAD(ipt_target);
109 static LIST_HEAD(ipt_match);
110 static LIST_HEAD(ipt_tables);
111 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
114 #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
116 #define TABLE_OFFSET(t,p) 0
120 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
121 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
122 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
125 /* Returns whether matches rule or not. */
127 ip_packet_match(const struct iphdr *ip,
130 const struct ipt_ip *ipinfo,
136 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
138 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
140 || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
142 dprintf("Source or dest mismatch.\n");
144 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
146 NIPQUAD(ipinfo->smsk.s_addr),
147 NIPQUAD(ipinfo->src.s_addr),
148 ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
149 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
151 NIPQUAD(ipinfo->dmsk.s_addr),
152 NIPQUAD(ipinfo->dst.s_addr),
153 ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
157 /* Look for ifname matches; this should unroll nicely. */
158 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
159 ret |= (((const unsigned long *)indev)[i]
160 ^ ((const unsigned long *)ipinfo->iniface)[i])
161 & ((const unsigned long *)ipinfo->iniface_mask)[i];
164 if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
165 dprintf("VIA in mismatch (%s vs %s).%s\n",
166 indev, ipinfo->iniface,
167 ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
171 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
172 ret |= (((const unsigned long *)outdev)[i]
173 ^ ((const unsigned long *)ipinfo->outiface)[i])
174 & ((const unsigned long *)ipinfo->outiface_mask)[i];
177 if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
178 dprintf("VIA out mismatch (%s vs %s).%s\n",
179 outdev, ipinfo->outiface,
180 ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
184 /* Check specific protocol */
186 && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
187 dprintf("Packet protocol %hi does not match %hi.%s\n",
188 ip->protocol, ipinfo->proto,
189 ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
193 /* If we have a fragment rule but the packet is not a fragment
194 * then we return zero */
195 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
196 dprintf("Fragment rule but not fragment.%s\n",
197 ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
205 ip_checkentry(const struct ipt_ip *ip)
207 if (ip->flags & ~IPT_F_MASK) {
208 duprintf("Unknown flag bits set: %08X\n",
209 ip->flags & ~IPT_F_MASK);
212 if (ip->invflags & ~IPT_INV_MASK) {
213 duprintf("Unknown invflag bits set: %08X\n",
214 ip->invflags & ~IPT_INV_MASK);
221 ipt_error(struct sk_buff **pskb,
222 const struct net_device *in,
223 const struct net_device *out,
224 unsigned int hooknum,
225 const void *targinfo,
229 printk("ip_tables: error: `%s'\n", (char *)targinfo);
235 int do_match(struct ipt_entry_match *m,
236 const struct sk_buff *skb,
237 const struct net_device *in,
238 const struct net_device *out,
242 /* Stop iteration if it doesn't match */
243 if (!m->u.kernel.match->match(skb, in, out, m->data, offset, hotdrop))
249 static inline struct ipt_entry *
250 get_entry(void *base, unsigned int offset)
252 return (struct ipt_entry *)(base + offset);
255 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
257 ipt_do_table(struct sk_buff **pskb,
259 const struct net_device *in,
260 const struct net_device *out,
261 struct ipt_table *table,
264 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
269 /* Initializing verdict to NF_DROP keeps gcc happy. */
270 unsigned int verdict = NF_DROP;
271 const char *indev, *outdev;
273 struct ipt_entry *e, *back;
276 ip = (*pskb)->nh.iph;
277 datalen = (*pskb)->len - ip->ihl * 4;
278 indev = in ? in->name : nulldevname;
279 outdev = out ? out->name : nulldevname;
280 /* We handle fragments by dealing with the first fragment as
281 * if it was a normal packet. All other fragments are treated
282 * normally, except that they will NEVER match rules that ask
283 * things we don't know, ie. tcp syn flag or ports). If the
284 * rule is also a fragment-specific rule, non-fragments won't
286 offset = ntohs(ip->frag_off) & IP_OFFSET;
288 read_lock_bh(&table->lock);
289 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
290 table_base = (void *)table->private->entries
291 + TABLE_OFFSET(table->private, smp_processor_id());
292 e = get_entry(table_base, table->private->hook_entry[hook]);
294 #ifdef CONFIG_NETFILTER_DEBUG
295 /* Check noone else using our table */
296 if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
297 && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
298 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
301 &((struct ipt_entry *)table_base)->comefrom,
302 ((struct ipt_entry *)table_base)->comefrom);
304 ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
307 /* For return from builtin chain */
308 back = get_entry(table_base, table->private->underflow[hook]);
313 (*pskb)->nfcache |= e->nfcache;
314 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
315 struct ipt_entry_target *t;
317 if (IPT_MATCH_ITERATE(e, do_match,
319 offset, &hotdrop) != 0)
322 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
324 t = ipt_get_target(e);
325 IP_NF_ASSERT(t->u.kernel.target);
326 /* Standard target? */
327 if (!t->u.kernel.target->target) {
330 v = ((struct ipt_standard_target *)t)->verdict;
332 /* Pop from stack? */
333 if (v != IPT_RETURN) {
334 verdict = (unsigned)(-v) - 1;
338 back = get_entry(table_base,
343 != (void *)e + e->next_offset) {
344 /* Save old back ptr in next entry */
345 struct ipt_entry *next
346 = (void *)e + e->next_offset;
348 = (void *)back - table_base;
349 /* set back pointer to next entry */
353 e = get_entry(table_base, v);
355 /* Targets which reenter must return
357 #ifdef CONFIG_NETFILTER_DEBUG
358 ((struct ipt_entry *)table_base)->comefrom
361 verdict = t->u.kernel.target->target(pskb,
367 #ifdef CONFIG_NETFILTER_DEBUG
368 if (((struct ipt_entry *)table_base)->comefrom
370 && verdict == IPT_CONTINUE) {
371 printk("Target %s reentered!\n",
372 t->u.kernel.target->name);
375 ((struct ipt_entry *)table_base)->comefrom
378 /* Target might have changed stuff. */
379 ip = (*pskb)->nh.iph;
380 datalen = (*pskb)->len - ip->ihl * 4;
382 if (verdict == IPT_CONTINUE)
383 e = (void *)e + e->next_offset;
391 e = (void *)e + e->next_offset;
395 #ifdef CONFIG_NETFILTER_DEBUG
396 ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
398 read_unlock_bh(&table->lock);
400 #ifdef DEBUG_ALLOW_ALL
409 /* If it succeeds, returns element and locks mutex */
411 find_inlist_lock_noload(struct list_head *head,
414 struct semaphore *mutex)
419 duprintf("find_inlist: searching for `%s' in %s.\n",
420 name, head == &ipt_target ? "ipt_target"
421 : head == &ipt_match ? "ipt_match"
422 : head == &ipt_tables ? "ipt_tables" : "UNKNOWN");
425 *error = down_interruptible(mutex);
429 ret = list_named_find(head, name);
438 #define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
441 find_inlist_lock(struct list_head *head,
445 struct semaphore *mutex)
449 ret = find_inlist_lock_noload(head, name, error, mutex);
451 duprintf("find_inlist: loading `%s%s'.\n", prefix, name);
452 request_module("%s%s", prefix, name);
453 ret = find_inlist_lock_noload(head, name, error, mutex);
460 static inline struct ipt_table *
461 ipt_find_table_lock(const char *name, int *error, struct semaphore *mutex)
463 return find_inlist_lock(&ipt_tables, name, "iptable_", error, mutex);
466 static inline struct ipt_match *
467 find_match_lock(const char *name, int *error, struct semaphore *mutex)
469 return find_inlist_lock(&ipt_match, name, "ipt_", error, mutex);
473 ipt_find_target_lock(const char *name, int *error, struct semaphore *mutex)
475 return find_inlist_lock(&ipt_target, name, "ipt_", error, mutex);
478 /* All zeroes == unconditional rule. */
480 unconditional(const struct ipt_ip *ip)
484 for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
485 if (((__u32 *)ip)[i])
491 /* Figures out from what hook each rule can be called: returns 0 if
492 there are loops. Puts hook bitmask in comefrom. */
494 mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
498 /* No recursion; use packet counter to save back ptrs (reset
499 to 0 as we leave), and comefrom to save source hook bitmask */
500 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
501 unsigned int pos = newinfo->hook_entry[hook];
503 = (struct ipt_entry *)(newinfo->entries + pos);
505 if (!(valid_hooks & (1 << hook)))
508 /* Set initial back pointer. */
509 e->counters.pcnt = pos;
512 struct ipt_standard_target *t
513 = (void *)ipt_get_target(e);
515 if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
516 printk("iptables: loop hook %u pos %u %08X.\n",
517 hook, pos, e->comefrom);
521 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
523 /* Unconditional return/END. */
524 if (e->target_offset == sizeof(struct ipt_entry)
525 && (strcmp(t->target.u.user.name,
526 IPT_STANDARD_TARGET) == 0)
528 && unconditional(&e->ip)) {
529 unsigned int oldpos, size;
531 /* Return: backtrack through the last
534 e->comefrom ^= (1<<NF_IP_NUMHOOKS);
535 #ifdef DEBUG_IP_FIREWALL_USER
537 & (1 << NF_IP_NUMHOOKS)) {
538 duprintf("Back unset "
545 pos = e->counters.pcnt;
546 e->counters.pcnt = 0;
548 /* We're at the start. */
552 e = (struct ipt_entry *)
553 (newinfo->entries + pos);
554 } while (oldpos == pos + e->next_offset);
557 size = e->next_offset;
558 e = (struct ipt_entry *)
559 (newinfo->entries + pos + size);
560 e->counters.pcnt = pos;
563 int newpos = t->verdict;
565 if (strcmp(t->target.u.user.name,
566 IPT_STANDARD_TARGET) == 0
568 /* This a jump; chase it. */
569 duprintf("Jump rule %u -> %u\n",
572 /* ... this is a fallthru */
573 newpos = pos + e->next_offset;
575 e = (struct ipt_entry *)
576 (newinfo->entries + newpos);
577 e->counters.pcnt = pos;
582 duprintf("Finished chain %u\n", hook);
588 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
590 if (i && (*i)-- == 0)
593 if (m->u.kernel.match->destroy)
594 m->u.kernel.match->destroy(m->data,
595 m->u.match_size - sizeof(*m));
596 module_put(m->u.kernel.match->me);
601 standard_check(const struct ipt_entry_target *t,
602 unsigned int max_offset)
604 struct ipt_standard_target *targ = (void *)t;
606 /* Check standard info. */
608 != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
609 duprintf("standard_check: target size %u != %u\n",
611 IPT_ALIGN(sizeof(struct ipt_standard_target)));
615 if (targ->verdict >= 0
616 && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
617 duprintf("ipt_standard_check: bad verdict (%i)\n",
622 if (targ->verdict < -NF_MAX_VERDICT - 1) {
623 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
631 check_match(struct ipt_entry_match *m,
633 const struct ipt_ip *ip,
634 unsigned int hookmask,
638 struct ipt_match *match;
640 match = find_match_lock(m->u.user.name, &ret, &ipt_mutex);
642 duprintf("check_match: `%s' not found\n", m->u.user.name);
645 if (!try_module_get(match->me)) {
649 m->u.kernel.match = match;
652 if (m->u.kernel.match->checkentry
653 && !m->u.kernel.match->checkentry(name, ip, m->data,
654 m->u.match_size - sizeof(*m),
656 module_put(m->u.kernel.match->me);
657 duprintf("ip_tables: check failed for `%s'.\n",
658 m->u.kernel.match->name);
666 static struct ipt_target ipt_standard_target;
669 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
672 struct ipt_entry_target *t;
673 struct ipt_target *target;
677 if (!ip_checkentry(&e->ip)) {
678 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
683 ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
685 goto cleanup_matches;
687 t = ipt_get_target(e);
688 target = ipt_find_target_lock(t->u.user.name, &ret, &ipt_mutex);
690 duprintf("check_entry: `%s' not found\n", t->u.user.name);
691 goto cleanup_matches;
693 if (!try_module_get(target->me)) {
696 goto cleanup_matches;
698 t->u.kernel.target = target;
701 if (t->u.kernel.target == &ipt_standard_target) {
702 if (!standard_check(t, size)) {
704 goto cleanup_matches;
706 } else if (t->u.kernel.target->checkentry
707 && !t->u.kernel.target->checkentry(name, e, t->data,
711 module_put(t->u.kernel.target->me);
712 duprintf("ip_tables: check failed for `%s'.\n",
713 t->u.kernel.target->name);
715 goto cleanup_matches;
722 IPT_MATCH_ITERATE(e, cleanup_match, &j);
727 check_entry_size_and_hooks(struct ipt_entry *e,
728 struct ipt_table_info *newinfo,
730 unsigned char *limit,
731 const unsigned int *hook_entries,
732 const unsigned int *underflows,
737 if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
738 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
739 duprintf("Bad offset %p\n", e);
744 < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
745 duprintf("checking: element %p size %u\n",
750 /* Check hooks & underflows */
751 for (h = 0; h < NF_IP_NUMHOOKS; h++) {
752 if ((unsigned char *)e - base == hook_entries[h])
753 newinfo->hook_entry[h] = hook_entries[h];
754 if ((unsigned char *)e - base == underflows[h])
755 newinfo->underflow[h] = underflows[h];
758 /* FIXME: underflows must be unconditional, standard verdicts
759 < 0 (not IPT_RETURN). --RR */
761 /* Clear counters and comefrom */
762 e->counters = ((struct ipt_counters) { 0, 0 });
770 cleanup_entry(struct ipt_entry *e, unsigned int *i)
772 struct ipt_entry_target *t;
774 if (i && (*i)-- == 0)
777 /* Cleanup all matches */
778 IPT_MATCH_ITERATE(e, cleanup_match, NULL);
779 t = ipt_get_target(e);
780 if (t->u.kernel.target->destroy)
781 t->u.kernel.target->destroy(t->data,
782 t->u.target_size - sizeof(*t));
783 module_put(t->u.kernel.target->me);
787 /* Checks and translates the user-supplied table segment (held in
790 translate_table(const char *name,
791 unsigned int valid_hooks,
792 struct ipt_table_info *newinfo,
795 const unsigned int *hook_entries,
796 const unsigned int *underflows)
801 newinfo->size = size;
802 newinfo->number = number;
804 /* Init all hooks to impossible value. */
805 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
806 newinfo->hook_entry[i] = 0xFFFFFFFF;
807 newinfo->underflow[i] = 0xFFFFFFFF;
810 duprintf("translate_table: size %u\n", newinfo->size);
812 /* Walk through entries, checking offsets. */
813 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
814 check_entry_size_and_hooks,
817 newinfo->entries + size,
818 hook_entries, underflows, &i);
823 duprintf("translate_table: %u not %u entries\n",
828 /* Check hooks all assigned */
829 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
830 /* Only hooks which are valid */
831 if (!(valid_hooks & (1 << i)))
833 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
834 duprintf("Invalid hook entry %u %u\n",
838 if (newinfo->underflow[i] == 0xFFFFFFFF) {
839 duprintf("Invalid underflow %u %u\n",
845 if (!mark_source_chains(newinfo, valid_hooks))
848 /* Finally, each sanity check must pass */
850 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
851 check_entry, name, size, &i);
854 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
859 /* And one copy for every other CPU */
860 for (i = 1; i < NR_CPUS; i++) {
861 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
863 SMP_ALIGN(newinfo->size));
869 static struct ipt_table_info *
870 replace_table(struct ipt_table *table,
871 unsigned int num_counters,
872 struct ipt_table_info *newinfo,
875 struct ipt_table_info *oldinfo;
877 #ifdef CONFIG_NETFILTER_DEBUG
879 struct ipt_entry *table_base;
882 for (i = 0; i < NR_CPUS; i++) {
884 (void *)newinfo->entries
885 + TABLE_OFFSET(newinfo, i);
887 table_base->comefrom = 0xdead57ac;
892 /* Do the substitution. */
893 write_lock_bh(&table->lock);
894 /* Check inside lock: is the old number correct? */
895 if (num_counters != table->private->number) {
896 duprintf("num_counters != table->private->number (%u/%u)\n",
897 num_counters, table->private->number);
898 write_unlock_bh(&table->lock);
902 oldinfo = table->private;
903 table->private = newinfo;
904 newinfo->initial_entries = oldinfo->initial_entries;
905 write_unlock_bh(&table->lock);
912 add_entry_to_counter(const struct ipt_entry *e,
913 struct ipt_counters total[],
916 ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
923 get_counters(const struct ipt_table_info *t,
924 struct ipt_counters counters[])
929 for (cpu = 0; cpu < NR_CPUS; cpu++) {
931 IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
933 add_entry_to_counter,
940 copy_entries_to_user(unsigned int total_size,
941 struct ipt_table *table,
942 void __user *userptr)
944 unsigned int off, num, countersize;
946 struct ipt_counters *counters;
949 /* We need atomic snapshot of counters: rest doesn't change
950 (other than comefrom, which userspace doesn't care
952 countersize = sizeof(struct ipt_counters) * table->private->number;
953 counters = vmalloc(countersize);
955 if (counters == NULL)
958 /* First, sum counters... */
959 memset(counters, 0, countersize);
960 write_lock_bh(&table->lock);
961 get_counters(table->private, counters);
962 write_unlock_bh(&table->lock);
964 /* ... then copy entire thing from CPU 0... */
965 if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
970 /* FIXME: use iterator macros --RR */
971 /* ... then go back and fix counters and names */
972 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
974 struct ipt_entry_match *m;
975 struct ipt_entry_target *t;
977 e = (struct ipt_entry *)(table->private->entries + off);
978 if (copy_to_user(userptr + off
979 + offsetof(struct ipt_entry, counters),
981 sizeof(counters[num])) != 0) {
986 for (i = sizeof(struct ipt_entry);
987 i < e->target_offset;
988 i += m->u.match_size) {
991 if (copy_to_user(userptr + off + i
992 + offsetof(struct ipt_entry_match,
994 m->u.kernel.match->name,
995 strlen(m->u.kernel.match->name)+1)
1002 t = ipt_get_target(e);
1003 if (copy_to_user(userptr + off + e->target_offset
1004 + offsetof(struct ipt_entry_target,
1006 t->u.kernel.target->name,
1007 strlen(t->u.kernel.target->name)+1) != 0) {
1019 get_entries(const struct ipt_get_entries *entries,
1020 struct ipt_get_entries __user *uptr)
1023 struct ipt_table *t;
1025 t = ipt_find_table_lock(entries->name, &ret, &ipt_mutex);
1027 duprintf("t->private->number = %u\n",
1028 t->private->number);
1029 if (entries->size == t->private->size)
1030 ret = copy_entries_to_user(t->private->size,
1031 t, uptr->entrytable);
1033 duprintf("get_entries: I've got %u not %u!\n",
1040 duprintf("get_entries: Can't find %s!\n",
1047 do_replace(void __user *user, unsigned int len)
1050 struct ipt_replace tmp;
1051 struct ipt_table *t;
1052 struct ipt_table_info *newinfo, *oldinfo;
1053 struct ipt_counters *counters;
1055 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1058 /* Hack: Causes ipchains to give correct error msg --RR */
1059 if (len != sizeof(tmp) + tmp.size)
1060 return -ENOPROTOOPT;
1062 /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1063 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1066 newinfo = vmalloc(sizeof(struct ipt_table_info)
1067 + SMP_ALIGN(tmp.size) * NR_CPUS);
1071 if (copy_from_user(newinfo->entries, user + sizeof(tmp),
1077 counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
1082 memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
1084 ret = translate_table(tmp.name, tmp.valid_hooks,
1085 newinfo, tmp.size, tmp.num_entries,
1086 tmp.hook_entry, tmp.underflow);
1088 goto free_newinfo_counters;
1090 duprintf("ip_tables: Translated table\n");
1092 t = ipt_find_table_lock(tmp.name, &ret, &ipt_mutex);
1094 goto free_newinfo_counters_untrans;
1097 if (tmp.valid_hooks != t->valid_hooks) {
1098 duprintf("Valid hook crap: %08X vs %08X\n",
1099 tmp.valid_hooks, t->valid_hooks);
1101 goto free_newinfo_counters_untrans_unlock;
1104 /* Get a reference in advance, we're not allowed fail later */
1105 if (!try_module_get(t->me)) {
1107 goto free_newinfo_counters_untrans_unlock;
1111 oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1115 /* Update module usage count based on number of rules */
1116 duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1117 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1118 if ((oldinfo->number > oldinfo->initial_entries) ||
1119 (newinfo->number <= oldinfo->initial_entries))
1121 if ((oldinfo->number > oldinfo->initial_entries) &&
1122 (newinfo->number <= oldinfo->initial_entries))
1125 /* Get the old counters. */
1126 get_counters(oldinfo, counters);
1127 /* Decrease module usage counts and free resource */
1128 IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1130 /* Silent error: too late now. */
1131 copy_to_user(tmp.counters, counters,
1132 sizeof(struct ipt_counters) * tmp.num_counters);
1139 free_newinfo_counters_untrans_unlock:
1141 free_newinfo_counters_untrans:
1142 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
1143 free_newinfo_counters:
1150 /* We're lazy, and add to the first CPU; overflow works its fey magic
1151 * and everything is OK. */
1153 add_counter_to_entry(struct ipt_entry *e,
1154 const struct ipt_counters addme[],
1158 duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1160 (long unsigned int)e->counters.pcnt,
1161 (long unsigned int)e->counters.bcnt,
1162 (long unsigned int)addme[*i].pcnt,
1163 (long unsigned int)addme[*i].bcnt);
1166 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1173 do_add_counters(void __user *user, unsigned int len)
1176 struct ipt_counters_info tmp, *paddc;
1177 struct ipt_table *t;
1180 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1183 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1186 paddc = vmalloc(len);
1190 if (copy_from_user(paddc, user, len) != 0) {
1195 t = ipt_find_table_lock(tmp.name, &ret, &ipt_mutex);
1199 write_lock_bh(&t->lock);
1200 if (t->private->number != paddc->num_counters) {
1202 goto unlock_up_free;
1206 IPT_ENTRY_ITERATE(t->private->entries,
1208 add_counter_to_entry,
1212 write_unlock_bh(&t->lock);
1221 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1225 if (!capable(CAP_NET_ADMIN))
1229 case IPT_SO_SET_REPLACE:
1230 ret = do_replace(user, len);
1233 case IPT_SO_SET_ADD_COUNTERS:
1234 ret = do_add_counters(user, len);
1238 duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
1246 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1250 if (!capable(CAP_NET_ADMIN))
1254 case IPT_SO_GET_INFO: {
1255 char name[IPT_TABLE_MAXNAMELEN];
1256 struct ipt_table *t;
1258 if (*len != sizeof(struct ipt_getinfo)) {
1259 duprintf("length %u != %u\n", *len,
1260 sizeof(struct ipt_getinfo));
1265 if (copy_from_user(name, user, sizeof(name)) != 0) {
1269 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1270 t = ipt_find_table_lock(name, &ret, &ipt_mutex);
1272 struct ipt_getinfo info;
1274 info.valid_hooks = t->valid_hooks;
1275 memcpy(info.hook_entry, t->private->hook_entry,
1276 sizeof(info.hook_entry));
1277 memcpy(info.underflow, t->private->underflow,
1278 sizeof(info.underflow));
1279 info.num_entries = t->private->number;
1280 info.size = t->private->size;
1281 strcpy(info.name, name);
1283 if (copy_to_user(user, &info, *len) != 0)
1293 case IPT_SO_GET_ENTRIES: {
1294 struct ipt_get_entries get;
1296 if (*len < sizeof(get)) {
1297 duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1299 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1301 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1302 duprintf("get_entries: %u != %u\n", *len,
1303 sizeof(struct ipt_get_entries) + get.size);
1306 ret = get_entries(&get, user);
1311 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1318 /* Registration hooks for targets. */
1320 ipt_register_target(struct ipt_target *target)
1324 ret = down_interruptible(&ipt_mutex);
1328 if (!list_named_insert(&ipt_target, target)) {
1329 duprintf("ipt_register_target: `%s' already in list!\n",
1338 ipt_unregister_target(struct ipt_target *target)
1341 LIST_DELETE(&ipt_target, target);
1346 ipt_register_match(struct ipt_match *match)
1350 ret = down_interruptible(&ipt_mutex);
1354 if (!list_named_insert(&ipt_match, match)) {
1355 duprintf("ipt_register_match: `%s' already in list!\n",
1365 ipt_unregister_match(struct ipt_match *match)
1368 LIST_DELETE(&ipt_match, match);
1372 int ipt_register_table(struct ipt_table *table)
1375 struct ipt_table_info *newinfo;
1376 static struct ipt_table_info bootstrap
1377 = { 0, 0, 0, { 0 }, { 0 }, { } };
1379 newinfo = vmalloc(sizeof(struct ipt_table_info)
1380 + SMP_ALIGN(table->table->size) * NR_CPUS);
1384 memcpy(newinfo->entries, table->table->entries, table->table->size);
1386 ret = translate_table(table->name, table->valid_hooks,
1387 newinfo, table->table->size,
1388 table->table->num_entries,
1389 table->table->hook_entry,
1390 table->table->underflow);
1396 ret = down_interruptible(&ipt_mutex);
1402 /* Don't autoload: we'd eat our tail... */
1403 if (list_named_find(&ipt_tables, table->name)) {
1408 /* Simplifies replace_table code. */
1409 table->private = &bootstrap;
1410 if (!replace_table(table, 0, newinfo, &ret))
1413 duprintf("table->private->number = %u\n",
1414 table->private->number);
1416 /* save number of initial entries */
1417 table->private->initial_entries = table->private->number;
1419 table->lock = RW_LOCK_UNLOCKED;
1420 list_prepend(&ipt_tables, table);
1431 void ipt_unregister_table(struct ipt_table *table)
1434 LIST_DELETE(&ipt_tables, table);
1437 /* Decrease module usage counts and free resources */
1438 IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
1439 cleanup_entry, NULL);
1440 vfree(table->private);
1443 /* Returns 1 if the port is matched by the range, 0 otherwise */
1445 port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1449 ret = (port >= min && port <= max) ^ invert;
1454 tcp_find_option(u_int8_t option,
1455 const struct sk_buff *skb,
1456 unsigned int optlen,
1460 /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
1461 char opt[60 - sizeof(struct tcphdr)];
1464 duprintf("tcp_match: finding option\n");
1465 /* If we don't have the whole header, drop packet. */
1466 if (skb_copy_bits(skb, skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
1472 for (i = 0; i < optlen; ) {
1473 if (opt[i] == option) return !invert;
1474 if (opt[i] < 2) i++;
1475 else i += opt[i+1]?:1;
1482 tcp_match(const struct sk_buff *skb,
1483 const struct net_device *in,
1484 const struct net_device *out,
1485 const void *matchinfo,
1490 const struct ipt_tcp *tcpinfo = matchinfo;
1495 Don't allow a fragment of TCP 8 bytes in. Nobody normal
1496 causes this. Its a cracker trying to break in by doing a
1497 flag overwrite to pass the direction checks.
1500 duprintf("Dropping evil TCP offset=1 frag.\n");
1503 /* Must not be a fragment. */
1507 #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1509 if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &tcph, sizeof(tcph)) < 0) {
1510 /* We've been asked to examine this packet, and we
1511 can't. Hence, no choice but to drop. */
1512 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1517 if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1519 !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
1521 if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1523 !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
1525 if (!FWINVTCP((((unsigned char *)&tcph)[13] & tcpinfo->flg_mask)
1526 == tcpinfo->flg_cmp,
1529 if (tcpinfo->option) {
1530 if (tcph.doff * 4 < sizeof(tcph)) {
1534 if (!tcp_find_option(tcpinfo->option, skb, tcph.doff*4 - sizeof(tcph),
1535 tcpinfo->invflags & IPT_TCP_INV_OPTION,
1542 /* Called when user tries to insert an entry of this type. */
1544 tcp_checkentry(const char *tablename,
1545 const struct ipt_ip *ip,
1547 unsigned int matchsize,
1548 unsigned int hook_mask)
1550 const struct ipt_tcp *tcpinfo = matchinfo;
1552 /* Must specify proto == TCP, and no unknown invflags */
1553 return ip->proto == IPPROTO_TCP
1554 && !(ip->invflags & IPT_INV_PROTO)
1555 && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1556 && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1560 udp_match(const struct sk_buff *skb,
1561 const struct net_device *in,
1562 const struct net_device *out,
1563 const void *matchinfo,
1568 const struct ipt_udp *udpinfo = matchinfo;
1570 /* Must not be a fragment. */
1574 if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &udph, sizeof(udph)) < 0) {
1575 /* We've been asked to examine this packet, and we
1576 can't. Hence, no choice but to drop. */
1577 duprintf("Dropping evil UDP tinygram.\n");
1582 return port_match(udpinfo->spts[0], udpinfo->spts[1],
1584 !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
1585 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1587 !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1590 /* Called when user tries to insert an entry of this type. */
1592 udp_checkentry(const char *tablename,
1593 const struct ipt_ip *ip,
1595 unsigned int matchinfosize,
1596 unsigned int hook_mask)
1598 const struct ipt_udp *udpinfo = matchinfo;
1600 /* Must specify proto == UDP, and no unknown invflags */
1601 if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1602 duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1606 if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1607 duprintf("ipt_udp: matchsize %u != %u\n",
1608 matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1611 if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1612 duprintf("ipt_udp: unknown flags %X\n",
1620 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1622 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1623 u_int8_t type, u_int8_t code,
1626 return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1631 icmp_match(const struct sk_buff *skb,
1632 const struct net_device *in,
1633 const struct net_device *out,
1634 const void *matchinfo,
1638 struct icmphdr icmph;
1639 const struct ipt_icmp *icmpinfo = matchinfo;
1641 /* Must not be a fragment. */
1645 if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &icmph, sizeof(icmph)) < 0){
1646 /* We've been asked to examine this packet, and we
1647 can't. Hence, no choice but to drop. */
1648 duprintf("Dropping evil ICMP tinygram.\n");
1653 return icmp_type_code_match(icmpinfo->type,
1656 icmph.type, icmph.code,
1657 !!(icmpinfo->invflags&IPT_ICMP_INV));
1660 /* Called when user tries to insert an entry of this type. */
1662 icmp_checkentry(const char *tablename,
1663 const struct ipt_ip *ip,
1665 unsigned int matchsize,
1666 unsigned int hook_mask)
1668 const struct ipt_icmp *icmpinfo = matchinfo;
1670 /* Must specify proto == ICMP, and no unknown invflags */
1671 return ip->proto == IPPROTO_ICMP
1672 && !(ip->invflags & IPT_INV_PROTO)
1673 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1674 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1677 /* The built-in targets: standard (NULL) and error. */
1678 static struct ipt_target ipt_standard_target = {
1679 .name = IPT_STANDARD_TARGET,
1682 static struct ipt_target ipt_error_target = {
1683 .name = IPT_ERROR_TARGET,
1684 .target = ipt_error,
1687 static struct nf_sockopt_ops ipt_sockopts = {
1689 .set_optmin = IPT_BASE_CTL,
1690 .set_optmax = IPT_SO_SET_MAX+1,
1691 .set = do_ipt_set_ctl,
1692 .get_optmin = IPT_BASE_CTL,
1693 .get_optmax = IPT_SO_GET_MAX+1,
1694 .get = do_ipt_get_ctl,
1697 static struct ipt_match tcp_matchstruct = {
1699 .match = &tcp_match,
1700 .checkentry = &tcp_checkentry,
1703 static struct ipt_match udp_matchstruct = {
1705 .match = &udp_match,
1706 .checkentry = &udp_checkentry,
1709 static struct ipt_match icmp_matchstruct = {
1711 .match = &icmp_match,
1712 .checkentry = &icmp_checkentry,
1715 #ifdef CONFIG_PROC_FS
1716 static inline int print_name(const char *i,
1717 off_t start_offset, char *buffer, int length,
1718 off_t *pos, unsigned int *count)
1720 if ((*count)++ >= start_offset) {
1721 unsigned int namelen;
1723 namelen = sprintf(buffer + *pos, "%s\n",
1724 i + sizeof(struct list_head));
1725 if (*pos + namelen > length) {
1726 /* Stop iterating */
1734 static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1737 unsigned int count = 0;
1739 if (down_interruptible(&ipt_mutex) != 0)
1742 LIST_FIND(&ipt_tables, print_name, void *,
1743 offset, buffer, length, &pos, &count);
1747 /* `start' hack - see fs/proc/generic.c line ~105 */
1748 *start=(char *)((unsigned long)count-offset);
1752 static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
1755 unsigned int count = 0;
1757 if (down_interruptible(&ipt_mutex) != 0)
1760 LIST_FIND(&ipt_target, print_name, void *,
1761 offset, buffer, length, &pos, &count);
1765 *start = (char *)((unsigned long)count - offset);
1769 static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
1772 unsigned int count = 0;
1774 if (down_interruptible(&ipt_mutex) != 0)
1777 LIST_FIND(&ipt_match, print_name, void *,
1778 offset, buffer, length, &pos, &count);
1782 *start = (char *)((unsigned long)count - offset);
1786 static struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
1787 { { "ip_tables_names", ipt_get_tables },
1788 { "ip_tables_targets", ipt_get_targets },
1789 { "ip_tables_matches", ipt_get_matches },
1791 #endif /*CONFIG_PROC_FS*/
1793 static int __init init(void)
1797 /* Noone else will be downing sem now, so we won't sleep */
1799 list_append(&ipt_target, &ipt_standard_target);
1800 list_append(&ipt_target, &ipt_error_target);
1801 list_append(&ipt_match, &tcp_matchstruct);
1802 list_append(&ipt_match, &udp_matchstruct);
1803 list_append(&ipt_match, &icmp_matchstruct);
1806 /* Register setsockopt */
1807 ret = nf_register_sockopt(&ipt_sockopts);
1809 duprintf("Unable to register sockopts.\n");
1813 #ifdef CONFIG_PROC_FS
1815 struct proc_dir_entry *proc;
1818 for (i = 0; ipt_proc_entry[i].name; i++) {
1819 proc = proc_net_create(ipt_proc_entry[i].name, 0,
1820 ipt_proc_entry[i].get_info);
1823 proc_net_remove(ipt_proc_entry[i].name);
1824 nf_unregister_sockopt(&ipt_sockopts);
1827 proc->owner = THIS_MODULE;
1832 printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
1836 static void __exit fini(void)
1838 nf_unregister_sockopt(&ipt_sockopts);
1839 #ifdef CONFIG_PROC_FS
1842 for (i = 0; ipt_proc_entry[i].name; i++)
1843 proc_net_remove(ipt_proc_entry[i].name);
1848 EXPORT_SYMBOL(ipt_register_table);
1849 EXPORT_SYMBOL(ipt_unregister_table);
1850 EXPORT_SYMBOL(ipt_register_match);
1851 EXPORT_SYMBOL(ipt_unregister_match);
1852 EXPORT_SYMBOL(ipt_do_table);
1853 EXPORT_SYMBOL(ipt_register_target);
1854 EXPORT_SYMBOL(ipt_unregister_target);
1855 EXPORT_SYMBOL(ipt_find_target_lock);