2 * Packet matching code.
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
11 * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12 * - increase module usage count as soon as we have rules inside
15 #include <linux/config.h>
16 #include <linux/cache.h>
17 #include <linux/skbuff.h>
18 #include <linux/kmod.h>
19 #include <linux/vmalloc.h>
20 #include <linux/netdevice.h>
21 #include <linux/module.h>
22 #include <linux/tcp.h>
23 #include <linux/udp.h>
24 #include <linux/icmp.h>
26 #include <asm/uaccess.h>
27 #include <asm/semaphore.h>
28 #include <linux/proc_fs.h>
30 #include <linux/netfilter_ipv4/ip_tables.h>
32 MODULE_LICENSE("GPL");
33 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
34 MODULE_DESCRIPTION("IPv4 packet filter");
36 /*#define DEBUG_IP_FIREWALL*/
37 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
38 /*#define DEBUG_IP_FIREWALL_USER*/
40 #ifdef DEBUG_IP_FIREWALL
41 #define dprintf(format, args...) printk(format , ## args)
43 #define dprintf(format, args...)
46 #ifdef DEBUG_IP_FIREWALL_USER
47 #define duprintf(format, args...) printk(format , ## args)
49 #define duprintf(format, args...)
52 #ifdef CONFIG_NETFILTER_DEBUG
53 #define IP_NF_ASSERT(x) \
56 printk("IP_NF_ASSERT: %s:%s:%u\n", \
57 __FUNCTION__, __FILE__, __LINE__); \
60 #define IP_NF_ASSERT(x)
62 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
64 /* Mutex protects lists (only traversed in user context). */
65 static DECLARE_MUTEX(ipt_mutex);
68 #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
69 #define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
70 #include <linux/netfilter_ipv4/lockhelp.h>
71 #include <linux/netfilter_ipv4/listhelp.h>
74 /* All the better to debug you with... */
80 We keep a set of rules for each CPU, so we can avoid write-locking
81 them in the softirq when updating the counters and therefore
82 only need to read-lock in the softirq; doing a write_lock_bh() in user
83 context stops packets coming through and allows user context to read
84 the counters or update the rules.
86 To be cache friendly on SMP, we arrange them like so:
88 ... cache-align padding ...
91 Hence the start of any table is given by get_table() below. */
93 /* The table itself */
98 /* Number of entries: FIXME. --RR */
100 /* Initial number of entries. Needed for module usage count */
101 unsigned int initial_entries;
103 /* Entry points and underflows */
104 unsigned int hook_entry[NF_IP_NUMHOOKS];
105 unsigned int underflow[NF_IP_NUMHOOKS];
107 /* ipt_entry tables: one per CPU */
108 char entries[0] ____cacheline_aligned;
111 static LIST_HEAD(ipt_target);
112 static LIST_HEAD(ipt_match);
113 static LIST_HEAD(ipt_tables);
114 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
117 #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
119 #define TABLE_OFFSET(t,p) 0
123 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
124 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
125 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
128 /* Returns whether matches rule or not. */
130 ip_packet_match(const struct iphdr *ip,
133 const struct ipt_ip *ipinfo,
139 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
141 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
143 || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
145 dprintf("Source or dest mismatch.\n");
147 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
149 NIPQUAD(ipinfo->smsk.s_addr),
150 NIPQUAD(ipinfo->src.s_addr),
151 ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
152 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
154 NIPQUAD(ipinfo->dmsk.s_addr),
155 NIPQUAD(ipinfo->dst.s_addr),
156 ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
160 /* Look for ifname matches; this should unroll nicely. */
161 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
162 ret |= (((const unsigned long *)indev)[i]
163 ^ ((const unsigned long *)ipinfo->iniface)[i])
164 & ((const unsigned long *)ipinfo->iniface_mask)[i];
167 if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
168 dprintf("VIA in mismatch (%s vs %s).%s\n",
169 indev, ipinfo->iniface,
170 ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
174 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
175 ret |= (((const unsigned long *)outdev)[i]
176 ^ ((const unsigned long *)ipinfo->outiface)[i])
177 & ((const unsigned long *)ipinfo->outiface_mask)[i];
180 if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
181 dprintf("VIA out mismatch (%s vs %s).%s\n",
182 outdev, ipinfo->outiface,
183 ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
187 /* Check specific protocol */
189 && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
190 dprintf("Packet protocol %hi does not match %hi.%s\n",
191 ip->protocol, ipinfo->proto,
192 ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
196 /* If we have a fragment rule but the packet is not a fragment
197 * then we return zero */
198 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
199 dprintf("Fragment rule but not fragment.%s\n",
200 ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
208 ip_checkentry(const struct ipt_ip *ip)
210 if (ip->flags & ~IPT_F_MASK) {
211 duprintf("Unknown flag bits set: %08X\n",
212 ip->flags & ~IPT_F_MASK);
215 if (ip->invflags & ~IPT_INV_MASK) {
216 duprintf("Unknown invflag bits set: %08X\n",
217 ip->invflags & ~IPT_INV_MASK);
224 ipt_error(struct sk_buff **pskb,
225 const struct net_device *in,
226 const struct net_device *out,
227 unsigned int hooknum,
228 const void *targinfo,
232 printk("ip_tables: error: `%s'\n", (char *)targinfo);
238 int do_match(struct ipt_entry_match *m,
239 const struct sk_buff *skb,
240 const struct net_device *in,
241 const struct net_device *out,
245 /* Stop iteration if it doesn't match */
246 if (!m->u.kernel.match->match(skb, in, out, m->data, offset, hotdrop))
252 static inline struct ipt_entry *
253 get_entry(void *base, unsigned int offset)
255 return (struct ipt_entry *)(base + offset);
258 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
260 ipt_do_table(struct sk_buff **pskb,
262 const struct net_device *in,
263 const struct net_device *out,
264 struct ipt_table *table,
267 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
272 /* Initializing verdict to NF_DROP keeps gcc happy. */
273 unsigned int verdict = NF_DROP;
274 const char *indev, *outdev;
276 struct ipt_entry *e, *back;
279 ip = (*pskb)->nh.iph;
280 datalen = (*pskb)->len - ip->ihl * 4;
281 indev = in ? in->name : nulldevname;
282 outdev = out ? out->name : nulldevname;
283 /* We handle fragments by dealing with the first fragment as
284 * if it was a normal packet. All other fragments are treated
285 * normally, except that they will NEVER match rules that ask
286 * things we don't know, ie. tcp syn flag or ports). If the
287 * rule is also a fragment-specific rule, non-fragments won't
289 offset = ntohs(ip->frag_off) & IP_OFFSET;
291 read_lock_bh(&table->lock);
292 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
293 table_base = (void *)table->private->entries
294 + TABLE_OFFSET(table->private, smp_processor_id());
295 e = get_entry(table_base, table->private->hook_entry[hook]);
297 #ifdef CONFIG_NETFILTER_DEBUG
298 /* Check noone else using our table */
299 if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
300 && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
301 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
304 &((struct ipt_entry *)table_base)->comefrom,
305 ((struct ipt_entry *)table_base)->comefrom);
307 ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
310 /* For return from builtin chain */
311 back = get_entry(table_base, table->private->underflow[hook]);
316 (*pskb)->nfcache |= e->nfcache;
317 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
318 struct ipt_entry_target *t;
320 if (IPT_MATCH_ITERATE(e, do_match,
322 offset, &hotdrop) != 0)
325 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
327 t = ipt_get_target(e);
328 IP_NF_ASSERT(t->u.kernel.target);
329 /* Standard target? */
330 if (!t->u.kernel.target->target) {
333 v = ((struct ipt_standard_target *)t)->verdict;
335 /* Pop from stack? */
336 if (v != IPT_RETURN) {
337 verdict = (unsigned)(-v) - 1;
341 back = get_entry(table_base,
346 != (void *)e + e->next_offset) {
347 /* Save old back ptr in next entry */
348 struct ipt_entry *next
349 = (void *)e + e->next_offset;
351 = (void *)back - table_base;
352 /* set back pointer to next entry */
356 e = get_entry(table_base, v);
358 /* Targets which reenter must return
360 #ifdef CONFIG_NETFILTER_DEBUG
361 ((struct ipt_entry *)table_base)->comefrom
364 verdict = t->u.kernel.target->target(pskb,
370 #ifdef CONFIG_NETFILTER_DEBUG
371 if (((struct ipt_entry *)table_base)->comefrom
373 && verdict == IPT_CONTINUE) {
374 printk("Target %s reentered!\n",
375 t->u.kernel.target->name);
378 ((struct ipt_entry *)table_base)->comefrom
381 /* Target might have changed stuff. */
382 ip = (*pskb)->nh.iph;
383 datalen = (*pskb)->len - ip->ihl * 4;
385 if (verdict == IPT_CONTINUE)
386 e = (void *)e + e->next_offset;
394 e = (void *)e + e->next_offset;
398 #ifdef CONFIG_NETFILTER_DEBUG
399 ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
401 read_unlock_bh(&table->lock);
403 #ifdef DEBUG_ALLOW_ALL
412 /* If it succeeds, returns element and locks mutex */
414 find_inlist_lock_noload(struct list_head *head,
417 struct semaphore *mutex)
422 duprintf("find_inlist: searching for `%s' in %s.\n",
423 name, head == &ipt_target ? "ipt_target"
424 : head == &ipt_match ? "ipt_match"
425 : head == &ipt_tables ? "ipt_tables" : "UNKNOWN");
428 *error = down_interruptible(mutex);
432 ret = list_named_find(head, name);
441 #define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
444 find_inlist_lock(struct list_head *head,
448 struct semaphore *mutex)
452 ret = find_inlist_lock_noload(head, name, error, mutex);
454 duprintf("find_inlist: loading `%s%s'.\n", prefix, name);
455 request_module("%s%s", prefix, name);
456 ret = find_inlist_lock_noload(head, name, error, mutex);
463 static inline struct ipt_table *
464 find_table_lock(const char *name, int *error, struct semaphore *mutex)
466 return find_inlist_lock(&ipt_tables, name, "iptable_", error, mutex);
469 static inline struct ipt_match *
470 find_match_lock(const char *name, int *error, struct semaphore *mutex)
472 return find_inlist_lock(&ipt_match, name, "ipt_", error, mutex);
475 static inline struct ipt_target *
476 find_target_lock(const char *name, int *error, struct semaphore *mutex)
478 return find_inlist_lock(&ipt_target, name, "ipt_", error, mutex);
481 /* All zeroes == unconditional rule. */
483 unconditional(const struct ipt_ip *ip)
487 for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
488 if (((__u32 *)ip)[i])
494 /* Figures out from what hook each rule can be called: returns 0 if
495 there are loops. Puts hook bitmask in comefrom. */
497 mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
501 /* No recursion; use packet counter to save back ptrs (reset
502 to 0 as we leave), and comefrom to save source hook bitmask */
503 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
504 unsigned int pos = newinfo->hook_entry[hook];
506 = (struct ipt_entry *)(newinfo->entries + pos);
508 if (!(valid_hooks & (1 << hook)))
511 /* Set initial back pointer. */
512 e->counters.pcnt = pos;
515 struct ipt_standard_target *t
516 = (void *)ipt_get_target(e);
518 if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
519 printk("iptables: loop hook %u pos %u %08X.\n",
520 hook, pos, e->comefrom);
524 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
526 /* Unconditional return/END. */
527 if (e->target_offset == sizeof(struct ipt_entry)
528 && (strcmp(t->target.u.user.name,
529 IPT_STANDARD_TARGET) == 0)
531 && unconditional(&e->ip)) {
532 unsigned int oldpos, size;
534 /* Return: backtrack through the last
537 e->comefrom ^= (1<<NF_IP_NUMHOOKS);
538 #ifdef DEBUG_IP_FIREWALL_USER
540 & (1 << NF_IP_NUMHOOKS)) {
541 duprintf("Back unset "
548 pos = e->counters.pcnt;
549 e->counters.pcnt = 0;
551 /* We're at the start. */
555 e = (struct ipt_entry *)
556 (newinfo->entries + pos);
557 } while (oldpos == pos + e->next_offset);
560 size = e->next_offset;
561 e = (struct ipt_entry *)
562 (newinfo->entries + pos + size);
563 e->counters.pcnt = pos;
566 int newpos = t->verdict;
568 if (strcmp(t->target.u.user.name,
569 IPT_STANDARD_TARGET) == 0
571 /* This a jump; chase it. */
572 duprintf("Jump rule %u -> %u\n",
575 /* ... this is a fallthru */
576 newpos = pos + e->next_offset;
578 e = (struct ipt_entry *)
579 (newinfo->entries + newpos);
580 e->counters.pcnt = pos;
585 duprintf("Finished chain %u\n", hook);
591 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
593 if (i && (*i)-- == 0)
596 if (m->u.kernel.match->destroy)
597 m->u.kernel.match->destroy(m->data,
598 m->u.match_size - sizeof(*m));
599 module_put(m->u.kernel.match->me);
604 standard_check(const struct ipt_entry_target *t,
605 unsigned int max_offset)
607 struct ipt_standard_target *targ = (void *)t;
609 /* Check standard info. */
611 != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
612 duprintf("standard_check: target size %u != %u\n",
614 IPT_ALIGN(sizeof(struct ipt_standard_target)));
618 if (targ->verdict >= 0
619 && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
620 duprintf("ipt_standard_check: bad verdict (%i)\n",
625 if (targ->verdict < -NF_MAX_VERDICT - 1) {
626 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
634 check_match(struct ipt_entry_match *m,
636 const struct ipt_ip *ip,
637 unsigned int hookmask,
641 struct ipt_match *match;
643 match = find_match_lock(m->u.user.name, &ret, &ipt_mutex);
645 duprintf("check_match: `%s' not found\n", m->u.user.name);
648 if (!try_module_get(match->me)) {
652 m->u.kernel.match = match;
655 if (m->u.kernel.match->checkentry
656 && !m->u.kernel.match->checkentry(name, ip, m->data,
657 m->u.match_size - sizeof(*m),
659 module_put(m->u.kernel.match->me);
660 duprintf("ip_tables: check failed for `%s'.\n",
661 m->u.kernel.match->name);
669 static struct ipt_target ipt_standard_target;
672 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
675 struct ipt_entry_target *t;
676 struct ipt_target *target;
680 if (!ip_checkentry(&e->ip)) {
681 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
686 ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
688 goto cleanup_matches;
690 t = ipt_get_target(e);
691 target = find_target_lock(t->u.user.name, &ret, &ipt_mutex);
693 duprintf("check_entry: `%s' not found\n", t->u.user.name);
694 goto cleanup_matches;
696 if (!try_module_get(target->me)) {
699 goto cleanup_matches;
701 t->u.kernel.target = target;
704 if (t->u.kernel.target == &ipt_standard_target) {
705 if (!standard_check(t, size)) {
707 goto cleanup_matches;
709 } else if (t->u.kernel.target->checkentry
710 && !t->u.kernel.target->checkentry(name, e, t->data,
714 module_put(t->u.kernel.target->me);
715 duprintf("ip_tables: check failed for `%s'.\n",
716 t->u.kernel.target->name);
718 goto cleanup_matches;
725 IPT_MATCH_ITERATE(e, cleanup_match, &j);
730 check_entry_size_and_hooks(struct ipt_entry *e,
731 struct ipt_table_info *newinfo,
733 unsigned char *limit,
734 const unsigned int *hook_entries,
735 const unsigned int *underflows,
740 if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
741 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
742 duprintf("Bad offset %p\n", e);
747 < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
748 duprintf("checking: element %p size %u\n",
753 /* Check hooks & underflows */
754 for (h = 0; h < NF_IP_NUMHOOKS; h++) {
755 if ((unsigned char *)e - base == hook_entries[h])
756 newinfo->hook_entry[h] = hook_entries[h];
757 if ((unsigned char *)e - base == underflows[h])
758 newinfo->underflow[h] = underflows[h];
761 /* FIXME: underflows must be unconditional, standard verdicts
762 < 0 (not IPT_RETURN). --RR */
764 /* Clear counters and comefrom */
765 e->counters = ((struct ipt_counters) { 0, 0 });
773 cleanup_entry(struct ipt_entry *e, unsigned int *i)
775 struct ipt_entry_target *t;
777 if (i && (*i)-- == 0)
780 /* Cleanup all matches */
781 IPT_MATCH_ITERATE(e, cleanup_match, NULL);
782 t = ipt_get_target(e);
783 if (t->u.kernel.target->destroy)
784 t->u.kernel.target->destroy(t->data,
785 t->u.target_size - sizeof(*t));
786 module_put(t->u.kernel.target->me);
790 /* Checks and translates the user-supplied table segment (held in
793 translate_table(const char *name,
794 unsigned int valid_hooks,
795 struct ipt_table_info *newinfo,
798 const unsigned int *hook_entries,
799 const unsigned int *underflows)
804 newinfo->size = size;
805 newinfo->number = number;
807 /* Init all hooks to impossible value. */
808 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
809 newinfo->hook_entry[i] = 0xFFFFFFFF;
810 newinfo->underflow[i] = 0xFFFFFFFF;
813 duprintf("translate_table: size %u\n", newinfo->size);
815 /* Walk through entries, checking offsets. */
816 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
817 check_entry_size_and_hooks,
820 newinfo->entries + size,
821 hook_entries, underflows, &i);
826 duprintf("translate_table: %u not %u entries\n",
831 /* Check hooks all assigned */
832 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
833 /* Only hooks which are valid */
834 if (!(valid_hooks & (1 << i)))
836 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
837 duprintf("Invalid hook entry %u %u\n",
841 if (newinfo->underflow[i] == 0xFFFFFFFF) {
842 duprintf("Invalid underflow %u %u\n",
848 if (!mark_source_chains(newinfo, valid_hooks))
851 /* Finally, each sanity check must pass */
853 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
854 check_entry, name, size, &i);
857 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
862 /* And one copy for every other CPU */
863 for (i = 1; i < NR_CPUS; i++) {
864 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
866 SMP_ALIGN(newinfo->size));
872 static struct ipt_table_info *
873 replace_table(struct ipt_table *table,
874 unsigned int num_counters,
875 struct ipt_table_info *newinfo,
878 struct ipt_table_info *oldinfo;
880 #ifdef CONFIG_NETFILTER_DEBUG
882 struct ipt_entry *table_base;
885 for (i = 0; i < NR_CPUS; i++) {
887 (void *)newinfo->entries
888 + TABLE_OFFSET(newinfo, i);
890 table_base->comefrom = 0xdead57ac;
895 /* Do the substitution. */
896 write_lock_bh(&table->lock);
897 /* Check inside lock: is the old number correct? */
898 if (num_counters != table->private->number) {
899 duprintf("num_counters != table->private->number (%u/%u)\n",
900 num_counters, table->private->number);
901 write_unlock_bh(&table->lock);
905 oldinfo = table->private;
906 table->private = newinfo;
907 newinfo->initial_entries = oldinfo->initial_entries;
908 write_unlock_bh(&table->lock);
915 add_entry_to_counter(const struct ipt_entry *e,
916 struct ipt_counters total[],
919 ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
926 get_counters(const struct ipt_table_info *t,
927 struct ipt_counters counters[])
932 for (cpu = 0; cpu < NR_CPUS; cpu++) {
934 IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
936 add_entry_to_counter,
943 copy_entries_to_user(unsigned int total_size,
944 struct ipt_table *table,
945 void __user *userptr)
947 unsigned int off, num, countersize;
949 struct ipt_counters *counters;
952 /* We need atomic snapshot of counters: rest doesn't change
953 (other than comefrom, which userspace doesn't care
955 countersize = sizeof(struct ipt_counters) * table->private->number;
956 counters = vmalloc(countersize);
958 if (counters == NULL)
961 /* First, sum counters... */
962 memset(counters, 0, countersize);
963 write_lock_bh(&table->lock);
964 get_counters(table->private, counters);
965 write_unlock_bh(&table->lock);
967 /* ... then copy entire thing from CPU 0... */
968 if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
973 /* FIXME: use iterator macros --RR */
974 /* ... then go back and fix counters and names */
975 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
977 struct ipt_entry_match *m;
978 struct ipt_entry_target *t;
980 e = (struct ipt_entry *)(table->private->entries + off);
981 if (copy_to_user(userptr + off
982 + offsetof(struct ipt_entry, counters),
984 sizeof(counters[num])) != 0) {
989 for (i = sizeof(struct ipt_entry);
990 i < e->target_offset;
991 i += m->u.match_size) {
994 if (copy_to_user(userptr + off + i
995 + offsetof(struct ipt_entry_match,
997 m->u.kernel.match->name,
998 strlen(m->u.kernel.match->name)+1)
1005 t = ipt_get_target(e);
1006 if (copy_to_user(userptr + off + e->target_offset
1007 + offsetof(struct ipt_entry_target,
1009 t->u.kernel.target->name,
1010 strlen(t->u.kernel.target->name)+1) != 0) {
1022 get_entries(const struct ipt_get_entries *entries,
1023 struct ipt_get_entries __user *uptr)
1026 struct ipt_table *t;
1028 t = find_table_lock(entries->name, &ret, &ipt_mutex);
1030 duprintf("t->private->number = %u\n",
1031 t->private->number);
1032 if (entries->size == t->private->size)
1033 ret = copy_entries_to_user(t->private->size,
1034 t, uptr->entrytable);
1036 duprintf("get_entries: I've got %u not %u!\n",
1043 duprintf("get_entries: Can't find %s!\n",
1050 do_replace(void __user *user, unsigned int len)
1053 struct ipt_replace tmp;
1054 struct ipt_table *t;
1055 struct ipt_table_info *newinfo, *oldinfo;
1056 struct ipt_counters *counters;
1058 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1061 /* Hack: Causes ipchains to give correct error msg --RR */
1062 if (len != sizeof(tmp) + tmp.size)
1063 return -ENOPROTOOPT;
1065 /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1066 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1069 newinfo = vmalloc(sizeof(struct ipt_table_info)
1070 + SMP_ALIGN(tmp.size) * NR_CPUS);
1074 if (copy_from_user(newinfo->entries, user + sizeof(tmp),
1080 counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
1085 memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
1087 ret = translate_table(tmp.name, tmp.valid_hooks,
1088 newinfo, tmp.size, tmp.num_entries,
1089 tmp.hook_entry, tmp.underflow);
1091 goto free_newinfo_counters;
1093 duprintf("ip_tables: Translated table\n");
1095 t = find_table_lock(tmp.name, &ret, &ipt_mutex);
1097 goto free_newinfo_counters_untrans;
1100 if (tmp.valid_hooks != t->valid_hooks) {
1101 duprintf("Valid hook crap: %08X vs %08X\n",
1102 tmp.valid_hooks, t->valid_hooks);
1104 goto free_newinfo_counters_untrans_unlock;
1107 /* Get a reference in advance, we're not allowed fail later */
1108 if (!try_module_get(t->me)) {
1110 goto free_newinfo_counters_untrans_unlock;
1114 oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1118 /* Update module usage count based on number of rules */
1119 duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1120 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1121 if ((oldinfo->number > oldinfo->initial_entries) ||
1122 (newinfo->number <= oldinfo->initial_entries))
1124 if ((oldinfo->number > oldinfo->initial_entries) &&
1125 (newinfo->number <= oldinfo->initial_entries))
1128 /* Get the old counters. */
1129 get_counters(oldinfo, counters);
1130 /* Decrease module usage counts and free resource */
1131 IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1133 /* Silent error: too late now. */
1134 copy_to_user(tmp.counters, counters,
1135 sizeof(struct ipt_counters) * tmp.num_counters);
1142 free_newinfo_counters_untrans_unlock:
1144 free_newinfo_counters_untrans:
1145 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
1146 free_newinfo_counters:
1153 /* We're lazy, and add to the first CPU; overflow works its fey magic
1154 * and everything is OK. */
1156 add_counter_to_entry(struct ipt_entry *e,
1157 const struct ipt_counters addme[],
1161 duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1163 (long unsigned int)e->counters.pcnt,
1164 (long unsigned int)e->counters.bcnt,
1165 (long unsigned int)addme[*i].pcnt,
1166 (long unsigned int)addme[*i].bcnt);
1169 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1176 do_add_counters(void __user *user, unsigned int len)
1179 struct ipt_counters_info tmp, *paddc;
1180 struct ipt_table *t;
1183 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1186 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1189 paddc = vmalloc(len);
1193 if (copy_from_user(paddc, user, len) != 0) {
1198 t = find_table_lock(tmp.name, &ret, &ipt_mutex);
1202 write_lock_bh(&t->lock);
1203 if (t->private->number != paddc->num_counters) {
1205 goto unlock_up_free;
1209 IPT_ENTRY_ITERATE(t->private->entries,
1211 add_counter_to_entry,
1215 write_unlock_bh(&t->lock);
1224 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1228 if (!capable(CAP_NET_ADMIN))
1232 case IPT_SO_SET_REPLACE:
1233 ret = do_replace(user, len);
1236 case IPT_SO_SET_ADD_COUNTERS:
1237 ret = do_add_counters(user, len);
1241 duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
1249 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1253 if (!capable(CAP_NET_ADMIN))
1257 case IPT_SO_GET_INFO: {
1258 char name[IPT_TABLE_MAXNAMELEN];
1259 struct ipt_table *t;
1261 if (*len != sizeof(struct ipt_getinfo)) {
1262 duprintf("length %u != %u\n", *len,
1263 sizeof(struct ipt_getinfo));
1268 if (copy_from_user(name, user, sizeof(name)) != 0) {
1272 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1273 t = find_table_lock(name, &ret, &ipt_mutex);
1275 struct ipt_getinfo info;
1277 info.valid_hooks = t->valid_hooks;
1278 memcpy(info.hook_entry, t->private->hook_entry,
1279 sizeof(info.hook_entry));
1280 memcpy(info.underflow, t->private->underflow,
1281 sizeof(info.underflow));
1282 info.num_entries = t->private->number;
1283 info.size = t->private->size;
1284 strcpy(info.name, name);
1286 if (copy_to_user(user, &info, *len) != 0)
1296 case IPT_SO_GET_ENTRIES: {
1297 struct ipt_get_entries get;
1299 if (*len < sizeof(get)) {
1300 duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1302 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1304 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1305 duprintf("get_entries: %u != %u\n", *len,
1306 sizeof(struct ipt_get_entries) + get.size);
1309 ret = get_entries(&get, user);
1314 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1321 /* Registration hooks for targets. */
1323 ipt_register_target(struct ipt_target *target)
1327 ret = down_interruptible(&ipt_mutex);
1331 if (!list_named_insert(&ipt_target, target)) {
1332 duprintf("ipt_register_target: `%s' already in list!\n",
1341 ipt_unregister_target(struct ipt_target *target)
1344 LIST_DELETE(&ipt_target, target);
1349 ipt_register_match(struct ipt_match *match)
1353 ret = down_interruptible(&ipt_mutex);
1357 if (!list_named_insert(&ipt_match, match)) {
1358 duprintf("ipt_register_match: `%s' already in list!\n",
1368 ipt_unregister_match(struct ipt_match *match)
1371 LIST_DELETE(&ipt_match, match);
1375 int ipt_register_table(struct ipt_table *table)
1378 struct ipt_table_info *newinfo;
1379 static struct ipt_table_info bootstrap
1380 = { 0, 0, 0, { 0 }, { 0 }, { } };
1382 newinfo = vmalloc(sizeof(struct ipt_table_info)
1383 + SMP_ALIGN(table->table->size) * NR_CPUS);
1387 memcpy(newinfo->entries, table->table->entries, table->table->size);
1389 ret = translate_table(table->name, table->valid_hooks,
1390 newinfo, table->table->size,
1391 table->table->num_entries,
1392 table->table->hook_entry,
1393 table->table->underflow);
1399 ret = down_interruptible(&ipt_mutex);
1405 /* Don't autoload: we'd eat our tail... */
1406 if (list_named_find(&ipt_tables, table->name)) {
1411 /* Simplifies replace_table code. */
1412 table->private = &bootstrap;
1413 if (!replace_table(table, 0, newinfo, &ret))
1416 duprintf("table->private->number = %u\n",
1417 table->private->number);
1419 /* save number of initial entries */
1420 table->private->initial_entries = table->private->number;
1422 table->lock = RW_LOCK_UNLOCKED;
1423 list_prepend(&ipt_tables, table);
1434 void ipt_unregister_table(struct ipt_table *table)
1437 LIST_DELETE(&ipt_tables, table);
1440 /* Decrease module usage counts and free resources */
1441 IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
1442 cleanup_entry, NULL);
1443 vfree(table->private);
1446 /* Returns 1 if the port is matched by the range, 0 otherwise */
1448 port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1452 ret = (port >= min && port <= max) ^ invert;
1457 tcp_find_option(u_int8_t option,
1458 const struct sk_buff *skb,
1459 unsigned int optlen,
1463 /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
1464 char opt[60 - sizeof(struct tcphdr)];
1467 duprintf("tcp_match: finding option\n");
1468 /* If we don't have the whole header, drop packet. */
1469 if (skb_copy_bits(skb, skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
1475 for (i = 0; i < optlen; ) {
1476 if (opt[i] == option) return !invert;
1477 if (opt[i] < 2) i++;
1478 else i += opt[i+1]?:1;
1485 tcp_match(const struct sk_buff *skb,
1486 const struct net_device *in,
1487 const struct net_device *out,
1488 const void *matchinfo,
1493 const struct ipt_tcp *tcpinfo = matchinfo;
1498 Don't allow a fragment of TCP 8 bytes in. Nobody normal
1499 causes this. Its a cracker trying to break in by doing a
1500 flag overwrite to pass the direction checks.
1503 duprintf("Dropping evil TCP offset=1 frag.\n");
1506 /* Must not be a fragment. */
1510 #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1512 if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &tcph, sizeof(tcph)) < 0) {
1513 /* We've been asked to examine this packet, and we
1514 can't. Hence, no choice but to drop. */
1515 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1520 if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1522 !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
1524 if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1526 !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
1528 if (!FWINVTCP((((unsigned char *)&tcph)[13] & tcpinfo->flg_mask)
1529 == tcpinfo->flg_cmp,
1532 if (tcpinfo->option) {
1533 if (tcph.doff * 4 < sizeof(tcph)) {
1537 if (!tcp_find_option(tcpinfo->option, skb, tcph.doff*4 - sizeof(tcph),
1538 tcpinfo->invflags & IPT_TCP_INV_OPTION,
1545 /* Called when user tries to insert an entry of this type. */
1547 tcp_checkentry(const char *tablename,
1548 const struct ipt_ip *ip,
1550 unsigned int matchsize,
1551 unsigned int hook_mask)
1553 const struct ipt_tcp *tcpinfo = matchinfo;
1555 /* Must specify proto == TCP, and no unknown invflags */
1556 return ip->proto == IPPROTO_TCP
1557 && !(ip->invflags & IPT_INV_PROTO)
1558 && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1559 && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1563 udp_match(const struct sk_buff *skb,
1564 const struct net_device *in,
1565 const struct net_device *out,
1566 const void *matchinfo,
1571 const struct ipt_udp *udpinfo = matchinfo;
1573 /* Must not be a fragment. */
1577 if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &udph, sizeof(udph)) < 0) {
1578 /* We've been asked to examine this packet, and we
1579 can't. Hence, no choice but to drop. */
1580 duprintf("Dropping evil UDP tinygram.\n");
1585 return port_match(udpinfo->spts[0], udpinfo->spts[1],
1587 !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
1588 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1590 !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1593 /* Called when user tries to insert an entry of this type. */
1595 udp_checkentry(const char *tablename,
1596 const struct ipt_ip *ip,
1598 unsigned int matchinfosize,
1599 unsigned int hook_mask)
1601 const struct ipt_udp *udpinfo = matchinfo;
1603 /* Must specify proto == UDP, and no unknown invflags */
1604 if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1605 duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1609 if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1610 duprintf("ipt_udp: matchsize %u != %u\n",
1611 matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1614 if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1615 duprintf("ipt_udp: unknown flags %X\n",
1623 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1625 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1626 u_int8_t type, u_int8_t code,
1629 return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1634 icmp_match(const struct sk_buff *skb,
1635 const struct net_device *in,
1636 const struct net_device *out,
1637 const void *matchinfo,
1641 struct icmphdr icmph;
1642 const struct ipt_icmp *icmpinfo = matchinfo;
1644 /* Must not be a fragment. */
1648 if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &icmph, sizeof(icmph)) < 0){
1649 /* We've been asked to examine this packet, and we
1650 can't. Hence, no choice but to drop. */
1651 duprintf("Dropping evil ICMP tinygram.\n");
1656 return icmp_type_code_match(icmpinfo->type,
1659 icmph.type, icmph.code,
1660 !!(icmpinfo->invflags&IPT_ICMP_INV));
1663 /* Called when user tries to insert an entry of this type. */
1665 icmp_checkentry(const char *tablename,
1666 const struct ipt_ip *ip,
1668 unsigned int matchsize,
1669 unsigned int hook_mask)
1671 const struct ipt_icmp *icmpinfo = matchinfo;
1673 /* Must specify proto == ICMP, and no unknown invflags */
1674 return ip->proto == IPPROTO_ICMP
1675 && !(ip->invflags & IPT_INV_PROTO)
1676 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1677 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1680 /* The built-in targets: standard (NULL) and error. */
1681 static struct ipt_target ipt_standard_target = {
1682 .name = IPT_STANDARD_TARGET,
1685 static struct ipt_target ipt_error_target = {
1686 .name = IPT_ERROR_TARGET,
1687 .target = ipt_error,
1690 static struct nf_sockopt_ops ipt_sockopts = {
1692 .set_optmin = IPT_BASE_CTL,
1693 .set_optmax = IPT_SO_SET_MAX+1,
1694 .set = do_ipt_set_ctl,
1695 .get_optmin = IPT_BASE_CTL,
1696 .get_optmax = IPT_SO_GET_MAX+1,
1697 .get = do_ipt_get_ctl,
1700 static struct ipt_match tcp_matchstruct = {
1702 .match = &tcp_match,
1703 .checkentry = &tcp_checkentry,
1706 static struct ipt_match udp_matchstruct = {
1708 .match = &udp_match,
1709 .checkentry = &udp_checkentry,
1712 static struct ipt_match icmp_matchstruct = {
1714 .match = &icmp_match,
1715 .checkentry = &icmp_checkentry,
1718 #ifdef CONFIG_PROC_FS
1719 static inline int print_name(const char *i,
1720 off_t start_offset, char *buffer, int length,
1721 off_t *pos, unsigned int *count)
1723 if ((*count)++ >= start_offset) {
1724 unsigned int namelen;
1726 namelen = sprintf(buffer + *pos, "%s\n",
1727 i + sizeof(struct list_head));
1728 if (*pos + namelen > length) {
1729 /* Stop iterating */
1737 static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1740 unsigned int count = 0;
1742 if (down_interruptible(&ipt_mutex) != 0)
1745 LIST_FIND(&ipt_tables, print_name, void *,
1746 offset, buffer, length, &pos, &count);
1750 /* `start' hack - see fs/proc/generic.c line ~105 */
1751 *start=(char *)((unsigned long)count-offset);
1755 static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
1758 unsigned int count = 0;
1760 if (down_interruptible(&ipt_mutex) != 0)
1763 LIST_FIND(&ipt_target, print_name, void *,
1764 offset, buffer, length, &pos, &count);
1768 *start = (char *)((unsigned long)count - offset);
1772 static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
1775 unsigned int count = 0;
1777 if (down_interruptible(&ipt_mutex) != 0)
1780 LIST_FIND(&ipt_match, print_name, void *,
1781 offset, buffer, length, &pos, &count);
1785 *start = (char *)((unsigned long)count - offset);
1789 static struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
1790 { { "ip_tables_names", ipt_get_tables },
1791 { "ip_tables_targets", ipt_get_targets },
1792 { "ip_tables_matches", ipt_get_matches },
1794 #endif /*CONFIG_PROC_FS*/
1796 static int __init init(void)
1800 /* Noone else will be downing sem now, so we won't sleep */
1802 list_append(&ipt_target, &ipt_standard_target);
1803 list_append(&ipt_target, &ipt_error_target);
1804 list_append(&ipt_match, &tcp_matchstruct);
1805 list_append(&ipt_match, &udp_matchstruct);
1806 list_append(&ipt_match, &icmp_matchstruct);
1809 /* Register setsockopt */
1810 ret = nf_register_sockopt(&ipt_sockopts);
1812 duprintf("Unable to register sockopts.\n");
1816 #ifdef CONFIG_PROC_FS
1818 struct proc_dir_entry *proc;
1821 for (i = 0; ipt_proc_entry[i].name; i++) {
1822 proc = proc_net_create(ipt_proc_entry[i].name, 0,
1823 ipt_proc_entry[i].get_info);
1826 proc_net_remove(ipt_proc_entry[i].name);
1827 nf_unregister_sockopt(&ipt_sockopts);
1830 proc->owner = THIS_MODULE;
1835 printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
1839 static void __exit fini(void)
1841 nf_unregister_sockopt(&ipt_sockopts);
1842 #ifdef CONFIG_PROC_FS
1845 for (i = 0; ipt_proc_entry[i].name; i++)
1846 proc_net_remove(ipt_proc_entry[i].name);
1851 EXPORT_SYMBOL(ipt_register_table);
1852 EXPORT_SYMBOL(ipt_unregister_table);
1853 EXPORT_SYMBOL(ipt_register_match);
1854 EXPORT_SYMBOL(ipt_unregister_match);
1855 EXPORT_SYMBOL(ipt_do_table);
1856 EXPORT_SYMBOL(ipt_register_target);
1857 EXPORT_SYMBOL(ipt_unregister_target);