2 * Packet matching code.
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
11 * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12 * - increase module usage count as soon as we have rules inside
15 #include <linux/config.h>
16 #include <linux/cache.h>
17 #include <linux/skbuff.h>
18 #include <linux/kmod.h>
19 #include <linux/vmalloc.h>
20 #include <linux/netdevice.h>
21 #include <linux/module.h>
22 #include <linux/tcp.h>
23 #include <linux/udp.h>
24 #include <linux/icmp.h>
26 #include <asm/uaccess.h>
27 #include <asm/semaphore.h>
28 #include <linux/proc_fs.h>
30 #include <linux/netfilter_ipv4/ip_tables.h>
32 MODULE_LICENSE("GPL");
33 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
34 MODULE_DESCRIPTION("IPv4 packet filter");
36 /*#define DEBUG_IP_FIREWALL*/
37 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
38 /*#define DEBUG_IP_FIREWALL_USER*/
40 #ifdef DEBUG_IP_FIREWALL
41 #define dprintf(format, args...) printk(format , ## args)
43 #define dprintf(format, args...)
46 #ifdef DEBUG_IP_FIREWALL_USER
47 #define duprintf(format, args...) printk(format , ## args)
49 #define duprintf(format, args...)
52 #ifdef CONFIG_NETFILTER_DEBUG
53 #define IP_NF_ASSERT(x) \
56 printk("IP_NF_ASSERT: %s:%s:%u\n", \
57 __FUNCTION__, __FILE__, __LINE__); \
60 #define IP_NF_ASSERT(x)
62 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
64 static DECLARE_MUTEX(ipt_mutex);
67 #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
68 #define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
69 #include <linux/netfilter_ipv4/lockhelp.h>
70 #include <linux/netfilter_ipv4/listhelp.h>
73 /* All the better to debug you with... */
79 We keep a set of rules for each CPU, so we can avoid write-locking
80 them in the softirq when updating the counters and therefore
81 only need to read-lock in the softirq; doing a write_lock_bh() in user
82 context stops packets coming through and allows user context to read
83 the counters or update the rules.
85 To be cache friendly on SMP, we arrange them like so:
87 ... cache-align padding ...
90 Hence the start of any table is given by get_table() below. */
92 /* The table itself */
97 /* Number of entries: FIXME. --RR */
99 /* Initial number of entries. Needed for module usage count */
100 unsigned int initial_entries;
102 /* Entry points and underflows */
103 unsigned int hook_entry[NF_IP_NUMHOOKS];
104 unsigned int underflow[NF_IP_NUMHOOKS];
106 /* ipt_entry tables: one per CPU */
107 char entries[0] ____cacheline_aligned;
110 static LIST_HEAD(ipt_target);
111 static LIST_HEAD(ipt_match);
112 static LIST_HEAD(ipt_tables);
113 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
116 #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
118 #define TABLE_OFFSET(t,p) 0
122 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
123 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
124 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
127 /* Returns whether matches rule or not. */
129 ip_packet_match(const struct iphdr *ip,
132 const struct ipt_ip *ipinfo,
138 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
140 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
142 || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
144 dprintf("Source or dest mismatch.\n");
146 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
148 NIPQUAD(ipinfo->smsk.s_addr),
149 NIPQUAD(ipinfo->src.s_addr),
150 ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
151 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
153 NIPQUAD(ipinfo->dmsk.s_addr),
154 NIPQUAD(ipinfo->dst.s_addr),
155 ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
159 /* Look for ifname matches; this should unroll nicely. */
160 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
161 ret |= (((const unsigned long *)indev)[i]
162 ^ ((const unsigned long *)ipinfo->iniface)[i])
163 & ((const unsigned long *)ipinfo->iniface_mask)[i];
166 if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
167 dprintf("VIA in mismatch (%s vs %s).%s\n",
168 indev, ipinfo->iniface,
169 ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
173 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
174 ret |= (((const unsigned long *)outdev)[i]
175 ^ ((const unsigned long *)ipinfo->outiface)[i])
176 & ((const unsigned long *)ipinfo->outiface_mask)[i];
179 if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
180 dprintf("VIA out mismatch (%s vs %s).%s\n",
181 outdev, ipinfo->outiface,
182 ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
186 /* Check specific protocol */
188 && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
189 dprintf("Packet protocol %hi does not match %hi.%s\n",
190 ip->protocol, ipinfo->proto,
191 ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
195 /* If we have a fragment rule but the packet is not a fragment
196 * then we return zero */
197 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
198 dprintf("Fragment rule but not fragment.%s\n",
199 ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
207 ip_checkentry(const struct ipt_ip *ip)
209 if (ip->flags & ~IPT_F_MASK) {
210 duprintf("Unknown flag bits set: %08X\n",
211 ip->flags & ~IPT_F_MASK);
214 if (ip->invflags & ~IPT_INV_MASK) {
215 duprintf("Unknown invflag bits set: %08X\n",
216 ip->invflags & ~IPT_INV_MASK);
223 ipt_error(struct sk_buff **pskb,
224 const struct net_device *in,
225 const struct net_device *out,
226 unsigned int hooknum,
227 const void *targinfo,
231 printk("ip_tables: error: `%s'\n", (char *)targinfo);
237 int do_match(struct ipt_entry_match *m,
238 const struct sk_buff *skb,
239 const struct net_device *in,
240 const struct net_device *out,
244 /* Stop iteration if it doesn't match */
245 if (!m->u.kernel.match->match(skb, in, out, m->data, offset, hotdrop))
251 static inline struct ipt_entry *
252 get_entry(void *base, unsigned int offset)
254 return (struct ipt_entry *)(base + offset);
257 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
259 ipt_do_table(struct sk_buff **pskb,
261 const struct net_device *in,
262 const struct net_device *out,
263 struct ipt_table *table,
266 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
271 /* Initializing verdict to NF_DROP keeps gcc happy. */
272 unsigned int verdict = NF_DROP;
273 const char *indev, *outdev;
275 struct ipt_entry *e, *back;
278 ip = (*pskb)->nh.iph;
279 datalen = (*pskb)->len - ip->ihl * 4;
280 indev = in ? in->name : nulldevname;
281 outdev = out ? out->name : nulldevname;
282 /* We handle fragments by dealing with the first fragment as
283 * if it was a normal packet. All other fragments are treated
284 * normally, except that they will NEVER match rules that ask
285 * things we don't know, ie. tcp syn flag or ports). If the
286 * rule is also a fragment-specific rule, non-fragments won't
288 offset = ntohs(ip->frag_off) & IP_OFFSET;
290 read_lock_bh(&table->lock);
291 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
292 table_base = (void *)table->private->entries
293 + TABLE_OFFSET(table->private, smp_processor_id());
294 e = get_entry(table_base, table->private->hook_entry[hook]);
296 #ifdef CONFIG_NETFILTER_DEBUG
297 /* Check noone else using our table */
298 if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
299 && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
300 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
303 &((struct ipt_entry *)table_base)->comefrom,
304 ((struct ipt_entry *)table_base)->comefrom);
306 ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
309 /* For return from builtin chain */
310 back = get_entry(table_base, table->private->underflow[hook]);
315 (*pskb)->nfcache |= e->nfcache;
316 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
317 struct ipt_entry_target *t;
319 if (IPT_MATCH_ITERATE(e, do_match,
321 offset, &hotdrop) != 0)
324 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
326 t = ipt_get_target(e);
327 IP_NF_ASSERT(t->u.kernel.target);
328 /* Standard target? */
329 if (!t->u.kernel.target->target) {
332 v = ((struct ipt_standard_target *)t)->verdict;
334 /* Pop from stack? */
335 if (v != IPT_RETURN) {
336 verdict = (unsigned)(-v) - 1;
340 back = get_entry(table_base,
345 != (void *)e + e->next_offset) {
346 /* Save old back ptr in next entry */
347 struct ipt_entry *next
348 = (void *)e + e->next_offset;
350 = (void *)back - table_base;
351 /* set back pointer to next entry */
355 e = get_entry(table_base, v);
357 /* Targets which reenter must return
359 #ifdef CONFIG_NETFILTER_DEBUG
360 ((struct ipt_entry *)table_base)->comefrom
363 verdict = t->u.kernel.target->target(pskb,
369 #ifdef CONFIG_NETFILTER_DEBUG
370 if (((struct ipt_entry *)table_base)->comefrom
372 && verdict == IPT_CONTINUE) {
373 printk("Target %s reentered!\n",
374 t->u.kernel.target->name);
377 ((struct ipt_entry *)table_base)->comefrom
380 /* Target might have changed stuff. */
381 ip = (*pskb)->nh.iph;
382 datalen = (*pskb)->len - ip->ihl * 4;
384 if (verdict == IPT_CONTINUE)
385 e = (void *)e + e->next_offset;
393 e = (void *)e + e->next_offset;
397 #ifdef CONFIG_NETFILTER_DEBUG
398 ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
400 read_unlock_bh(&table->lock);
402 #ifdef DEBUG_ALLOW_ALL
411 /* If it succeeds, returns element and locks mutex */
413 find_inlist_lock_noload(struct list_head *head,
416 struct semaphore *mutex)
421 duprintf("find_inlist: searching for `%s' in %s.\n",
422 name, head == &ipt_target ? "ipt_target"
423 : head == &ipt_match ? "ipt_match"
424 : head == &ipt_tables ? "ipt_tables" : "UNKNOWN");
427 *error = down_interruptible(mutex);
431 ret = list_named_find(head, name);
440 #define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
443 find_inlist_lock(struct list_head *head,
447 struct semaphore *mutex)
451 ret = find_inlist_lock_noload(head, name, error, mutex);
453 duprintf("find_inlist: loading `%s%s'.\n", prefix, name);
454 request_module("%s%s", prefix, name);
455 ret = find_inlist_lock_noload(head, name, error, mutex);
462 static inline struct ipt_table *
463 ipt_find_table_lock(const char *name, int *error, struct semaphore *mutex)
465 return find_inlist_lock(&ipt_tables, name, "iptable_", error, mutex);
468 static inline struct ipt_match *
469 find_match_lock(const char *name, int *error, struct semaphore *mutex)
471 return find_inlist_lock(&ipt_match, name, "ipt_", error, mutex);
475 ipt_find_target_lock(const char *name, int *error, struct semaphore *mutex)
477 return find_inlist_lock(&ipt_target, name, "ipt_", error, mutex);
480 /* All zeroes == unconditional rule. */
482 unconditional(const struct ipt_ip *ip)
486 for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
487 if (((__u32 *)ip)[i])
493 /* Figures out from what hook each rule can be called: returns 0 if
494 there are loops. Puts hook bitmask in comefrom. */
496 mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
500 /* No recursion; use packet counter to save back ptrs (reset
501 to 0 as we leave), and comefrom to save source hook bitmask */
502 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
503 unsigned int pos = newinfo->hook_entry[hook];
505 = (struct ipt_entry *)(newinfo->entries + pos);
507 if (!(valid_hooks & (1 << hook)))
510 /* Set initial back pointer. */
511 e->counters.pcnt = pos;
514 struct ipt_standard_target *t
515 = (void *)ipt_get_target(e);
517 if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
518 printk("iptables: loop hook %u pos %u %08X.\n",
519 hook, pos, e->comefrom);
523 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
525 /* Unconditional return/END. */
526 if (e->target_offset == sizeof(struct ipt_entry)
527 && (strcmp(t->target.u.user.name,
528 IPT_STANDARD_TARGET) == 0)
530 && unconditional(&e->ip)) {
531 unsigned int oldpos, size;
533 /* Return: backtrack through the last
536 e->comefrom ^= (1<<NF_IP_NUMHOOKS);
537 #ifdef DEBUG_IP_FIREWALL_USER
539 & (1 << NF_IP_NUMHOOKS)) {
540 duprintf("Back unset "
547 pos = e->counters.pcnt;
548 e->counters.pcnt = 0;
550 /* We're at the start. */
554 e = (struct ipt_entry *)
555 (newinfo->entries + pos);
556 } while (oldpos == pos + e->next_offset);
559 size = e->next_offset;
560 e = (struct ipt_entry *)
561 (newinfo->entries + pos + size);
562 e->counters.pcnt = pos;
565 int newpos = t->verdict;
567 if (strcmp(t->target.u.user.name,
568 IPT_STANDARD_TARGET) == 0
570 /* This a jump; chase it. */
571 duprintf("Jump rule %u -> %u\n",
574 /* ... this is a fallthru */
575 newpos = pos + e->next_offset;
577 e = (struct ipt_entry *)
578 (newinfo->entries + newpos);
579 e->counters.pcnt = pos;
584 duprintf("Finished chain %u\n", hook);
590 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
592 if (i && (*i)-- == 0)
595 if (m->u.kernel.match->destroy)
596 m->u.kernel.match->destroy(m->data,
597 m->u.match_size - sizeof(*m));
598 module_put(m->u.kernel.match->me);
603 standard_check(const struct ipt_entry_target *t,
604 unsigned int max_offset)
606 struct ipt_standard_target *targ = (void *)t;
608 /* Check standard info. */
610 != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
611 duprintf("standard_check: target size %u != %u\n",
613 IPT_ALIGN(sizeof(struct ipt_standard_target)));
617 if (targ->verdict >= 0
618 && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
619 duprintf("ipt_standard_check: bad verdict (%i)\n",
624 if (targ->verdict < -NF_MAX_VERDICT - 1) {
625 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
633 check_match(struct ipt_entry_match *m,
635 const struct ipt_ip *ip,
636 unsigned int hookmask,
640 struct ipt_match *match;
642 match = find_match_lock(m->u.user.name, &ret, &ipt_mutex);
644 duprintf("check_match: `%s' not found\n", m->u.user.name);
647 if (!try_module_get(match->me)) {
651 m->u.kernel.match = match;
654 if (m->u.kernel.match->checkentry
655 && !m->u.kernel.match->checkentry(name, ip, m->data,
656 m->u.match_size - sizeof(*m),
658 module_put(m->u.kernel.match->me);
659 duprintf("ip_tables: check failed for `%s'.\n",
660 m->u.kernel.match->name);
668 static struct ipt_target ipt_standard_target;
671 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
674 struct ipt_entry_target *t;
675 struct ipt_target *target;
679 if (!ip_checkentry(&e->ip)) {
680 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
685 ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
687 goto cleanup_matches;
689 t = ipt_get_target(e);
690 target = ipt_find_target_lock(t->u.user.name, &ret, &ipt_mutex);
692 duprintf("check_entry: `%s' not found\n", t->u.user.name);
693 goto cleanup_matches;
695 if (!try_module_get(target->me)) {
698 goto cleanup_matches;
700 t->u.kernel.target = target;
703 if (t->u.kernel.target == &ipt_standard_target) {
704 if (!standard_check(t, size)) {
706 goto cleanup_matches;
708 } else if (t->u.kernel.target->checkentry
709 && !t->u.kernel.target->checkentry(name, e, t->data,
713 module_put(t->u.kernel.target->me);
714 duprintf("ip_tables: check failed for `%s'.\n",
715 t->u.kernel.target->name);
717 goto cleanup_matches;
724 IPT_MATCH_ITERATE(e, cleanup_match, &j);
729 check_entry_size_and_hooks(struct ipt_entry *e,
730 struct ipt_table_info *newinfo,
732 unsigned char *limit,
733 const unsigned int *hook_entries,
734 const unsigned int *underflows,
739 if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
740 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
741 duprintf("Bad offset %p\n", e);
746 < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
747 duprintf("checking: element %p size %u\n",
752 /* Check hooks & underflows */
753 for (h = 0; h < NF_IP_NUMHOOKS; h++) {
754 if ((unsigned char *)e - base == hook_entries[h])
755 newinfo->hook_entry[h] = hook_entries[h];
756 if ((unsigned char *)e - base == underflows[h])
757 newinfo->underflow[h] = underflows[h];
760 /* FIXME: underflows must be unconditional, standard verdicts
761 < 0 (not IPT_RETURN). --RR */
763 /* Clear counters and comefrom */
764 e->counters = ((struct ipt_counters) { 0, 0 });
772 cleanup_entry(struct ipt_entry *e, unsigned int *i)
774 struct ipt_entry_target *t;
776 if (i && (*i)-- == 0)
779 /* Cleanup all matches */
780 IPT_MATCH_ITERATE(e, cleanup_match, NULL);
781 t = ipt_get_target(e);
782 if (t->u.kernel.target->destroy)
783 t->u.kernel.target->destroy(t->data,
784 t->u.target_size - sizeof(*t));
785 module_put(t->u.kernel.target->me);
789 /* Checks and translates the user-supplied table segment (held in
792 translate_table(const char *name,
793 unsigned int valid_hooks,
794 struct ipt_table_info *newinfo,
797 const unsigned int *hook_entries,
798 const unsigned int *underflows)
803 newinfo->size = size;
804 newinfo->number = number;
806 /* Init all hooks to impossible value. */
807 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
808 newinfo->hook_entry[i] = 0xFFFFFFFF;
809 newinfo->underflow[i] = 0xFFFFFFFF;
812 duprintf("translate_table: size %u\n", newinfo->size);
814 /* Walk through entries, checking offsets. */
815 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
816 check_entry_size_and_hooks,
819 newinfo->entries + size,
820 hook_entries, underflows, &i);
825 duprintf("translate_table: %u not %u entries\n",
830 /* Check hooks all assigned */
831 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
832 /* Only hooks which are valid */
833 if (!(valid_hooks & (1 << i)))
835 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
836 duprintf("Invalid hook entry %u %u\n",
840 if (newinfo->underflow[i] == 0xFFFFFFFF) {
841 duprintf("Invalid underflow %u %u\n",
847 if (!mark_source_chains(newinfo, valid_hooks))
850 /* Finally, each sanity check must pass */
852 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
853 check_entry, name, size, &i);
856 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
861 /* And one copy for every other CPU */
862 for (i = 1; i < NR_CPUS; i++) {
863 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
865 SMP_ALIGN(newinfo->size));
871 static struct ipt_table_info *
872 replace_table(struct ipt_table *table,
873 unsigned int num_counters,
874 struct ipt_table_info *newinfo,
877 struct ipt_table_info *oldinfo;
879 #ifdef CONFIG_NETFILTER_DEBUG
881 struct ipt_entry *table_base;
884 for (i = 0; i < NR_CPUS; i++) {
886 (void *)newinfo->entries
887 + TABLE_OFFSET(newinfo, i);
889 table_base->comefrom = 0xdead57ac;
894 /* Do the substitution. */
895 write_lock_bh(&table->lock);
896 /* Check inside lock: is the old number correct? */
897 if (num_counters != table->private->number) {
898 duprintf("num_counters != table->private->number (%u/%u)\n",
899 num_counters, table->private->number);
900 write_unlock_bh(&table->lock);
904 oldinfo = table->private;
905 table->private = newinfo;
906 newinfo->initial_entries = oldinfo->initial_entries;
907 write_unlock_bh(&table->lock);
914 add_entry_to_counter(const struct ipt_entry *e,
915 struct ipt_counters total[],
918 ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
925 get_counters(const struct ipt_table_info *t,
926 struct ipt_counters counters[])
931 for (cpu = 0; cpu < NR_CPUS; cpu++) {
933 IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
935 add_entry_to_counter,
942 copy_entries_to_user(unsigned int total_size,
943 struct ipt_table *table,
944 void __user *userptr)
946 unsigned int off, num, countersize;
948 struct ipt_counters *counters;
951 /* We need atomic snapshot of counters: rest doesn't change
952 (other than comefrom, which userspace doesn't care
954 countersize = sizeof(struct ipt_counters) * table->private->number;
955 counters = vmalloc(countersize);
957 if (counters == NULL)
960 /* First, sum counters... */
961 memset(counters, 0, countersize);
962 write_lock_bh(&table->lock);
963 get_counters(table->private, counters);
964 write_unlock_bh(&table->lock);
966 /* ... then copy entire thing from CPU 0... */
967 if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
972 /* FIXME: use iterator macros --RR */
973 /* ... then go back and fix counters and names */
974 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
976 struct ipt_entry_match *m;
977 struct ipt_entry_target *t;
979 e = (struct ipt_entry *)(table->private->entries + off);
980 if (copy_to_user(userptr + off
981 + offsetof(struct ipt_entry, counters),
983 sizeof(counters[num])) != 0) {
988 for (i = sizeof(struct ipt_entry);
989 i < e->target_offset;
990 i += m->u.match_size) {
993 if (copy_to_user(userptr + off + i
994 + offsetof(struct ipt_entry_match,
996 m->u.kernel.match->name,
997 strlen(m->u.kernel.match->name)+1)
1004 t = ipt_get_target(e);
1005 if (copy_to_user(userptr + off + e->target_offset
1006 + offsetof(struct ipt_entry_target,
1008 t->u.kernel.target->name,
1009 strlen(t->u.kernel.target->name)+1) != 0) {
1021 get_entries(const struct ipt_get_entries *entries,
1022 struct ipt_get_entries __user *uptr)
1025 struct ipt_table *t;
1027 t = ipt_find_table_lock(entries->name, &ret, &ipt_mutex);
1029 duprintf("t->private->number = %u\n",
1030 t->private->number);
1031 if (entries->size == t->private->size)
1032 ret = copy_entries_to_user(t->private->size,
1033 t, uptr->entrytable);
1035 duprintf("get_entries: I've got %u not %u!\n",
1042 duprintf("get_entries: Can't find %s!\n",
1049 do_replace(void __user *user, unsigned int len)
1052 struct ipt_replace tmp;
1053 struct ipt_table *t;
1054 struct ipt_table_info *newinfo, *oldinfo;
1055 struct ipt_counters *counters;
1057 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1060 /* Hack: Causes ipchains to give correct error msg --RR */
1061 if (len != sizeof(tmp) + tmp.size)
1062 return -ENOPROTOOPT;
1064 /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1065 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1068 newinfo = vmalloc(sizeof(struct ipt_table_info)
1069 + SMP_ALIGN(tmp.size) * NR_CPUS);
1073 if (copy_from_user(newinfo->entries, user + sizeof(tmp),
1079 counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
1084 memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
1086 ret = translate_table(tmp.name, tmp.valid_hooks,
1087 newinfo, tmp.size, tmp.num_entries,
1088 tmp.hook_entry, tmp.underflow);
1090 goto free_newinfo_counters;
1092 duprintf("ip_tables: Translated table\n");
1094 t = ipt_find_table_lock(tmp.name, &ret, &ipt_mutex);
1096 goto free_newinfo_counters_untrans;
1099 if (tmp.valid_hooks != t->valid_hooks) {
1100 duprintf("Valid hook crap: %08X vs %08X\n",
1101 tmp.valid_hooks, t->valid_hooks);
1103 goto free_newinfo_counters_untrans_unlock;
1106 /* Get a reference in advance, we're not allowed fail later */
1107 if (!try_module_get(t->me)) {
1109 goto free_newinfo_counters_untrans_unlock;
1113 oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1117 /* Update module usage count based on number of rules */
1118 duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1119 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1120 if ((oldinfo->number > oldinfo->initial_entries) ||
1121 (newinfo->number <= oldinfo->initial_entries))
1123 if ((oldinfo->number > oldinfo->initial_entries) &&
1124 (newinfo->number <= oldinfo->initial_entries))
1127 /* Get the old counters. */
1128 get_counters(oldinfo, counters);
1129 /* Decrease module usage counts and free resource */
1130 IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1132 /* Silent error: too late now. */
1133 copy_to_user(tmp.counters, counters,
1134 sizeof(struct ipt_counters) * tmp.num_counters);
1141 free_newinfo_counters_untrans_unlock:
1143 free_newinfo_counters_untrans:
1144 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
1145 free_newinfo_counters:
1152 /* We're lazy, and add to the first CPU; overflow works its fey magic
1153 * and everything is OK. */
1155 add_counter_to_entry(struct ipt_entry *e,
1156 const struct ipt_counters addme[],
1160 duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1162 (long unsigned int)e->counters.pcnt,
1163 (long unsigned int)e->counters.bcnt,
1164 (long unsigned int)addme[*i].pcnt,
1165 (long unsigned int)addme[*i].bcnt);
1168 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1175 do_add_counters(void __user *user, unsigned int len)
1178 struct ipt_counters_info tmp, *paddc;
1179 struct ipt_table *t;
1182 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1185 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1188 paddc = vmalloc(len);
1192 if (copy_from_user(paddc, user, len) != 0) {
1197 t = ipt_find_table_lock(tmp.name, &ret, &ipt_mutex);
1201 write_lock_bh(&t->lock);
1202 if (t->private->number != paddc->num_counters) {
1204 goto unlock_up_free;
1208 IPT_ENTRY_ITERATE(t->private->entries,
1210 add_counter_to_entry,
1214 write_unlock_bh(&t->lock);
1223 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1227 if (!capable(CAP_NET_ADMIN))
1231 case IPT_SO_SET_REPLACE:
1232 ret = do_replace(user, len);
1235 case IPT_SO_SET_ADD_COUNTERS:
1236 ret = do_add_counters(user, len);
1240 duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
1248 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1252 if (!capable(CAP_NET_ADMIN))
1256 case IPT_SO_GET_INFO: {
1257 char name[IPT_TABLE_MAXNAMELEN];
1258 struct ipt_table *t;
1260 if (*len != sizeof(struct ipt_getinfo)) {
1261 duprintf("length %u != %u\n", *len,
1262 sizeof(struct ipt_getinfo));
1267 if (copy_from_user(name, user, sizeof(name)) != 0) {
1271 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1272 t = ipt_find_table_lock(name, &ret, &ipt_mutex);
1274 struct ipt_getinfo info;
1276 info.valid_hooks = t->valid_hooks;
1277 memcpy(info.hook_entry, t->private->hook_entry,
1278 sizeof(info.hook_entry));
1279 memcpy(info.underflow, t->private->underflow,
1280 sizeof(info.underflow));
1281 info.num_entries = t->private->number;
1282 info.size = t->private->size;
1283 strcpy(info.name, name);
1285 if (copy_to_user(user, &info, *len) != 0)
1295 case IPT_SO_GET_ENTRIES: {
1296 struct ipt_get_entries get;
1298 if (*len < sizeof(get)) {
1299 duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1301 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1303 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1304 duprintf("get_entries: %u != %u\n", *len,
1305 sizeof(struct ipt_get_entries) + get.size);
1308 ret = get_entries(&get, user);
1313 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1320 /* Registration hooks for targets. */
1322 ipt_register_target(struct ipt_target *target)
1326 ret = down_interruptible(&ipt_mutex);
1330 if (!list_named_insert(&ipt_target, target)) {
1331 duprintf("ipt_register_target: `%s' already in list!\n",
1340 ipt_unregister_target(struct ipt_target *target)
1343 LIST_DELETE(&ipt_target, target);
1348 ipt_register_match(struct ipt_match *match)
1352 ret = down_interruptible(&ipt_mutex);
1356 if (!list_named_insert(&ipt_match, match)) {
1357 duprintf("ipt_register_match: `%s' already in list!\n",
1367 ipt_unregister_match(struct ipt_match *match)
1370 LIST_DELETE(&ipt_match, match);
1374 int ipt_register_table(struct ipt_table *table)
1377 struct ipt_table_info *newinfo;
1378 static struct ipt_table_info bootstrap
1379 = { 0, 0, 0, { 0 }, { 0 }, { } };
1381 newinfo = vmalloc(sizeof(struct ipt_table_info)
1382 + SMP_ALIGN(table->table->size) * NR_CPUS);
1386 memcpy(newinfo->entries, table->table->entries, table->table->size);
1388 ret = translate_table(table->name, table->valid_hooks,
1389 newinfo, table->table->size,
1390 table->table->num_entries,
1391 table->table->hook_entry,
1392 table->table->underflow);
1398 ret = down_interruptible(&ipt_mutex);
1404 /* Don't autoload: we'd eat our tail... */
1405 if (list_named_find(&ipt_tables, table->name)) {
1410 /* Simplifies replace_table code. */
1411 table->private = &bootstrap;
1412 if (!replace_table(table, 0, newinfo, &ret))
1415 duprintf("table->private->number = %u\n",
1416 table->private->number);
1418 /* save number of initial entries */
1419 table->private->initial_entries = table->private->number;
1421 table->lock = RW_LOCK_UNLOCKED;
1422 list_prepend(&ipt_tables, table);
1433 void ipt_unregister_table(struct ipt_table *table)
1436 LIST_DELETE(&ipt_tables, table);
1439 /* Decrease module usage counts and free resources */
1440 IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
1441 cleanup_entry, NULL);
1442 vfree(table->private);
1445 /* Returns 1 if the port is matched by the range, 0 otherwise */
1447 port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1451 ret = (port >= min && port <= max) ^ invert;
1456 tcp_find_option(u_int8_t option,
1457 const struct sk_buff *skb,
1458 unsigned int optlen,
1462 /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
1463 u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
1466 duprintf("tcp_match: finding option\n");
1471 /* If we don't have the whole header, drop packet. */
1472 op = skb_header_pointer(skb,
1473 skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
1480 for (i = 0; i < optlen; ) {
1481 if (op[i] == option) return !invert;
1483 else i += op[i+1]?:1;
1490 tcp_match(const struct sk_buff *skb,
1491 const struct net_device *in,
1492 const struct net_device *out,
1493 const void *matchinfo,
1497 struct tcphdr _tcph, *th;
1498 const struct ipt_tcp *tcpinfo = matchinfo;
1503 Don't allow a fragment of TCP 8 bytes in. Nobody normal
1504 causes this. Its a cracker trying to break in by doing a
1505 flag overwrite to pass the direction checks.
1508 duprintf("Dropping evil TCP offset=1 frag.\n");
1511 /* Must not be a fragment. */
1515 #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1517 th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1518 sizeof(_tcph), &_tcph);
1520 /* We've been asked to examine this packet, and we
1521 can't. Hence, no choice but to drop. */
1522 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1527 if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1529 !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
1531 if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1533 !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
1535 if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
1536 == tcpinfo->flg_cmp,
1539 if (tcpinfo->option) {
1540 if (th->doff * 4 < sizeof(_tcph)) {
1544 if (!tcp_find_option(tcpinfo->option, skb,
1545 th->doff*4 - sizeof(_tcph),
1546 tcpinfo->invflags & IPT_TCP_INV_OPTION,
1553 /* Called when user tries to insert an entry of this type. */
1555 tcp_checkentry(const char *tablename,
1556 const struct ipt_ip *ip,
1558 unsigned int matchsize,
1559 unsigned int hook_mask)
1561 const struct ipt_tcp *tcpinfo = matchinfo;
1563 /* Must specify proto == TCP, and no unknown invflags */
1564 return ip->proto == IPPROTO_TCP
1565 && !(ip->invflags & IPT_INV_PROTO)
1566 && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1567 && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1571 udp_match(const struct sk_buff *skb,
1572 const struct net_device *in,
1573 const struct net_device *out,
1574 const void *matchinfo,
1578 struct udphdr _udph, *uh;
1579 const struct ipt_udp *udpinfo = matchinfo;
1581 /* Must not be a fragment. */
1585 uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1586 sizeof(_udph), &_udph);
1588 /* We've been asked to examine this packet, and we
1589 can't. Hence, no choice but to drop. */
1590 duprintf("Dropping evil UDP tinygram.\n");
1595 return port_match(udpinfo->spts[0], udpinfo->spts[1],
1597 !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
1598 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1600 !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1603 /* Called when user tries to insert an entry of this type. */
1605 udp_checkentry(const char *tablename,
1606 const struct ipt_ip *ip,
1608 unsigned int matchinfosize,
1609 unsigned int hook_mask)
1611 const struct ipt_udp *udpinfo = matchinfo;
1613 /* Must specify proto == UDP, and no unknown invflags */
1614 if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1615 duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1619 if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1620 duprintf("ipt_udp: matchsize %u != %u\n",
1621 matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1624 if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1625 duprintf("ipt_udp: unknown flags %X\n",
1633 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1635 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1636 u_int8_t type, u_int8_t code,
1639 return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1644 icmp_match(const struct sk_buff *skb,
1645 const struct net_device *in,
1646 const struct net_device *out,
1647 const void *matchinfo,
1651 struct icmphdr _icmph, *ic;
1652 const struct ipt_icmp *icmpinfo = matchinfo;
1654 /* Must not be a fragment. */
1658 ic = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1659 sizeof(_icmph), &_icmph);
1661 /* We've been asked to examine this packet, and we
1662 * can't. Hence, no choice but to drop.
1664 duprintf("Dropping evil ICMP tinygram.\n");
1669 return icmp_type_code_match(icmpinfo->type,
1673 !!(icmpinfo->invflags&IPT_ICMP_INV));
1676 /* Called when user tries to insert an entry of this type. */
1678 icmp_checkentry(const char *tablename,
1679 const struct ipt_ip *ip,
1681 unsigned int matchsize,
1682 unsigned int hook_mask)
1684 const struct ipt_icmp *icmpinfo = matchinfo;
1686 /* Must specify proto == ICMP, and no unknown invflags */
1687 return ip->proto == IPPROTO_ICMP
1688 && !(ip->invflags & IPT_INV_PROTO)
1689 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1690 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1693 /* The built-in targets: standard (NULL) and error. */
1694 static struct ipt_target ipt_standard_target = {
1695 .name = IPT_STANDARD_TARGET,
1698 static struct ipt_target ipt_error_target = {
1699 .name = IPT_ERROR_TARGET,
1700 .target = ipt_error,
1703 static struct nf_sockopt_ops ipt_sockopts = {
1705 .set_optmin = IPT_BASE_CTL,
1706 .set_optmax = IPT_SO_SET_MAX+1,
1707 .set = do_ipt_set_ctl,
1708 .get_optmin = IPT_BASE_CTL,
1709 .get_optmax = IPT_SO_GET_MAX+1,
1710 .get = do_ipt_get_ctl,
1713 static struct ipt_match tcp_matchstruct = {
1715 .match = &tcp_match,
1716 .checkentry = &tcp_checkentry,
1719 static struct ipt_match udp_matchstruct = {
1721 .match = &udp_match,
1722 .checkentry = &udp_checkentry,
1725 static struct ipt_match icmp_matchstruct = {
1727 .match = &icmp_match,
1728 .checkentry = &icmp_checkentry,
1731 #ifdef CONFIG_PROC_FS
1732 static inline int print_name(const char *i,
1733 off_t start_offset, char *buffer, int length,
1734 off_t *pos, unsigned int *count)
1736 if ((*count)++ >= start_offset) {
1737 unsigned int namelen;
1739 namelen = sprintf(buffer + *pos, "%s\n",
1740 i + sizeof(struct list_head));
1741 if (*pos + namelen > length) {
1742 /* Stop iterating */
1750 static inline int print_target(const struct ipt_target *t,
1751 off_t start_offset, char *buffer, int length,
1752 off_t *pos, unsigned int *count)
1754 if (t == &ipt_standard_target || t == &ipt_error_target)
1756 return print_name((char *)t, start_offset, buffer, length, pos, count);
1759 static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1762 unsigned int count = 0;
1764 if (down_interruptible(&ipt_mutex) != 0)
1767 LIST_FIND(&ipt_tables, print_name, void *,
1768 offset, buffer, length, &pos, &count);
1772 /* `start' hack - see fs/proc/generic.c line ~105 */
1773 *start=(char *)((unsigned long)count-offset);
1777 static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
1780 unsigned int count = 0;
1782 if (down_interruptible(&ipt_mutex) != 0)
1785 LIST_FIND(&ipt_target, print_target, struct ipt_target *,
1786 offset, buffer, length, &pos, &count);
1790 *start = (char *)((unsigned long)count - offset);
1794 static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
1797 unsigned int count = 0;
1799 if (down_interruptible(&ipt_mutex) != 0)
1802 LIST_FIND(&ipt_match, print_name, void *,
1803 offset, buffer, length, &pos, &count);
1807 *start = (char *)((unsigned long)count - offset);
1811 static struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
1812 { { "ip_tables_names", ipt_get_tables },
1813 { "ip_tables_targets", ipt_get_targets },
1814 { "ip_tables_matches", ipt_get_matches },
1816 #endif /*CONFIG_PROC_FS*/
1818 static int __init init(void)
1822 /* Noone else will be downing sem now, so we won't sleep */
1824 list_append(&ipt_target, &ipt_standard_target);
1825 list_append(&ipt_target, &ipt_error_target);
1826 list_append(&ipt_match, &tcp_matchstruct);
1827 list_append(&ipt_match, &udp_matchstruct);
1828 list_append(&ipt_match, &icmp_matchstruct);
1831 /* Register setsockopt */
1832 ret = nf_register_sockopt(&ipt_sockopts);
1834 duprintf("Unable to register sockopts.\n");
1838 #ifdef CONFIG_PROC_FS
1840 struct proc_dir_entry *proc;
1843 for (i = 0; ipt_proc_entry[i].name; i++) {
1844 proc = proc_net_create(ipt_proc_entry[i].name, 0,
1845 ipt_proc_entry[i].get_info);
1848 proc_net_remove(ipt_proc_entry[i].name);
1849 nf_unregister_sockopt(&ipt_sockopts);
1852 proc->owner = THIS_MODULE;
1857 printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
1861 static void __exit fini(void)
1863 nf_unregister_sockopt(&ipt_sockopts);
1864 #ifdef CONFIG_PROC_FS
1867 for (i = 0; ipt_proc_entry[i].name; i++)
1868 proc_net_remove(ipt_proc_entry[i].name);
1873 EXPORT_SYMBOL(ipt_register_table);
1874 EXPORT_SYMBOL(ipt_unregister_table);
1875 EXPORT_SYMBOL(ipt_register_match);
1876 EXPORT_SYMBOL(ipt_unregister_match);
1877 EXPORT_SYMBOL(ipt_do_table);
1878 EXPORT_SYMBOL(ipt_register_target);
1879 EXPORT_SYMBOL(ipt_unregister_target);
1880 EXPORT_SYMBOL(ipt_find_target_lock);