2 * Packet matching code.
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
11 * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12 * - increase module usage count as soon as we have rules inside
14 * 08 Oct 2005 Harald Welte <lafore@netfilter.org>
15 * - Generalize into "x_tables" layer and "{ip,ip6,arp}_tables"
17 #include <linux/config.h>
18 #include <linux/cache.h>
19 #include <linux/capability.h>
20 #include <linux/skbuff.h>
21 #include <linux/kmod.h>
22 #include <linux/vmalloc.h>
23 #include <linux/netdevice.h>
24 #include <linux/module.h>
25 #include <linux/icmp.h>
27 #include <asm/uaccess.h>
28 #include <asm/semaphore.h>
29 #include <linux/proc_fs.h>
30 #include <linux/err.h>
31 #include <linux/cpumask.h>
33 #include <linux/netfilter/x_tables.h>
34 #include <linux/netfilter_ipv4/ip_tables.h>
36 MODULE_LICENSE("GPL");
37 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
38 MODULE_DESCRIPTION("IPv4 packet filter");
40 /*#define DEBUG_IP_FIREWALL*/
41 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
42 /*#define DEBUG_IP_FIREWALL_USER*/
44 #ifdef DEBUG_IP_FIREWALL
45 #define dprintf(format, args...) printk(format , ## args)
47 #define dprintf(format, args...)
50 #ifdef DEBUG_IP_FIREWALL_USER
51 #define duprintf(format, args...) printk(format , ## args)
53 #define duprintf(format, args...)
56 #ifdef CONFIG_NETFILTER_DEBUG
57 #define IP_NF_ASSERT(x) \
60 printk("IP_NF_ASSERT: %s:%s:%u\n", \
61 __FUNCTION__, __FILE__, __LINE__); \
64 #define IP_NF_ASSERT(x)
68 /* All the better to debug you with... */
74 We keep a set of rules for each CPU, so we can avoid write-locking
75 them in the softirq when updating the counters and therefore
76 only need to read-lock in the softirq; doing a write_lock_bh() in user
77 context stops packets coming through and allows user context to read
78 the counters or update the rules.
80 Hence the start of any table is given by get_table() below. */
82 /* Returns whether matches rule or not. */
84 ip_packet_match(const struct iphdr *ip,
87 const struct ipt_ip *ipinfo,
93 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
95 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
97 || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
99 dprintf("Source or dest mismatch.\n");
101 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
103 NIPQUAD(ipinfo->smsk.s_addr),
104 NIPQUAD(ipinfo->src.s_addr),
105 ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
106 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
108 NIPQUAD(ipinfo->dmsk.s_addr),
109 NIPQUAD(ipinfo->dst.s_addr),
110 ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
114 /* Look for ifname matches; this should unroll nicely. */
115 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
116 ret |= (((const unsigned long *)indev)[i]
117 ^ ((const unsigned long *)ipinfo->iniface)[i])
118 & ((const unsigned long *)ipinfo->iniface_mask)[i];
121 if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
122 dprintf("VIA in mismatch (%s vs %s).%s\n",
123 indev, ipinfo->iniface,
124 ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
128 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
129 ret |= (((const unsigned long *)outdev)[i]
130 ^ ((const unsigned long *)ipinfo->outiface)[i])
131 & ((const unsigned long *)ipinfo->outiface_mask)[i];
134 if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
135 dprintf("VIA out mismatch (%s vs %s).%s\n",
136 outdev, ipinfo->outiface,
137 ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
141 /* Check specific protocol */
143 && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
144 dprintf("Packet protocol %hi does not match %hi.%s\n",
145 ip->protocol, ipinfo->proto,
146 ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
150 /* If we have a fragment rule but the packet is not a fragment
151 * then we return zero */
152 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
153 dprintf("Fragment rule but not fragment.%s\n",
154 ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
162 ip_checkentry(const struct ipt_ip *ip)
164 if (ip->flags & ~IPT_F_MASK) {
165 duprintf("Unknown flag bits set: %08X\n",
166 ip->flags & ~IPT_F_MASK);
169 if (ip->invflags & ~IPT_INV_MASK) {
170 duprintf("Unknown invflag bits set: %08X\n",
171 ip->invflags & ~IPT_INV_MASK);
178 ipt_error(struct sk_buff **pskb,
179 const struct net_device *in,
180 const struct net_device *out,
181 unsigned int hooknum,
182 const void *targinfo,
186 printk("ip_tables: error: `%s'\n", (char *)targinfo);
192 int do_match(struct ipt_entry_match *m,
193 const struct sk_buff *skb,
194 const struct net_device *in,
195 const struct net_device *out,
199 /* Stop iteration if it doesn't match */
200 if (!m->u.kernel.match->match(skb, in, out, m->data, offset,
201 skb->nh.iph->ihl*4, hotdrop))
207 static inline struct ipt_entry *
208 get_entry(void *base, unsigned int offset)
210 return (struct ipt_entry *)(base + offset);
213 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
215 ipt_do_table(struct sk_buff **pskb,
217 const struct net_device *in,
218 const struct net_device *out,
219 struct ipt_table *table,
222 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
227 /* Initializing verdict to NF_DROP keeps gcc happy. */
228 unsigned int verdict = NF_DROP;
229 const char *indev, *outdev;
231 struct ipt_entry *e, *back;
232 struct xt_table_info *private;
235 ip = (*pskb)->nh.iph;
236 datalen = (*pskb)->len - ip->ihl * 4;
237 indev = in ? in->name : nulldevname;
238 outdev = out ? out->name : nulldevname;
239 /* We handle fragments by dealing with the first fragment as
240 * if it was a normal packet. All other fragments are treated
241 * normally, except that they will NEVER match rules that ask
242 * things we don't know, ie. tcp syn flag or ports). If the
243 * rule is also a fragment-specific rule, non-fragments won't
245 offset = ntohs(ip->frag_off) & IP_OFFSET;
247 read_lock_bh(&table->lock);
248 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
249 private = table->private;
250 table_base = (void *)private->entries[smp_processor_id()];
251 e = get_entry(table_base, private->hook_entry[hook]);
253 /* For return from builtin chain */
254 back = get_entry(table_base, private->underflow[hook]);
259 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
260 struct ipt_entry_target *t;
262 if (IPT_MATCH_ITERATE(e, do_match,
264 offset, &hotdrop) != 0)
267 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
269 t = ipt_get_target(e);
270 IP_NF_ASSERT(t->u.kernel.target);
271 /* Standard target? */
272 if (!t->u.kernel.target->target) {
275 v = ((struct ipt_standard_target *)t)->verdict;
277 /* Pop from stack? */
278 if (v != IPT_RETURN) {
279 verdict = (unsigned)(-v) - 1;
283 back = get_entry(table_base,
287 if (table_base + v != (void *)e + e->next_offset
288 && !(e->ip.flags & IPT_F_GOTO)) {
289 /* Save old back ptr in next entry */
290 struct ipt_entry *next
291 = (void *)e + e->next_offset;
293 = (void *)back - table_base;
294 /* set back pointer to next entry */
298 e = get_entry(table_base, v);
300 /* Targets which reenter must return
302 #ifdef CONFIG_NETFILTER_DEBUG
303 ((struct ipt_entry *)table_base)->comefrom
306 verdict = t->u.kernel.target->target(pskb,
312 #ifdef CONFIG_NETFILTER_DEBUG
313 if (((struct ipt_entry *)table_base)->comefrom
315 && verdict == IPT_CONTINUE) {
316 printk("Target %s reentered!\n",
317 t->u.kernel.target->name);
320 ((struct ipt_entry *)table_base)->comefrom
323 /* Target might have changed stuff. */
324 ip = (*pskb)->nh.iph;
325 datalen = (*pskb)->len - ip->ihl * 4;
327 if (verdict == IPT_CONTINUE)
328 e = (void *)e + e->next_offset;
336 e = (void *)e + e->next_offset;
340 read_unlock_bh(&table->lock);
342 #ifdef DEBUG_ALLOW_ALL
351 /* All zeroes == unconditional rule. */
353 unconditional(const struct ipt_ip *ip)
357 for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
358 if (((__u32 *)ip)[i])
364 /* Figures out from what hook each rule can be called: returns 0 if
365 there are loops. Puts hook bitmask in comefrom. */
367 mark_source_chains(struct xt_table_info *newinfo,
368 unsigned int valid_hooks, void *entry0)
372 /* No recursion; use packet counter to save back ptrs (reset
373 to 0 as we leave), and comefrom to save source hook bitmask */
374 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
375 unsigned int pos = newinfo->hook_entry[hook];
377 = (struct ipt_entry *)(entry0 + pos);
379 if (!(valid_hooks & (1 << hook)))
382 /* Set initial back pointer. */
383 e->counters.pcnt = pos;
386 struct ipt_standard_target *t
387 = (void *)ipt_get_target(e);
389 if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
390 printk("iptables: loop hook %u pos %u %08X.\n",
391 hook, pos, e->comefrom);
395 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
397 /* Unconditional return/END. */
398 if (e->target_offset == sizeof(struct ipt_entry)
399 && (strcmp(t->target.u.user.name,
400 IPT_STANDARD_TARGET) == 0)
402 && unconditional(&e->ip)) {
403 unsigned int oldpos, size;
405 /* Return: backtrack through the last
408 e->comefrom ^= (1<<NF_IP_NUMHOOKS);
409 #ifdef DEBUG_IP_FIREWALL_USER
411 & (1 << NF_IP_NUMHOOKS)) {
412 duprintf("Back unset "
419 pos = e->counters.pcnt;
420 e->counters.pcnt = 0;
422 /* We're at the start. */
426 e = (struct ipt_entry *)
428 } while (oldpos == pos + e->next_offset);
431 size = e->next_offset;
432 e = (struct ipt_entry *)
433 (entry0 + pos + size);
434 e->counters.pcnt = pos;
437 int newpos = t->verdict;
439 if (strcmp(t->target.u.user.name,
440 IPT_STANDARD_TARGET) == 0
442 /* This a jump; chase it. */
443 duprintf("Jump rule %u -> %u\n",
446 /* ... this is a fallthru */
447 newpos = pos + e->next_offset;
449 e = (struct ipt_entry *)
451 e->counters.pcnt = pos;
456 duprintf("Finished chain %u\n", hook);
462 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
464 if (i && (*i)-- == 0)
467 if (m->u.kernel.match->destroy)
468 m->u.kernel.match->destroy(m->data,
469 m->u.match_size - sizeof(*m));
470 module_put(m->u.kernel.match->me);
475 standard_check(const struct ipt_entry_target *t,
476 unsigned int max_offset)
478 struct ipt_standard_target *targ = (void *)t;
480 /* Check standard info. */
482 != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
483 duprintf("standard_check: target size %u != %u\n",
485 IPT_ALIGN(sizeof(struct ipt_standard_target)));
489 if (targ->verdict >= 0
490 && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
491 duprintf("ipt_standard_check: bad verdict (%i)\n",
496 if (targ->verdict < -NF_MAX_VERDICT - 1) {
497 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
505 check_match(struct ipt_entry_match *m,
507 const struct ipt_ip *ip,
508 unsigned int hookmask,
511 struct ipt_match *match;
513 match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
515 "ipt_%s", m->u.user.name);
516 if (IS_ERR(match) || !match) {
517 duprintf("check_match: `%s' not found\n", m->u.user.name);
518 return match ? PTR_ERR(match) : -ENOENT;
520 m->u.kernel.match = match;
522 if (m->u.kernel.match->checkentry
523 && !m->u.kernel.match->checkentry(name, ip, m->data,
524 m->u.match_size - sizeof(*m),
526 module_put(m->u.kernel.match->me);
527 duprintf("ip_tables: check failed for `%s'.\n",
528 m->u.kernel.match->name);
536 static struct ipt_target ipt_standard_target;
539 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
542 struct ipt_entry_target *t;
543 struct ipt_target *target;
547 if (!ip_checkentry(&e->ip)) {
548 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
553 ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
555 goto cleanup_matches;
557 t = ipt_get_target(e);
558 target = try_then_request_module(xt_find_target(AF_INET,
561 "ipt_%s", t->u.user.name);
562 if (IS_ERR(target) || !target) {
563 duprintf("check_entry: `%s' not found\n", t->u.user.name);
564 ret = target ? PTR_ERR(target) : -ENOENT;
565 goto cleanup_matches;
567 t->u.kernel.target = target;
569 if (t->u.kernel.target == &ipt_standard_target) {
570 if (!standard_check(t, size)) {
572 goto cleanup_matches;
574 } else if (t->u.kernel.target->checkentry
575 && !t->u.kernel.target->checkentry(name, e, t->data,
579 module_put(t->u.kernel.target->me);
580 duprintf("ip_tables: check failed for `%s'.\n",
581 t->u.kernel.target->name);
583 goto cleanup_matches;
590 IPT_MATCH_ITERATE(e, cleanup_match, &j);
595 check_entry_size_and_hooks(struct ipt_entry *e,
596 struct xt_table_info *newinfo,
598 unsigned char *limit,
599 const unsigned int *hook_entries,
600 const unsigned int *underflows,
605 if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
606 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
607 duprintf("Bad offset %p\n", e);
612 < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
613 duprintf("checking: element %p size %u\n",
618 /* Check hooks & underflows */
619 for (h = 0; h < NF_IP_NUMHOOKS; h++) {
620 if ((unsigned char *)e - base == hook_entries[h])
621 newinfo->hook_entry[h] = hook_entries[h];
622 if ((unsigned char *)e - base == underflows[h])
623 newinfo->underflow[h] = underflows[h];
626 /* FIXME: underflows must be unconditional, standard verdicts
627 < 0 (not IPT_RETURN). --RR */
629 /* Clear counters and comefrom */
630 e->counters = ((struct xt_counters) { 0, 0 });
638 cleanup_entry(struct ipt_entry *e, unsigned int *i)
640 struct ipt_entry_target *t;
642 if (i && (*i)-- == 0)
645 /* Cleanup all matches */
646 IPT_MATCH_ITERATE(e, cleanup_match, NULL);
647 t = ipt_get_target(e);
648 if (t->u.kernel.target->destroy)
649 t->u.kernel.target->destroy(t->data,
650 t->u.target_size - sizeof(*t));
651 module_put(t->u.kernel.target->me);
655 /* Checks and translates the user-supplied table segment (held in
658 translate_table(const char *name,
659 unsigned int valid_hooks,
660 struct xt_table_info *newinfo,
664 const unsigned int *hook_entries,
665 const unsigned int *underflows)
670 newinfo->size = size;
671 newinfo->number = number;
673 /* Init all hooks to impossible value. */
674 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
675 newinfo->hook_entry[i] = 0xFFFFFFFF;
676 newinfo->underflow[i] = 0xFFFFFFFF;
679 duprintf("translate_table: size %u\n", newinfo->size);
681 /* Walk through entries, checking offsets. */
682 ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
683 check_entry_size_and_hooks,
687 hook_entries, underflows, &i);
692 duprintf("translate_table: %u not %u entries\n",
697 /* Check hooks all assigned */
698 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
699 /* Only hooks which are valid */
700 if (!(valid_hooks & (1 << i)))
702 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
703 duprintf("Invalid hook entry %u %u\n",
707 if (newinfo->underflow[i] == 0xFFFFFFFF) {
708 duprintf("Invalid underflow %u %u\n",
714 if (!mark_source_chains(newinfo, valid_hooks, entry0))
717 /* Finally, each sanity check must pass */
719 ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
720 check_entry, name, size, &i);
723 IPT_ENTRY_ITERATE(entry0, newinfo->size,
728 /* And one copy for every other CPU */
730 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
731 memcpy(newinfo->entries[i], entry0, newinfo->size);
739 add_entry_to_counter(const struct ipt_entry *e,
740 struct xt_counters total[],
743 ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
750 set_entry_to_counter(const struct ipt_entry *e,
751 struct ipt_counters total[],
754 SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
761 get_counters(const struct xt_table_info *t,
762 struct xt_counters counters[])
768 /* Instead of clearing (by a previous call to memset())
769 * the counters and using adds, we set the counters
770 * with data used by 'current' CPU
771 * We dont care about preemption here.
773 curcpu = raw_smp_processor_id();
776 IPT_ENTRY_ITERATE(t->entries[curcpu],
778 set_entry_to_counter,
786 IPT_ENTRY_ITERATE(t->entries[cpu],
788 add_entry_to_counter,
795 copy_entries_to_user(unsigned int total_size,
796 struct ipt_table *table,
797 void __user *userptr)
799 unsigned int off, num, countersize;
801 struct xt_counters *counters;
802 struct xt_table_info *private = table->private;
806 /* We need atomic snapshot of counters: rest doesn't change
807 (other than comefrom, which userspace doesn't care
809 countersize = sizeof(struct xt_counters) * private->number;
810 counters = vmalloc_node(countersize, numa_node_id());
812 if (counters == NULL)
815 /* First, sum counters... */
816 write_lock_bh(&table->lock);
817 get_counters(private, counters);
818 write_unlock_bh(&table->lock);
820 /* choose the copy that is on our node/cpu, ...
821 * This choice is lazy (because current thread is
822 * allowed to migrate to another cpu)
824 loc_cpu_entry = private->entries[raw_smp_processor_id()];
825 /* ... then copy entire thing ... */
826 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
831 /* FIXME: use iterator macros --RR */
832 /* ... then go back and fix counters and names */
833 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
835 struct ipt_entry_match *m;
836 struct ipt_entry_target *t;
838 e = (struct ipt_entry *)(loc_cpu_entry + off);
839 if (copy_to_user(userptr + off
840 + offsetof(struct ipt_entry, counters),
842 sizeof(counters[num])) != 0) {
847 for (i = sizeof(struct ipt_entry);
848 i < e->target_offset;
849 i += m->u.match_size) {
852 if (copy_to_user(userptr + off + i
853 + offsetof(struct ipt_entry_match,
855 m->u.kernel.match->name,
856 strlen(m->u.kernel.match->name)+1)
863 t = ipt_get_target(e);
864 if (copy_to_user(userptr + off + e->target_offset
865 + offsetof(struct ipt_entry_target,
867 t->u.kernel.target->name,
868 strlen(t->u.kernel.target->name)+1) != 0) {
880 get_entries(const struct ipt_get_entries *entries,
881 struct ipt_get_entries __user *uptr)
886 t = xt_find_table_lock(AF_INET, entries->name);
887 if (t && !IS_ERR(t)) {
888 struct xt_table_info *private = t->private;
889 duprintf("t->private->number = %u\n",
891 if (entries->size == private->size)
892 ret = copy_entries_to_user(private->size,
893 t, uptr->entrytable);
895 duprintf("get_entries: I've got %u not %u!\n",
903 ret = t ? PTR_ERR(t) : -ENOENT;
909 do_replace(void __user *user, unsigned int len)
912 struct ipt_replace tmp;
914 struct xt_table_info *newinfo, *oldinfo;
915 struct xt_counters *counters;
916 void *loc_cpu_entry, *loc_cpu_old_entry;
918 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
921 /* Hack: Causes ipchains to give correct error msg --RR */
922 if (len != sizeof(tmp) + tmp.size)
926 if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
929 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
932 newinfo = xt_alloc_table_info(tmp.size);
936 /* choose the copy that is our node/cpu */
937 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
938 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
944 counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
950 ret = translate_table(tmp.name, tmp.valid_hooks,
951 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
952 tmp.hook_entry, tmp.underflow);
954 goto free_newinfo_counters;
956 duprintf("ip_tables: Translated table\n");
958 t = try_then_request_module(xt_find_table_lock(AF_INET, tmp.name),
959 "iptable_%s", tmp.name);
960 if (!t || IS_ERR(t)) {
961 ret = t ? PTR_ERR(t) : -ENOENT;
962 goto free_newinfo_counters_untrans;
966 if (tmp.valid_hooks != t->valid_hooks) {
967 duprintf("Valid hook crap: %08X vs %08X\n",
968 tmp.valid_hooks, t->valid_hooks);
973 oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret);
977 /* Update module usage count based on number of rules */
978 duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
979 oldinfo->number, oldinfo->initial_entries, newinfo->number);
980 if ((oldinfo->number > oldinfo->initial_entries) ||
981 (newinfo->number <= oldinfo->initial_entries))
983 if ((oldinfo->number > oldinfo->initial_entries) &&
984 (newinfo->number <= oldinfo->initial_entries))
987 /* Get the old counters. */
988 get_counters(oldinfo, counters);
989 /* Decrease module usage counts and free resource */
990 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
991 IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
992 xt_free_table_info(oldinfo);
993 if (copy_to_user(tmp.counters, counters,
994 sizeof(struct xt_counters) * tmp.num_counters) != 0)
1003 free_newinfo_counters_untrans:
1004 IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
1005 free_newinfo_counters:
1008 xt_free_table_info(newinfo);
1012 /* We're lazy, and add to the first CPU; overflow works its fey magic
1013 * and everything is OK. */
1015 add_counter_to_entry(struct ipt_entry *e,
1016 const struct xt_counters addme[],
1020 duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1022 (long unsigned int)e->counters.pcnt,
1023 (long unsigned int)e->counters.bcnt,
1024 (long unsigned int)addme[*i].pcnt,
1025 (long unsigned int)addme[*i].bcnt);
1028 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1035 do_add_counters(void __user *user, unsigned int len)
1038 struct xt_counters_info tmp, *paddc;
1039 struct ipt_table *t;
1040 struct xt_table_info *private;
1042 void *loc_cpu_entry;
1044 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1047 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
1050 paddc = vmalloc_node(len, numa_node_id());
1054 if (copy_from_user(paddc, user, len) != 0) {
1059 t = xt_find_table_lock(AF_INET, tmp.name);
1060 if (!t || IS_ERR(t)) {
1061 ret = t ? PTR_ERR(t) : -ENOENT;
1065 write_lock_bh(&t->lock);
1066 private = t->private;
1067 if (private->number != tmp.num_counters) {
1069 goto unlock_up_free;
1073 /* Choose the copy that is on our node */
1074 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1075 IPT_ENTRY_ITERATE(loc_cpu_entry,
1077 add_counter_to_entry,
1081 write_unlock_bh(&t->lock);
1091 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1095 if (!capable(CAP_NET_ADMIN))
1099 case IPT_SO_SET_REPLACE:
1100 ret = do_replace(user, len);
1103 case IPT_SO_SET_ADD_COUNTERS:
1104 ret = do_add_counters(user, len);
1108 duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
1116 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1120 if (!capable(CAP_NET_ADMIN))
1124 case IPT_SO_GET_INFO: {
1125 char name[IPT_TABLE_MAXNAMELEN];
1126 struct ipt_table *t;
1128 if (*len != sizeof(struct ipt_getinfo)) {
1129 duprintf("length %u != %u\n", *len,
1130 sizeof(struct ipt_getinfo));
1135 if (copy_from_user(name, user, sizeof(name)) != 0) {
1139 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1141 t = try_then_request_module(xt_find_table_lock(AF_INET, name),
1142 "iptable_%s", name);
1143 if (t && !IS_ERR(t)) {
1144 struct ipt_getinfo info;
1145 struct xt_table_info *private = t->private;
1147 info.valid_hooks = t->valid_hooks;
1148 memcpy(info.hook_entry, private->hook_entry,
1149 sizeof(info.hook_entry));
1150 memcpy(info.underflow, private->underflow,
1151 sizeof(info.underflow));
1152 info.num_entries = private->number;
1153 info.size = private->size;
1154 memcpy(info.name, name, sizeof(info.name));
1156 if (copy_to_user(user, &info, *len) != 0)
1163 ret = t ? PTR_ERR(t) : -ENOENT;
1167 case IPT_SO_GET_ENTRIES: {
1168 struct ipt_get_entries get;
1170 if (*len < sizeof(get)) {
1171 duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1173 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1175 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1176 duprintf("get_entries: %u != %u\n", *len,
1177 sizeof(struct ipt_get_entries) + get.size);
1180 ret = get_entries(&get, user);
1184 case IPT_SO_GET_REVISION_MATCH:
1185 case IPT_SO_GET_REVISION_TARGET: {
1186 struct ipt_get_revision rev;
1189 if (*len != sizeof(rev)) {
1193 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
1198 if (cmd == IPT_SO_GET_REVISION_TARGET)
1203 try_then_request_module(xt_find_revision(AF_INET, rev.name,
1206 "ipt_%s", rev.name);
1211 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1218 int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
1221 struct xt_table_info *newinfo;
1222 static struct xt_table_info bootstrap
1223 = { 0, 0, 0, { 0 }, { 0 }, { } };
1224 void *loc_cpu_entry;
1226 newinfo = xt_alloc_table_info(repl->size);
1230 /* choose the copy on our node/cpu
1231 * but dont care of preemption
1233 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1234 memcpy(loc_cpu_entry, repl->entries, repl->size);
1236 ret = translate_table(table->name, table->valid_hooks,
1237 newinfo, loc_cpu_entry, repl->size,
1242 xt_free_table_info(newinfo);
1246 if (xt_register_table(table, &bootstrap, newinfo) != 0) {
1247 xt_free_table_info(newinfo);
1254 void ipt_unregister_table(struct ipt_table *table)
1256 struct xt_table_info *private;
1257 void *loc_cpu_entry;
1259 private = xt_unregister_table(table);
1261 /* Decrease module usage counts and free resources */
1262 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1263 IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
1264 xt_free_table_info(private);
1267 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1269 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1270 u_int8_t type, u_int8_t code,
1273 return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1278 icmp_match(const struct sk_buff *skb,
1279 const struct net_device *in,
1280 const struct net_device *out,
1281 const void *matchinfo,
1283 unsigned int protoff,
1286 struct icmphdr _icmph, *ic;
1287 const struct ipt_icmp *icmpinfo = matchinfo;
1289 /* Must not be a fragment. */
1293 ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph);
1295 /* We've been asked to examine this packet, and we
1296 * can't. Hence, no choice but to drop.
1298 duprintf("Dropping evil ICMP tinygram.\n");
1303 return icmp_type_code_match(icmpinfo->type,
1307 !!(icmpinfo->invflags&IPT_ICMP_INV));
1310 /* Called when user tries to insert an entry of this type. */
1312 icmp_checkentry(const char *tablename,
1315 unsigned int matchsize,
1316 unsigned int hook_mask)
1318 const struct ipt_ip *ip = info;
1319 const struct ipt_icmp *icmpinfo = matchinfo;
1321 /* Must specify proto == ICMP, and no unknown invflags */
1322 return ip->proto == IPPROTO_ICMP
1323 && !(ip->invflags & IPT_INV_PROTO)
1324 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1325 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1328 /* The built-in targets: standard (NULL) and error. */
1329 static struct ipt_target ipt_standard_target = {
1330 .name = IPT_STANDARD_TARGET,
1333 static struct ipt_target ipt_error_target = {
1334 .name = IPT_ERROR_TARGET,
1335 .target = ipt_error,
1338 static struct nf_sockopt_ops ipt_sockopts = {
1340 .set_optmin = IPT_BASE_CTL,
1341 .set_optmax = IPT_SO_SET_MAX+1,
1342 .set = do_ipt_set_ctl,
1343 .get_optmin = IPT_BASE_CTL,
1344 .get_optmax = IPT_SO_GET_MAX+1,
1345 .get = do_ipt_get_ctl,
1348 static struct ipt_match icmp_matchstruct = {
1350 .match = &icmp_match,
1351 .checkentry = &icmp_checkentry,
1354 static int __init init(void)
1358 xt_proto_init(AF_INET);
1360 /* Noone else will be downing sem now, so we won't sleep */
1361 xt_register_target(AF_INET, &ipt_standard_target);
1362 xt_register_target(AF_INET, &ipt_error_target);
1363 xt_register_match(AF_INET, &icmp_matchstruct);
1365 /* Register setsockopt */
1366 ret = nf_register_sockopt(&ipt_sockopts);
1368 duprintf("Unable to register sockopts.\n");
1372 printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n");
1376 static void __exit fini(void)
1378 nf_unregister_sockopt(&ipt_sockopts);
1380 xt_unregister_match(AF_INET, &icmp_matchstruct);
1381 xt_unregister_target(AF_INET, &ipt_error_target);
1382 xt_unregister_target(AF_INET, &ipt_standard_target);
1384 xt_proto_fini(AF_INET);
1387 EXPORT_SYMBOL(ipt_register_table);
1388 EXPORT_SYMBOL(ipt_unregister_table);
1389 EXPORT_SYMBOL(ipt_do_table);