ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / net / ipv4 / netfilter / arp_tables.c
1 /*
2  * Packet matching code for ARP packets.
3  *
4  * Based heavily, if not almost entirely, upon ip_tables.c framework.
5  *
6  * Some ARP specific bits are:
7  *
8  * Copyright (C) 2002 David S. Miller (davem@redhat.com)
9  *
10  */
11
12 #include <linux/config.h>
13 #include <linux/kernel.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/if_arp.h>
17 #include <linux/kmod.h>
18 #include <linux/vmalloc.h>
19 #include <linux/proc_fs.h>
20 #include <linux/module.h>
21 #include <linux/init.h>
22
23 #include <asm/uaccess.h>
24 #include <asm/semaphore.h>
25
26 #include <linux/netfilter_arp/arp_tables.h>
27
28 MODULE_LICENSE("GPL");
29 MODULE_AUTHOR("David S. Miller <davem@redhat.com>");
30 MODULE_DESCRIPTION("arptables core");
31
32 /*#define DEBUG_ARP_TABLES*/
33 /*#define DEBUG_ARP_TABLES_USER*/
34
35 #ifdef DEBUG_ARP_TABLES
36 #define dprintf(format, args...)  printk(format , ## args)
37 #else
38 #define dprintf(format, args...)
39 #endif
40
41 #ifdef DEBUG_ARP_TABLES_USER
42 #define duprintf(format, args...) printk(format , ## args)
43 #else
44 #define duprintf(format, args...)
45 #endif
46
47 #ifdef CONFIG_NETFILTER_DEBUG
48 #define ARP_NF_ASSERT(x)                                        \
49 do {                                                            \
50         if (!(x))                                               \
51                 printk("ARP_NF_ASSERT: %s:%s:%u\n",             \
52                        __FUNCTION__, __FILE__, __LINE__);       \
53 } while(0)
54 #else
55 #define ARP_NF_ASSERT(x)
56 #endif
57 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
58
59 static DECLARE_MUTEX(arpt_mutex);
60
61 #define ASSERT_READ_LOCK(x) ARP_NF_ASSERT(down_trylock(&arpt_mutex) != 0)
62 #define ASSERT_WRITE_LOCK(x) ARP_NF_ASSERT(down_trylock(&arpt_mutex) != 0)
63 #include <linux/netfilter_ipv4/lockhelp.h>
64 #include <linux/netfilter_ipv4/listhelp.h>
65
66 struct arpt_table_info {
67         unsigned int size;
68         unsigned int number;
69         unsigned int initial_entries;
70         unsigned int hook_entry[NF_ARP_NUMHOOKS];
71         unsigned int underflow[NF_ARP_NUMHOOKS];
72         char entries[0] __attribute__((aligned(SMP_CACHE_BYTES)));
73 };
74
75 static LIST_HEAD(arpt_target);
76 static LIST_HEAD(arpt_tables);
77 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
78
79 #ifdef CONFIG_SMP
80 #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
81 #else
82 #define TABLE_OFFSET(t,p) 0
83 #endif
84
85 static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
86                                       char *hdr_addr, int len)
87 {
88         int i, ret;
89
90         if (len > ARPT_DEV_ADDR_LEN_MAX)
91                 len = ARPT_DEV_ADDR_LEN_MAX;
92
93         ret = 0;
94         for (i = 0; i < len; i++)
95                 ret |= (hdr_addr[i] ^ ap->addr[i]) & ap->mask[i];
96
97         return (ret != 0);
98 }
99
100 /* Returns whether packet matches rule or not. */
101 static inline int arp_packet_match(const struct arphdr *arphdr,
102                                    struct net_device *dev,
103                                    const char *indev,
104                                    const char *outdev,
105                                    const struct arpt_arp *arpinfo)
106 {
107         char *arpptr = (char *)(arphdr + 1);
108         char *src_devaddr, *tgt_devaddr;
109         u32 *src_ipaddr, *tgt_ipaddr;
110         int i, ret;
111
112 #define FWINV(bool,invflg) ((bool) ^ !!(arpinfo->invflags & invflg))
113
114         if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop,
115                   ARPT_INV_ARPOP)) {
116                 dprintf("ARP operation field mismatch.\n");
117                 dprintf("ar_op: %04x info->arpop: %04x info->arpop_mask: %04x\n",
118                         arphdr->ar_op, arpinfo->arpop, arpinfo->arpop_mask);
119                 return 0;
120         }
121
122         if (FWINV((arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd,
123                   ARPT_INV_ARPHRD)) {
124                 dprintf("ARP hardware address format mismatch.\n");
125                 dprintf("ar_hrd: %04x info->arhrd: %04x info->arhrd_mask: %04x\n",
126                         arphdr->ar_hrd, arpinfo->arhrd, arpinfo->arhrd_mask);
127                 return 0;
128         }
129
130         if (FWINV((arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro,
131                   ARPT_INV_ARPPRO)) {
132                 dprintf("ARP protocol address format mismatch.\n");
133                 dprintf("ar_pro: %04x info->arpro: %04x info->arpro_mask: %04x\n",
134                         arphdr->ar_pro, arpinfo->arpro, arpinfo->arpro_mask);
135                 return 0;
136         }
137
138         if (FWINV((arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln,
139                   ARPT_INV_ARPHLN)) {
140                 dprintf("ARP hardware address length mismatch.\n");
141                 dprintf("ar_hln: %02x info->arhln: %02x info->arhln_mask: %02x\n",
142                         arphdr->ar_hln, arpinfo->arhln, arpinfo->arhln_mask);
143                 return 0;
144         }
145
146         src_devaddr = arpptr;
147         arpptr += dev->addr_len;
148         src_ipaddr = (u32 *) arpptr;
149         arpptr += sizeof(u32);
150         tgt_devaddr = arpptr;
151         arpptr += dev->addr_len;
152         tgt_ipaddr = (u32 *) arpptr;
153
154         if (FWINV(arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, dev->addr_len),
155                   ARPT_INV_SRCDEVADDR) ||
156             FWINV(arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, dev->addr_len),
157                   ARPT_INV_TGTDEVADDR)) {
158                 dprintf("Source or target device address mismatch.\n");
159
160                 return 0;
161         }
162
163         if (FWINV(((*src_ipaddr) & arpinfo->smsk.s_addr) != arpinfo->src.s_addr,
164                   ARPT_INV_SRCIP) ||
165             FWINV((((*tgt_ipaddr) & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr),
166                   ARPT_INV_TGTIP)) {
167                 dprintf("Source or target IP address mismatch.\n");
168
169                 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
170                         NIPQUAD(*src_ipaddr),
171                         NIPQUAD(arpinfo->smsk.s_addr),
172                         NIPQUAD(arpinfo->src.s_addr),
173                         arpinfo->invflags & ARPT_INV_SRCIP ? " (INV)" : "");
174                 dprintf("TGT: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
175                         NIPQUAD(*tgt_ipaddr),
176                         NIPQUAD(arpinfo->tmsk.s_addr),
177                         NIPQUAD(arpinfo->tgt.s_addr),
178                         arpinfo->invflags & ARPT_INV_TGTIP ? " (INV)" : "");
179                 return 0;
180         }
181
182         /* Look for ifname matches; this should unroll nicely. */
183         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
184                 ret |= (((const unsigned long *)indev)[i]
185                         ^ ((const unsigned long *)arpinfo->iniface)[i])
186                         & ((const unsigned long *)arpinfo->iniface_mask)[i];
187         }
188
189         if (FWINV(ret != 0, ARPT_INV_VIA_IN)) {
190                 dprintf("VIA in mismatch (%s vs %s).%s\n",
191                         indev, arpinfo->iniface,
192                         arpinfo->invflags&ARPT_INV_VIA_IN ?" (INV)":"");
193                 return 0;
194         }
195
196         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
197                 ret |= (((const unsigned long *)outdev)[i]
198                         ^ ((const unsigned long *)arpinfo->outiface)[i])
199                         & ((const unsigned long *)arpinfo->outiface_mask)[i];
200         }
201
202         if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) {
203                 dprintf("VIA out mismatch (%s vs %s).%s\n",
204                         outdev, arpinfo->outiface,
205                         arpinfo->invflags&ARPT_INV_VIA_OUT ?" (INV)":"");
206                 return 0;
207         }
208
209         return 1;
210 }
211
212 static inline int arp_checkentry(const struct arpt_arp *arp)
213 {
214         if (arp->flags & ~ARPT_F_MASK) {
215                 duprintf("Unknown flag bits set: %08X\n",
216                          arp->flags & ~ARPT_F_MASK);
217                 return 0;
218         }
219         if (arp->invflags & ~ARPT_INV_MASK) {
220                 duprintf("Unknown invflag bits set: %08X\n",
221                          arp->invflags & ~ARPT_INV_MASK);
222                 return 0;
223         }
224
225         return 1;
226 }
227
228 static unsigned int arpt_error(struct sk_buff **pskb,
229                                unsigned int hooknum,
230                                const struct net_device *in,
231                                const struct net_device *out,
232                                const void *targinfo,
233                                void *userinfo)
234 {
235         if (net_ratelimit())
236                 printk("arp_tables: error: '%s'\n", (char *)targinfo);
237
238         return NF_DROP;
239 }
240
241 static inline struct arpt_entry *get_entry(void *base, unsigned int offset)
242 {
243         return (struct arpt_entry *)(base + offset);
244 }
245
246 unsigned int arpt_do_table(struct sk_buff **pskb,
247                            unsigned int hook,
248                            const struct net_device *in,
249                            const struct net_device *out,
250                            struct arpt_table *table,
251                            void *userdata)
252 {
253         static const char nulldevname[IFNAMSIZ];
254         unsigned int verdict = NF_DROP;
255         struct arphdr *arp;
256         int hotdrop = 0;
257         struct arpt_entry *e, *back;
258         const char *indev, *outdev;
259         void *table_base;
260
261         /* ARP header, plus 2 device addresses, plus 2 IP addresses.  */
262         if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) +
263                                      (2 * (*pskb)->dev->addr_len) +
264                                      (2 * sizeof(u32)))))
265                 return NF_DROP;
266
267         indev = in ? in->name : nulldevname;
268         outdev = out ? out->name : nulldevname;
269
270         read_lock_bh(&table->lock);
271         table_base = (void *)table->private->entries
272                 + TABLE_OFFSET(table->private,
273                                smp_processor_id());
274         e = get_entry(table_base, table->private->hook_entry[hook]);
275         back = get_entry(table_base, table->private->underflow[hook]);
276
277         arp = (*pskb)->nh.arph;
278         do {
279                 if (arp_packet_match(arp, (*pskb)->dev, indev, outdev, &e->arp)) {
280                         struct arpt_entry_target *t;
281                         int hdr_len;
282
283                         hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
284                                 (2 * (*pskb)->dev->addr_len);
285                         ADD_COUNTER(e->counters, hdr_len, 1);
286
287                         t = arpt_get_target(e);
288
289                         /* Standard target? */
290                         if (!t->u.kernel.target->target) {
291                                 int v;
292
293                                 v = ((struct arpt_standard_target *)t)->verdict;
294                                 if (v < 0) {
295                                         /* Pop from stack? */
296                                         if (v != ARPT_RETURN) {
297                                                 verdict = (unsigned)(-v) - 1;
298                                                 break;
299                                         }
300                                         e = back;
301                                         back = get_entry(table_base,
302                                                          back->comefrom);
303                                         continue;
304                                 }
305                                 if (table_base + v
306                                     != (void *)e + e->next_offset) {
307                                         /* Save old back ptr in next entry */
308                                         struct arpt_entry *next
309                                                 = (void *)e + e->next_offset;
310                                         next->comefrom =
311                                                 (void *)back - table_base;
312
313                                         /* set back pointer to next entry */
314                                         back = next;
315                                 }
316
317                                 e = get_entry(table_base, v);
318                         } else {
319                                 /* Targets which reenter must return
320                                  * abs. verdicts
321                                  */
322                                 verdict = t->u.kernel.target->target(pskb,
323                                                                      hook,
324                                                                      in, out,
325                                                                      t->data,
326                                                                      userdata);
327
328                                 /* Target might have changed stuff. */
329                                 arp = (*pskb)->nh.arph;
330
331                                 if (verdict == ARPT_CONTINUE)
332                                         e = (void *)e + e->next_offset;
333                                 else
334                                         /* Verdict */
335                                         break;
336                         }
337                 } else {
338                         e = (void *)e + e->next_offset;
339                 }
340         } while (!hotdrop);
341         read_unlock_bh(&table->lock);
342
343         if (hotdrop)
344                 return NF_DROP;
345         else
346                 return verdict;
347 }
348
349 static inline void *find_inlist_lock_noload(struct list_head *head,
350                                             const char *name,
351                                             int *error,
352                                             struct semaphore *mutex)
353 {
354         void *ret;
355
356         *error = down_interruptible(mutex);
357         if (*error != 0)
358                 return NULL;
359
360         ret = list_named_find(head, name);
361         if (!ret) {
362                 *error = -ENOENT;
363                 up(mutex);
364         }
365         return ret;
366 }
367
368 #ifndef CONFIG_KMOD
369 #define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
370 #else
371 static void *
372 find_inlist_lock(struct list_head *head,
373                  const char *name,
374                  const char *prefix,
375                  int *error,
376                  struct semaphore *mutex)
377 {
378         void *ret;
379
380         ret = find_inlist_lock_noload(head, name, error, mutex);
381         if (!ret) {
382                 duprintf("find_inlist: loading `%s%s'.\n", prefix, name);
383                 request_module("%s%s", prefix, name);
384                 ret = find_inlist_lock_noload(head, name, error, mutex);
385         }
386
387         return ret;
388 }
389 #endif
390
391 static inline struct arpt_table *find_table_lock(const char *name, int *error, struct semaphore *mutex)
392 {
393         return find_inlist_lock(&arpt_tables, name, "arptable_", error, mutex);
394 }
395
396 static inline struct arpt_target *find_target_lock(const char *name, int *error, struct semaphore *mutex)
397 {
398         return find_inlist_lock(&arpt_target, name, "arpt_", error, mutex);
399 }
400
401 /* All zeroes == unconditional rule. */
402 static inline int unconditional(const struct arpt_arp *arp)
403 {
404         unsigned int i;
405
406         for (i = 0; i < sizeof(*arp)/sizeof(__u32); i++)
407                 if (((__u32 *)arp)[i])
408                         return 0;
409
410         return 1;
411 }
412
413 /* Figures out from what hook each rule can be called: returns 0 if
414  * there are loops.  Puts hook bitmask in comefrom.
415  */
416 static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int valid_hooks)
417 {
418         unsigned int hook;
419
420         /* No recursion; use packet counter to save back ptrs (reset
421          * to 0 as we leave), and comefrom to save source hook bitmask.
422          */
423         for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) {
424                 unsigned int pos = newinfo->hook_entry[hook];
425                 struct arpt_entry *e
426                         = (struct arpt_entry *)(newinfo->entries + pos);
427
428                 if (!(valid_hooks & (1 << hook)))
429                         continue;
430
431                 /* Set initial back pointer. */
432                 e->counters.pcnt = pos;
433
434                 for (;;) {
435                         struct arpt_standard_target *t
436                                 = (void *)arpt_get_target(e);
437
438                         if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) {
439                                 printk("arptables: loop hook %u pos %u %08X.\n",
440                                        hook, pos, e->comefrom);
441                                 return 0;
442                         }
443                         e->comefrom
444                                 |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS));
445
446                         /* Unconditional return/END. */
447                         if (e->target_offset == sizeof(struct arpt_entry)
448                             && (strcmp(t->target.u.user.name,
449                                        ARPT_STANDARD_TARGET) == 0)
450                             && t->verdict < 0
451                             && unconditional(&e->arp)) {
452                                 unsigned int oldpos, size;
453
454                                 /* Return: backtrack through the last
455                                  * big jump.
456                                  */
457                                 do {
458                                         e->comefrom ^= (1<<NF_ARP_NUMHOOKS);
459                                         oldpos = pos;
460                                         pos = e->counters.pcnt;
461                                         e->counters.pcnt = 0;
462
463                                         /* We're at the start. */
464                                         if (pos == oldpos)
465                                                 goto next;
466
467                                         e = (struct arpt_entry *)
468                                                 (newinfo->entries + pos);
469                                 } while (oldpos == pos + e->next_offset);
470
471                                 /* Move along one */
472                                 size = e->next_offset;
473                                 e = (struct arpt_entry *)
474                                         (newinfo->entries + pos + size);
475                                 e->counters.pcnt = pos;
476                                 pos += size;
477                         } else {
478                                 int newpos = t->verdict;
479
480                                 if (strcmp(t->target.u.user.name,
481                                            ARPT_STANDARD_TARGET) == 0
482                                     && newpos >= 0) {
483                                         /* This a jump; chase it. */
484                                         duprintf("Jump rule %u -> %u\n",
485                                                  pos, newpos);
486                                 } else {
487                                         /* ... this is a fallthru */
488                                         newpos = pos + e->next_offset;
489                                 }
490                                 e = (struct arpt_entry *)
491                                         (newinfo->entries + newpos);
492                                 e->counters.pcnt = pos;
493                                 pos = newpos;
494                         }
495                 }
496                 next:
497                 duprintf("Finished chain %u\n", hook);
498         }
499         return 1;
500 }
501
502 static inline int standard_check(const struct arpt_entry_target *t,
503                                  unsigned int max_offset)
504 {
505         struct arpt_standard_target *targ = (void *)t;
506
507         /* Check standard info. */
508         if (t->u.target_size
509             != ARPT_ALIGN(sizeof(struct arpt_standard_target))) {
510                 duprintf("arpt_standard_check: target size %u != %Zu\n",
511                          t->u.target_size,
512                          ARPT_ALIGN(sizeof(struct arpt_standard_target)));
513                 return 0;
514         }
515
516         if (targ->verdict >= 0
517             && targ->verdict > max_offset - sizeof(struct arpt_entry)) {
518                 duprintf("arpt_standard_check: bad verdict (%i)\n",
519                          targ->verdict);
520                 return 0;
521         }
522
523         if (targ->verdict < -NF_MAX_VERDICT - 1) {
524                 duprintf("arpt_standard_check: bad negative verdict (%i)\n",
525                          targ->verdict);
526                 return 0;
527         }
528         return 1;
529 }
530
531 static struct arpt_target arpt_standard_target;
532
533 static inline int check_entry(struct arpt_entry *e, const char *name, unsigned int size,
534                               unsigned int *i)
535 {
536         struct arpt_entry_target *t;
537         struct arpt_target *target;
538         int ret;
539
540         if (!arp_checkentry(&e->arp)) {
541                 duprintf("arp_tables: arp check failed %p %s.\n", e, name);
542                 return -EINVAL;
543         }
544
545         t = arpt_get_target(e);
546         target = find_target_lock(t->u.user.name, &ret, &arpt_mutex);
547         if (!target) {
548                 duprintf("check_entry: `%s' not found\n", t->u.user.name);
549                 goto out;
550         }
551         if (!try_module_get((target->me))) {
552                 ret = -ENOENT;
553                 goto out_unlock;
554         }
555         t->u.kernel.target = target;
556         up(&arpt_mutex);
557
558         if (t->u.kernel.target == &arpt_standard_target) {
559                 if (!standard_check(t, size)) {
560                         ret = -EINVAL;
561                         goto out;
562                 }
563         } else if (t->u.kernel.target->checkentry
564                    && !t->u.kernel.target->checkentry(name, e, t->data,
565                                                       t->u.target_size
566                                                       - sizeof(*t),
567                                                       e->comefrom)) {
568                 module_put(t->u.kernel.target->me);
569                 duprintf("arp_tables: check failed for `%s'.\n",
570                          t->u.kernel.target->name);
571                 ret = -EINVAL;
572                 goto out;
573         }
574
575         (*i)++;
576         return 0;
577
578 out_unlock:
579         up(&arpt_mutex);
580 out:
581         return ret;
582 }
583
584 static inline int check_entry_size_and_hooks(struct arpt_entry *e,
585                                              struct arpt_table_info *newinfo,
586                                              unsigned char *base,
587                                              unsigned char *limit,
588                                              const unsigned int *hook_entries,
589                                              const unsigned int *underflows,
590                                              unsigned int *i)
591 {
592         unsigned int h;
593
594         if ((unsigned long)e % __alignof__(struct arpt_entry) != 0
595             || (unsigned char *)e + sizeof(struct arpt_entry) >= limit) {
596                 duprintf("Bad offset %p\n", e);
597                 return -EINVAL;
598         }
599
600         if (e->next_offset
601             < sizeof(struct arpt_entry) + sizeof(struct arpt_entry_target)) {
602                 duprintf("checking: element %p size %u\n",
603                          e, e->next_offset);
604                 return -EINVAL;
605         }
606
607         /* Check hooks & underflows */
608         for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
609                 if ((unsigned char *)e - base == hook_entries[h])
610                         newinfo->hook_entry[h] = hook_entries[h];
611                 if ((unsigned char *)e - base == underflows[h])
612                         newinfo->underflow[h] = underflows[h];
613         }
614
615         /* FIXME: underflows must be unconditional, standard verdicts
616            < 0 (not ARPT_RETURN). --RR */
617
618         /* Clear counters and comefrom */
619         e->counters = ((struct arpt_counters) { 0, 0 });
620         e->comefrom = 0;
621
622         (*i)++;
623         return 0;
624 }
625
626 static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i)
627 {
628         struct arpt_entry_target *t;
629
630         if (i && (*i)-- == 0)
631                 return 1;
632
633         t = arpt_get_target(e);
634         if (t->u.kernel.target->destroy)
635                 t->u.kernel.target->destroy(t->data,
636                                             t->u.target_size - sizeof(*t));
637         module_put(t->u.kernel.target->me);
638         return 0;
639 }
640
641 /* Checks and translates the user-supplied table segment (held in
642  * newinfo).
643  */
644 static int translate_table(const char *name,
645                            unsigned int valid_hooks,
646                            struct arpt_table_info *newinfo,
647                            unsigned int size,
648                            unsigned int number,
649                            const unsigned int *hook_entries,
650                            const unsigned int *underflows)
651 {
652         unsigned int i;
653         int ret;
654
655         newinfo->size = size;
656         newinfo->number = number;
657
658         /* Init all hooks to impossible value. */
659         for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
660                 newinfo->hook_entry[i] = 0xFFFFFFFF;
661                 newinfo->underflow[i] = 0xFFFFFFFF;
662         }
663
664         duprintf("translate_table: size %u\n", newinfo->size);
665         i = 0;
666
667         /* Walk through entries, checking offsets. */
668         ret = ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
669                                  check_entry_size_and_hooks,
670                                  newinfo,
671                                  newinfo->entries,
672                                  newinfo->entries + size,
673                                  hook_entries, underflows, &i);
674         duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
675         if (ret != 0)
676                 return ret;
677
678         if (i != number) {
679                 duprintf("translate_table: %u not %u entries\n",
680                          i, number);
681                 return -EINVAL;
682         }
683
684         /* Check hooks all assigned */
685         for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
686                 /* Only hooks which are valid */
687                 if (!(valid_hooks & (1 << i)))
688                         continue;
689                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
690                         duprintf("Invalid hook entry %u %u\n",
691                                  i, hook_entries[i]);
692                         return -EINVAL;
693                 }
694                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
695                         duprintf("Invalid underflow %u %u\n",
696                                  i, underflows[i]);
697                         return -EINVAL;
698                 }
699         }
700
701         if (!mark_source_chains(newinfo, valid_hooks)) {
702                 duprintf("Looping hook\n");
703                 return -ELOOP;
704         }
705
706         /* Finally, each sanity check must pass */
707         i = 0;
708         ret = ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
709                                  check_entry, name, size, &i);
710
711         if (ret != 0) {
712                 ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
713                                    cleanup_entry, &i);
714                 return ret;
715         }
716
717         /* And one copy for every other CPU */
718         for (i = 1; i < NR_CPUS; i++) {
719                 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
720                        newinfo->entries,
721                        SMP_ALIGN(newinfo->size));
722         }
723
724         return ret;
725 }
726
727 static struct arpt_table_info *replace_table(struct arpt_table *table,
728                                              unsigned int num_counters,
729                                              struct arpt_table_info *newinfo,
730                                              int *error)
731 {
732         struct arpt_table_info *oldinfo;
733
734         /* Do the substitution. */
735         write_lock_bh(&table->lock);
736         /* Check inside lock: is the old number correct? */
737         if (num_counters != table->private->number) {
738                 duprintf("num_counters != table->private->number (%u/%u)\n",
739                          num_counters, table->private->number);
740                 write_unlock_bh(&table->lock);
741                 *error = -EAGAIN;
742                 return NULL;
743         }
744         oldinfo = table->private;
745         table->private = newinfo;
746         newinfo->initial_entries = oldinfo->initial_entries;
747         write_unlock_bh(&table->lock);
748
749         return oldinfo;
750 }
751
752 /* Gets counters. */
753 static inline int add_entry_to_counter(const struct arpt_entry *e,
754                                        struct arpt_counters total[],
755                                        unsigned int *i)
756 {
757         ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
758
759         (*i)++;
760         return 0;
761 }
762
763 static void get_counters(const struct arpt_table_info *t,
764                          struct arpt_counters counters[])
765 {
766         unsigned int cpu;
767         unsigned int i;
768
769         for (cpu = 0; cpu < NR_CPUS; cpu++) {
770                 i = 0;
771                 ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
772                                    t->size,
773                                    add_entry_to_counter,
774                                    counters,
775                                    &i);
776         }
777 }
778
779 static int copy_entries_to_user(unsigned int total_size,
780                                 struct arpt_table *table,
781                                 void *userptr)
782 {
783         unsigned int off, num, countersize;
784         struct arpt_entry *e;
785         struct arpt_counters *counters;
786         int ret = 0;
787
788         /* We need atomic snapshot of counters: rest doesn't change
789          * (other than comefrom, which userspace doesn't care
790          * about).
791          */
792         countersize = sizeof(struct arpt_counters) * table->private->number;
793         counters = vmalloc(countersize);
794
795         if (counters == NULL)
796                 return -ENOMEM;
797
798         /* First, sum counters... */
799         memset(counters, 0, countersize);
800         write_lock_bh(&table->lock);
801         get_counters(table->private, counters);
802         write_unlock_bh(&table->lock);
803
804         /* ... then copy entire thing from CPU 0... */
805         if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
806                 ret = -EFAULT;
807                 goto free_counters;
808         }
809
810         /* FIXME: use iterator macros --RR */
811         /* ... then go back and fix counters and names */
812         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
813                 struct arpt_entry_target *t;
814
815                 e = (struct arpt_entry *)(table->private->entries + off);
816                 if (copy_to_user(userptr + off
817                                  + offsetof(struct arpt_entry, counters),
818                                  &counters[num],
819                                  sizeof(counters[num])) != 0) {
820                         ret = -EFAULT;
821                         goto free_counters;
822                 }
823
824                 t = arpt_get_target(e);
825                 if (copy_to_user(userptr + off + e->target_offset
826                                  + offsetof(struct arpt_entry_target,
827                                             u.user.name),
828                                  t->u.kernel.target->name,
829                                  strlen(t->u.kernel.target->name)+1) != 0) {
830                         ret = -EFAULT;
831                         goto free_counters;
832                 }
833         }
834
835  free_counters:
836         vfree(counters);
837         return ret;
838 }
839
840 static int get_entries(const struct arpt_get_entries *entries,
841                        struct arpt_get_entries *uptr)
842 {
843         int ret;
844         struct arpt_table *t;
845
846         t = find_table_lock(entries->name, &ret, &arpt_mutex);
847         if (t) {
848                 duprintf("t->private->number = %u\n",
849                          t->private->number);
850                 if (entries->size == t->private->size)
851                         ret = copy_entries_to_user(t->private->size,
852                                                    t, uptr->entrytable);
853                 else {
854                         duprintf("get_entries: I've got %u not %u!\n",
855                                  t->private->size,
856                                  entries->size);
857                         ret = -EINVAL;
858                 }
859                 up(&arpt_mutex);
860         } else
861                 duprintf("get_entries: Can't find %s!\n",
862                          entries->name);
863
864         return ret;
865 }
866
867 static int do_replace(void *user, unsigned int len)
868 {
869         int ret;
870         struct arpt_replace tmp;
871         struct arpt_table *t;
872         struct arpt_table_info *newinfo, *oldinfo;
873         struct arpt_counters *counters;
874
875         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
876                 return -EFAULT;
877
878         /* Hack: Causes ipchains to give correct error msg --RR */
879         if (len != sizeof(tmp) + tmp.size)
880                 return -ENOPROTOOPT;
881
882         /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
883         if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
884                 return -ENOMEM;
885
886         newinfo = vmalloc(sizeof(struct arpt_table_info)
887                           + SMP_ALIGN(tmp.size) * NR_CPUS);
888         if (!newinfo)
889                 return -ENOMEM;
890
891         if (copy_from_user(newinfo->entries, user + sizeof(tmp),
892                            tmp.size) != 0) {
893                 ret = -EFAULT;
894                 goto free_newinfo;
895         }
896
897         counters = vmalloc(tmp.num_counters * sizeof(struct arpt_counters));
898         if (!counters) {
899                 ret = -ENOMEM;
900                 goto free_newinfo;
901         }
902         memset(counters, 0, tmp.num_counters * sizeof(struct arpt_counters));
903
904         ret = translate_table(tmp.name, tmp.valid_hooks,
905                               newinfo, tmp.size, tmp.num_entries,
906                               tmp.hook_entry, tmp.underflow);
907         if (ret != 0)
908                 goto free_newinfo_counters;
909
910         duprintf("arp_tables: Translated table\n");
911
912         t = find_table_lock(tmp.name, &ret, &arpt_mutex);
913         if (!t)
914                 goto free_newinfo_counters_untrans;
915
916         /* You lied! */
917         if (tmp.valid_hooks != t->valid_hooks) {
918                 duprintf("Valid hook crap: %08X vs %08X\n",
919                          tmp.valid_hooks, t->valid_hooks);
920                 ret = -EINVAL;
921                 goto free_newinfo_counters_untrans_unlock;
922         }
923
924         /* Get a reference in advance, we're not allowed fail later */
925         if (!try_module_get(t->me)) {
926                 ret = -EBUSY;
927                 goto free_newinfo_counters_untrans_unlock;
928         }
929
930         oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
931         if (!oldinfo)
932                 goto put_module;
933
934         /* Update module usage count based on number of rules */
935         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
936                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
937         if ((oldinfo->number > oldinfo->initial_entries) || 
938             (newinfo->number <= oldinfo->initial_entries)) 
939                 module_put(t->me);
940         if ((oldinfo->number > oldinfo->initial_entries) &&
941             (newinfo->number <= oldinfo->initial_entries))
942                 module_put(t->me);
943
944         /* Get the old counters. */
945         get_counters(oldinfo, counters);
946         /* Decrease module usage counts and free resource */
947         ARPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
948         vfree(oldinfo);
949         /* Silent error: too late now. */
950         copy_to_user(tmp.counters, counters,
951                      sizeof(struct arpt_counters) * tmp.num_counters);
952         vfree(counters);
953         up(&arpt_mutex);
954         return 0;
955
956  put_module:
957         module_put(t->me);
958  free_newinfo_counters_untrans_unlock:
959         up(&arpt_mutex);
960  free_newinfo_counters_untrans:
961         ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry, NULL);
962  free_newinfo_counters:
963         vfree(counters);
964  free_newinfo:
965         vfree(newinfo);
966         return ret;
967 }
968
969 /* We're lazy, and add to the first CPU; overflow works its fey magic
970  * and everything is OK.
971  */
972 static inline int add_counter_to_entry(struct arpt_entry *e,
973                                        const struct arpt_counters addme[],
974                                        unsigned int *i)
975 {
976
977         ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
978
979         (*i)++;
980         return 0;
981 }
982
983 static int do_add_counters(void *user, unsigned int len)
984 {
985         unsigned int i;
986         struct arpt_counters_info tmp, *paddc;
987         struct arpt_table *t;
988         int ret;
989
990         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
991                 return -EFAULT;
992
993         if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct arpt_counters))
994                 return -EINVAL;
995
996         paddc = vmalloc(len);
997         if (!paddc)
998                 return -ENOMEM;
999
1000         if (copy_from_user(paddc, user, len) != 0) {
1001                 ret = -EFAULT;
1002                 goto free;
1003         }
1004
1005         t = find_table_lock(tmp.name, &ret, &arpt_mutex);
1006         if (!t)
1007                 goto free;
1008
1009         write_lock_bh(&t->lock);
1010         if (t->private->number != paddc->num_counters) {
1011                 ret = -EINVAL;
1012                 goto unlock_up_free;
1013         }
1014
1015         i = 0;
1016         ARPT_ENTRY_ITERATE(t->private->entries,
1017                            t->private->size,
1018                            add_counter_to_entry,
1019                            paddc->counters,
1020                            &i);
1021  unlock_up_free:
1022         write_unlock_bh(&t->lock);
1023         up(&arpt_mutex);
1024  free:
1025         vfree(paddc);
1026
1027         return ret;
1028 }
1029
1030 static int do_arpt_set_ctl(struct sock *sk, int cmd, void *user, unsigned int len)
1031 {
1032         int ret;
1033
1034         if (!capable(CAP_NET_ADMIN))
1035                 return -EPERM;
1036
1037         switch (cmd) {
1038         case ARPT_SO_SET_REPLACE:
1039                 ret = do_replace(user, len);
1040                 break;
1041
1042         case ARPT_SO_SET_ADD_COUNTERS:
1043                 ret = do_add_counters(user, len);
1044                 break;
1045
1046         default:
1047                 duprintf("do_arpt_set_ctl:  unknown request %i\n", cmd);
1048                 ret = -EINVAL;
1049         }
1050
1051         return ret;
1052 }
1053
1054 static int do_arpt_get_ctl(struct sock *sk, int cmd, void *user, int *len)
1055 {
1056         int ret;
1057
1058         if (!capable(CAP_NET_ADMIN))
1059                 return -EPERM;
1060
1061         switch (cmd) {
1062         case ARPT_SO_GET_INFO: {
1063                 char name[ARPT_TABLE_MAXNAMELEN];
1064                 struct arpt_table *t;
1065
1066                 if (*len != sizeof(struct arpt_getinfo)) {
1067                         duprintf("length %u != %Zu\n", *len,
1068                                  sizeof(struct arpt_getinfo));
1069                         ret = -EINVAL;
1070                         break;
1071                 }
1072
1073                 if (copy_from_user(name, user, sizeof(name)) != 0) {
1074                         ret = -EFAULT;
1075                         break;
1076                 }
1077                 name[ARPT_TABLE_MAXNAMELEN-1] = '\0';
1078                 t = find_table_lock(name, &ret, &arpt_mutex);
1079                 if (t) {
1080                         struct arpt_getinfo info;
1081
1082                         info.valid_hooks = t->valid_hooks;
1083                         memcpy(info.hook_entry, t->private->hook_entry,
1084                                sizeof(info.hook_entry));
1085                         memcpy(info.underflow, t->private->underflow,
1086                                sizeof(info.underflow));
1087                         info.num_entries = t->private->number;
1088                         info.size = t->private->size;
1089                         strcpy(info.name, name);
1090
1091                         if (copy_to_user(user, &info, *len) != 0)
1092                                 ret = -EFAULT;
1093                         else
1094                                 ret = 0;
1095
1096                         up(&arpt_mutex);
1097                 }
1098         }
1099         break;
1100
1101         case ARPT_SO_GET_ENTRIES: {
1102                 struct arpt_get_entries get;
1103
1104                 if (*len < sizeof(get)) {
1105                         duprintf("get_entries: %u < %Zu\n", *len, sizeof(get));
1106                         ret = -EINVAL;
1107                 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1108                         ret = -EFAULT;
1109                 } else if (*len != sizeof(struct arpt_get_entries) + get.size) {
1110                         duprintf("get_entries: %u != %Zu\n", *len,
1111                                  sizeof(struct arpt_get_entries) + get.size);
1112                         ret = -EINVAL;
1113                 } else
1114                         ret = get_entries(&get, user);
1115                 break;
1116         }
1117
1118         default:
1119                 duprintf("do_arpt_get_ctl: unknown request %i\n", cmd);
1120                 ret = -EINVAL;
1121         }
1122
1123         return ret;
1124 }
1125
1126 /* Registration hooks for targets. */
1127 int arpt_register_target(struct arpt_target *target)
1128 {
1129         int ret;
1130
1131         ret = down_interruptible(&arpt_mutex);
1132         if (ret != 0)
1133                 return ret;
1134
1135         if (!list_named_insert(&arpt_target, target)) {
1136                 duprintf("arpt_register_target: `%s' already in list!\n",
1137                          target->name);
1138                 ret = -EINVAL;
1139         }
1140         up(&arpt_mutex);
1141         return ret;
1142 }
1143
1144 void arpt_unregister_target(struct arpt_target *target)
1145 {
1146         down(&arpt_mutex);
1147         LIST_DELETE(&arpt_target, target);
1148         up(&arpt_mutex);
1149 }
1150
1151 int arpt_register_table(struct arpt_table *table)
1152 {
1153         int ret;
1154         struct arpt_table_info *newinfo;
1155         static struct arpt_table_info bootstrap
1156                 = { 0, 0, 0, { 0 }, { 0 }, { } };
1157
1158         newinfo = vmalloc(sizeof(struct arpt_table_info)
1159                           + SMP_ALIGN(table->table->size) * NR_CPUS);
1160         if (!newinfo) {
1161                 ret = -ENOMEM;
1162                 return ret;
1163         }
1164         memcpy(newinfo->entries, table->table->entries, table->table->size);
1165
1166         ret = translate_table(table->name, table->valid_hooks,
1167                               newinfo, table->table->size,
1168                               table->table->num_entries,
1169                               table->table->hook_entry,
1170                               table->table->underflow);
1171         duprintf("arpt_register_table: translate table gives %d\n", ret);
1172         if (ret != 0) {
1173                 vfree(newinfo);
1174                 return ret;
1175         }
1176
1177         ret = down_interruptible(&arpt_mutex);
1178         if (ret != 0) {
1179                 vfree(newinfo);
1180                 return ret;
1181         }
1182
1183         /* Don't autoload: we'd eat our tail... */
1184         if (list_named_find(&arpt_tables, table->name)) {
1185                 ret = -EEXIST;
1186                 goto free_unlock;
1187         }
1188
1189         /* Simplifies replace_table code. */
1190         table->private = &bootstrap;
1191         if (!replace_table(table, 0, newinfo, &ret))
1192                 goto free_unlock;
1193
1194         duprintf("table->private->number = %u\n",
1195                  table->private->number);
1196         
1197         /* save number of initial entries */
1198         table->private->initial_entries = table->private->number;
1199
1200         table->lock = RW_LOCK_UNLOCKED;
1201         list_prepend(&arpt_tables, table);
1202
1203  unlock:
1204         up(&arpt_mutex);
1205         return ret;
1206
1207  free_unlock:
1208         vfree(newinfo);
1209         goto unlock;
1210 }
1211
1212 void arpt_unregister_table(struct arpt_table *table)
1213 {
1214         down(&arpt_mutex);
1215         LIST_DELETE(&arpt_tables, table);
1216         up(&arpt_mutex);
1217
1218         /* Decrease module usage counts and free resources */
1219         ARPT_ENTRY_ITERATE(table->private->entries, table->private->size,
1220                            cleanup_entry, NULL);
1221         vfree(table->private);
1222 }
1223
1224 /* The built-in targets: standard (NULL) and error. */
1225 static struct arpt_target arpt_standard_target = {
1226         .name           = ARPT_STANDARD_TARGET,
1227 };
1228
1229 static struct arpt_target arpt_error_target = {
1230         .name           = ARPT_ERROR_TARGET,
1231         .target         = arpt_error,
1232 };
1233
1234 static struct nf_sockopt_ops arpt_sockopts = {
1235         .pf             = PF_INET,
1236         .set_optmin     = ARPT_BASE_CTL,
1237         .set_optmax     = ARPT_SO_SET_MAX+1,
1238         .set            = do_arpt_set_ctl,
1239         .get_optmin     = ARPT_BASE_CTL,
1240         .get_optmax     = ARPT_SO_GET_MAX+1,
1241         .get            = do_arpt_get_ctl,
1242 };
1243
1244 #ifdef CONFIG_PROC_FS
1245 static inline int print_name(const struct arpt_table *t,
1246                              off_t start_offset, char *buffer, int length,
1247                              off_t *pos, unsigned int *count)
1248 {
1249         if ((*count)++ >= start_offset) {
1250                 unsigned int namelen;
1251
1252                 namelen = sprintf(buffer + *pos, "%s\n", t->name);
1253                 if (*pos + namelen > length) {
1254                         /* Stop iterating */
1255                         return 1;
1256                 }
1257                 *pos += namelen;
1258         }
1259         return 0;
1260 }
1261
1262 static int arpt_get_tables(char *buffer, char **start, off_t offset, int length)
1263 {
1264         off_t pos = 0;
1265         unsigned int count = 0;
1266
1267         if (down_interruptible(&arpt_mutex) != 0)
1268                 return 0;
1269
1270         LIST_FIND(&arpt_tables, print_name, struct arpt_table *,
1271                   offset, buffer, length, &pos, &count);
1272
1273         up(&arpt_mutex);
1274
1275         /* `start' hack - see fs/proc/generic.c line ~105 */
1276         *start=(char *)((unsigned long)count-offset);
1277         return pos;
1278 }
1279 #endif /*CONFIG_PROC_FS*/
1280
1281 static int __init init(void)
1282 {
1283         int ret;
1284
1285         /* Noone else will be downing sem now, so we won't sleep */
1286         down(&arpt_mutex);
1287         list_append(&arpt_target, &arpt_standard_target);
1288         list_append(&arpt_target, &arpt_error_target);
1289         up(&arpt_mutex);
1290
1291         /* Register setsockopt */
1292         ret = nf_register_sockopt(&arpt_sockopts);
1293         if (ret < 0) {
1294                 duprintf("Unable to register sockopts.\n");
1295                 return ret;
1296         }
1297
1298 #ifdef CONFIG_PROC_FS
1299         {
1300                 struct proc_dir_entry *proc;
1301
1302                 proc = proc_net_create("arp_tables_names", 0, arpt_get_tables);
1303                 if (!proc) {
1304                         nf_unregister_sockopt(&arpt_sockopts);
1305                         return -ENOMEM;
1306                 }
1307                 proc->owner = THIS_MODULE;
1308         }
1309 #endif
1310
1311         printk("arp_tables: (C) 2002 David S. Miller\n");
1312         return 0;
1313 }
1314
1315 static void __exit fini(void)
1316 {
1317         nf_unregister_sockopt(&arpt_sockopts);
1318 #ifdef CONFIG_PROC_FS
1319         proc_net_remove("arp_tables_names");
1320 #endif
1321 }
1322
1323 EXPORT_SYMBOL(arpt_register_table);
1324 EXPORT_SYMBOL(arpt_unregister_table);
1325 EXPORT_SYMBOL(arpt_do_table);
1326 EXPORT_SYMBOL(arpt_register_target);
1327 EXPORT_SYMBOL(arpt_unregister_target);
1328
1329 module_init(init);
1330 module_exit(fini);