kipfw/ipfw2_mod.c

   1 /*
   2  * Copyright (C) 2009 Luigi Rizzo, Marta Carbone, Universita` di Pisa
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions
   6  * are met:
   7  * 1. Redistributions of source code must retain the above copyright
   8  *    notice, this list of conditions and the following disclaimer.
   9  * 2. Redistributions in binary form must reproduce the above copyright
  10  *    notice, this list of conditions and the following disclaimer in the
  11  *    documentation and/or other materials provided with the distribution.
  12  *
  13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  23  * SUCH DAMAGE.
  24  */
  25
  26 /*
  27  * $Id: ipfw2_mod.c 12501 2014-01-10 01:09:14Z luigi $
  28  *
  29  * The main interface to build ipfw+dummynet as a linux module.
  30  * (and possibly as a windows module as well, though that part
  31  * is not complete yet).
  32  *
  33  * The control interface uses the sockopt mechanism
  34  * on a socket(AF_INET, SOCK_RAW, IPPROTO_RAW).
  35  *
  36  * The data interface uses the netfilter interface, at the moment
  37  * hooked to the PRE_ROUTING and POST_ROUTING hooks.
  38  * Unfortunately the netfilter interface is a moving target,
  39  * so we need a set of macros to adapt to the various cases.
  40  *
  41  * In the netfilter hook we just mark packet as 'QUEUE' and then
  42  * let the queue handler to do the whole work (filtering and
  43  * possibly emulation).
  44  * As we receive packets, we wrap them with an mbuf descriptor
  45  * so the existing ipfw+dummynet code runs unmodified.
  46  */
  47
  48 #include <sys/cdefs.h>
  49 #include <sys/mbuf.h>                   /* sizeof struct mbuf */
  50 #include <sys/param.h>                  /* NGROUPS */
  51
  52 #ifndef D
  53 #define ND(fmt, ...) do {} while (0)
  54 #define D1(fmt, ...) do {} while (0)
  55 #define D(fmt, ...) printf("%-10s " fmt "\n",      \
  56         __FUNCTION__, ## __VA_ARGS__)
  57 #endif
  58
  59 #ifdef __linux__
  60 #include <linux/module.h>
  61 #include <linux/kernel.h>
  62
  63 #ifndef CONFIG_NETFILTER
  64 #error should configure netfilter (broken on 2.6.26 and below ?)
  65 #endif
  66
  67 #include <linux/netfilter.h>
  68 #include <linux/netfilter_ipv4.h>       /* NF_IP_PRI_FILTER */
  69
  70 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25)
  71 #include <net/netfilter/nf_queue.h>     /* nf_queue */
  72 #endif
  73
  74 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14)
  75 #define __read_mostly
  76 #endif
  77
  78 #endif /* !__linux__ */
  79
  80 #include <netinet/in.h>                 /* in_addr */
  81 #include <netinet/ip_fw.h>              /* ip_fw_ctl_t, ip_fw_chk_t */
  82 #include <netinet/ipfw/ip_fw_private.h>         /* ip_fw_ctl_t, ip_fw_chk_t */
  83 #include <netinet/ip_dummynet.h>        /* ip_dn_ctl_t, ip_dn_io_t */
  84 #include <net/pfil.h>                   /* PFIL_IN, PFIL_OUT */
  85
  86 #ifdef __linux__
  87
  88 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13)
  89 /* XXX was < 2.6.0:  inet_hashtables.h is introduced in 2.6.14 */
  90 // #warning --- inet_hashtables not present on 2.4
  91 #include <linux/tcp.h>
  92 #include <net/route.h>
  93 #include <net/sock.h>
  94 static inline int inet_iif(const struct sk_buff *skb)
  95 {
  96         return ((struct rtable *)skb->dst)->rt_iif;
  97 }
  98
  99 #else
 100 #include <net/inet_hashtables.h>        /* inet_lookup */
 101 #endif
 102 #endif /* __linux__ */
 103
 104 #include <net/route.h>                  /* inet_iif */
 105
 106 /*
 107  * Here we allocate some global variables used in the firewall.
 108  */
 109 //ip_dn_ctl_t    *ip_dn_ctl_ptr;
 110 int (*ip_dn_ctl_ptr)(struct sockopt *);
 111
 112 ip_fw_ctl_t    *ip_fw_ctl_ptr;
 113
 114 int     (*ip_dn_io_ptr)(struct mbuf **m, int dir, struct ip_fw_args *fwa);
 115 ip_fw_chk_t    *ip_fw_chk_ptr;
 116
 117 void            (*bridge_dn_p)(struct mbuf *, struct ifnet *);
 118
 119 /* Divert hooks. */
 120 void (*ip_divert_ptr)(struct mbuf *m, int incoming);
 121
 122 /* ng_ipfw hooks. */
 123 ng_ipfw_input_t *ng_ipfw_input_p = NULL;
 124
 125 /*---
 126  * Glue code to implement the registration of children with the parent.
 127  * Each child should call my_mod_register() when linking, so that
 128  * module_init() and module_exit() can call init_children() and
 129  * fini_children() to provide the necessary initialization.
 130  * We use the same mechanism for MODULE_ and SYSINIT_.
 131  * The former only get a pointer to the moduledata,
 132  * the latter have two function pointers (init/uninit)
 133  */
 134 #include <sys/module.h>
 135 struct mod_args {
 136         const char *name;
 137         int order;
 138         struct moduledata *mod;
 139         void (*init)(void), (*uninit)(void);
 140 };
 141
 142 static unsigned int mod_idx;
 143 static struct mod_args mods[10];        /* hard limit to 10 modules */
 144
 145 int
 146 my_mod_register(const char *name, int order,
 147         struct moduledata *mod, void *init, void *uninit);
 148 /*
 149  * my_mod_register should be called automatically as the init
 150  * functions in the submodules. Unfortunately this compiler/linker
 151  * trick is not supported yet so we call it manually.
 152  */
 153 int
 154 my_mod_register(const char *name, int order,
 155         struct moduledata *mod, void *init, void *uninit)
 156 {
 157         struct mod_args m;
 158
 159         m.name = name;
 160         m.order = order;
 161         m.mod = mod;
 162         m.init = init;
 163         m.uninit = uninit;
 164
 165         printf("%s %s called\n", __FUNCTION__, name);
 166         if (mod_idx < sizeof(mods) / sizeof(mods[0]))
 167                 mods[mod_idx++] = m;
 168         return 0;
 169 }
 170
 171 static void
 172 init_children(void)
 173 {
 174         unsigned int i;
 175
 176         /* Call the functions registered at init time. */
 177         printf("%s mod_idx value %d\n", __FUNCTION__, mod_idx);
 178         for (i = 0; i < mod_idx; i++) {
 179                 struct mod_args *m = &mods[i];
 180                 printf("+++ start module %d %s %s at %p order 0x%x\n",
 181                         i, m->name, m->mod ? m->mod->name : "SYSINIT",
 182                         m->mod, m->order);
 183                 if (m->mod && m->mod->evhand)
 184                         m->mod->evhand(NULL, MOD_LOAD, m->mod->priv);
 185                 else if (m->init)
 186                         m->init();
 187         }
 188 }
 189
 190 static void
 191 fini_children(void)
 192 {
 193         int i;
 194
 195         /* Call the functions registered at init time. */
 196         for (i = mod_idx - 1; i >= 0; i--) {
 197                 struct mod_args *m = &mods[i];
 198                 printf("+++ end module %d %s %s at %p order 0x%x\n",
 199                         i, m->name, m->mod ? m->mod->name : "SYSINIT",
 200                         m->mod, m->order);
 201                 if (m->mod && m->mod->evhand)
 202                         m->mod->evhand(NULL, MOD_UNLOAD, m->mod->priv);
 203                 else if (m->uninit)
 204                         m->uninit();
 205         }
 206 }
 207 /*--- end of module binding helper functions ---*/
 208
 209 /*---
 210  * Control hooks:
 211  * ipfw_ctl_h() is a wrapper for linux to FreeBSD sockopt call convention.
 212  * then call the ipfw handler in order to manage requests.
 213  * In turn this is called by the linux set/get handlers.
 214  */
 215 static int
 216 ipfw_ctl_h(struct sockopt *s, int cmd, int dir, int len, void __user *user)
 217 {
 218         struct thread t;
 219         int ret = EINVAL;
 220
 221         memset(s, 0, sizeof(*s));
 222         s->sopt_name = cmd;
 223         s->sopt_dir = dir;
 224         s->sopt_valsize = len;
 225         s->sopt_val = user;
 226
 227         /* sopt_td is not used but it is referenced */
 228         memset(&t, 0, sizeof(t));
 229         s->sopt_td = &t;
 230
 231         //printf("%s called with cmd %d len %d sopt %p user %p\n", __FUNCTION__, cmd, len, s, user);
 232
 233         if (ip_fw_ctl_ptr && cmd != IP_DUMMYNET3 && (cmd == IP_FW3 ||
 234             cmd < IP_DUMMYNET_CONFIGURE))
 235                 ret = ip_fw_ctl_ptr(s);
 236         else if (ip_dn_ctl_ptr && (cmd == IP_DUMMYNET3 ||
 237             cmd >= IP_DUMMYNET_CONFIGURE))
 238                 ret = ip_dn_ctl_ptr(s);
 239
 240         return -ret;    /* errors are < 0 on linux */
 241 }
 242
 243 #ifdef linux
 244 /*
 245  * Convert an mbuf into an skbuff
 246  * At the moment this only works for ip packets fully contained
 247  * in a single mbuf. We assume that on entry ip_len and ip_off are
 248  * in host format, and the ip checksum is not computed.
 249  */
 250 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) /* check boundary */
 251 int dst_output(struct skbuff *s)
 252 {
 253         return 0;
 254 }
 255
 256 struct sk_buff *
 257 mbuf2skbuff(struct mbuf* m)
 258 {
 259         return NULL;
 260 }
 261 #else
 262 struct sk_buff *
 263 mbuf2skbuff(struct mbuf* m)
 264 {
 265         struct sk_buff *skb;
 266         size_t len = m->m_pkthdr.len;
 267
 268         /* used to lookup the routing table */
 269         struct rtable *r;
 270         struct flowi fl;
 271         int ret = 0;    /* success for ip_route_output_key() */
 272
 273         struct ip *ip = mtod(m, struct ip *);
 274
 275         /* XXX ip_output has ip_len and ip_off in network format,
 276          * linux expects host format */
 277         ip->ip_len = ntohs(ip->ip_len);
 278         ip->ip_off = ntohs(ip->ip_off);
 279
 280         ip->ip_sum = 0;
 281         ip->ip_sum = in_cksum(m, ip->ip_hl<<2);
 282
 283         /* fill flowi struct, we need just the dst addr, see XXX */
 284         bzero(&fl, sizeof(fl));
 285         flow_daddr.daddr = ip->ip_dst.s_addr;
 286
 287         /*
 288          * ip_route_output_key() should increment
 289          * r->u.dst.__use and call a dst_hold(dst)
 290          * XXX verify how we release the resources.
 291          */
 292 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,38) /* check boundary */
 293         r = ip_route_output_key(&init_net, &fl.u.ip4);
 294 #elif LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) /* check boundary */
 295         ret = ip_route_output_key(&init_net, &r, &fl);
 296 #else
 297         ret = ip_route_output_key(&r, &fl);
 298 #endif
 299         if (ret != 0 || r == NULL ) {
 300                 printf("NO ROUTE FOUND\n");
 301                 return NULL;
 302         }
 303
 304         /* allocate the skbuff and the data */
 305         skb = alloc_skb(len + sizeof(struct ethhdr), GFP_ATOMIC);
 306         if (skb == NULL) {
 307                 printf("%s: can not allocate SKB buffers.\n", __FUNCTION__);
 308                 return NULL;
 309         }
 310
 311         skb->protocol = htons(ETH_P_IP); // XXX 8 or 16 bit ?
 312         /* sk_dst_set XXX take the lock (?) */
 313 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36)
 314         skb_dst_set(skb, &r->u.dst);
 315 #else
 316         skb_dst_set(skb, &r->dst);
 317 #endif
 318         skb->dev = skb_dst(skb)->dev;
 319
 320         /* reserve space for ethernet header */
 321         skb_reserve(skb, sizeof(struct ethhdr));
 322
 323 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
 324         skb_reset_network_header(skb); // skb->network_header = skb->data - skb->head
 325 #else
 326         skb->nh.raw = skb->data;
 327 #endif
 328         /* set skbuff tail pointers and copy content */
 329         skb_put(skb, len);
 330         memcpy(skb->data, m->m_data, len);
 331
 332         return skb;
 333 }
 334 #endif /* linux 2.6+ */
 335 #endif /* linux */
 336
 337
 338 /*
 339  * This function is called to reinject packets to the
 340  * kernel stack within the linux netfilter system
 341  * or to send a new created mbuf.
 342  * In the first case we have a valid sk_buff pointer
 343  * encapsulated within the fake mbuf, so we can call
 344  * the reinject function trough netisr_dispatch.
 345  * In the last case we need to build a sk_buff from scratch,
 346  * before sending out the packet.
 347  */
 348 int
 349 ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
 350     struct ip_moptions *imo, struct inpcb *inp)
 351 {
 352         (void)opt; (void)ro; (void)flags; (void)imo; (void)inp; /* UNUSED */
 353         if ( m->m_skb != NULL ) { /* reinjected packet, just call dispatch */
 354                 ND("sending... ");
 355                 netisr_dispatch(0, m);
 356         } else {
 357                 /* self-generated packet, wrap as appropriate and send */
 358 #ifdef __linux__
 359                 struct sk_buff *skb = mbuf2skbuff(m);
 360
 361                 if (skb != NULL)
 362                         dst_output(skb);
 363 #else /* Windows */
 364                 D("unimplemented.");
 365 #endif
 366                 FREE_PKT(m);
 367         }
 368         return 0;
 369 }
 370
 371 /*
 372  * setsockopt hook has no return value other than the error code.
 373  */
 374 int
 375 do_ipfw_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 376 {
 377         struct sockopt s;       /* pass arguments */
 378         (void)sk;               /* UNUSED */
 379         return ipfw_ctl_h(&s, cmd, SOPT_SET, len, user);
 380 }
 381
 382 /*
 383  * getsockopt can can return a block of data in response.
 384  */
 385 int
 386 do_ipfw_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 387 {
 388         struct sockopt s;       /* pass arguments */
 389         int ret = ipfw_ctl_h(&s, cmd, SOPT_GET, *len, user);
 390
 391         (void)sk;               /* UNUSED */
 392         *len = s.sopt_valsize;  /* return length back to the caller */
 393         return ret;
 394 }
 395
 396 #ifdef __linux__
 397
 398 /*
 399  * declare our [get|set]sockopt hooks
 400  */
 401 static struct nf_sockopt_ops ipfw_sockopts = {
 402         .pf             = PF_INET,
 403         .set_optmin     = _IPFW_SOCKOPT_BASE,
 404         .set_optmax     = _IPFW_SOCKOPT_END,
 405         .set            = do_ipfw_set_ctl,
 406         .get_optmin     = _IPFW_SOCKOPT_BASE,
 407         .get_optmax     = _IPFW_SOCKOPT_END,
 408         .get            = do_ipfw_get_ctl,
 409 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
 410         .owner          = THIS_MODULE,
 411 #endif
 412 };
 413
 414 /*----
 415  * We need a number of macros to adapt to the various APIs in
 416  * different linux versions. Among them:
 417  *
 418  * - the hook names change between macros (NF_IP*) and enum NF_INET_*
 419  *
 420  * - the second argument to the netfilter hook is
 421  *      struct sk_buff **       in kernels <= 2.6.22
 422  *      struct sk_buff *        in kernels > 2.6.22
 423  *
 424  * - NF_STOP is not defined before 2.6 so we remap it to NF_ACCEPT
 425  *
 426  * - the packet descriptor passed to the queue handler is
 427  *      struct nf_info          in kernels <= 2.6.24
 428  *      struct nf_queue_entry   in kernels <= 2.6.24
 429  *
 430  * - the arguments to the queue handler also change;
 431  */
 432
 433 /*
 434  * declare hook to grab packets from the netfilter interface.
 435  * The NF_* names change in different versions of linux, in some
 436  * cases they are #defines, in others they are enum, so we
 437  * need to adapt.
 438  */
 439 #ifndef NF_IP_PRE_ROUTING
 440 #define NF_IP_PRE_ROUTING       NF_INET_PRE_ROUTING
 441 #endif
 442 #ifndef NF_IP_POST_ROUTING
 443 #define NF_IP_POST_ROUTING      NF_INET_POST_ROUTING
 444 #endif
 445
 446 /*
 447  * ipfw hooks into the POST_ROUTING and the PRE_ROUTING chains.
 448  * PlanetLab sets skb_tag to the slice id in the LOCAL_INPUT and
 449  * POST_ROUTING chains, so if we want to use that information we
 450  * need to hook the LOCAL_INPUT chain instead of the PRE_ROUTING.
 451  * However at the moment the skb_tag info is not reliable so
 452  * we stay with the standard hooks.
 453  */
 454 #if 0 // defined(IPFW_PLANETLAB)
 455 #define IPFW_HOOK_IN NF_IP_LOCAL_IN
 456 #else
 457 #define IPFW_HOOK_IN NF_IP_PRE_ROUTING
 458 #endif
 459
 460 /*
 461  * The main netfilter hook.
 462  * To make life simple, we queue everything and then do all the
 463  * decision in the queue handler.
 464  *
 465  * XXX note that in 2.4 and up to 2.6.22 the skbuf is passed as sk_buff**
 466  * so we have an #ifdef to set the proper argument type.
 467  */
 468 static unsigned int
 469 call_ipfw(
 470 #if LINUX_VERSION_CODE < KERNEL_VERSION(3,13,0)
 471         unsigned int hooknum,
 472 #else
 473         const struct nf_hook_ops *hooknum,
 474 #endif
 475
 476 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) // in 2.6.22 we have **
 477         struct sk_buff  **skb,
 478 #else
 479         struct sk_buff  *skb,
 480 #endif
 481         const struct net_device *in, const struct net_device *out,
 482         int (*okfn)(struct sk_buff *))
 483 {
 484         (void)hooknum; (void)skb; (void)in; (void)out; (void)okfn; /* UNUSED */
 485         return NF_QUEUE;
 486 }
 487
 488 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,12) /* XXX was 2.6.0 */
 489 #define NF_STOP         NF_ACCEPT
 490 #endif
 491
 492 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
 493
 494 /*
 495  * nf_queue_entry is a recent addition, in previous versions
 496  * of the code the struct is called nf_info.
 497  */
 498 #define nf_queue_entry  nf_info /* for simplicity */
 499
 500 /* also, 2.4 and perhaps something else have different arguments */
 501 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) /* XXX unsure */
 502 /* on 2.4 we use nf_info */
 503 #define QH_ARGS         struct sk_buff *skb, struct nf_info *info, void *data
 504 #else   /* 2.6.14. 2.6.24 */
 505 #define QH_ARGS         struct sk_buff *skb, struct nf_info *info, unsigned int qnum, void *data
 506 #endif
 507
 508 #define DEFINE_SKB      /* nothing, already an argument */
 509 #define REINJECT(_inf, _verd)   nf_reinject(skb, _inf, _verd)
 510
 511 #else   /* 2.6.25 and above */
 512
 513 #define QH_ARGS         struct nf_queue_entry *info, unsigned int queuenum
 514 #define DEFINE_SKB      struct sk_buff *skb = info->skb;
 515 #define REINJECT(_inf, _verd)   nf_reinject(_inf, _verd)
 516 #endif
 517
 518 /*
 519  * used by dummynet when dropping packets
 520  * XXX use dummynet_send()
 521  */
 522 void
 523 reinject_drop(struct mbuf* m)
 524 {
 525 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) /* unsure on the exact boundary */
 526         struct sk_buff *skb = (struct sk_buff *)m;
 527 #endif
 528         REINJECT(m->queue_entry, NF_DROP);
 529 }
 530
 531 /*
 532  * The real call to the firewall. nf_queue_entry points to the skbuf,
 533  * and eventually we need to return both through nf_reinject().
 534  */
 535 static int
 536 ipfw2_queue_handler(QH_ARGS)
 537 {
 538         DEFINE_SKB      /* no semicolon here, goes in the macro */
 539         int ret = 0;    /* return value */
 540         struct mbuf *m;
 541
 542 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
 543         if (skb->nh.iph == NULL) {
 544                 printf("null dp, len %d reinject now\n", skb->len);
 545                 REINJECT(info, NF_ACCEPT);
 546                 return 0;
 547         }
 548 #endif
 549         m = malloc(sizeof(*m), 0, 0);
 550         if (m == NULL) {
 551                 printf("malloc fail, len %d reinject now\n", skb->len);
 552                 REINJECT(info, NF_ACCEPT);
 553                 return 0;
 554         }
 555
 556         m->m_skb = skb;
 557         m->m_len = skb->len;            /* len from ip header to end */
 558         m->m_pkthdr.len = skb->len;     /* total packet len */
 559         m->m_pkthdr.rcvif = info->indev;
 560         m->queue_entry = info;
 561 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) /* XXX was 2.6.0 */
 562         m->m_data = (char *)skb->nh.iph;
 563 #else
 564         m->m_data = (char *)skb_network_header(skb);    // XXX unsigned ? */
 565 #endif
 566
 567         /* XXX add the interface */
 568         if (info->hook == IPFW_HOOK_IN) {
 569                 ret = ipfw_check_hook(NULL, &m, info->indev, PFIL_IN, NULL);
 570         } else {
 571                 ret = ipfw_check_hook(NULL, &m, info->outdev, PFIL_OUT, NULL);
 572         }
 573
 574         if (m != NULL) {        /* Accept. reinject and free the mbuf */
 575                 REINJECT(info, NF_ACCEPT);
 576                 m_freem(m);
 577         } else if (ret == 0) {
 578                 /* dummynet has kept the packet, will reinject later. */
 579         } else {
 580                 /*
 581                  * Packet dropped by ipfw or dummynet. Nothing to do as
 582                  * FREE_PKT already did a reinject as NF_DROP
 583                  */
 584         }
 585         return 0;
 586 }
 587
 588 struct route;
 589 struct ip_moptions;
 590 struct inpcb;
 591
 592 /* XXX should include prototypes for netisr_dispatch and ip_output */
 593 /*
 594  * The reinjection routine after a packet comes out from dummynet.
 595  * We must update the skb timestamp so ping reports the right time.
 596  * This routine is also used (with num == -1) as FREE_PKT. XXX
 597  */
 598 void
 599 netisr_dispatch(int num, struct mbuf *m)
 600 {
 601         struct nf_queue_entry *info = m->queue_entry;
 602         struct sk_buff *skb = m->m_skb; /* always used */
 603
 604         /*
 605          * This function can be called by the FREE_PKT()
 606          * used when ipfw generate their own mbuf packets
 607          * or by the mbuf2skbuff() function.
 608          */
 609         m_freem(m);
 610
 611         /* XXX check
 612          * info is null in the case of a real mbuf
 613          * (one created by the ipfw code without a
 614          * valid sk_buff pointer
 615          */
 616         if (info == NULL)
 617                 return;
 618
 619 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)        // XXX above 2.6.x ?
 620         __net_timestamp(skb);   /* update timestamp */
 621 #endif
 622
 623         /* XXX to obey one-pass, possibly call the queue handler here */
 624         REINJECT(info, ((num == -1)?NF_DROP:NF_STOP));  /* accept but no more firewall */
 625 }
 626
 627 /*
 628  * socket lookup function for linux.
 629  * This code is used to associate uid, gid, jail/xid to packets,
 630  * and store the info in a cache *ugp where they can be accessed quickly.
 631  * The function returns 1 if the info is found, -1 otherwise.
 632  *
 633  * We do this only on selected protocols: TCP, ...
 634  *
 635  * The chain is the following
 636  *   sk_buff*  sock*  socket*    file*
 637  *      skb  ->  sk ->sk_socket->file ->f_owner    ->pid
 638  *      skb  ->  sk ->sk_socket->file ->f_uid (direct)
 639  *      skb  ->  sk ->sk_socket->file ->f_cred->fsuid (2.6.29+)
 640  *
 641  * Related headers:
 642  * linux/skbuff.h       struct skbuff
 643  * net/sock.h           struct sock
 644  * linux/net.h          struct socket
 645  * linux/fs.h           struct file
 646  *
 647  * With vserver we may have sk->sk_xid and sk->sk_nid that
 648  * which we store in fw_groups[1] (matches O_JAIL) and fw_groups[2]
 649  * (no matches yet)
 650  *
 651  * Note- for locally generated, outgoing packets we should not need
 652  * need a lookup because the sk_buff already points to the socket where
 653  * the info is.
 654  */
 655 extern struct inet_hashinfo tcp_hashinfo;
 656 int
 657 linux_lookup(const int proto, const __be32 saddr, const __be16 sport,
 658                 const __be32 daddr, const __be16 dport,
 659                 struct sk_buff *skb, int dir, struct bsd_ucred *u)
 660 {
 661 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,13)        /* XXX was 2.6.0 */
 662         return -1;
 663 #else
 664         struct sock *sk;
 665         int ret = -1;   /* default return value */
 666         int st = -1;    /* state */
 667
 668
 669         if (proto != IPPROTO_TCP)       /* XXX extend for UDP */
 670                 return -1;
 671
 672         if ((dir ? (void *)skb_dst(skb) : (void *)skb->dev) == NULL) {
 673                 panic(" -- this should not happen\n");
 674                 return -1;
 675         }
 676
 677         if (skb->sk) {
 678                 sk = skb->sk;
 679         } else {
 680                 /*
 681                  * Try a lookup. On a match, sk has a refcount that we must
 682                  * release on exit (we know it because skb->sk = NULL).
 683                  *
 684                  * inet_lookup above 2.6.24 has an additional 'net' parameter
 685                  * so we use a macro to conditionally supply it.
 686                  * swap dst and src depending on the direction.
 687                  */
 688 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,24)
 689 #define _OPT_NET_ARG
 690 #else
 691 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)
 692 /* there is no dev_net() on 2.6.25 */
 693 #define _OPT_NET_ARG (skb->dev->nd_net),
 694 #else   /* 2.6.26 and above */
 695 #define _OPT_NET_ARG dev_net(skb->dev),
 696 #endif
 697 #endif
 698                 sk =  (dir) ? /* dir != 0 on output */
 699                     inet_lookup(_OPT_NET_ARG &tcp_hashinfo,
 700                         daddr, dport, saddr, sport,     // match outgoing
 701                         inet_iif(skb)) :
 702                     inet_lookup(_OPT_NET_ARG &tcp_hashinfo,
 703                         saddr, sport, daddr, dport,     // match incoming
 704                         skb->dev->ifindex);
 705 #undef _OPT_NET_ARG
 706
 707                 if (sk == NULL) /* no match, nothing to be done */
 708                         return -1;
 709         }
 710         ret = 1;        /* retrying won't make things better */
 711         st = sk->sk_state;
 712 #ifdef CONFIG_VSERVER
 713         u->xid = sk->sk_xid;
 714         u->nid = sk->sk_nid;
 715 #else
 716         u->xid = u->nid = 0;
 717 #endif
 718         /*
 719          * Exclude tcp states where sk points to a inet_timewait_sock which
 720          * has no sk_socket field (surely TCP_TIME_WAIT, perhaps more).
 721          * To be safe, use a whitelist and not a blacklist.
 722          * Before dereferencing sk_socket grab a lock on sk_callback_lock.
 723          *
 724          * Once again we need conditional code because the UID and GID
 725          * location changes between kernels.
 726          */
 727 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,28)
 728 /* use the current's real uid/gid */
 729 #define _CURR_UID f_uid
 730 #define _CURR_GID f_gid
 731 #else /* 2.6.29 and above */
 732 /* use the current's file access real uid/gid */
 733 #define _CURR_UID f_cred->fsuid
 734 #define _CURR_GID f_cred->fsgid
 735 #endif
 736
 737 #define GOOD_STATES (   \
 738         (1<<TCP_LISTEN) | (1<<TCP_SYN_RECV)   | (1<<TCP_SYN_SENT)   | \
 739         (1<<TCP_ESTABLISHED)  | (1<<TCP_FIN_WAIT1) | (1<<TCP_FIN_WAIT2) )
 740         // surely exclude TCP_CLOSE, TCP_TIME_WAIT, TCP_LAST_ACK
 741         // uncertain TCP_CLOSE_WAIT and TCP_CLOSING
 742
 743         if ((1<<st) & GOOD_STATES) {
 744                 read_lock_bh(&sk->sk_callback_lock);
 745                 if (sk->sk_socket && sk->sk_socket->file) {
 746                         //u->uid = sk->sk_socket->file->_CURR_UID;
 747                         //u->gid = sk->sk_socket->file->_CURR_GID;
 748                 }
 749                 read_unlock_bh(&sk->sk_callback_lock);
 750         } else {
 751                 u->uid = u->gid = 0;
 752         }
 753         if (!skb->sk) /* return the reference that came from the lookup */
 754                 sock_put(sk);
 755 #undef GOOD_STATES
 756 #undef _CURR_UID
 757 #undef _CURR_GID
 758         return ret;
 759
 760 #endif /* LINUX > 2.4 */
 761 }
 762
 763 /*
 764  * Now prepare to hook the various functions.
 765  * Linux 2.4 has a different API so we need some adaptation
 766  * for register and unregister hooks
 767  *
 768  * the unregister function changed arguments between 2.6.22 and 2.6.24
 769  */
 770 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14)
 771 struct nf_queue_handler ipfw2_queue_handler_desc = {
 772         .outfn = ipfw2_queue_handler,
 773 #if LINUX_VERSION_CODE < KERNEL_VERSION(3,8,2)
 774         .name = "ipfw2 dummynet queue",
 775 #endif
 776 };
 777 #if LINUX_VERSION_CODE < KERNEL_VERSION(3,8,2)
 778 #define REG_QH_ARG(pf, fn)      pf, &(fn ## _desc)
 779 #else
 780 #define REG_QH_ARG(pf, fn)      &(fn ## _desc)
 781 #endif
 782 #endif
 783
 784 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) /* XXX was 2.6.0 */
 785 static int
 786 nf_register_hooks(struct nf_hook_ops *ops, int n)
 787 {
 788         int i, ret = 0;
 789         for (i = 0; i < n; i++) {
 790                 ret = nf_register_hook(ops + i);
 791                 if (ret < 0)
 792                         break;
 793         }
 794         return ret;
 795 }
 796
 797 static void
 798 nf_unregister_hooks(struct nf_hook_ops *ops, int n)
 799 {
 800         int i;
 801         for (i = 0; i < n; i++) {
 802                 nf_unregister_hook(ops + i);
 803         }
 804 }
 805 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) /* XXX was 2.6.0 */
 806 #define REG_QH_ARG(pf, fn)      pf, fn, NULL
 807 #endif
 808 #define UNREG_QH_ARG(pf, fn) //fn       /* argument for nf_[un]register_queue_handler */
 809 #define SET_MOD_OWNER
 810
 811 #else /* linux > 2.6.17 */
 812
 813 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
 814 #define UNREG_QH_ARG(pf, fn) //fn
 815 #elif LINUX_VERSION_CODE < KERNEL_VERSION(3,8,2)
 816 #define UNREG_QH_ARG(pf, fn)    pf, &(fn ## _desc)
 817 #else
 818 #define UNREG_QH_ARG(pf, fn)
 819 #endif /* 2.6.0 < LINUX > 2.6.24 */
 820
 821 #define SET_MOD_OWNER   .owner = THIS_MODULE,
 822
 823 #endif  /* !LINUX < 2.6.0 */
 824
 825 static struct nf_hook_ops ipfw_ops[] __read_mostly = {
 826         {
 827                 .hook           = call_ipfw,
 828                 .pf             = PF_INET,
 829                 .hooknum        = IPFW_HOOK_IN,
 830                 .priority       = NF_IP_PRI_FILTER,
 831                 SET_MOD_OWNER
 832         },
 833         {
 834                 .hook           = call_ipfw,
 835                 .pf             = PF_INET,
 836                 .hooknum        = NF_IP_POST_ROUTING,
 837                 .priority       = NF_IP_PRI_FILTER,
 838                 SET_MOD_OWNER
 839         },
 840 };
 841 #endif /* __linux__ */
 842
 843 /* descriptors for the children, until i find a way for the
 844  * linker to produce them
 845  */
 846 extern moduledata_t *moddesc_ipfw;
 847 extern moduledata_t *moddesc_dummynet;
 848 extern moduledata_t *moddesc_dn_fifo;
 849 extern moduledata_t *moddesc_dn_wf2qp;
 850 extern moduledata_t *moddesc_dn_rr;
 851 extern moduledata_t *moddesc_dn_qfq;
 852 extern moduledata_t *moddesc_dn_prio;
 853 extern void *sysinit_ipfw_init;
 854 extern void *sysuninit_ipfw_destroy;
 855 extern void *sysinit_vnet_ipfw_init;
 856 extern void *sysuninit_vnet_ipfw_uninit;
 857
 858 /*
 859  * Module glue - init and exit function.
 860  */
 861 int __init
 862 ipfw_module_init(void)
 863 {
 864         int ret = 0;
 865 #ifdef _WIN32
 866         unsigned long resolution;
 867 #endif
 868
 869         rn_init(64);
 870         my_mod_register("ipfw",  1, moddesc_ipfw, NULL, NULL);
 871         my_mod_register("sy_ipfw",  2, NULL,
 872                 sysinit_ipfw_init, sysuninit_ipfw_destroy);
 873         my_mod_register("sy_Vnet_ipfw",  3, NULL,
 874                 sysinit_vnet_ipfw_init, sysuninit_vnet_ipfw_uninit);
 875         my_mod_register("dummynet",  4, moddesc_dummynet, NULL, NULL);
 876         my_mod_register("dn_fifo",  5, moddesc_dn_fifo, NULL, NULL);
 877         my_mod_register("dn_wf2qp",  6, moddesc_dn_wf2qp, NULL, NULL);
 878         my_mod_register("dn_rr",  7, moddesc_dn_rr, NULL, NULL);
 879         my_mod_register("dn_qfq",  8, moddesc_dn_qfq, NULL, NULL);
 880         my_mod_register("dn_prio",  9, moddesc_dn_prio, NULL, NULL);
 881         init_children();
 882
 883 #ifdef _WIN32
 884         resolution = ExSetTimerResolution(1, TRUE);
 885         printf("*** ExSetTimerResolution: resolution set to %d n-sec ***\n",resolution);
 886 #endif
 887 #ifdef EMULATE_SYSCTL
 888         keinit_GST();
 889 #endif
 890
 891 #ifdef __linux__
 892         /* sockopt register, in order to talk with user space */
 893         ret = nf_register_sockopt(&ipfw_sockopts);
 894         if (ret < 0) {
 895                 printf("error %d in nf_register_sockopt\n", ret);
 896                 goto clean_modules;
 897         }
 898
 899         /* queue handler registration, in order to get network
 900          * packet under a private queue */
 901 #if LINUX_VERSION_CODE < KERNEL_VERSION(3,8,2)
 902         ret =
 903 #endif
 904             nf_register_queue_handler(REG_QH_ARG(PF_INET, ipfw2_queue_handler) );
 905         if (ret < 0)    /* queue busy */
 906                 goto unregister_sockopt;
 907
 908         ret = nf_register_hooks(ipfw_ops, ARRAY_SIZE(ipfw_ops));
 909         if (ret < 0)
 910                 goto unregister_sockopt;
 911
 912         printf("%s loaded\n", __FUNCTION__);
 913         return 0;
 914
 915
 916 /* handle errors on load */
 917 unregister_sockopt:
 918         nf_unregister_queue_handler(UNREG_QH_ARG(PF_INET, ipfw2_queue_handler) );
 919         nf_unregister_sockopt(&ipfw_sockopts);
 920
 921 clean_modules:
 922         fini_children();
 923         printf("%s error\n", __FUNCTION__);
 924
 925 #endif  /* __linux__ */
 926         return ret;
 927 }
 928
 929 /* module shutdown */
 930 void __exit
 931 ipfw_module_exit(void)
 932 {
 933 #ifdef EMULATE_SYSCTL
 934         keexit_GST();
 935 #endif
 936 #ifdef _WIN32
 937         ExSetTimerResolution(0,FALSE);
 938
 939 #else  /* linux hook */
 940         nf_unregister_hooks(ipfw_ops, ARRAY_SIZE(ipfw_ops));
 941         /* maybe drain the queue before unregistering ? */
 942         nf_unregister_queue_handler(UNREG_QH_ARG(PF_INET, ipfw2_queue_handler) );
 943         nf_unregister_sockopt(&ipfw_sockopts);
 944 #endif  /* __linux__ */
 945
 946         fini_children();
 947
 948         printf("%s unloaded\n", __FUNCTION__);
 949 }
 950
 951 #ifdef __linux__
 952 module_init(ipfw_module_init)
 953 module_exit(ipfw_module_exit)
 954 MODULE_LICENSE("Dual BSD/GPL"); /* the code here is all BSD. */
 955 #endif