/* * Copyright (C) 2009 Luigi Rizzo, Marta Carbone, Universita` di Pisa * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * $Id$ * * The main interface to build ipfw+dummynet as a linux module. * (and possibly as a windows module as well, though that part * is not complete yet). * * The control interface uses the sockopt mechanism * on a socket(AF_INET, SOCK_RAW, IPPROTO_RAW). * * The data interface uses the netfilter interface, at the moment * hooked to the PRE_ROUTING and POST_ROUTING hooks. * Unfortunately the netfilter interface is a moving target, * so we need a set of macros to adapt to the various cases. * * In the netfilter hook we just mark packet as 'QUEUE' and then * let the queue handler to do the whole work (filtering and * possibly emulation). * As we receive packets, we wrap them with an mbuf descriptor * so the existing ipfw+dummynet code runs unmodified. */ #include #include /* sizeof struct mbuf */ #include /* NGROUPS */ #ifdef __linux__ #include #include #include #include /* NF_IP_PRI_FILTER */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25) #include /* nf_queue */ #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) #define __read_mostly #endif #endif /* !__linux__ */ #include /* in_addr */ #include /* ip_fw_ctl_t, ip_fw_chk_t */ #include /* ip_dn_ctl_t, ip_dn_io_t */ #include /* PFIL_IN, PFIL_OUT */ #include /* inet_lookup */ #include /* inet_iif */ /* * Here we allocate some global variables used in the firewall. */ ip_dn_ctl_t *ip_dn_ctl_ptr; ip_fw_ctl_t *ip_fw_ctl_ptr; ip_dn_io_t *ip_dn_io_ptr; ip_fw_chk_t *ip_fw_chk_ptr; void (*bridge_dn_p)(struct mbuf *, struct ifnet *); /*--- * Glue code to implement the registration of children with the parent. * Each child should call my_mod_register() when linking, so that * module_init() and module_exit() can call init_children() and * fini_children() to provide the necessary initialization. */ #include struct mod_args { struct moduledata *mod; const char *name; int order; }; static unsigned int mod_idx; static struct mod_args mods[10]; /* hard limit to 10 modules */ /* * Data structure to cache our ucred related * information. This structure only gets used if * the user specified UID/GID based constraints in * a firewall rule. */ struct ip_fw_ugid { gid_t fw_groups[NGROUPS]; int fw_ngroups; uid_t fw_uid; int fw_prid; }; /* * my_mod_register should be called automatically as the init * functions in the submodules. Unfortunately this compiler/linker * trick is not supported yet so we call it manually. */ int my_mod_register(struct moduledata *mod, const char *name, int order) { struct mod_args m = { mod, name, order }; printf("%s %s called\n", __FUNCTION__, name); if (mod_idx < sizeof(mods) / sizeof(mods[0])) mods[mod_idx++] = m; return 0; } static void init_children(void) { unsigned int i; /* Call the functions registered at init time. */ printf("%s mod_idx value %d\n", __FUNCTION__, mod_idx); for (i = 0; i < mod_idx; i++) { printf("+++ start module %d %s %s at %p order 0x%x\n", i, mods[i].name, mods[i].mod->name, mods[i].mod, mods[i].order); mods[i].mod->evhand(NULL, MOD_LOAD, mods[i].mod->priv); } } static void fini_children(void) { int i; /* Call the functions registered at init time. */ for (i = mod_idx - 1; i >= 0; i--) { printf("+++ end module %d %s %s at %p order 0x%x\n", i, mods[i].name, mods[i].mod->name, mods[i].mod, mods[i].order); mods[i].mod->evhand(NULL, MOD_UNLOAD, mods[i].mod->priv); } } /*--- end of module bindinghelper functions ---*/ /*--- * Control hooks: * ipfw_ctl_h() is a wrapper for linux to FreeBSD sockopt call convention. * then call the ipfw handler in order to manage requests. * In turn this is called by the linux set/get handlers. */ static int ipfw_ctl_h(struct sockopt *s, int cmd, int dir, int len, void __user *user) { struct thread t; int ret = EINVAL; memset(s, 0, sizeof(s)); s->sopt_name = cmd; s->sopt_dir = dir; s->sopt_valsize = len; s->sopt_val = user; /* sopt_td is not used but it is referenced */ memset(&t, 0, sizeof(t)); s->sopt_td = &t; // printf("%s called with cmd %d len %d\n", __FUNCTION__, cmd, len); if (cmd < IP_DUMMYNET_CONFIGURE && ip_fw_ctl_ptr) ret = ip_fw_ctl_ptr(s); else if (cmd >= IP_DUMMYNET_CONFIGURE && ip_dn_ctl_ptr) ret = ip_dn_ctl_ptr(s); return -ret; /* errors are < 0 on linux */ } #ifdef _WIN32 void netisr_dispatch(int __unused num, struct mbuf *m) { } int ip_output(struct mbuf *m, struct mbuf __unused *opt, struct route __unused *ro, int __unused flags, struct ip_moptions __unused *imo, struct inpcb __unused *inp) { netisr_dispatch(0, m); return 0; } #else /* this is the linux glue */ /* * setsockopt hook has no return value other than the error code. */ static int do_ipfw_set_ctl(struct sock __unused *sk, int cmd, void __user *user, unsigned int len) { struct sockopt s; /* pass arguments */ return ipfw_ctl_h(&s, cmd, SOPT_SET, len, user); } /* * getsockopt can can return a block of data in response. */ static int do_ipfw_get_ctl(struct sock __unused *sk, int cmd, void __user *user, int *len) { struct sockopt s; /* pass arguments */ int ret = ipfw_ctl_h(&s, cmd, SOPT_GET, *len, user); *len = s.sopt_valsize; /* return lenght back to the caller */ return ret; } /* * declare our [get|set]sockopt hooks */ static struct nf_sockopt_ops ipfw_sockopts = { .pf = PF_INET, .set_optmin = _IPFW_SOCKOPT_BASE, .set_optmax = _IPFW_SOCKOPT_END, .set = do_ipfw_set_ctl, .get_optmin = _IPFW_SOCKOPT_BASE, .get_optmax = _IPFW_SOCKOPT_END, .get = do_ipfw_get_ctl, #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24) .owner = THIS_MODULE, #endif }; /*---- * We need a number of macros to adapt to the various APIs in * different linux versions. Among them: * * - the hook names change between macros (NF_IP*) and enum NF_INET_* * * - the second argument to the netfilter hook is * struct sk_buff ** in kernels <= 2.6.22 * struct sk_buff * in kernels > 2.6.22 * * - NF_STOP is not defined before 2.6 so we remap it to NF_ACCEPT * * - the packet descriptor passed to the queue handler is * struct nf_info in kernels <= 2.6.24 * struct nf_queue_entry in kernels <= 2.6.24 * * - the arguments to the queue handler also change; */ /* * declare hook to grab packets from the netfilter interface. * The NF_* names change in different versions of linux, in some * cases they are #defines, in others they are enum, so we * need to adapt. */ #ifndef NF_IP_PRE_ROUTING #define NF_IP_PRE_ROUTING NF_INET_PRE_ROUTING #endif #ifndef NF_IP_POST_ROUTING #define NF_IP_POST_ROUTING NF_INET_POST_ROUTING #endif /* * ipfw hooks into the POST_ROUTING and the PRE_ROUTING chains. * PlanetLab sets skb_tag to the slice id in the LOCAL_INPUT and * POST_ROUTING chains, so if we want to use that information we * need to hook the LOCAL_INPUT chain instead of the PRE_ROUTING. * However at the moment the skb_tag info is not reliable so * we stay with the standard hooks. */ #if 0 // defined(IPFW_PLANETLAB) #define IPFW_HOOK_IN NF_IP_LOCAL_IN #else #define IPFW_HOOK_IN NF_IP_PRE_ROUTING #endif /* * The main netfilter hook. * To make life simple, we queue everything and then do all the * decision in the queue handler. * * XXX note that in 2.4 and up to 2.6.22 the skbuf is passed as sk_buff** * so we have an #ifdef to set the proper argument type. */ static unsigned int call_ipfw(unsigned int __unused hooknum, #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) // in 2.6.22 we have ** struct sk_buff __unused **skb, #else struct sk_buff __unused *skb, #endif const struct net_device __unused *in, const struct net_device __unused *out, int __unused (*okfn)(struct sk_buff *)) { return NF_QUEUE; } #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) #define NF_STOP NF_ACCEPT #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) /* * nf_queue_entry is a recent addition, in previous versions * of the code the struct is called nf_info. */ #define nf_queue_entry nf_info /* for simplicity */ /* also, 2.4 and perhaps something else have different arguments */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) /* unsure on the exact boundary */ /* on 2.4 we use nf_info */ #define QH_ARGS struct sk_buff *skb, struct nf_info *info, void *data #else /* 2.6.1.. 2.6.24 */ #define QH_ARGS struct sk_buff *skb, struct nf_info *info, unsigned int qnum, void *data #endif #define DEFINE_SKB /* nothing, already an argument */ #define REINJECT(_inf, _verd) nf_reinject(skb, _inf, _verd) #else /* 2.6.25 and above */ #define QH_ARGS struct nf_queue_entry *info, unsigned int queuenum #define DEFINE_SKB struct sk_buff *skb = info->skb; #define REINJECT(_inf, _verd) nf_reinject(_inf, _verd) #endif /* * used by dummynet when dropping packets * XXX use dummynet_send() */ void reinject_drop(struct mbuf* m) { #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) /* unsure on the exact boundary */ struct sk_buff *skb = (struct sk_buff *)m; #endif REINJECT(m->queue_entry, NF_DROP); } /* * The real call to the firewall. nf_queue_entry points to the skbuf, * and eventually we need to return both through nf_reinject(). */ static int ipfw2_queue_handler(QH_ARGS) { DEFINE_SKB /* no semicolon here, goes in the macro */ int ret = 0; /* return value */ struct mbuf *m; #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) if (skb->nh.iph == NULL) { printf("null dp, len %d reinject now\n", skb->len); REINJECT(info, NF_ACCEPT); return 0; } #endif m = malloc(sizeof(*m), 0, 0); if (m == NULL) { printf("malloc fail, len %d reinject now\n", skb->len); REINJECT(info, NF_ACCEPT); return 0; } m->m_skb = skb; m->m_len = skb->len; /* len in this skbuf */ m->m_pkthdr.len = skb->len; /* total packet len */ m->m_pkthdr.rcvif = info->indev; m->queue_entry = info; #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) m->m_data = skb->nh.iph; #else m->m_data = skb_network_header(skb); #endif /* XXX add the interface */ if (info->hook == IPFW_HOOK_IN) { ret = ipfw_check_in(NULL, &m, info->indev, PFIL_IN, NULL); } else { ret = ipfw_check_out(NULL, &m, info->outdev, PFIL_OUT, NULL); } if (m != NULL) { /* Accept. reinject and free the mbuf */ REINJECT(info, NF_STOP); m_freem(m); } else if (ret == 0) { /* dummynet has kept the packet, will reinject later. */ } else { /* * Packet dropped by ipfw or dummynet, reinject as NF_DROP * mbuf already released by ipfw itself */ REINJECT(info, NF_DROP); } return 0; } struct route; struct ip_moptions; struct inpcb; /* XXX should include prototypes for netisr_dispatch and ip_output */ /* * The reinjection routine after a packet comes out from dummynet. * We must update the skb timestamp so ping reports the right time. */ void netisr_dispatch(int num, struct mbuf *m) { struct nf_queue_entry *info = m->queue_entry; struct sk_buff *skb = m->m_skb; /* always used */ m_freem(m); KASSERT((info != NULL), ("%s info null!\n", __FUNCTION__)); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) // XXX above 2.6.x ? __net_timestamp(skb); /* update timestamp */ #endif /* XXX to obey one-pass, possibly call the queue handler here */ REINJECT(info, ((num == -1)?NF_DROP:NF_STOP)); /* accept but no more firewall */ } int ip_output(struct mbuf *m, struct mbuf __unused *opt, struct route __unused *ro, int __unused flags, struct ip_moptions __unused *imo, struct inpcb __unused *inp) { netisr_dispatch(0, m); return 0; } /* * socket lookup function for linux. * This code is used to associate uid, gid, jail/xid to packets, * and store the info in a cache *ugp where they can be accessed quickly. * The function returns 1 if the info is found, -1 otherwise. * * We do this only on selected protocols: TCP, ... * * The chain is the following * sk_buff* sock* socket* file* * skb -> sk ->sk_socket->file ->f_owner ->pid * skb -> sk ->sk_socket->file ->f_uid (direct) * skb -> sk ->sk_socket->file ->f_cred->fsuid (2.6.29+) * * Related headers: * linux/skbuff.h struct skbuff * net/sock.h struct sock * linux/net.h struct socket * linux/fs.h struct file * * With vserver we may have sk->sk_xid and sk->sk_nid that * which we store in fw_groups[1] (matches O_JAIL) and fw_groups[2] * (no matches yet) * * Note- for locally generated, outgoing packets we should not need * need a lookup because the sk_buff already points to the socket where * the info is. */ extern struct inet_hashinfo tcp_hashinfo; int linux_lookup(const int proto, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, struct sk_buff *skb, int dir, struct ip_fw_ugid *ugp) { struct sock *sk; int ret = -1; /* default return value */ int uid = -1; /* user id */ int st = -1; /* state */ if (proto != IPPROTO_TCP) return -1; if ((dir ? (void *)skb->dst : (void *)skb->dev) == NULL) { panic(" -- this should not happen\n"); return -1; } /* * inet_lookup above 2.6.24 has an additional 'net' parameter * so we use a macro to conditionally supply it. * Also we need to switch dst and src depending on the direction. */ #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,24) #define _OPT_NET_ARG #else #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) /* there is no dev_net() on 2.6.25 */ #define _OPT_NET_ARG (skb->dev->nd_net), #else /* 2.6.26 and above */ #define _OPT_NET_ARG dev_net(skb->dev), #endif #endif if (0 && skb->sk) { sk=skb->sk; } else { sk = (dir) ? inet_lookup(_OPT_NET_ARG &tcp_hashinfo, daddr, dport, saddr, sport, // matches outgoing for server sockets inet_iif(skb)) : inet_lookup(_OPT_NET_ARG &tcp_hashinfo, saddr, sport, daddr, dport, // matches incoming for server sockets skb->dev->ifindex); } #undef _OPT_NET_ARG /* no match, nothing to be done */ if (sk == NULL) return -1; /* * On a match, sk is returned with a refcount. * In tcp some states reference a valid struct sock * which is what we want, otherwise the struct sock * referenced can be invalid, as in the case of the * TCP_TIME_WAIT state, when it references a * struct inet_timewait_sock which does not point to credentials. * To be safe we exclude TCP_CLOSE and TCP_LAST_ACK states too. * * Once again we need conditional code because the UID and GID * location changes between the two kernels. */ #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,28) /* use the current's real uid/gid */ #define _CURR_UID f_uid #define _CURR_GID f_gid #else /* 2.6.29 and above */ /* use the current's file access real uid/gid */ #define _CURR_UID f_cred->fsuid #define _CURR_GID f_cred->fsgid #endif st = sk->sk_state; if (st != TCP_TIME_WAIT && st != TCP_CLOSE && st != TCP_LAST_ACK && sk->sk_socket && sk->sk_socket->file) { ugp->fw_uid = sk->sk_socket->file->_CURR_UID; uid = ugp->fw_uid; ugp->fw_groups[0] = sk->sk_socket->file->_CURR_GID; #ifdef CONFIG_VSERVER ugp->fw_groups[1] = sk->sk_xid; ugp->fw_groups[2] = sk->sk_nid; #endif ret = 1; } if (1 || !skb->sk) /* the reference came from the lookup */ sock_put(sk); #undef _CURR_UID #undef _CURR_GID //printf("%s dir %d sb>dst %p sb>dev %p ret %d id %d st%d\n", __FUNCTION__, dir, skb->dst, skb->dev, ret, uid, st); return ret; } /* * Now prepare to hook the various functions. * Linux 2.4 has a different API so we need some adaptation * for register and unregister hooks * * the unregister function changed arguments between 2.6.22 and 2.6.24 */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) static int nf_register_hooks(struct nf_hook_ops *ops, int n) { int i, ret = 0; for (i = 0; i < n; i++) { ret = nf_register_hook(ops + i); if (ret < 0) break; } return ret; } static void nf_unregister_hooks(struct nf_hook_ops *ops, int n) { int i; for (i = 0; i < n; i++) { nf_unregister_hook(ops + i); } } #define REG_QH_ARG(fn) fn, NULL /* argument for nf_[un]register_queue_handler */ #define UNREG_QH_ARG(fn) //fn /* argument for nf_[un]register_queue_handler */ #define SET_MOD_OWNER #else /* linux >= 2.6.0 */ struct nf_queue_handler ipfw2_queue_handler_desc = { .outfn = ipfw2_queue_handler, .name = "ipfw2 dummynet queue", }; #define REG_QH_ARG(fn) &(fn ## _desc) #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) #define UNREG_QH_ARG(fn) //fn /* argument for nf_[un]register_queue_handler */ #else #define UNREG_QH_ARG(fn) , &(fn ## _desc) #endif /* 2.6.0 < LINUX > 2.6.24 */ #define SET_MOD_OWNER .owner = THIS_MODULE, #endif /* !LINUX < 2.6.0 */ static struct nf_hook_ops ipfw_ops[] __read_mostly = { { .hook = call_ipfw, .pf = PF_INET, .hooknum = IPFW_HOOK_IN, .priority = NF_IP_PRI_FILTER, SET_MOD_OWNER }, { .hook = call_ipfw, .pf = PF_INET, .hooknum = NF_IP_POST_ROUTING, .priority = NF_IP_PRI_FILTER, SET_MOD_OWNER }, }; #endif /* !__linux__ */ /* descriptors for the children */ extern moduledata_t *moddesc_ipfw; extern moduledata_t *moddesc_dummynet; /* * Module glue - init and exit function. */ static int __init ipfw_module_init(void) { int ret = 0; printf("%s in-hook %d svn id %s\n", __FUNCTION__, IPFW_HOOK_IN, "$Id$"); my_mod_register(moddesc_ipfw, "ipfw", 1); my_mod_register(moddesc_dummynet, "dummynet", 2); init_children(); #ifdef _WIN32 return ret; #else /* linux hook */ /* sockopt register, in order to talk with user space */ ret = nf_register_sockopt(&ipfw_sockopts); if (ret < 0) { printf("error %d in nf_register_sockopt\n", ret); goto clean_modules; } /* queue handler registration, in order to get network * packet under a private queue */ ret = nf_register_queue_handler(PF_INET, REG_QH_ARG(ipfw2_queue_handler) ); if (ret < 0) /* queue busy */ goto unregister_sockopt; ret = nf_register_hooks(ipfw_ops, ARRAY_SIZE(ipfw_ops)); if (ret < 0) goto unregister_sockopt; printf("%s loaded\n", __FUNCTION__); return 0; /* handle errors on load */ unregister_sockopt: nf_unregister_queue_handler(PF_INET UNREG_QH_ARG(ipfw2_queue_handler) ); nf_unregister_sockopt(&ipfw_sockopts); clean_modules: fini_children(); printf("%s error\n", __FUNCTION__); return ret; #endif /* linux */ } /* module shutdown */ static void __exit ipfw_module_exit(void) { #ifdef _WIN32 #else /* linux hook */ nf_unregister_hooks(ipfw_ops, ARRAY_SIZE(ipfw_ops)); /* maybe drain the queue before unregistering ? */ nf_unregister_queue_handler(PF_INET UNREG_QH_ARG(ipfw2_queue_handler) ); nf_unregister_sockopt(&ipfw_sockopts); #endif /* linux */ fini_children(); printf("%s unloaded\n", __FUNCTION__); } #ifdef __linux__ module_init(ipfw_module_init) module_exit(ipfw_module_exit) MODULE_LICENSE("Dual BSD/GPL"); /* the code here is all BSD. */ #endif