X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fnetlink%2Faf_netlink.c;h=524d3e8c7800ccd20b43b7267cc5ab43d89f9a78;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=118c172c48ca9069ae6120f158fa84cb39f39f86;hpb=6a77f38946aaee1cd85eeec6cf4229b204c15071;p=linux-2.6.git diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 118c172c4..524d3e8c7 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -13,15 +13,20 @@ * added netlink_proto_exit * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo * use nlk_sk, as sk->protinfo is on a diet 8) - * + * Fri Jul 22 19:51:12 MEST 2005 Harald Welte + * - inc module use count of module that owns + * the kernel socket in case userspace opens + * socket of same protocol + * - remove all module support, since netlink is + * mandatory if CONFIG_NET=y these days */ #include #include +#include #include #include -#include #include #include #include @@ -50,33 +55,43 @@ #include #include #include +#include +#include #include #include #include + #include #include +#include -#define Nprintk(a...) - -#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE) -#define NL_EMULATE_DEV -#endif +#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) -struct netlink_opt -{ +struct netlink_sock { + /* struct sock has to be the first member of netlink_sock */ + struct sock sk; u32 pid; - unsigned int groups; u32 dst_pid; - unsigned int dst_groups; + u32 dst_group; + u32 flags; + u32 subscriptions; + u32 ngroups; + unsigned long *groups; unsigned long state; - int (*handler)(int unit, struct sk_buff *skb); wait_queue_head_t wait; struct netlink_callback *cb; spinlock_t cb_lock; void (*data_ready)(struct sock *sk, int bytes); + struct module *module; }; -#define nlk_sk(__sk) ((struct netlink_opt *)(__sk)->sk_protinfo) +#define NETLINK_KERNEL_SOCKET 0x1 +#define NETLINK_RECV_PKTINFO 0x2 + +static inline struct netlink_sock *nlk_sk(struct sock *sk) +{ + return (struct netlink_sock *)sk; +} struct nl_pid_hash { struct hlist_head *table; @@ -94,7 +109,11 @@ struct nl_pid_hash { struct netlink_table { struct nl_pid_hash hash; struct hlist_head mc_list; + unsigned long *listeners; unsigned int nl_nonroot; + unsigned int groups; + struct module *module; + int registered; }; static struct netlink_table *nl_table; @@ -107,7 +126,12 @@ static void netlink_destroy_callback(struct netlink_callback *cb); static DEFINE_RWLOCK(nl_table_lock); static atomic_t nl_table_users = ATOMIC_INIT(0); -static struct notifier_block *netlink_chain; +static ATOMIC_NOTIFIER_HEAD(netlink_chain); + +static u32 netlink_group_mask(u32 group) +{ + return group ? 1 << (group - 1) : 0; +} static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid) { @@ -125,8 +149,7 @@ static void netlink_sock_destruct(struct sock *sk) BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); BUG_TRAP(!nlk_sk(sk)->cb); - - kfree(nlk_sk(sk)); + BUG_TRAP(!nlk_sk(sk)->groups); } /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on SMP. @@ -275,7 +298,25 @@ static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len) return 0; } -static struct proto_ops netlink_ops; +static const struct proto_ops netlink_ops; + +static void +netlink_update_listeners(struct sock *sk) +{ + struct netlink_table *tbl = &nl_table[sk->sk_protocol]; + struct hlist_node *node; + unsigned long mask; + unsigned int i; + + for (i = 0; i < NLGRPSZ(tbl->groups)/sizeof(unsigned long); i++) { + mask = 0; + sk_for_each_bound(sk, node, &tbl->mc_list) + mask |= nlk_sk(sk)->groups[i]; + tbl->listeners[i] = mask; + } + /* this function is only called with the netlink table "grabbed", which + * makes sure updates are visible before bind or setsockopt return. */ +} static int netlink_insert(struct sock *sk, u32 pid) { @@ -320,54 +361,87 @@ err: static void netlink_remove(struct sock *sk) { netlink_table_grab(); - nl_table[sk->sk_protocol].hash.entries--; - sk_del_node_init(sk); - if (nlk_sk(sk)->groups) + if (sk_del_node_init(sk)) + nl_table[sk->sk_protocol].hash.entries--; + if (nlk_sk(sk)->subscriptions) __sk_del_bind_node(sk); netlink_table_ungrab(); } -static int netlink_create(struct socket *sock, int protocol) +static struct proto netlink_proto = { + .name = "NETLINK", + .owner = THIS_MODULE, + .obj_size = sizeof(struct netlink_sock), +}; + +static int __netlink_create(struct socket *sock, int protocol) { struct sock *sk; - struct netlink_opt *nlk; - - sock->state = SS_UNCONNECTED; - - if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) - return -ESOCKTNOSUPPORT; - - if (protocol<0 || protocol >= MAX_LINKS) - return -EPROTONOSUPPORT; + struct netlink_sock *nlk; sock->ops = &netlink_ops; - sk = sk_alloc(PF_NETLINK, GFP_KERNEL, 1, NULL); + sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1); if (!sk) return -ENOMEM; - sock_init_data(sock,sk); - sk_set_owner(sk, THIS_MODULE); - - nlk = sk->sk_protinfo = kmalloc(sizeof(*nlk), GFP_KERNEL); - if (!nlk) { - sk_free(sk); - return -ENOMEM; - } - memset(nlk, 0, sizeof(*nlk)); + sock_init_data(sock, sk); + nlk = nlk_sk(sk); spin_lock_init(&nlk->cb_lock); init_waitqueue_head(&nlk->wait); - sk->sk_destruct = netlink_sock_destruct; + sk->sk_destruct = netlink_sock_destruct; sk->sk_protocol = protocol; return 0; } +static int netlink_create(struct socket *sock, int protocol) +{ + struct module *module = NULL; + struct netlink_sock *nlk; + unsigned int groups; + int err = 0; + + sock->state = SS_UNCONNECTED; + + if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) + return -ESOCKTNOSUPPORT; + + if (protocol<0 || protocol >= MAX_LINKS) + return -EPROTONOSUPPORT; + + netlink_lock_table(); +#ifdef CONFIG_KMOD + if (!nl_table[protocol].registered) { + netlink_unlock_table(); + request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); + netlink_lock_table(); + } +#endif + if (nl_table[protocol].registered && + try_module_get(nl_table[protocol].module)) + module = nl_table[protocol].module; + groups = nl_table[protocol].groups; + netlink_unlock_table(); + + if ((err = __netlink_create(sock, protocol)) < 0) + goto out_module; + + nlk = nlk_sk(sock->sk); + nlk->module = module; +out: + return err; + +out_module: + module_put(module); + goto out; +} + static int netlink_release(struct socket *sock) { struct sock *sk = sock->sk; - struct netlink_opt *nlk; + struct netlink_sock *nlk; if (!sk) return 0; @@ -377,10 +451,10 @@ static int netlink_release(struct socket *sock) spin_lock(&nlk->cb_lock); if (nlk->cb) { - nlk->cb->done(nlk->cb); + if (nlk->cb->done) + nlk->cb->done(nlk->cb); netlink_destroy_callback(nlk->cb); nlk->cb = NULL; - __sock_put(sk); } spin_unlock(&nlk->cb_lock); @@ -393,14 +467,30 @@ static int netlink_release(struct socket *sock) skb_queue_purge(&sk->sk_write_queue); - if (nlk->pid && !nlk->groups) { + if (nlk->pid && !nlk->subscriptions) { struct netlink_notify n = { .protocol = sk->sk_protocol, .pid = nlk->pid, }; - notifier_call_chain(&netlink_chain, NETLINK_URELEASE, &n); + atomic_notifier_call_chain(&netlink_chain, + NETLINK_URELEASE, &n); } - + + if (nlk->module) + module_put(nlk->module); + + netlink_table_grab(); + if (nlk->flags & NETLINK_KERNEL_SOCKET) { + kfree(nl_table[sk->sk_protocol].listeners); + nl_table[sk->sk_protocol].module = NULL; + nl_table[sk->sk_protocol].registered = 0; + } else if (nlk->subscriptions) + netlink_update_listeners(sk); + netlink_table_ungrab(); + + kfree(nlk->groups); + nlk->groups = NULL; + sock_put(sk); return 0; } @@ -412,7 +502,7 @@ static int netlink_autobind(struct socket *sock) struct hlist_head *head; struct sock *osk; struct hlist_node *node; - s32 pid = current->pid; + s32 pid = current->tgid; int err; static s32 rover = -4097; @@ -435,8 +525,12 @@ retry: err = netlink_insert(sk, pid); if (err == -EADDRINUSE) goto retry; - nlk_sk(sk)->groups = 0; - return 0; + + /* If 2 threads race to autobind, that is fine. */ + if (err == -EBUSY) + err = 0; + + return err; } static inline int netlink_capable(struct socket *sock, unsigned int flag) @@ -445,10 +539,45 @@ static inline int netlink_capable(struct socket *sock, unsigned int flag) capable(CAP_NET_ADMIN); } +static void +netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions) +{ + struct netlink_sock *nlk = nlk_sk(sk); + + if (nlk->subscriptions && !subscriptions) + __sk_del_bind_node(sk); + else if (!nlk->subscriptions && subscriptions) + sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list); + nlk->subscriptions = subscriptions; +} + +static int netlink_alloc_groups(struct sock *sk) +{ + struct netlink_sock *nlk = nlk_sk(sk); + unsigned int groups; + int err = 0; + + netlink_lock_table(); + groups = nl_table[sk->sk_protocol].groups; + if (!nl_table[sk->sk_protocol].registered) + err = -ENOENT; + netlink_unlock_table(); + + if (err) + return err; + + nlk->groups = kmalloc(NLGRPSZ(groups), GFP_KERNEL); + if (nlk->groups == NULL) + return -ENOMEM; + memset(nlk->groups, 0, NLGRPSZ(groups)); + nlk->ngroups = groups; + return 0; +} + static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { struct sock *sk = sock->sk; - struct netlink_opt *nlk = nlk_sk(sk); + struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; int err; @@ -456,8 +585,15 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len return -EINVAL; /* Only superuser is allowed to listen multicasts */ - if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_RECV)) - return -EPERM; + if (nladdr->nl_groups) { + if (!netlink_capable(sock, NL_NONROOT_RECV)) + return -EPERM; + if (nlk->groups == NULL) { + err = netlink_alloc_groups(sk); + if (err) + return err; + } + } if (nlk->pid) { if (nladdr->nl_pid != nlk->pid) @@ -470,15 +606,15 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len return err; } - if (!nladdr->nl_groups && !nlk->groups) + if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) return 0; netlink_table_grab(); - if (nlk->groups && !nladdr->nl_groups) - __sk_del_bind_node(sk); - else if (!nlk->groups && nladdr->nl_groups) - sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list); - nlk->groups = nladdr->nl_groups; + netlink_update_subscriptions(sk, nlk->subscriptions + + hweight32(nladdr->nl_groups) - + hweight32(nlk->groups[0])); + nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups; + netlink_update_listeners(sk); netlink_table_ungrab(); return 0; @@ -489,13 +625,13 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, { int err = 0; struct sock *sk = sock->sk; - struct netlink_opt *nlk = nlk_sk(sk); + struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *nladdr=(struct sockaddr_nl*)addr; if (addr->sa_family == AF_UNSPEC) { sk->sk_state = NETLINK_UNCONNECTED; nlk->dst_pid = 0; - nlk->dst_groups = 0; + nlk->dst_group = 0; return 0; } if (addr->sa_family != AF_NETLINK) @@ -511,7 +647,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, if (err == 0) { sk->sk_state = NETLINK_CONNECTED; nlk->dst_pid = nladdr->nl_pid; - nlk->dst_groups = nladdr->nl_groups; + nlk->dst_group = ffs(nladdr->nl_groups); } return err; @@ -520,7 +656,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, static int netlink_getname(struct socket *sock, struct sockaddr *addr, int *addr_len, int peer) { struct sock *sk = sock->sk; - struct netlink_opt *nlk = nlk_sk(sk); + struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *nladdr=(struct sockaddr_nl *)addr; nladdr->nl_family = AF_NETLINK; @@ -529,10 +665,10 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, int *addr if (peer) { nladdr->nl_pid = nlk->dst_pid; - nladdr->nl_groups = nlk->dst_groups; + nladdr->nl_groups = netlink_group_mask(nlk->dst_group); } else { nladdr->nl_pid = nlk->pid; - nladdr->nl_groups = nlk->groups; + nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; } return 0; } @@ -549,7 +685,7 @@ static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) { int protocol = ssk->sk_protocol; struct sock *sock; - struct netlink_opt *nlk; + struct netlink_sock *nlk; sock = netlink_lookup(protocol, pid); if (!sock) @@ -569,13 +705,12 @@ static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) struct sock *netlink_getsockbyfilp(struct file *filp) { struct inode *inode = filp->f_dentry->d_inode; - struct socket *socket; struct sock *sock; - if (!inode->i_sock || !(socket = SOCKET_I(inode))) + if (!S_ISSOCK(inode->i_mode)) return ERR_PTR(-ENOTSOCK); - sock = socket->sk; + sock = SOCKET_I(inode)->sk; if (sock->sk_family != AF_NETLINK) return ERR_PTR(-EINVAL); @@ -593,21 +728,18 @@ struct sock *netlink_getsockbyfilp(struct file *filp) * 0: continue * 1: repeat lookup - reference dropped while waiting for socket memory. */ -int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long timeo) +int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, + long timeo, struct sock *ssk) { - struct netlink_opt *nlk; + struct netlink_sock *nlk; nlk = nlk_sk(sk); -#ifdef NL_EMULATE_DEV - if (nlk->handler) - return 0; -#endif if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || test_bit(0, &nlk->state)) { DECLARE_WAITQUEUE(wait, current); if (!timeo) { - if (!nlk->pid) + if (!ssk || nlk_sk(ssk)->pid == 0) netlink_overrun(sk); sock_put(sk); kfree_skb(skb); @@ -638,19 +770,8 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long t int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol) { - struct netlink_opt *nlk; int len = skb->len; - nlk = nlk_sk(sk); -#ifdef NL_EMULATE_DEV - if (nlk->handler) { - skb_orphan(skb); - len = nlk->handler(protocol, skb); - sock_put(sk); - return len; - } -#endif - skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk, len); sock_put(sk); @@ -663,7 +784,8 @@ void netlink_detachskb(struct sock *sk, struct sk_buff *skb) sock_put(sk); } -static inline struct sk_buff *netlink_trim(struct sk_buff *skb, int allocation) +static inline struct sk_buff *netlink_trim(struct sk_buff *skb, + gfp_t allocation) { int delta; @@ -702,7 +824,7 @@ retry: kfree_skb(skb); return PTR_ERR(sk); } - err = netlink_attachskb(sk, skb, nonblock, timeo); + err = netlink_attachskb(sk, skb, nonblock, timeo, ssk); if (err == 1) goto retry; if (err) @@ -711,15 +833,21 @@ retry: return netlink_sendskb(sk, skb, ssk->sk_protocol); } +int netlink_has_listeners(struct sock *sk, unsigned int group) +{ + int res = 0; + + BUG_ON(!(nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET)); + if (group - 1 < nl_table[sk->sk_protocol].groups) + res = test_bit(group - 1, nl_table[sk->sk_protocol].listeners); + return res; +} +EXPORT_SYMBOL_GPL(netlink_has_listeners); + static __inline__ int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) { - struct netlink_opt *nlk = nlk_sk(sk); -#ifdef NL_EMULATE_DEV - if (nlk->handler) { - nlk->handler(sk->sk_protocol, skb); - return 0; - } else -#endif + struct netlink_sock *nlk = nlk_sk(sk); + if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && !test_bit(0, &nlk->state)) { skb_set_owner_r(skb, sk); @@ -737,20 +865,21 @@ struct netlink_broadcast_data { int failure; int congested; int delivered; - int allocation; + gfp_t allocation; struct sk_buff *skb, *skb2; }; static inline int do_one_broadcast(struct sock *sk, struct netlink_broadcast_data *p) { - struct netlink_opt *nlk = nlk_sk(sk); + struct netlink_sock *nlk = nlk_sk(sk); int val; if (p->exclude_sk == sk) goto out; - if (nlk->pid == p->pid || !(nlk->groups & p->group)) + if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || + !test_bit(p->group - 1, nlk->groups)) goto out; if (p->failure) { @@ -760,11 +889,15 @@ static inline int do_one_broadcast(struct sock *sk, sock_hold(sk); if (p->skb2 == NULL) { - if (atomic_read(&p->skb->users) != 1) { + if (skb_shared(p->skb)) { p->skb2 = skb_clone(p->skb, p->allocation); } else { - p->skb2 = p->skb; - atomic_inc(&p->skb->users); + p->skb2 = skb_get(p->skb); + /* + * skb ownership may have been set when + * delivered to a previous socket. + */ + skb_orphan(p->skb2); } } if (p->skb2 == NULL) { @@ -785,7 +918,7 @@ out: } int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, - u32 group, int allocation) + u32 group, gfp_t allocation) { struct netlink_broadcast_data info; struct hlist_node *node; @@ -810,11 +943,12 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) do_one_broadcast(sk, &info); + kfree_skb(skb); + netlink_unlock_table(); if (info.skb2) kfree_skb(info.skb2); - kfree_skb(skb); if (info.delivered) { if (info.congested && (allocation & __GFP_WAIT)) @@ -836,12 +970,13 @@ struct netlink_set_err_data { static inline int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p) { - struct netlink_opt *nlk = nlk_sk(sk); + struct netlink_sock *nlk = nlk_sk(sk); if (sk == p->exclude_sk) goto out; - if (nlk->pid == p->pid || !(nlk->groups & p->group)) + if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || + !test_bit(p->group - 1, nlk->groups)) goto out; sk->sk_err = p->code; @@ -869,11 +1004,105 @@ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) read_unlock(&nl_table_lock); } +static int netlink_setsockopt(struct socket *sock, int level, int optname, + char __user *optval, int optlen) +{ + struct sock *sk = sock->sk; + struct netlink_sock *nlk = nlk_sk(sk); + int val = 0, err; + + if (level != SOL_NETLINK) + return -ENOPROTOOPT; + + if (optlen >= sizeof(int) && + get_user(val, (int __user *)optval)) + return -EFAULT; + + switch (optname) { + case NETLINK_PKTINFO: + if (val) + nlk->flags |= NETLINK_RECV_PKTINFO; + else + nlk->flags &= ~NETLINK_RECV_PKTINFO; + err = 0; + break; + case NETLINK_ADD_MEMBERSHIP: + case NETLINK_DROP_MEMBERSHIP: { + unsigned int subscriptions; + int old, new = optname == NETLINK_ADD_MEMBERSHIP ? 1 : 0; + + if (!netlink_capable(sock, NL_NONROOT_RECV)) + return -EPERM; + if (nlk->groups == NULL) { + err = netlink_alloc_groups(sk); + if (err) + return err; + } + if (!val || val - 1 >= nlk->ngroups) + return -EINVAL; + netlink_table_grab(); + old = test_bit(val - 1, nlk->groups); + subscriptions = nlk->subscriptions - old + new; + if (new) + __set_bit(val - 1, nlk->groups); + else + __clear_bit(val - 1, nlk->groups); + netlink_update_subscriptions(sk, subscriptions); + netlink_update_listeners(sk); + netlink_table_ungrab(); + err = 0; + break; + } + default: + err = -ENOPROTOOPT; + } + return err; +} + +static int netlink_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct sock *sk = sock->sk; + struct netlink_sock *nlk = nlk_sk(sk); + int len, val, err; + + if (level != SOL_NETLINK) + return -ENOPROTOOPT; + + if (get_user(len, optlen)) + return -EFAULT; + if (len < 0) + return -EINVAL; + + switch (optname) { + case NETLINK_PKTINFO: + if (len < sizeof(int)) + return -EINVAL; + len = sizeof(int); + val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0; + put_user(len, optlen); + put_user(val, optval); + err = 0; + break; + default: + err = -ENOPROTOOPT; + } + return err; +} + +static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) +{ + struct nl_pktinfo info; + + info.group = NETLINK_CB(skb).dst_group; + put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); +} + static inline void netlink_rcv_wake(struct sock *sk) { - struct netlink_opt *nlk = nlk_sk(sk); + struct netlink_sock *nlk = nlk_sk(sk); - if (!skb_queue_len(&sk->sk_receive_queue)) + if (skb_queue_empty(&sk->sk_receive_queue)) clear_bit(0, &nlk->state); if (!test_bit(0, &nlk->state)) wake_up_interruptible(&nlk->wait); @@ -884,10 +1113,10 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, { struct sock_iocb *siocb = kiocb_to_siocb(kiocb); struct sock *sk = sock->sk; - struct netlink_opt *nlk = nlk_sk(sk); + struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *addr=msg->msg_name; u32 dst_pid; - u32 dst_groups; + u32 dst_group; struct sk_buff *skb; int err; struct scm_cookie scm; @@ -905,12 +1134,12 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (addr->nl_family != AF_NETLINK) return -EINVAL; dst_pid = addr->nl_pid; - dst_groups = addr->nl_groups; - if (dst_groups && !netlink_capable(sock, NL_NONROOT_SEND)) + dst_group = ffs(addr->nl_groups); + if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND)) return -EPERM; } else { dst_pid = nlk->dst_pid; - dst_groups = nlk->dst_groups; + dst_group = nlk->dst_group; } if (!nlk->pid) { @@ -928,9 +1157,10 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out; NETLINK_CB(skb).pid = nlk->pid; - NETLINK_CB(skb).groups = nlk->groups; NETLINK_CB(skb).dst_pid = dst_pid; - NETLINK_CB(skb).dst_groups = dst_groups; + NETLINK_CB(skb).dst_group = dst_group; + NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context); + selinux_get_task_sid(current, &(NETLINK_CB(skb).sid)); memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); /* What can I do? Netlink is asynchronous, so that @@ -951,9 +1181,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out; } - if (dst_groups) { + if (dst_group) { atomic_inc(&skb->users); - netlink_broadcast(sk, skb, dst_pid, dst_groups, GFP_KERNEL); + netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL); } err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT); @@ -968,7 +1198,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, struct sock_iocb *siocb = kiocb_to_siocb(kiocb); struct scm_cookie scm; struct sock *sk = sock->sk; - struct netlink_opt *nlk = nlk_sk(sk); + struct netlink_sock *nlk = nlk_sk(sk); int noblock = flags&MSG_DONTWAIT; size_t copied; struct sk_buff *skb; @@ -999,10 +1229,13 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, addr->nl_family = AF_NETLINK; addr->nl_pad = 0; addr->nl_pid = NETLINK_CB(skb).pid; - addr->nl_groups = NETLINK_CB(skb).dst_groups; + addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group); msg->msg_namelen = sizeof(*addr); } + if (nlk->flags & NETLINK_RECV_PKTINFO) + netlink_cmsg_recv_pktinfo(msg, skb); + if (NULL == siocb->scm) { memset(&scm, 0, sizeof(scm)); siocb->scm = &scm; @@ -1022,7 +1255,7 @@ out: static void netlink_data_ready(struct sock *sk, int len) { - struct netlink_opt *nlk = nlk_sk(sk); + struct netlink_sock *nlk = nlk_sk(sk); if (nlk->data_ready) nlk->data_ready(sk, len); @@ -1036,10 +1269,14 @@ static void netlink_data_ready(struct sock *sk, int len) */ struct sock * -netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len)) +netlink_kernel_create(int unit, unsigned int groups, + void (*input)(struct sock *sk, int len), + struct module *module) { struct socket *sock; struct sock *sk; + struct netlink_sock *nlk; + unsigned long *listeners = NULL; if (!nl_table) return NULL; @@ -1050,20 +1287,40 @@ netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len)) if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) return NULL; - if (netlink_create(sock, unit) < 0) { - sock_release(sock); - return NULL; - } + if (__netlink_create(sock, unit) < 0) + goto out_sock_release; + + if (groups < 32) + groups = 32; + + listeners = kzalloc(NLGRPSZ(groups), GFP_KERNEL); + if (!listeners) + goto out_sock_release; + sk = sock->sk; sk->sk_data_ready = netlink_data_ready; if (input) nlk_sk(sk)->data_ready = input; - if (netlink_insert(sk, 0)) { - sock_release(sock); - return NULL; - } + if (netlink_insert(sk, 0)) + goto out_sock_release; + + nlk = nlk_sk(sk); + nlk->flags |= NETLINK_KERNEL_SOCKET; + + netlink_table_grab(); + nl_table[unit].groups = groups; + nl_table[unit].listeners = listeners; + nl_table[unit].module = module; + nl_table[unit].registered = 1; + netlink_table_ungrab(); + return sk; + +out_sock_release: + kfree(listeners); + sock_release(sock); + return NULL; } void netlink_set_nonroot(int protocol, unsigned int flags) @@ -1086,7 +1343,7 @@ static void netlink_destroy_callback(struct netlink_callback *cb) static int netlink_dump(struct sock *sk) { - struct netlink_opt *nlk = nlk_sk(sk); + struct netlink_sock *nlk = nlk_sk(sk); struct netlink_callback *cb; struct sk_buff *skb; struct nlmsghdr *nlh; @@ -1114,19 +1371,21 @@ static int netlink_dump(struct sock *sk) return 0; } - nlh = __nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLMSG_DONE, sizeof(int)); - nlh->nlmsg_flags |= NLM_F_MULTI; + nlh = NLMSG_NEW_ANSWER(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); memcpy(NLMSG_DATA(nlh), &len, sizeof(len)); skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk, skb->len); - cb->done(cb); + if (cb->done) + cb->done(cb); nlk->cb = NULL; spin_unlock(&nlk->cb_lock); netlink_destroy_callback(cb); - sock_put(sk); return 0; + +nlmsg_failure: + return -ENOBUFS; } int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, @@ -1136,7 +1395,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, { struct netlink_callback *cb; struct sock *sk; - struct netlink_opt *nlk; + struct netlink_sock *nlk; cb = kmalloc(sizeof(*cb), GFP_KERNEL); if (cb == NULL) @@ -1167,6 +1426,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, spin_unlock(&nlk->cb_lock); netlink_dump(sk); + sock_put(sk); return 0; } @@ -1197,13 +1457,101 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) } rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, - NLMSG_ERROR, sizeof(struct nlmsgerr)); + NLMSG_ERROR, sizeof(struct nlmsgerr), 0); errmsg = NLMSG_DATA(rep); errmsg->error = err; memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(struct nlmsghdr)); netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); } +static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, + struct nlmsghdr *, int *)) +{ + unsigned int total_len; + struct nlmsghdr *nlh; + int err; + + while (skb->len >= nlmsg_total_size(0)) { + nlh = (struct nlmsghdr *) skb->data; + + if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) + return 0; + + total_len = min(NLMSG_ALIGN(nlh->nlmsg_len), skb->len); + + if (cb(skb, nlh, &err) < 0) { + /* Not an error, but we have to interrupt processing + * here. Note: that in this case we do not pull + * message from skb, it will be processed later. + */ + if (err == 0) + return -1; + netlink_ack(skb, nlh, err); + } else if (nlh->nlmsg_flags & NLM_F_ACK) + netlink_ack(skb, nlh, 0); + + skb_pull(skb, total_len); + } + + return 0; +} + +/** + * nelink_run_queue - Process netlink receive queue. + * @sk: Netlink socket containing the queue + * @qlen: Place to store queue length upon entry + * @cb: Callback function invoked for each netlink message found + * + * Processes as much as there was in the queue upon entry and invokes + * a callback function for each netlink message found. The callback + * function may refuse a message by returning a negative error code + * but setting the error pointer to 0 in which case this function + * returns with a qlen != 0. + * + * qlen must be initialized to 0 before the initial entry, afterwards + * the function may be called repeatedly until qlen reaches 0. + */ +void netlink_run_queue(struct sock *sk, unsigned int *qlen, + int (*cb)(struct sk_buff *, struct nlmsghdr *, int *)) +{ + struct sk_buff *skb; + + if (!*qlen || *qlen > skb_queue_len(&sk->sk_receive_queue)) + *qlen = skb_queue_len(&sk->sk_receive_queue); + + for (; *qlen; (*qlen)--) { + skb = skb_dequeue(&sk->sk_receive_queue); + if (netlink_rcv_skb(skb, cb)) { + if (skb->len) + skb_queue_head(&sk->sk_receive_queue, skb); + else { + kfree_skb(skb); + (*qlen)--; + } + break; + } + + kfree_skb(skb); + } +} + +/** + * netlink_queue_skip - Skip netlink message while processing queue. + * @nlh: Netlink message to be skipped + * @skb: Socket buffer containing the netlink messages. + * + * Pulls the given netlink message off the socket buffer so the next + * call to netlink_queue_run() will not reconsider the message. + */ +void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb) +{ + int msglen = NLMSG_ALIGN(nlh->nlmsg_len); + + if (msglen > skb->len) + msglen = skb->len; + + skb_pull(skb, msglen); +} #ifdef CONFIG_PROC_FS struct nl_seq_iter { @@ -1293,13 +1641,13 @@ static int netlink_seq_show(struct seq_file *seq, void *v) "Rmem Wmem Dump Locks\n"); else { struct sock *s = v; - struct netlink_opt *nlk = nlk_sk(s); + struct netlink_sock *nlk = nlk_sk(s); seq_printf(seq, "%p %-3d %-6d %08x %-8d %-8d %p %d\n", s, s->sk_protocol, nlk->pid, - nlk->groups, + nlk->groups ? (u32)nlk->groups[0] : 0, atomic_read(&s->sk_rmem_alloc), atomic_read(&s->sk_wmem_alloc), nlk->cb, @@ -1352,15 +1700,15 @@ static struct file_operations netlink_seq_fops = { int netlink_register_notifier(struct notifier_block *nb) { - return notifier_chain_register(&netlink_chain, nb); + return atomic_notifier_chain_register(&netlink_chain, nb); } int netlink_unregister_notifier(struct notifier_block *nb) { - return notifier_chain_unregister(&netlink_chain, nb); + return atomic_notifier_chain_unregister(&netlink_chain, nb); } -static struct proto_ops netlink_ops = { +static const struct proto_ops netlink_ops = { .family = PF_NETLINK, .owner = THIS_MODULE, .release = netlink_release, @@ -1373,8 +1721,8 @@ static struct proto_ops netlink_ops = { .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt, + .setsockopt = netlink_setsockopt, + .getsockopt = netlink_getsockopt, .sendmsg = netlink_sendmsg, .recvmsg = netlink_recvmsg, .mmap = sock_no_mmap, @@ -1395,6 +1743,10 @@ static int __init netlink_proto_init(void) int i; unsigned long max; unsigned int order; + int err = proto_register(&netlink_proto, 0); + + if (err != 0) + goto out; if (sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb)) netlink_skb_parms_too_large(); @@ -1441,25 +1793,15 @@ enomem: #endif /* The netlink device handler may be needed early. */ rtnetlink_init(); - return 0; -} - -static void __exit netlink_proto_exit(void) -{ - sock_unregister(PF_NETLINK); - proc_net_remove("netlink"); - kfree(nl_table); - nl_table = NULL; +out: + return err; } core_initcall(netlink_proto_init); -module_exit(netlink_proto_exit); - -MODULE_LICENSE("GPL"); - -MODULE_ALIAS_NETPROTO(PF_NETLINK); EXPORT_SYMBOL(netlink_ack); +EXPORT_SYMBOL(netlink_run_queue); +EXPORT_SYMBOL(netlink_queue_skip); EXPORT_SYMBOL(netlink_broadcast); EXPORT_SYMBOL(netlink_dump_start); EXPORT_SYMBOL(netlink_kernel_create);