X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fipv4%2Fraw.c;h=e5effedd1ac78a609f7abc4cc7b284af93b02213;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=d7b9238d1ea7093a012085ed78e8d9a60281eb6e;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index d7b9238d1..e5effedd1 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -7,7 +7,7 @@ * * Version: $Id: raw.c,v 1.64 2002/02/01 22:01:04 davem Exp $ * - * Authors: Ross Biro, + * Authors: Ross Biro * Fred N. van Kempen, * * Fixes: @@ -38,14 +38,13 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ - -#include + +#include #include #include #include #include #include -#include #include #include #include @@ -59,7 +58,6 @@ #include #include #include -#include #include #include #include @@ -71,6 +69,7 @@ #include #include #include +#include #include #include #include @@ -81,7 +80,7 @@ #include struct hlist_head raw_v4_htable[RAWV4_HTABLE_SIZE]; -rwlock_t raw_v4_lock = RW_LOCK_UNLOCKED; +DEFINE_RWLOCK(raw_v4_lock); static void raw_v4_hash(struct sock *sk) { @@ -102,18 +101,40 @@ static void raw_v4_unhash(struct sock *sk) write_unlock_bh(&raw_v4_lock); } + +/* + * Check if a given address matches for a socket + * + * nxi: the socket's nx_info if any + * addr: to be verified address + * saddr/baddr: socket addresses + */ +static inline int raw_addr_match ( + struct nx_info *nxi, + uint32_t addr, + uint32_t saddr, + uint32_t baddr) +{ + if (addr && (saddr == addr || baddr == addr)) + return 1; + if (!saddr) + return addr_in_nx_info(nxi, addr); + return 0; +} + struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, - unsigned long raddr, unsigned long laddr, + __be32 raddr, __be32 laddr, int dif) { struct hlist_node *node; sk_for_each_from(sk, node) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); if (inet->num == num && !(inet->daddr && inet->daddr != raddr) && - !(inet->rcv_saddr && inet->rcv_saddr != laddr) && + raw_addr_match(sk->sk_nx_info, laddr, + inet->rcv_saddr, inet->rcv_saddr2) && !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) goto found; /* gotcha */ } @@ -130,9 +151,12 @@ static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb) { int type; + if (!pskb_may_pull(skb, sizeof(struct icmphdr))) + return 1; + type = skb->h.icmph->type; if (type < 32) { - __u32 data = raw4_sk(sk)->filter.data; + __u32 data = raw_sk(sk)->filter.data; return ((1 << type) & data) != 0; } @@ -147,10 +171,11 @@ static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb) * RFC 1122: SHOULD pass TOS value up to the transport layer. * -> It does. And not only TOS, but all IP header. */ -void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) +int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) { struct sock *sk; struct hlist_head *head; + int delivered = 0; read_lock(&raw_v4_lock); head = &raw_v4_htable[hash]; @@ -161,6 +186,7 @@ void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) skb->dev->ifindex); while (sk) { + delivered = 1; if (iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) { struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); @@ -174,11 +200,12 @@ void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) } out: read_unlock(&raw_v4_lock); + return delivered; } void raw_err (struct sock *sk, struct sk_buff *skb, u32 info) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); int type = skb->h.icmph->type; int code = skb->h.icmph->code; int err = 0; @@ -249,6 +276,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); return NET_RX_DROP; } + nf_reset(skb); skb_push(skb, skb->data - skb->nh.raw); @@ -256,11 +284,11 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) return 0; } -static int raw_send_hdrinc(struct sock *sk, void *from, int length, +static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, struct rtable *rt, unsigned int flags) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); int hh_len; struct iphdr *iph; struct sk_buff *skb; @@ -295,7 +323,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, int length, goto error_fault; /* We don't modify invalid header */ - if (length >= sizeof(*iph) && iph->ihl * 4 <= length) { + if (length >= sizeof(*iph) && iph->ihl * 4U <= length) { if (!iph->saddr) iph->saddr = rt->rt_src; iph->check = 0; @@ -306,6 +334,11 @@ static int raw_send_hdrinc(struct sock *sk, void *from, int length, iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } + err = -EPERM; + if (!nx_check(0, VS_ADMIN) && !capable(CAP_NET_RAW) + && (!addr_in_nx_info(sk->sk_nx_info, iph->saddr))) + goto error_free; + err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, dst_output); if (err > 0) @@ -317,26 +350,74 @@ out: error_fault: err = -EFAULT; +error_free: kfree_skb(skb); error: - IP_INC_STATS(IpOutDiscards); + IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS); return err; } +static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) +{ + struct iovec *iov; + u8 __user *type = NULL; + u8 __user *code = NULL; + int probed = 0; + unsigned int i; + + if (!msg->msg_iov) + return 0; + + for (i = 0; i < msg->msg_iovlen; i++) { + iov = &msg->msg_iov[i]; + if (!iov) + continue; + + switch (fl->proto) { + case IPPROTO_ICMP: + /* check if one-byte field is readable or not. */ + if (iov->iov_base && iov->iov_len < 1) + break; + + if (!type) { + type = iov->iov_base; + /* check if code field is readable or not. */ + if (iov->iov_len > 1) + code = type + 1; + } else if (!code) + code = iov->iov_base; + + if (type && code) { + if (get_user(fl->fl_icmp_type, type) || + get_user(fl->fl_icmp_code, code)) + return -EFAULT; + probed = 1; + } + break; + default: + probed = 1; + break; + } + if (probed) + break; + } + return 0; +} + static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ipcm_cookie ipc; struct rtable *rt = NULL; int free = 0; - u32 daddr; - u32 saddr; + __be32 daddr; + __be32 saddr; u8 tos; int err; err = -EMSGSIZE; - if (len < 0 || len > 0xFFFF) + if (len > 0xFFFF) goto out; /* @@ -362,7 +443,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, printk(KERN_INFO "%s forgot to set AF_INET in " "raw sendmsg. Fix it!\n", current->comm); - err = -EINVAL; + err = -EAFNOSUPPORT; if (usin->sin_family) goto out; } @@ -409,7 +490,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, daddr = ipc.opt->faddr; } } - tos = RT_TOS(inet->tos) | sk->sk_localroute; + tos = RT_CONN_FLAGS(sk); if (msg->msg_flags & MSG_DONTROUTE) tos |= RTO_ONLINK; @@ -429,6 +510,19 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, .proto = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, }; + if (!inet->hdrincl) { + err = raw_probe_proto_opt(&fl, msg); + if (err) + goto done; + } + + security_sk_classify_flow(sk, &fl); + if (sk->sk_nx_info) { + err = ip_find_src(sk->sk_nx_info, &rt, &fl); + + if (err) + goto done; + } err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); } if (err) @@ -463,7 +557,10 @@ done: kfree(ipc.opt); ip_rt_put(rt); -out: return err < 0 ? err : len; +out: + if (err < 0) + return err; + return len; do_confirm: dst_confirm(&rt->u.dst); @@ -480,13 +577,13 @@ static void raw_close(struct sock *sk, long timeout) */ ip_ra_control(sk, 0, NULL); - inet_sock_release(sk); + sk_common_release(sk); } /* This gets rid of all the nasties in af_inet. -DaveM */ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *addr = (struct sockaddr_in *) uaddr; int ret = -EINVAL; int chk_addr_ret; @@ -511,10 +608,10 @@ out: return ret; * we return it, otherwise we block. */ -int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); size_t copied = 0; int err = -EOPNOTSUPP; struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; @@ -551,33 +648,40 @@ int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (sin) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = skb->nh.iph->saddr; + sin->sin_port = 0; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); } if (inet->cmsg_flags) ip_cmsg_recv(msg, skb); + if (flags & MSG_TRUNC) + copied = skb->len; done: skb_free_datagram(sk, skb); -out: return err ? : copied; +out: + if (err) + return err; + return copied; } static int raw_init(struct sock *sk) { - struct raw_opt *tp = raw4_sk(sk); + struct raw_sock *rp = raw_sk(sk); + if (inet_sk(sk)->num == IPPROTO_ICMP) - memset(&tp->filter, 0, sizeof(tp->filter)); + memset(&rp->filter, 0, sizeof(rp->filter)); return 0; } -static int raw_seticmpfilter(struct sock *sk, char *optval, int optlen) +static int raw_seticmpfilter(struct sock *sk, char __user *optval, int optlen) { if (optlen > sizeof(struct icmp_filter)) optlen = sizeof(struct icmp_filter); - if (copy_from_user(&raw4_sk(sk)->filter, optval, optlen)) + if (copy_from_user(&raw_sk(sk)->filter, optval, optlen)) return -EFAULT; return 0; } -static int raw_geticmpfilter(struct sock *sk, char *optval, int *optlen) +static int raw_geticmpfilter(struct sock *sk, char __user *optval, int __user *optlen) { int len, ret = -EFAULT; @@ -590,18 +694,15 @@ static int raw_geticmpfilter(struct sock *sk, char *optval, int *optlen) len = sizeof(struct icmp_filter); ret = -EFAULT; if (put_user(len, optlen) || - copy_to_user(optval, &raw4_sk(sk)->filter, len)) + copy_to_user(optval, &raw_sk(sk)->filter, len)) goto out; ret = 0; out: return ret; } -static int raw_setsockopt(struct sock *sk, int level, int optname, - char *optval, int optlen) +static int do_raw_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen) { - if (level != SOL_RAW) - return ip_setsockopt(sk, level, optname, optval, optlen); - if (optname == ICMP_FILTER) { if (inet_sk(sk)->num != IPPROTO_ICMP) return -EOPNOTSUPP; @@ -611,12 +712,27 @@ static int raw_setsockopt(struct sock *sk, int level, int optname, return -ENOPROTOOPT; } -static int raw_getsockopt(struct sock *sk, int level, int optname, - char *optval, int *optlen) +static int raw_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen) { if (level != SOL_RAW) - return ip_getsockopt(sk, level, optname, optval, optlen); + return ip_setsockopt(sk, level, optname, optval, optlen); + return do_raw_setsockopt(sk, level, optname, optval, optlen); +} + +#ifdef CONFIG_COMPAT +static int compat_raw_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen) +{ + if (level != SOL_RAW) + return compat_ip_setsockopt(sk, level, optname, optval, optlen); + return do_raw_setsockopt(sk, level, optname, optval, optlen); +} +#endif +static int do_raw_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ if (optname == ICMP_FILTER) { if (inet_sk(sk)->num != IPPROTO_ICMP) return -EOPNOTSUPP; @@ -626,28 +742,46 @@ static int raw_getsockopt(struct sock *sk, int level, int optname, return -ENOPROTOOPT; } +static int raw_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ + if (level != SOL_RAW) + return ip_getsockopt(sk, level, optname, optval, optlen); + return do_raw_getsockopt(sk, level, optname, optval, optlen); +} + +#ifdef CONFIG_COMPAT +static int compat_raw_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ + if (level != SOL_RAW) + return compat_ip_getsockopt(sk, level, optname, optval, optlen); + return do_raw_getsockopt(sk, level, optname, optval, optlen); +} +#endif + static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) { switch (cmd) { case SIOCOUTQ: { int amount = atomic_read(&sk->sk_wmem_alloc); - return put_user(amount, (int *)arg); + return put_user(amount, (int __user *)arg); } case SIOCINQ: { struct sk_buff *skb; int amount = 0; - spin_lock_irq(&sk->sk_receive_queue.lock); + spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); if (skb != NULL) amount = skb->len; - spin_unlock_irq(&sk->sk_receive_queue.lock); - return put_user(amount, (int *)arg); + spin_unlock_bh(&sk->sk_receive_queue.lock); + return put_user(amount, (int __user *)arg); } default: #ifdef CONFIG_IP_MROUTE - return ipmr_ioctl(sk, cmd, arg); + return ipmr_ioctl(sk, cmd, (void __user *)arg); #else return -ENOIOCTLCMD; #endif @@ -655,20 +789,26 @@ static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) } struct proto raw_prot = { - .name = "RAW", - .close = raw_close, - .connect = udp_connect, - .disconnect = udp_disconnect, - .ioctl = raw_ioctl, - .init = raw_init, - .setsockopt = raw_setsockopt, - .getsockopt = raw_getsockopt, - .sendmsg = raw_sendmsg, - .recvmsg = raw_recvmsg, - .bind = raw_bind, - .backlog_rcv = raw_rcv_skb, - .hash = raw_v4_hash, - .unhash = raw_v4_unhash, + .name = "RAW", + .owner = THIS_MODULE, + .close = raw_close, + .connect = ip4_datagram_connect, + .disconnect = udp_disconnect, + .ioctl = raw_ioctl, + .init = raw_init, + .setsockopt = raw_setsockopt, + .getsockopt = raw_getsockopt, + .sendmsg = raw_sendmsg, + .recvmsg = raw_recvmsg, + .bind = raw_bind, + .backlog_rcv = raw_rcv_skb, + .hash = raw_v4_hash, + .unhash = raw_v4_unhash, + .obj_size = sizeof(struct raw_sock), +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_raw_setsockopt, + .compat_getsockopt = compat_raw_getsockopt, +#endif }; #ifdef CONFIG_PROC_FS @@ -687,7 +827,8 @@ static struct sock *raw_get_first(struct seq_file *seq) struct hlist_node *node; sk_for_each(sk, node, &raw_v4_htable[state->bucket]) - if (sk->sk_family == PF_INET) + if (sk->sk_family == PF_INET && + nx_check(sk->sk_nid, VS_WATCH_P|VS_IDENT)) goto found; } sk = NULL; @@ -703,7 +844,8 @@ static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk) sk = sk_next(sk); try_again: ; - } while (sk && sk->sk_family != PF_INET); + } while (sk && (sk->sk_family != PF_INET || + !nx_check(sk->sk_nid, VS_WATCH_P|VS_IDENT))); if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) { sk = sk_head(&raw_v4_htable[state->bucket]); @@ -747,9 +889,9 @@ static void raw_seq_stop(struct seq_file *seq, void *v) static __inline__ char *get_raw_sock(struct sock *sp, char *tmpbuf, int i) { - struct inet_opt *inet = inet_sk(sp); - unsigned int dest = inet->daddr, - src = inet->rcv_saddr; + struct inet_sock *inet = inet_sk(sp); + __be32 dest = inet->daddr, + src = inet->rcv_saddr; __u16 destp = 0, srcp = inet->num;