X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fipv4%2Fudp.c;h=3f849b6d39c83d71507c394c9f442f3a6d4a4947;hb=987b0145d94eecf292d8b301228356f44611ab7c;hp=d956482447aea949436bd7d8220cd0747f366621;hpb=7100de193a1848d14a1221b9e83d6c2f2ad93f5a;p=linux-2.6.git diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d95648244..3f849b6d3 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -7,7 +7,7 @@ * * Version: $Id: udp.c,v 1.102 2002/02/01 22:01:04 davem Exp $ * - * Authors: Ross Biro, + * Authors: Ross Biro * Fred N. van Kempen, * Arnt Gulbrandsen, * Alan Cox, @@ -86,6 +86,7 @@ #include #include #include +#include #include #include #include @@ -95,7 +96,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -112,10 +114,10 @@ * Snmp MIB for the UDP layer */ -DEFINE_SNMP_STAT(struct udp_mib, udp_statistics); +DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly; struct hlist_head udp_hash[UDP_HTABLE_SIZE]; -rwlock_t udp_hash_lock = RW_LOCK_UNLOCKED; +DEFINE_RWLOCK(udp_hash_lock); /* Shared by v4/v6 udp. */ int udp_port_rover; @@ -124,7 +126,7 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum) { struct hlist_node *node; struct sock *sk2; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); write_lock_bh(&udp_hash_lock); if (snum == 0) { @@ -171,17 +173,15 @@ gotit: } else { sk_for_each(sk2, node, &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) { - struct inet_opt *inet2 = inet_sk(sk2); + struct inet_sock *inet2 = inet_sk(sk2); if (inet2->num == snum && - sk2 != sk && - !ipv6_only_sock(sk2) && + sk2 != sk && !ipv6_only_sock(sk2) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - (!inet2->rcv_saddr || - !inet->rcv_saddr || - inet2->rcv_saddr == inet->rcv_saddr) && + nx_addr_conflict(sk->sk_nx_info, + inet_rcv_saddr(sk), sk2) && (!sk2->sk_reuse || !sk->sk_reuse)) goto fail; } @@ -219,7 +219,8 @@ static void udp_v4_unhash(struct sock *sk) /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ -struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif) +static struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, + u32 daddr, u16 dport, int dif) { struct sock *sk, *result = NULL; struct hlist_node *node; @@ -227,7 +228,7 @@ struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, i int badness = -1; sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); if (inet->num == hnum && !ipv6_only_sock(sk)) { int score = (sk->sk_family == PF_INET ? 1 : 0); @@ -235,6 +236,11 @@ struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, i if (inet->rcv_saddr != daddr) continue; score+=2; + } else if (sk->sk_nx_info) { + if (addr_in_nx_info(sk->sk_nx_info, daddr)) + score+=2; + else + continue; } if (inet->daddr) { if (inet->daddr != saddr) @@ -263,7 +269,8 @@ struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, i return result; } -__inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif) +static __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, + u32 daddr, u16 dport, int dif) { struct sock *sk; @@ -285,12 +292,13 @@ static inline struct sock *udp_v4_mcast_next(struct sock *sk, unsigned short hnum = ntohs(loc_port); sk_for_each_from(s, node) { - struct inet_opt *inet = inet_sk(s); + struct inet_sock *inet = inet_sk(s); if (inet->num != hnum || (inet->daddr && inet->daddr != rmt_addr) || (inet->dport != rmt_port && inet->dport) || - (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || + (inet->rcv_saddr && inet->rcv_saddr != loc_addr && + inet->rcv_saddr2 && inet->rcv_saddr2 != loc_addr) || ipv6_only_sock(s) || (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) continue; @@ -316,7 +324,7 @@ found: void udp_err(struct sk_buff *skb, u32 info) { - struct inet_opt *inet; + struct inet_sock *inet; struct iphdr *iph = (struct iphdr*)skb->data; struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2)); int type = skb->h.icmph->type; @@ -384,7 +392,7 @@ out: */ static void udp_flush_pending_frames(struct sock *sk) { - struct udp_opt *up = udp_sk(sk); + struct udp_sock *up = udp_sk(sk); if (up->pending) { up->len = 0; @@ -396,9 +404,9 @@ static void udp_flush_pending_frames(struct sock *sk) /* * Push out all pending data as one UDP datagram. Socket is locked. */ -static int udp_push_pending_frames(struct sock *sk, struct udp_opt *up) +static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct flowi *fl = &inet->cork.fl; struct sk_buff *skb; struct udphdr *uh; @@ -480,8 +488,8 @@ static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { - struct inet_opt *inet = inet_sk(sk); - struct udp_opt *up = udp_sk(sk); + struct inet_sock *inet = inet_sk(sk); + struct udp_sock *up = udp_sk(sk); int ulen = len; struct ipcm_cookie ipc; struct rtable *rt = NULL; @@ -531,7 +539,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, return -EINVAL; if (usin->sin_family != AF_INET) { if (usin->sin_family != AF_UNSPEC) - return -EINVAL; + return -EAFNOSUPPORT; } daddr = usin->sin_addr.s_addr; @@ -572,7 +580,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, connected = 0; } tos = RT_TOS(inet->tos); - if (sk->sk_localroute || (msg->msg_flags & MSG_DONTROUTE) || + if (sock_flag(sk, SOCK_LOCALROUTE) || + (msg->msg_flags & MSG_DONTROUTE) || (ipc.opt && ipc.opt->is_strictroute)) { tos |= RTO_ONLINK; connected = 0; @@ -599,6 +608,19 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, .uli_u = { .ports = { .sport = inet->sport, .dport = dport } } }; + struct nx_info *nxi = sk->sk_nx_info; + + if (nxi) { + err = ip_find_src(nxi, &rt, &fl); + if (err) + goto out; + if (daddr == IPI_LOOPBACK && !vx_check(0, VX_ADMIN)) + daddr = fl.fl4_dst = nxi->ipv4[0]; +#ifdef VSERVER_REMAP_SADDR + if (saddr == IPI_LOOPBACK && !vx_check(0, VX_ADMIN)) + saddr = fl.fl4_src = nxi->ipv4[0]; +#endif + } err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); if (err) goto out; @@ -625,7 +647,7 @@ back_from_confirm: /* ... which is an evident application bug. --ANK */ release_sock(sk); - NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp cork app bug 2\n")); + LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); err = -EINVAL; goto out; } @@ -667,9 +689,10 @@ do_confirm: goto out; } -int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags) +static int udp_sendpage(struct sock *sk, struct page *page, int offset, + size_t size, int flags) { - struct udp_opt *up = udp_sk(sk); + struct udp_sock *up = udp_sk(sk); int ret; if (!up->pending) { @@ -689,7 +712,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, in if (unlikely(!up->pending)) { release_sock(sk); - NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp cork app bug 3\n")); + LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n"); return -EINVAL; } @@ -734,7 +757,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) unsigned long amount; amount = 0; - spin_lock_irq(&sk->sk_receive_queue.lock); + spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); if (skb != NULL) { /* @@ -744,7 +767,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) */ amount = skb->len - sizeof(struct udphdr); } - spin_unlock_irq(&sk->sk_receive_queue.lock); + spin_unlock_bh(&sk->sk_receive_queue.lock); return put_user(amount, (int __user *)arg); } @@ -756,7 +779,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) static __inline__ int __udp_checksum_complete(struct sk_buff *skb) { - return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)); + return __skb_checksum_complete(skb); } static __inline__ int udp_checksum_complete(struct sk_buff *skb) @@ -770,10 +793,10 @@ static __inline__ int udp_checksum_complete(struct sk_buff *skb) * return it, otherwise we block. */ -int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +static int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; struct sk_buff *skb; int copied, err; @@ -841,20 +864,7 @@ out: csum_copy_err: UDP_INC_STATS_BH(UDP_MIB_INERRORS); - /* Clear queue. */ - if (flags&MSG_PEEK) { - int clear = 0; - spin_lock_irq(&sk->sk_receive_queue.lock); - if (skb == skb_peek(&sk->sk_receive_queue)) { - __skb_unlink(skb, &sk->sk_receive_queue); - clear = 1; - } - spin_unlock_irq(&sk->sk_receive_queue.lock); - if (clear) - kfree_skb(skb); - } - - skb_free_datagram(sk, skb); + skb_kill_datagram(sk, skb, flags); if (noblock) return -EAGAIN; @@ -864,7 +874,7 @@ csum_copy_err: int udp_disconnect(struct sock *sk, int flags) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); /* * 1003.1g - break association. */ @@ -899,24 +909,33 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) #ifndef CONFIG_XFRM return 1; #else - struct udp_opt *up = udp_sk(sk); - struct udphdr *uh = skb->h.uh; + struct udp_sock *up = udp_sk(sk); + struct udphdr *uh; struct iphdr *iph; int iphlen, len; - __u8 *udpdata = (__u8 *)uh + sizeof(struct udphdr); - __u32 *udpdata32 = (__u32 *)udpdata; + __u8 *udpdata; + __u32 *udpdata32; __u16 encap_type = up->encap_type; /* if we're overly short, let UDP handle it */ - if (udpdata > skb->tail) + len = skb->len - sizeof(struct udphdr); + if (len <= 0) return 1; /* if this is not encapsulated socket, then just return now */ if (!encap_type) return 1; - len = skb->tail - udpdata; + /* If this is a paged skb, make sure we pull up + * whatever data we need to look at. */ + if (!pskb_may_pull(skb, sizeof(struct udphdr) + min(len, 8))) + return 1; + + /* Now we can get the pointers */ + uh = skb->h.uh; + udpdata = (__u8 *)uh + sizeof(struct udphdr); + udpdata32 = (__u32 *)udpdata; switch (encap_type) { default: @@ -951,6 +970,8 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) * header and optional ESP marker bytes) and then modify the * protocol to ESP, and then call into the transform receiver. */ + if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + return 0; /* Now we can update and verify the packet length... */ iph = skb->nh.iph; @@ -985,7 +1006,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) */ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) { - struct udp_opt *up = udp_sk(sk); + struct udp_sock *up = udp_sk(sk); /* * Charge it to the socket, dropping if the queue is full. @@ -994,6 +1015,7 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) kfree_skb(skb); return -1; } + nf_reset(skb); if (up->encap_type) { /* @@ -1087,24 +1109,20 @@ static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh, * Otherwise, csum completion requires chacksumming packet body, * including udp header and folding it to skb->csum. */ -static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh, +static void udp_checksum_init(struct sk_buff *skb, struct udphdr *uh, unsigned short ulen, u32 saddr, u32 daddr) { if (uh->check == 0) { skb->ip_summed = CHECKSUM_UNNECESSARY; } else if (skb->ip_summed == CHECKSUM_HW) { - skb->ip_summed = CHECKSUM_UNNECESSARY; if (!udp_check(uh, ulen, saddr, daddr, skb->csum)) - return 0; - NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp v4 hw csum failure.\n")); - skb->ip_summed = CHECKSUM_NONE; + skb->ip_summed = CHECKSUM_UNNECESSARY; } if (skb->ip_summed != CHECKSUM_UNNECESSARY) skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); /* Probably, we should checksum udp header (it should be in cache * in any case) and data in tiny packets (< rx copybreak). */ - return 0; } /* XXX (mef) need to generalize the IPOD stuff. Right now I am borrowing @@ -1203,11 +1221,10 @@ int udp_rcv(struct sk_buff *skb) if (ulen > len || ulen < sizeof(*uh)) goto short_packet; - if (pskb_trim(skb, ulen)) + if (pskb_trim_rcsum(skb, ulen)) goto short_packet; - if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0) - goto csum_error; + udp_checksum_init(skb, uh, ulen, saddr, daddr); if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) return udp_v4_mcast_deliver(skb, uh, saddr, daddr); @@ -1232,16 +1249,22 @@ int udp_rcv(struct sk_buff *skb) if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; + nf_reset(skb); /* No socket. Drop packet silently, if checksum is wrong */ if (udp_checksum_complete(skb)) goto csum_error; - /* VNET: Suppress ICMP Unreachable if the port was bound to a (presumably raw) socket */ - if (!skb->sk) { - UDP_INC_STATS_BH(UDP_MIB_NOPORTS); - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); +#if defined(CONFIG_VNET) || defined(CONFIG_VNET_MODULE) + if (vnet_active && skb->sk) { + /* VNET: Suppress ICMP Unreachable if the port was bound to a (presumably raw) socket */ + kfree_skb(skb); + return 0; } +#endif + + UDP_INC_STATS_BH(UDP_MIB_NOPORTS); + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); /* * Hmm. We got an UDP packet to a port to which we @@ -1251,14 +1274,13 @@ int udp_rcv(struct sk_buff *skb) return(0); short_packet: - NETDEBUG(if (net_ratelimit()) - printk(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", - NIPQUAD(saddr), - ntohs(uh->source), - ulen, - len, - NIPQUAD(daddr), - ntohs(uh->dest))); + LIMIT_NETDEBUG(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", + NIPQUAD(saddr), + ntohs(uh->source), + ulen, + len, + NIPQUAD(daddr), + ntohs(uh->dest)); no_header: UDP_INC_STATS_BH(UDP_MIB_INERRORS); kfree_skb(skb); @@ -1269,13 +1291,12 @@ csum_error: * RFC1122: OK. Discards the bad packet silently (as far as * the network is concerned, anyway) as per 4.1.3.4 (MUST). */ - NETDEBUG(if (net_ratelimit()) - printk(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", - NIPQUAD(saddr), - ntohs(uh->source), - NIPQUAD(daddr), - ntohs(uh->dest), - ulen)); + LIMIT_NETDEBUG(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", + NIPQUAD(saddr), + ntohs(uh->source), + NIPQUAD(daddr), + ntohs(uh->dest), + ulen); drop: UDP_INC_STATS_BH(UDP_MIB_INERRORS); kfree_skb(skb); @@ -1296,7 +1317,7 @@ static int udp_destroy_sock(struct sock *sk) static int udp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { - struct udp_opt *up = udp_sk(sk); + struct udp_sock *up = udp_sk(sk); int val; int err = 0; @@ -1345,7 +1366,7 @@ static int udp_setsockopt(struct sock *sk, int level, int optname, static int udp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { - struct udp_opt *up = udp_sk(sk); + struct udp_sock *up = udp_sk(sk); int val, len; if (level != SOL_UDP) @@ -1379,9 +1400,56 @@ static int udp_getsockopt(struct sock *sk, int level, int optname, return 0; } +/** + * udp_poll - wait for a UDP event. + * @file - file struct + * @sock - socket + * @wait - poll table + * + * This is same as datagram poll, except for the special case of + * blocking sockets. If application is using a blocking fd + * and a packet with checksum error is in the queue; + * then it could get return from select indicating data available + * but then block when reading it. Add special case code + * to work around these arguably broken applications. + */ +unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) +{ + unsigned int mask = datagram_poll(file, sock, wait); + struct sock *sk = sock->sk; + + /* Check for false positives due to checksum errors */ + if ( (mask & POLLRDNORM) && + !(file->f_flags & O_NONBLOCK) && + !(sk->sk_shutdown & RCV_SHUTDOWN)){ + struct sk_buff_head *rcvq = &sk->sk_receive_queue; + struct sk_buff *skb; + + spin_lock_bh(&rcvq->lock); + while ((skb = skb_peek(rcvq)) != NULL) { + if (udp_checksum_complete(skb)) { + UDP_INC_STATS_BH(UDP_MIB_INERRORS); + __skb_unlink(skb, rcvq); + kfree_skb(skb); + } else { + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; + } + } + spin_unlock_bh(&rcvq->lock); + + /* nothing to see, move along */ + if (skb == NULL) + mask &= ~(POLLIN | POLLRDNORM); + } + + return mask; + +} struct proto udp_prot = { .name = "UDP", + .owner = THIS_MODULE, .close = udp_close, .connect = ip4_datagram_connect, .disconnect = udp_disconnect, @@ -1396,6 +1464,7 @@ struct proto udp_prot = { .hash = udp_v4_hash, .unhash = udp_v4_unhash, .get_port = udp_v4_get_port, + .obj_size = sizeof(struct udp_sock), }; /* ------------------------------------------------------------------------ */ @@ -1411,7 +1480,7 @@ static struct sock *udp_get_first(struct seq_file *seq) sk_for_each(sk, node, &udp_hash[state->bucket]) { if (sk->sk_family == state->family && - vx_check(sk->sk_xid, VX_WATCH|VX_IDENT)) + vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)) goto found; } } @@ -1429,7 +1498,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) try_again: ; } while (sk && (sk->sk_family != state->family || - !vx_check(sk->sk_xid, VX_WATCH|VX_IDENT))); + !vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))); if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { sk = sk_head(&udp_hash[state->bucket]); @@ -1534,7 +1603,7 @@ void udp_proc_unregister(struct udp_seq_afinfo *afinfo) /* ------------------------------------------------------------------------ */ static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket) { - struct inet_opt *inet = inet_sk(sp); + struct inet_sock *inet = inet_sk(sp); unsigned int dest = inet->daddr; unsigned int src = inet->rcv_saddr; __u16 destp = ntohs(inet->dport); @@ -1594,6 +1663,7 @@ EXPORT_SYMBOL(udp_ioctl); EXPORT_SYMBOL(udp_port_rover); EXPORT_SYMBOL(udp_prot); EXPORT_SYMBOL(udp_sendmsg); +EXPORT_SYMBOL(udp_poll); #ifdef CONFIG_PROC_FS EXPORT_SYMBOL(udp_proc_register);