X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fipv4%2Fudp.c;h=55b46f91d5cf768d258b126d990a926dcbe125bb;hb=6a77f38946aaee1cd85eeec6cf4229b204c15071;hp=03559b37f0c2b69316476cb6f371d4c79fa07e42;hpb=9213980e6a70d8473e0ffd4b39ab5b6caaba9ff5;p=linux-2.6.git diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 03559b37f..55b46f91d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -115,18 +115,16 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_statistics); struct hlist_head udp_hash[UDP_HTABLE_SIZE]; -rwlock_t udp_hash_lock = RW_LOCK_UNLOCKED; +DEFINE_RWLOCK(udp_hash_lock); /* Shared by v4/v6 udp. */ int udp_port_rover; -int tcp_ipv4_addr_conflict(struct sock *sk1, struct sock *sk2); - static int udp_v4_get_port(struct sock *sk, unsigned short snum) { struct hlist_node *node; struct sock *sk2; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); write_lock_bh(&udp_hash_lock); if (snum == 0) { @@ -173,15 +171,15 @@ gotit: } else { sk_for_each(sk2, node, &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) { - struct inet_opt *inet2 = inet_sk(sk2); + struct inet_sock *inet2 = inet_sk(sk2); if (inet2->num == snum && - sk2 != sk && - !ipv6_only_sock(sk2) && + sk2 != sk && !ipv6_only_sock(sk2) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - tcp_ipv4_addr_conflict(sk2, sk) && + nx_addr_conflict(sk->sk_nx_info, + tcp_v4_rcv_saddr(sk), sk2) && (!sk2->sk_reuse || !sk->sk_reuse)) goto fail; } @@ -230,7 +228,8 @@ static inline int udp_in_list(struct nx_info *nx_info, u32 addr) /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ -struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif) +static struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, + u32 daddr, u16 dport, int dif) { struct sock *sk, *result = NULL; struct hlist_node *node; @@ -238,7 +237,7 @@ struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, i int badness = -1; sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); if (inet->num == hnum && !ipv6_only_sock(sk)) { int score = (sk->sk_family == PF_INET ? 1 : 0); @@ -279,7 +278,8 @@ struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, i return result; } -__inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif) +static __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, + u32 daddr, u16 dport, int dif) { struct sock *sk; @@ -301,7 +301,7 @@ static inline struct sock *udp_v4_mcast_next(struct sock *sk, unsigned short hnum = ntohs(loc_port); sk_for_each_from(s, node) { - struct inet_opt *inet = inet_sk(s); + struct inet_sock *inet = inet_sk(s); if (inet->num != hnum || (inet->daddr && inet->daddr != rmt_addr) || @@ -333,7 +333,7 @@ found: void udp_err(struct sk_buff *skb, u32 info) { - struct inet_opt *inet; + struct inet_sock *inet; struct iphdr *iph = (struct iphdr*)skb->data; struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2)); int type = skb->h.icmph->type; @@ -344,7 +344,7 @@ void udp_err(struct sk_buff *skb, u32 info) sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex); if (sk == NULL) { - ICMP_INC_STATS_BH(IcmpInErrors); + ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); return; /* No socket for error */ } @@ -401,7 +401,7 @@ out: */ static void udp_flush_pending_frames(struct sock *sk) { - struct udp_opt *up = udp_sk(sk); + struct udp_sock *up = udp_sk(sk); if (up->pending) { up->len = 0; @@ -413,9 +413,9 @@ static void udp_flush_pending_frames(struct sock *sk) /* * Push out all pending data as one UDP datagram. Socket is locked. */ -static int udp_push_pending_frames(struct sock *sk, struct udp_opt *up) +static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct flowi *fl = &inet->cork.fl; struct sk_buff *skb; struct udphdr *uh; @@ -497,8 +497,8 @@ static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { - struct inet_opt *inet = inet_sk(sk); - struct udp_opt *up = udp_sk(sk); + struct inet_sock *inet = inet_sk(sk); + struct udp_sock *up = udp_sk(sk); int ulen = len; struct ipcm_cookie ipc; struct rtable *rt = NULL; @@ -548,7 +548,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, return -EINVAL; if (usin->sin_family != AF_INET) { if (usin->sin_family != AF_UNSPEC) - return -EINVAL; + return -EAFNOSUPPORT; } daddr = usin->sin_addr.s_addr; @@ -680,7 +680,7 @@ out: if (free) kfree(ipc.opt); if (!err) { - UDP_INC_STATS_USER(UdpOutDatagrams); + UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS); return len; } return err; @@ -693,9 +693,10 @@ do_confirm: goto out; } -int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags) +static int udp_sendpage(struct sock *sk, struct page *page, int offset, + size_t size, int flags) { - struct udp_opt *up = udp_sk(sk); + struct udp_sock *up = udp_sk(sk); int ret; if (!up->pending) { @@ -796,10 +797,10 @@ static __inline__ int udp_checksum_complete(struct sk_buff *skb) * return it, otherwise we block. */ -int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +static int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; struct sk_buff *skb; int copied, err; @@ -854,7 +855,10 @@ try_again: } if (inet->cmsg_flags) ip_cmsg_recv(msg, skb); + err = copied; + if (flags & MSG_TRUNC) + err = skb->len - sizeof(struct udphdr); out_free: skb_free_datagram(sk, skb); @@ -862,7 +866,7 @@ out: return err; csum_copy_err: - UDP_INC_STATS_BH(UdpInErrors); + UDP_INC_STATS_BH(UDP_MIB_INERRORS); /* Clear queue. */ if (flags&MSG_PEEK) { @@ -884,58 +888,10 @@ csum_copy_err: goto try_again; } -int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) -{ - struct inet_opt *inet = inet_sk(sk); - struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; - struct rtable *rt; - u32 saddr; - int oif; - int err; - - - if (addr_len < sizeof(*usin)) - return -EINVAL; - - if (usin->sin_family != AF_INET) - return -EAFNOSUPPORT; - - sk_dst_reset(sk); - - oif = sk->sk_bound_dev_if; - saddr = inet->saddr; - if (MULTICAST(usin->sin_addr.s_addr)) { - if (!oif) - oif = inet->mc_index; - if (!saddr) - saddr = inet->mc_addr; - } - err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr, - RT_CONN_FLAGS(sk), oif, - IPPROTO_UDP, - inet->sport, usin->sin_port, sk); - if (err) - return err; - if ((rt->rt_flags & RTCF_BROADCAST) && !sock_flag(sk, SOCK_BROADCAST)) { - ip_rt_put(rt); - return -EACCES; - } - if (!inet->saddr) - inet->saddr = rt->rt_src; /* Update source address */ - if (!inet->rcv_saddr) - inet->rcv_saddr = rt->rt_src; - inet->daddr = rt->rt_dst; - inet->dport = usin->sin_port; - sk->sk_state = TCP_ESTABLISHED; - inet->id = jiffies; - - sk_dst_set(sk, &rt->u.dst); - return(0); -} int udp_disconnect(struct sock *sk, int flags) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); /* * 1003.1g - break association. */ @@ -957,7 +913,7 @@ int udp_disconnect(struct sock *sk, int flags) static void udp_close(struct sock *sk, long timeout) { - inet_sock_release(sk); + sk_common_release(sk); } /* return: @@ -970,7 +926,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) #ifndef CONFIG_XFRM return 1; #else - struct udp_opt *up = udp_sk(sk); + struct udp_sock *up = udp_sk(sk); struct udphdr *uh = skb->h.uh; struct iphdr *iph; int iphlen, len; @@ -990,6 +946,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) len = skb->tail - udpdata; switch (encap_type) { + default: case UDP_ENCAP_ESPINUDP: /* Check if this is a keepalive packet. If so, eat it. */ if (len == 1 && udpdata[0] == 0xff) { @@ -1000,35 +957,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) } else /* Must be an IKE packet.. pass it through */ return 1; - - decaps: - /* At this point we are sure that this is an ESPinUDP packet, - * so we need to remove 'len' bytes from the packet (the UDP - * header and optional ESP marker bytes) and then modify the - * protocol to ESP, and then call into the transform receiver. - */ - - /* Now we can update and verify the packet length... */ - iph = skb->nh.iph; - iphlen = iph->ihl << 2; - iph->tot_len = htons(ntohs(iph->tot_len) - len); - if (skb->len < iphlen + len) { - /* packet is too small!?! */ - return 0; - } - - /* pull the data buffer up to the ESP header and set the - * transport header to point to ESP. Keep UDP on the stack - * for later. - */ - skb->h.raw = skb_pull(skb, len); - - /* modify the protocol (it's ESP!) */ - iph->protocol = IPPROTO_ESP; - - /* and let the caller know to send this into the ESP processor... */ - return -1; - + break; case UDP_ENCAP_ESPINUDP_NON_IKE: /* Check if this is a keepalive packet. If so, eat it. */ if (len == 1 && udpdata[0] == 0xff) { @@ -1038,17 +967,38 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) /* ESP Packet with Non-IKE marker */ len = sizeof(struct udphdr) + 2 * sizeof(u32); - goto decaps; } else /* Must be an IKE packet.. pass it through */ return 1; + break; + } - default: - if (net_ratelimit()) - printk(KERN_INFO "udp_encap_rcv(): Unhandled UDP encap type: %u\n", - encap_type); - return 1; + /* At this point we are sure that this is an ESPinUDP packet, + * so we need to remove 'len' bytes from the packet (the UDP + * header and optional ESP marker bytes) and then modify the + * protocol to ESP, and then call into the transform receiver. + */ + + /* Now we can update and verify the packet length... */ + iph = skb->nh.iph; + iphlen = iph->ihl << 2; + iph->tot_len = htons(ntohs(iph->tot_len) - len); + if (skb->len < iphlen + len) { + /* packet is too small!?! */ + return 0; } + + /* pull the data buffer up to the ESP header and set the + * transport header to point to ESP. Keep UDP on the stack + * for later. + */ + skb->h.raw = skb_pull(skb, len); + + /* modify the protocol (it's ESP!) */ + iph->protocol = IPPROTO_ESP; + + /* and let the caller know to send this into the ESP processor... */ + return -1; #endif } @@ -1062,7 +1012,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) */ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) { - struct udp_opt *up = udp_sk(sk); + struct udp_sock *up = udp_sk(sk); /* * Charge it to the socket, dropping if the queue is full. @@ -1093,7 +1043,7 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) if (ret < 0) { /* process the ESP packet */ ret = xfrm4_rcv_encap(skb, up->encap_type); - UDP_INC_STATS_BH(UdpInDatagrams); + UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS); return -ret; } /* FALLTHROUGH -- it's a UDP Packet */ @@ -1101,7 +1051,7 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) if (sk->sk_filter && skb->ip_summed != CHECKSUM_UNNECESSARY) { if (__udp_checksum_complete(skb)) { - UDP_INC_STATS_BH(UdpInErrors); + UDP_INC_STATS_BH(UDP_MIB_INERRORS); kfree_skb(skb); return -1; } @@ -1109,11 +1059,11 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) } if (sock_queue_rcv_skb(sk,skb)<0) { - UDP_INC_STATS_BH(UdpInErrors); + UDP_INC_STATS_BH(UDP_MIB_INERRORS); kfree_skb(skb); return -1; } - UDP_INC_STATS_BH(UdpInDatagrams); + UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS); return 0; } @@ -1241,7 +1191,7 @@ int udp_rcv(struct sk_buff *skb) if (udp_checksum_complete(skb)) goto csum_error; - UDP_INC_STATS_BH(UdpNoPorts); + UDP_INC_STATS_BH(UDP_MIB_NOPORTS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); /* @@ -1261,7 +1211,7 @@ short_packet: NIPQUAD(daddr), ntohs(uh->dest))); no_header: - UDP_INC_STATS_BH(UdpInErrors); + UDP_INC_STATS_BH(UDP_MIB_INERRORS); kfree_skb(skb); return(0); @@ -1278,7 +1228,7 @@ csum_error: ntohs(uh->dest), ulen)); drop: - UDP_INC_STATS_BH(UdpInErrors); + UDP_INC_STATS_BH(UDP_MIB_INERRORS); kfree_skb(skb); return(0); } @@ -1297,7 +1247,7 @@ static int udp_destroy_sock(struct sock *sk) static int udp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { - struct udp_opt *up = udp_sk(sk); + struct udp_sock *up = udp_sk(sk); int val; int err = 0; @@ -1323,7 +1273,16 @@ static int udp_setsockopt(struct sock *sk, int level, int optname, break; case UDP_ENCAP: - up->encap_type = val; + switch (val) { + case 0: + case UDP_ENCAP_ESPINUDP: + case UDP_ENCAP_ESPINUDP_NON_IKE: + up->encap_type = val; + break; + default: + err = -ENOPROTOOPT; + break; + } break; default: @@ -1337,7 +1296,7 @@ static int udp_setsockopt(struct sock *sk, int level, int optname, static int udp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { - struct udp_opt *up = udp_sk(sk); + struct udp_sock *up = udp_sk(sk); int val, len; if (level != SOL_UDP) @@ -1371,11 +1330,58 @@ static int udp_getsockopt(struct sock *sk, int level, int optname, return 0; } +/** + * udp_poll - wait for a UDP event. + * @file - file struct + * @sock - socket + * @wait - poll table + * + * This is same as datagram poll, except for the special case of + * blocking sockets. If application is using a blocking fd + * and a packet with checksum error is in the queue; + * then it could get return from select indicating data available + * but then block when reading it. Add special case code + * to work around these arguably broken applications. + */ +unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) +{ + unsigned int mask = datagram_poll(file, sock, wait); + struct sock *sk = sock->sk; + + /* Check for false positives due to checksum errors */ + if ( (mask & POLLRDNORM) && + !(file->f_flags & O_NONBLOCK) && + !(sk->sk_shutdown & RCV_SHUTDOWN)){ + struct sk_buff_head *rcvq = &sk->sk_receive_queue; + struct sk_buff *skb; + + spin_lock_irq(&rcvq->lock); + while ((skb = skb_peek(rcvq)) != NULL) { + if (udp_checksum_complete(skb)) { + UDP_INC_STATS_BH(UDP_MIB_INERRORS); + __skb_unlink(skb, rcvq); + kfree_skb(skb); + } else { + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; + } + } + spin_unlock_irq(&rcvq->lock); + + /* nothing to see, move along */ + if (skb == NULL) + mask &= ~(POLLIN | POLLRDNORM); + } + + return mask; + +} struct proto udp_prot = { .name = "UDP", + .owner = THIS_MODULE, .close = udp_close, - .connect = udp_connect, + .connect = ip4_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, .destroy = udp_destroy_sock, @@ -1388,6 +1394,7 @@ struct proto udp_prot = { .hash = udp_v4_hash, .unhash = udp_v4_unhash, .get_port = udp_v4_get_port, + .slab_obj_size = sizeof(struct udp_sock), }; /* ------------------------------------------------------------------------ */ @@ -1403,7 +1410,7 @@ static struct sock *udp_get_first(struct seq_file *seq) sk_for_each(sk, node, &udp_hash[state->bucket]) { if (sk->sk_family == state->family && - vx_check(sk->sk_xid, VX_WATCH|VX_IDENT)) + vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)) goto found; } } @@ -1421,7 +1428,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) try_again: ; } while (sk && (sk->sk_family != state->family || - !vx_check(sk->sk_xid, VX_WATCH|VX_IDENT))); + !vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))); if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { sk = sk_head(&udp_hash[state->bucket]); @@ -1526,7 +1533,7 @@ void udp_proc_unregister(struct udp_seq_afinfo *afinfo) /* ------------------------------------------------------------------------ */ static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket) { - struct inet_opt *inet = inet_sk(sp); + struct inet_sock *inet = inet_sk(sp); unsigned int dest = inet->daddr; unsigned int src = inet->rcv_saddr; __u16 destp = ntohs(inet->dport); @@ -1579,7 +1586,6 @@ void udp4_proc_exit(void) } #endif /* CONFIG_PROC_FS */ -EXPORT_SYMBOL(udp_connect); EXPORT_SYMBOL(udp_disconnect); EXPORT_SYMBOL(udp_hash); EXPORT_SYMBOL(udp_hash_lock); @@ -1587,6 +1593,7 @@ EXPORT_SYMBOL(udp_ioctl); EXPORT_SYMBOL(udp_port_rover); EXPORT_SYMBOL(udp_prot); EXPORT_SYMBOL(udp_sendmsg); +EXPORT_SYMBOL(udp_poll); #ifdef CONFIG_PROC_FS EXPORT_SYMBOL(udp_proc_register);