*
* Version: $Id: udp.c,v 1.102 2002/02/01 22:01:04 davem Exp $
*
- * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
+ * Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Arnt Gulbrandsen, <agulbra@nvg.unit.no>
* Alan Cox, <Alan.Cox@linux.org>
#include <linux/module.h>
#include <linux/socket.h>
#include <linux/sockios.h>
+#include <linux/igmp.h>
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <linux/ipv6.h>
#include <linux/netdevice.h>
#include <net/snmp.h>
-#include <net/tcp.h>
+#include <net/ip.h>
+#include <net/tcp_states.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <linux/proc_fs.h>
#include <net/inet_common.h>
#include <net/checksum.h>
#include <net/xfrm.h>
-#include <linux/vs_base.h>
/*
* Snmp MIB for the UDP layer
*/
-DEFINE_SNMP_STAT(struct udp_mib, udp_statistics);
+DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly;
struct hlist_head udp_hash[UDP_HTABLE_SIZE];
-rwlock_t udp_hash_lock = RW_LOCK_UNLOCKED;
+DEFINE_RWLOCK(udp_hash_lock);
/* Shared by v4/v6 udp. */
int udp_port_rover;
{
struct hlist_node *node;
struct sock *sk2;
- struct inet_opt *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
write_lock_bh(&udp_hash_lock);
if (snum == 0) {
} else {
sk_for_each(sk2, node,
&udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
- struct inet_opt *inet2 = inet_sk(sk2);
+ struct inet_sock *inet2 = inet_sk(sk2);
if (inet2->num == snum &&
- sk2 != sk &&
- !ipv6_only_sock(sk2) &&
+ sk2 != sk && !ipv6_only_sock(sk2) &&
(!sk2->sk_bound_dev_if ||
!sk->sk_bound_dev_if ||
sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
- (!inet2->rcv_saddr ||
- !inet->rcv_saddr ||
- inet2->rcv_saddr == inet->rcv_saddr) &&
+ nx_addr_conflict(sk->sk_nx_info,
+ inet_rcv_saddr(sk), sk2) &&
(!sk2->sk_reuse || !sk->sk_reuse))
goto fail;
}
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
* harder than this. -DaveM
*/
-struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
+static struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport,
+ u32 daddr, u16 dport, int dif)
{
struct sock *sk, *result = NULL;
struct hlist_node *node;
int badness = -1;
sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) {
- struct inet_opt *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
if (inet->num == hnum && !ipv6_only_sock(sk)) {
int score = (sk->sk_family == PF_INET ? 1 : 0);
if (inet->rcv_saddr != daddr)
continue;
score+=2;
+ } else if (sk->sk_nx_info) {
+ if (addr_in_nx_info(sk->sk_nx_info, daddr))
+ score+=2;
+ else
+ continue;
}
if (inet->daddr) {
if (inet->daddr != saddr)
return result;
}
-__inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
+static __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport,
+ u32 daddr, u16 dport, int dif)
{
struct sock *sk;
unsigned short hnum = ntohs(loc_port);
sk_for_each_from(s, node) {
- struct inet_opt *inet = inet_sk(s);
+ struct inet_sock *inet = inet_sk(s);
if (inet->num != hnum ||
(inet->daddr && inet->daddr != rmt_addr) ||
(inet->dport != rmt_port && inet->dport) ||
- (inet->rcv_saddr && inet->rcv_saddr != loc_addr) ||
+ (inet->rcv_saddr && inet->rcv_saddr != loc_addr &&
+ inet->rcv_saddr2 && inet->rcv_saddr2 != loc_addr) ||
ipv6_only_sock(s) ||
(s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
continue;
void udp_err(struct sk_buff *skb, u32 info)
{
- struct inet_opt *inet;
+ struct inet_sock *inet;
struct iphdr *iph = (struct iphdr*)skb->data;
struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
int type = skb->h.icmph->type;
*/
static void udp_flush_pending_frames(struct sock *sk)
{
- struct udp_opt *up = udp_sk(sk);
+ struct udp_sock *up = udp_sk(sk);
if (up->pending) {
up->len = 0;
/*
* Push out all pending data as one UDP datagram. Socket is locked.
*/
-static int udp_push_pending_frames(struct sock *sk, struct udp_opt *up)
+static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up)
{
- struct inet_opt *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
struct flowi *fl = &inet->cork.fl;
struct sk_buff *skb;
struct udphdr *uh;
int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t len)
{
- struct inet_opt *inet = inet_sk(sk);
- struct udp_opt *up = udp_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
+ struct udp_sock *up = udp_sk(sk);
int ulen = len;
struct ipcm_cookie ipc;
struct rtable *rt = NULL;
return -EINVAL;
if (usin->sin_family != AF_INET) {
if (usin->sin_family != AF_UNSPEC)
- return -EINVAL;
+ return -EAFNOSUPPORT;
}
daddr = usin->sin_addr.s_addr;
connected = 0;
}
tos = RT_TOS(inet->tos);
- if (sk->sk_localroute || (msg->msg_flags & MSG_DONTROUTE) ||
+ if (sock_flag(sk, SOCK_LOCALROUTE) ||
+ (msg->msg_flags & MSG_DONTROUTE) ||
(ipc.opt && ipc.opt->is_strictroute)) {
tos |= RTO_ONLINK;
connected = 0;
.uli_u = { .ports =
{ .sport = inet->sport,
.dport = dport } } };
+ struct nx_info *nxi = sk->sk_nx_info;
+
+ if (nxi) {
+ err = ip_find_src(nxi, &rt, &fl);
+ if (err)
+ goto out;
+ if (daddr == IPI_LOOPBACK && !vx_check(0, VX_ADMIN))
+ daddr = fl.fl4_dst = nxi->ipv4[0];
+#ifdef VSERVER_REMAP_SADDR
+ if (saddr == IPI_LOOPBACK && !vx_check(0, VX_ADMIN))
+ saddr = fl.fl4_src = nxi->ipv4[0];
+#endif
+ }
err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
if (err)
goto out;
/* ... which is an evident application bug. --ANK */
release_sock(sk);
- NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp cork app bug 2\n"));
+ LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");
err = -EINVAL;
goto out;
}
goto out;
}
-int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags)
+static int udp_sendpage(struct sock *sk, struct page *page, int offset,
+ size_t size, int flags)
{
- struct udp_opt *up = udp_sk(sk);
+ struct udp_sock *up = udp_sk(sk);
int ret;
if (!up->pending) {
if (unlikely(!up->pending)) {
release_sock(sk);
- NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp cork app bug 3\n"));
+ LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n");
return -EINVAL;
}
unsigned long amount;
amount = 0;
- spin_lock_irq(&sk->sk_receive_queue.lock);
+ spin_lock_bh(&sk->sk_receive_queue.lock);
skb = skb_peek(&sk->sk_receive_queue);
if (skb != NULL) {
/*
*/
amount = skb->len - sizeof(struct udphdr);
}
- spin_unlock_irq(&sk->sk_receive_queue.lock);
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
return put_user(amount, (int __user *)arg);
}
static __inline__ int __udp_checksum_complete(struct sk_buff *skb)
{
- return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
+ return __skb_checksum_complete(skb);
}
static __inline__ int udp_checksum_complete(struct sk_buff *skb)
* return it, otherwise we block.
*/
-int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
- size_t len, int noblock, int flags, int *addr_len)
+static int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+ size_t len, int noblock, int flags, int *addr_len)
{
- struct inet_opt *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
struct sk_buff *skb;
int copied, err;
csum_copy_err:
UDP_INC_STATS_BH(UDP_MIB_INERRORS);
- /* Clear queue. */
- if (flags&MSG_PEEK) {
- int clear = 0;
- spin_lock_irq(&sk->sk_receive_queue.lock);
- if (skb == skb_peek(&sk->sk_receive_queue)) {
- __skb_unlink(skb, &sk->sk_receive_queue);
- clear = 1;
- }
- spin_unlock_irq(&sk->sk_receive_queue.lock);
- if (clear)
- kfree_skb(skb);
- }
-
- skb_free_datagram(sk, skb);
+ skb_kill_datagram(sk, skb, flags);
if (noblock)
return -EAGAIN;
int udp_disconnect(struct sock *sk, int flags)
{
- struct inet_opt *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
/*
* 1003.1g - break association.
*/
#ifndef CONFIG_XFRM
return 1;
#else
- struct udp_opt *up = udp_sk(sk);
- struct udphdr *uh = skb->h.uh;
+ struct udp_sock *up = udp_sk(sk);
+ struct udphdr *uh;
struct iphdr *iph;
int iphlen, len;
- __u8 *udpdata = (__u8 *)uh + sizeof(struct udphdr);
- __u32 *udpdata32 = (__u32 *)udpdata;
+ __u8 *udpdata;
+ __u32 *udpdata32;
__u16 encap_type = up->encap_type;
/* if we're overly short, let UDP handle it */
- if (udpdata > skb->tail)
+ len = skb->len - sizeof(struct udphdr);
+ if (len <= 0)
return 1;
/* if this is not encapsulated socket, then just return now */
if (!encap_type)
return 1;
- len = skb->tail - udpdata;
+ /* If this is a paged skb, make sure we pull up
+ * whatever data we need to look at. */
+ if (!pskb_may_pull(skb, sizeof(struct udphdr) + min(len, 8)))
+ return 1;
+
+ /* Now we can get the pointers */
+ uh = skb->h.uh;
+ udpdata = (__u8 *)uh + sizeof(struct udphdr);
+ udpdata32 = (__u32 *)udpdata;
switch (encap_type) {
default:
* header and optional ESP marker bytes) and then modify the
* protocol to ESP, and then call into the transform receiver.
*/
+ if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+ return 0;
/* Now we can update and verify the packet length... */
iph = skb->nh.iph;
*/
static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
{
- struct udp_opt *up = udp_sk(sk);
+ struct udp_sock *up = udp_sk(sk);
/*
* Charge it to the socket, dropping if the queue is full.
kfree_skb(skb);
return -1;
}
+ nf_reset(skb);
if (up->encap_type) {
/*
* Otherwise, csum completion requires chacksumming packet body,
* including udp header and folding it to skb->csum.
*/
-static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
+static void udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
unsigned short ulen, u32 saddr, u32 daddr)
{
if (uh->check == 0) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
} else if (skb->ip_summed == CHECKSUM_HW) {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
if (!udp_check(uh, ulen, saddr, daddr, skb->csum))
- return 0;
- NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp v4 hw csum failure.\n"));
- skb->ip_summed = CHECKSUM_NONE;
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
}
if (skb->ip_summed != CHECKSUM_UNNECESSARY)
skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
/* Probably, we should checksum udp header (it should be in cache
* in any case) and data in tiny packets (< rx copybreak).
*/
- return 0;
}
+/* XXX (mef) need to generalize the IPOD stuff. Right now I am borrowing
+ from the ICMP infrastructure. */
+#ifdef CONFIG_ICMP_IPOD
+#include <linux/reboot.h>
+
+extern int sysctl_icmp_ipod_version;
+extern int sysctl_icmp_ipod_enabled;
+extern u32 sysctl_icmp_ipod_host;
+extern u32 sysctl_icmp_ipod_mask;
+extern char sysctl_icmp_ipod_key[32+1];
+#define IPOD_CHECK_KEY \
+ (sysctl_icmp_ipod_key[0] != 0)
+#define IPOD_VALID_KEY(d) \
+ (strncmp(sysctl_icmp_ipod_key, (char *)(d), strlen(sysctl_icmp_ipod_key)) == 0)
+
+static void udp_ping_of_death(struct sk_buff *skb, struct udphdr *uh, u32 saddr)
+{
+ int doit = 0;
+
+ /*
+ * If IPOD not enabled or wrong UDP IPOD port, ignore.
+ */
+ if (!sysctl_icmp_ipod_enabled || (ntohs(uh->dest) != 664))
+ return;
+
+#if 0
+ printk(KERN_INFO "IPOD: got udp pod request, host=%u.%u.%u.%u\n", NIPQUAD(saddr));
+#endif
+
+
+ /*
+ * First check the source address info.
+ * If host not set, ignore.
+ */
+ if (sysctl_icmp_ipod_host != 0xffffffff &&
+ (ntohl(saddr) & sysctl_icmp_ipod_mask) == sysctl_icmp_ipod_host) {
+ /*
+ * Now check the key if enabled.
+ * If packet doesn't contain enough data or key
+ * is otherwise invalid, ignore.
+ */
+ if (IPOD_CHECK_KEY) {
+ if (pskb_may_pull(skb, sizeof(sysctl_icmp_ipod_key)+sizeof(struct udphdr)-1)){
+#if 0
+ int i;
+ for (i=0;i<32+1;i++){
+ printk("%c",((char*)skb->data)[i+sizeof(struct udphdr)]);
+ }
+ printk("\n");
+#endif
+ if (IPOD_VALID_KEY(skb->data+sizeof(struct udphdr)))
+ doit = 1;
+ }
+ } else {
+ doit = 1;
+ }
+ }
+ if (doit) {
+ sysctl_icmp_ipod_enabled = 0;
+ printk(KERN_CRIT "IPOD: reboot forced by %u.%u.%u.%u...\n",
+ NIPQUAD(saddr));
+ machine_restart(NULL);
+ } else {
+ printk(KERN_WARNING "IPOD: from %u.%u.%u.%u rejected\n",
+ NIPQUAD(saddr));
+ }
+}
+#endif
+
/*
* All we need to do is get the socket, and then do a checksum.
*/
if (ulen > len || ulen < sizeof(*uh))
goto short_packet;
- if (pskb_trim(skb, ulen))
+ if (pskb_trim_rcsum(skb, ulen))
goto short_packet;
- if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0)
- goto csum_error;
+ udp_checksum_init(skb, uh, ulen, saddr, daddr);
if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
return udp_v4_mcast_deliver(skb, uh, saddr, daddr);
+#ifdef CONFIG_ICMP_IPOD
+ udp_ping_of_death(skb, uh, saddr);
+#endif
+
sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex);
if (sk != NULL) {
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
+ nf_reset(skb);
/* No socket. Drop packet silently, if checksum is wrong */
if (udp_checksum_complete(skb))
goto csum_error;
+#if defined(CONFIG_VNET) || defined(CONFIG_VNET_MODULE)
+ if (vnet_active && skb->sk) {
+ /* VNET: Suppress ICMP Unreachable if the port was bound to a (presumably raw) socket */
+ kfree_skb(skb);
+ return 0;
+ }
+#endif
+
UDP_INC_STATS_BH(UDP_MIB_NOPORTS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
return(0);
short_packet:
- NETDEBUG(if (net_ratelimit())
- printk(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
- NIPQUAD(saddr),
- ntohs(uh->source),
- ulen,
- len,
- NIPQUAD(daddr),
- ntohs(uh->dest)));
+ LIMIT_NETDEBUG(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
+ NIPQUAD(saddr),
+ ntohs(uh->source),
+ ulen,
+ len,
+ NIPQUAD(daddr),
+ ntohs(uh->dest));
no_header:
UDP_INC_STATS_BH(UDP_MIB_INERRORS);
kfree_skb(skb);
* RFC1122: OK. Discards the bad packet silently (as far as
* the network is concerned, anyway) as per 4.1.3.4 (MUST).
*/
- NETDEBUG(if (net_ratelimit())
- printk(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
- NIPQUAD(saddr),
- ntohs(uh->source),
- NIPQUAD(daddr),
- ntohs(uh->dest),
- ulen));
+ LIMIT_NETDEBUG(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
+ NIPQUAD(saddr),
+ ntohs(uh->source),
+ NIPQUAD(daddr),
+ ntohs(uh->dest),
+ ulen);
drop:
UDP_INC_STATS_BH(UDP_MIB_INERRORS);
kfree_skb(skb);
static int udp_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, int optlen)
{
- struct udp_opt *up = udp_sk(sk);
+ struct udp_sock *up = udp_sk(sk);
int val;
int err = 0;
static int udp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
- struct udp_opt *up = udp_sk(sk);
+ struct udp_sock *up = udp_sk(sk);
int val, len;
if (level != SOL_UDP)
return 0;
}
+/**
+ * udp_poll - wait for a UDP event.
+ * @file - file struct
+ * @sock - socket
+ * @wait - poll table
+ *
+ * This is same as datagram poll, except for the special case of
+ * blocking sockets. If application is using a blocking fd
+ * and a packet with checksum error is in the queue;
+ * then it could get return from select indicating data available
+ * but then block when reading it. Add special case code
+ * to work around these arguably broken applications.
+ */
+unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
+{
+ unsigned int mask = datagram_poll(file, sock, wait);
+ struct sock *sk = sock->sk;
+
+ /* Check for false positives due to checksum errors */
+ if ( (mask & POLLRDNORM) &&
+ !(file->f_flags & O_NONBLOCK) &&
+ !(sk->sk_shutdown & RCV_SHUTDOWN)){
+ struct sk_buff_head *rcvq = &sk->sk_receive_queue;
+ struct sk_buff *skb;
+
+ spin_lock_bh(&rcvq->lock);
+ while ((skb = skb_peek(rcvq)) != NULL) {
+ if (udp_checksum_complete(skb)) {
+ UDP_INC_STATS_BH(UDP_MIB_INERRORS);
+ __skb_unlink(skb, rcvq);
+ kfree_skb(skb);
+ } else {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ break;
+ }
+ }
+ spin_unlock_bh(&rcvq->lock);
+
+ /* nothing to see, move along */
+ if (skb == NULL)
+ mask &= ~(POLLIN | POLLRDNORM);
+ }
+
+ return mask;
+
+}
struct proto udp_prot = {
.name = "UDP",
+ .owner = THIS_MODULE,
.close = udp_close,
.connect = ip4_datagram_connect,
.disconnect = udp_disconnect,
.hash = udp_v4_hash,
.unhash = udp_v4_unhash,
.get_port = udp_v4_get_port,
+ .obj_size = sizeof(struct udp_sock),
};
/* ------------------------------------------------------------------------ */
sk_for_each(sk, node, &udp_hash[state->bucket]) {
if (sk->sk_family == state->family &&
- vx_check(sk->sk_xid, VX_WATCH|VX_IDENT))
+ vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
goto found;
}
}
try_again:
;
} while (sk && (sk->sk_family != state->family ||
- !vx_check(sk->sk_xid, VX_WATCH|VX_IDENT)));
+ !vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)));
if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
sk = sk_head(&udp_hash[state->bucket]);
/* ------------------------------------------------------------------------ */
static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket)
{
- struct inet_opt *inet = inet_sk(sp);
+ struct inet_sock *inet = inet_sk(sp);
unsigned int dest = inet->daddr;
unsigned int src = inet->rcv_saddr;
__u16 destp = ntohs(inet->dport);
EXPORT_SYMBOL(udp_port_rover);
EXPORT_SYMBOL(udp_prot);
EXPORT_SYMBOL(udp_sendmsg);
+EXPORT_SYMBOL(udp_poll);
#ifdef CONFIG_PROC_FS
EXPORT_SYMBOL(udp_proc_register);