*
* Version: $Id: ip_output.c,v 1.100 2002/02/01 22:01:03 davem Exp $
*
- * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
+ * Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Donald Becker, <becker@super.org>
* Alan Cox, <Alan.Cox@linux.org>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/errno.h>
-#include <linux/config.h>
+#include <linux/highmem.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/route.h>
-#include <net/tcp.h>
-#include <net/udp.h>
+#include <net/xfrm.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/arp.h>
#include <net/icmp.h>
-#include <net/raw.h>
#include <net/checksum.h>
#include <net/inetpeer.h>
+#include <net/checksum.h>
#include <linux/igmp.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_bridge.h>
#include <linux/mroute.h>
#include <linux/netlink.h>
+#include <linux/tcp.h>
-/*
- * Shall we try to damage output packets if routing dev changes?
- */
-
-int sysctl_ip_dynaddr;
-int sysctl_ip_default_ttl = IPDEFTTL;
+int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
/* Generate a checksum for an outgoing IP datagram. */
__inline__ void ip_send_check(struct iphdr *iph)
newskb->pkt_type = PACKET_LOOPBACK;
newskb->ip_summed = CHECKSUM_UNNECESSARY;
BUG_TRAP(newskb->dst);
-
-#ifdef CONFIG_NETFILTER_DEBUG
- nf_debug_ip_loopback_xmit(newskb);
-#endif
netif_rx(newskb);
return 0;
}
-static inline int ip_select_ttl(struct inet_opt *inet, struct dst_entry *dst)
+static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
{
int ttl = inet->uc_ttl;
*
*/
int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
- u32 saddr, u32 daddr, struct ip_options *opt)
+ __be32 saddr, __be32 daddr, struct ip_options *opt)
{
- struct inet_opt *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
struct rtable *rt = (struct rtable *)skb->dst;
struct iphdr *iph;
dst_output);
}
+EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
+
static inline int ip_finish_output2(struct sk_buff *skb)
{
struct dst_entry *dst = skb->dst;
- struct hh_cache *hh = dst->hh;
struct net_device *dev = dst->dev;
int hh_len = LL_RESERVED_SPACE(dev);
skb = skb2;
}
-#ifdef CONFIG_NETFILTER_DEBUG
- nf_debug_ip_finish_output2(skb);
-#endif /*CONFIG_NETFILTER_DEBUG*/
-
- if (hh) {
- int hh_alen;
-
- read_lock_bh(&hh->hh_lock);
- hh_alen = HH_DATA_ALIGN(hh->hh_len);
- memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
- read_unlock_bh(&hh->hh_lock);
- skb_push(skb, hh->hh_len);
- return hh->hh_output(skb);
- } else if (dst->neighbour)
+ if (dst->hh)
+ return neigh_hh_output(dst->hh, skb);
+ else if (dst->neighbour)
return dst->neighbour->output(skb);
if (net_ratelimit())
return -EINVAL;
}
-int ip_finish_output(struct sk_buff *skb)
+static inline int ip_finish_output(struct sk_buff *skb)
{
- struct net_device *dev = skb->dst->dev;
-
- skb->dev = dev;
- skb->protocol = htons(ETH_P_IP);
-
- return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
- ip_finish_output2);
+#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
+ /* Policy lookup after SNAT yielded a new policy */
+ if (skb->dst->xfrm != NULL) {
+ IPCB(skb)->flags |= IPSKB_REROUTED;
+ return dst_output(skb);
+ }
+#endif
+ if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb))
+ return ip_fragment(skb, ip_finish_output2);
+ else
+ return ip_finish_output2(skb);
}
int ip_mc_output(struct sk_buff *skb)
/*
* If the indicated interface is up and running, send the packet.
*/
- IP_INC_STATS(IpOutRequests);
+ IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
newskb->dev, ip_dev_loopback_xmit);
}
- if (skb->len > dst_pmtu(&rt->u.dst) || skb_shinfo(skb)->frag_list)
- return ip_fragment(skb, ip_finish_output);
- else
- return ip_finish_output(skb);
+ return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev,
+ ip_finish_output,
+ !(IPCB(skb)->flags & IPSKB_REROUTED));
}
int ip_output(struct sk_buff *skb)
{
- IP_INC_STATS(IpOutRequests);
+ struct net_device *dev = skb->dst->dev;
- if ((skb->len > dst_pmtu(skb->dst) || skb_shinfo(skb)->frag_list) &&
- !skb_shinfo(skb)->tso_size)
- return ip_fragment(skb, ip_finish_output);
- else
- return ip_finish_output(skb);
+ IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+
+ skb->dev = dev;
+ skb->protocol = htons(ETH_P_IP);
+
+ return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
+ ip_finish_output,
+ !(IPCB(skb)->flags & IPSKB_REROUTED));
}
int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
{
struct sock *sk = skb->sk;
- struct inet_opt *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
struct ip_options *opt = inet->opt;
struct rtable *rt;
struct iphdr *iph;
- u32 mtu;
/* Skip all of this if the packet is already routed,
* f.e. by something like SCTP.
/* Make sure we can route this packet. */
rt = (struct rtable *)__sk_dst_check(sk, 0);
if (rt == NULL) {
- u32 daddr;
+ __be32 daddr;
/* Use correct destination address if we have options. */
daddr = inet->daddr;
* keep trying until route appears or the connection times
* itself out.
*/
+ security_sk_classify_flow(sk, &fl);
if (ip_route_output_flow(&rt, &fl, sk, 0))
goto no_route;
}
- __sk_dst_set(sk, &rt->u.dst);
- tcp_v4_setup_caps(sk, &rt->u.dst);
+ sk_setup_caps(sk, &rt->u.dst);
}
skb->dst = dst_clone(&rt->u.dst);
/* OK, we know where to send it, allocate and build IP header. */
iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
- *((__u16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
+ *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
iph->tot_len = htons(skb->len);
if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
iph->frag_off = htons(IP_DF);
skb->nh.iph = iph;
/* Transport layer set skb->h.foo itself. */
- if(opt && opt->optlen) {
+ if (opt && opt->optlen) {
iph->ihl += opt->optlen >> 2;
ip_options_build(skb, opt, inet->daddr, rt, 0);
}
- mtu = dst_pmtu(&rt->u.dst);
- if (skb->len > mtu && (sk->sk_route_caps & NETIF_F_TSO)) {
- unsigned int hlen;
-
- /* Hack zone: all this must be done by TCP. */
- hlen = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
- skb_shinfo(skb)->tso_size = mtu - hlen;
- skb_shinfo(skb)->tso_segs =
- (skb->len - hlen + skb_shinfo(skb)->tso_size - 1)/
- skb_shinfo(skb)->tso_size - 1;
- }
-
- ip_select_ident_more(iph, &rt->u.dst, sk, skb_shinfo(skb)->tso_segs);
+ ip_select_ident_more(iph, &rt->u.dst, sk,
+ (skb_shinfo(skb)->gso_segs ?: 1) - 1);
/* Add an IP checksum. */
ip_send_check(iph);
dst_output);
no_route:
- IP_INC_STATS(IpOutNoRoutes);
+ IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
kfree_skb(skb);
return -EHOSTUNREACH;
}
to->pkt_type = from->pkt_type;
to->priority = from->priority;
to->protocol = from->protocol;
- to->security = from->security;
+ dst_release(to->dst);
to->dst = dst_clone(from->dst);
to->dev = from->dev;
+ to->mark = from->mark;
/* Copy the flags to each fragment. */
IPCB(to)->flags = IPCB(from)->flags;
to->tc_index = from->tc_index;
#endif
#ifdef CONFIG_NETFILTER
- to->nfmark = from->nfmark;
- to->nfcache = from->nfcache;
/* Connection association is same as pre-frag packet */
nf_conntrack_put(to->nfct);
to->nfct = from->nfct;
nf_conntrack_get(to->nfct);
+ to->nfctinfo = from->nfctinfo;
+#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
+ to->ipvs_property = from->ipvs_property;
+#endif
#ifdef CONFIG_BRIDGE_NETFILTER
nf_bridge_put(to->nf_bridge);
to->nf_bridge = from->nf_bridge;
nf_bridge_get(to->nf_bridge);
#endif
-#ifdef CONFIG_NETFILTER_DEBUG
- to->nf_debug = from->nf_debug;
-#endif
#endif
+ skb_copy_secmark(to, from);
}
/*
int ptr;
struct net_device *dev;
struct sk_buff *skb2;
- unsigned int mtu, hlen, left, len, ll_rs;
+ unsigned int mtu, hlen, left, len, ll_rs, pad;
int offset;
- int not_last_frag;
+ __be16 not_last_frag;
struct rtable *rt = (struct rtable*)skb->dst;
int err = 0;
iph = skb->nh.iph;
if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
+ IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
- htonl(dst_pmtu(&rt->u.dst)));
+ htonl(dst_mtu(&rt->u.dst)));
kfree_skb(skb);
return -EMSGSIZE;
}
*/
hlen = iph->ihl * 4;
- mtu = dst_pmtu(&rt->u.dst) - hlen; /* Size of data space */
+ mtu = dst_mtu(&rt->u.dst) - hlen; /* Size of data space */
+ IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
/* When frag_list is given, use it. First, check its validity:
* some transformers could create wrong frag_list or break existing
skb_headroom(frag) < hlen)
goto slow_path;
- /* Correct socket ownership. */
- if (frag->sk == NULL && skb->sk)
- goto slow_path;
-
/* Partially cloned skb? */
if (skb_shared(frag))
goto slow_path;
+
+ BUG_ON(frag->sk);
+ if (skb->sk) {
+ sock_hold(skb->sk);
+ frag->sk = skb->sk;
+ frag->destructor = sock_wfree;
+ skb->truesize -= frag->truesize;
+ }
}
/* Everything is OK. Generate! */
err = 0;
offset = 0;
frag = skb_shinfo(skb)->frag_list;
- skb_shinfo(skb)->frag_list = 0;
+ skb_shinfo(skb)->frag_list = NULL;
skb->data_len = first_len - skb_headlen(skb);
skb->len = first_len;
iph->tot_len = htons(first_len);
- iph->frag_off |= htons(IP_MF);
+ iph->frag_off = htons(IP_MF);
ip_send_check(iph);
for (;;) {
/* Prepare header of the next frame,
* before previous one went down. */
if (frag) {
+ frag->ip_summed = CHECKSUM_NONE;
frag->h.raw = frag->data;
frag->nh.raw = __skb_push(frag, hlen);
memcpy(frag->nh.raw, iph, hlen);
err = output(skb);
+ if (!err)
+ IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
if (err || !frag)
break;
}
if (err == 0) {
- IP_INC_STATS(IpFragOKs);
+ IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
return 0;
}
kfree_skb(frag);
frag = skb;
}
- IP_INC_STATS(IpFragFails);
+ IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
return err;
}
left = skb->len - hlen; /* Space per frame */
ptr = raw + hlen; /* Where to start from */
-#ifdef CONFIG_BRIDGE_NETFILTER
/* for bridged IP traffic encapsulated inside f.e. a vlan header,
- * we need to make room for the encapsulating header */
- ll_rs = LL_RESERVED_SPACE(rt->u.dst.dev + nf_bridge_pad(skb));
- mtu -= nf_bridge_pad(skb);
-#else
- ll_rs = LL_RESERVED_SPACE(rt->u.dst.dev);
-#endif
+ * we need to make room for the encapsulating header
+ */
+ pad = nf_bridge_pad(skb);
+ ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad);
+ mtu -= pad;
+
/*
* Fragment the datagram.
*/
*/
if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) {
- NETDEBUG(printk(KERN_INFO "IP: frag: no memory for new fragment!\n"));
+ NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n");
err = -ENOMEM;
goto fail;
}
/*
* Put this fragment into the sending queue.
*/
-
- IP_INC_STATS(IpFragCreates);
-
iph->tot_len = htons(len + hlen);
ip_send_check(iph);
err = output(skb2);
if (err)
goto fail;
+
+ IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
}
kfree_skb(skb);
- IP_INC_STATS(IpFragOKs);
+ IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
return err;
fail:
kfree_skb(skb);
- IP_INC_STATS(IpFragFails);
+ IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
return err;
}
+EXPORT_SYMBOL(ip_fragment);
+
int
ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
{
struct iovec *iov = from;
- if (skb->ip_summed == CHECKSUM_HW) {
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
if (memcpy_fromiovecend(to, iov, offset, len) < 0)
return -EFAULT;
} else {
- unsigned int csum = 0;
+ __wsum csum = 0;
if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0)
return -EFAULT;
skb->csum = csum_block_add(skb->csum, csum, odd);
return 0;
}
-static inline int
-skb_can_coalesce(struct sk_buff *skb, int i, struct page *page, int off)
-{
- if (i) {
- skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
- return page == frag->page &&
- off == frag->page_offset+frag->size;
- }
- return 0;
-}
-
-static inline unsigned int
+static inline __wsum
csum_page(struct page *page, int offset, int copy)
{
char *kaddr;
- unsigned int csum;
+ __wsum csum;
kaddr = kmap(page);
csum = csum_partial(kaddr + offset, copy, 0);
kunmap(page);
return csum;
}
+static inline int ip_ufo_append_data(struct sock *sk,
+ int getfrag(void *from, char *to, int offset, int len,
+ int odd, struct sk_buff *skb),
+ void *from, int length, int hh_len, int fragheaderlen,
+ int transhdrlen, int mtu,unsigned int flags)
+{
+ struct sk_buff *skb;
+ int err;
+
+ /* There is support for UDP fragmentation offload by network
+ * device, so create one single skb packet containing complete
+ * udp datagram
+ */
+ if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
+ skb = sock_alloc_send_skb(sk,
+ hh_len + fragheaderlen + transhdrlen + 20,
+ (flags & MSG_DONTWAIT), &err);
+
+ if (skb == NULL)
+ return err;
+
+ /* reserve space for Hardware header */
+ skb_reserve(skb, hh_len);
+
+ /* create space for UDP/IP header */
+ skb_put(skb,fragheaderlen + transhdrlen);
+
+ /* initialize network header pointer */
+ skb->nh.raw = skb->data;
+
+ /* initialize protocol header pointer */
+ skb->h.raw = skb->data + fragheaderlen;
+
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum = 0;
+ sk->sk_sndmsg_off = 0;
+ }
+
+ err = skb_append_datato_frags(sk,skb, getfrag, from,
+ (length - transhdrlen));
+ if (!err) {
+ /* specify the length of each IP datagram fragment*/
+ skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
+ skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
+ __skb_queue_tail(&sk->sk_write_queue, skb);
+
+ return 0;
+ }
+ /* There is not enough support do UFO ,
+ * so follow normal path
+ */
+ kfree_skb(skb);
+ return err;
+}
+
/*
* ip_append_data() and ip_append_page() can make one large IP datagram
* from many pieces of data. Each pieces will be holded on the socket
- * until ip_push_pending_frames() is called. Eache pieces can be a page
+ * until ip_push_pending_frames() is called. Each piece can be a page
* or non-page data.
*
* Not only UDP, other transport protocols - e.g. raw sockets - can use
struct ipcm_cookie *ipc, struct rtable *rt,
unsigned int flags)
{
- struct inet_opt *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb;
struct ip_options *opt = NULL;
inet->cork.addr = ipc->addr;
}
dst_hold(&rt->u.dst);
- inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst);
+ inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
inet->cork.rt = rt;
inet->cork.length = 0;
- inet->sndmsg_page = NULL;
- inet->sndmsg_off = 0;
+ sk->sk_sndmsg_page = NULL;
+ sk->sk_sndmsg_off = 0;
if ((exthdrlen = rt->u.dst.header_len) != 0) {
length += exthdrlen;
transhdrlen += exthdrlen;
hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
- maxfraglen = ((mtu-fragheaderlen) & ~7) + fragheaderlen;
+ maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen);
* it won't be fragmented in the future.
*/
if (transhdrlen &&
- length + fragheaderlen <= maxfraglen &&
- rt->u.dst.dev->features&(NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) &&
+ length + fragheaderlen <= mtu &&
+ rt->u.dst.dev->features & NETIF_F_ALL_CSUM &&
!exthdrlen)
- csummode = CHECKSUM_HW;
+ csummode = CHECKSUM_PARTIAL;
inet->cork.length += length;
+ if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
+ (rt->u.dst.dev->features & NETIF_F_UFO)) {
+
+ err = ip_ufo_append_data(sk, getfrag, from, length, hh_len,
+ fragheaderlen, transhdrlen, mtu,
+ flags);
+ if (err)
+ goto error;
+ return 0;
+ }
/* So, what's going on in the loop below?
*
* We use calculated fragment length to generate chained skb,
* each of segments is IP fragment ready for sending to network after
* adding appropriate IP header.
- *
- * Mistake is:
- *
- * If mtu-fragheaderlen is not 0 modulo 8, we generate additional
- * small fragment of length (mtu-fragheaderlen)%8, even though
- * it is not necessary. Not a big bug, but needs a fix.
*/
if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
goto alloc_new_skb;
while (length > 0) {
- if ((copy = maxfraglen - skb->len) <= 0) {
+ /* Check if the remaining data fits into current packet. */
+ copy = mtu - skb->len;
+ if (copy < length)
+ copy = maxfraglen - skb->len;
+ if (copy <= 0) {
char *data;
unsigned int datalen;
unsigned int fraglen;
+ unsigned int fraggap;
unsigned int alloclen;
- BUG_TRAP(copy == 0);
-
+ struct sk_buff *skb_prev;
alloc_new_skb:
- datalen = maxfraglen - fragheaderlen;
- if (datalen > length)
- datalen = length;
+ skb_prev = skb;
+ if (skb_prev)
+ fraggap = skb_prev->len - maxfraglen;
+ else
+ fraggap = 0;
+ /*
+ * If remaining data exceeds the mtu,
+ * we know we need more fragment(s).
+ */
+ datalen = length + fraggap;
+ if (datalen > mtu - fragheaderlen)
+ datalen = maxfraglen - fragheaderlen;
fraglen = datalen + fragheaderlen;
+
if ((flags & MSG_MORE) &&
!(rt->u.dst.dev->features&NETIF_F_SG))
- alloclen = maxfraglen;
+ alloclen = mtu;
else
alloclen = datalen + fragheaderlen;
* because we have no idea what fragment will be
* the last.
*/
- if (datalen == length)
+ if (datalen == length + fraggap)
alloclen += rt->u.dst.trailer_len;
if (transhdrlen) {
data += fragheaderlen;
skb->h.raw = data + exthdrlen;
- copy = datalen - transhdrlen;
- if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, 0, skb) < 0) {
+ if (fraggap) {
+ skb->csum = skb_copy_and_csum_bits(
+ skb_prev, maxfraglen,
+ data + transhdrlen, fraggap, 0);
+ skb_prev->csum = csum_sub(skb_prev->csum,
+ skb->csum);
+ data += fraggap;
+ pskb_trim_unique(skb_prev, maxfraglen);
+ }
+
+ copy = datalen - transhdrlen - fraggap;
+ if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
err = -EFAULT;
kfree_skb(skb);
goto error;
}
offset += copy;
- length -= datalen;
+ length -= datalen - fraggap;
transhdrlen = 0;
exthdrlen = 0;
csummode = CHECKSUM_NONE;
} else {
int i = skb_shinfo(skb)->nr_frags;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
- struct page *page = inet->sndmsg_page;
- int off = inet->sndmsg_off;
+ struct page *page = sk->sk_sndmsg_page;
+ int off = sk->sk_sndmsg_off;
unsigned int left;
if (page && (left = PAGE_SIZE - off) > 0) {
goto error;
}
get_page(page);
- skb_fill_page_desc(skb, i, page, inet->sndmsg_off, 0);
+ skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
} else if (i < MAX_SKB_FRAGS) {
err = -ENOMEM;
goto error;
}
- inet->sndmsg_page = page;
- inet->sndmsg_off = 0;
+ sk->sk_sndmsg_page = page;
+ sk->sk_sndmsg_off = 0;
skb_fill_page_desc(skb, i, page, 0, 0);
frag = &skb_shinfo(skb)->frags[i];
err = -EFAULT;
goto error;
}
- inet->sndmsg_off += copy;
+ sk->sk_sndmsg_off += copy;
frag->size += copy;
skb->len += copy;
skb->data_len += copy;
error:
inet->cork.length -= length;
- IP_INC_STATS(IpOutDiscards);
+ IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
return err;
}
ssize_t ip_append_page(struct sock *sk, struct page *page,
int offset, size_t size, int flags)
{
- struct inet_opt *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb;
struct rtable *rt;
struct ip_options *opt = NULL;
int mtu;
int len;
int err;
- unsigned int maxfraglen, fragheaderlen;
+ unsigned int maxfraglen, fragheaderlen, fraggap;
if (inet->hdrincl)
return -EPERM;
mtu = inet->cork.fragsize;
fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
- maxfraglen = ((mtu-fragheaderlen) & ~7) + fragheaderlen;
+ maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
if (inet->cork.length + size > 0xFFFF - fragheaderlen) {
ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu);
return -EINVAL;
inet->cork.length += size;
+ if ((sk->sk_protocol == IPPROTO_UDP) &&
+ (rt->u.dst.dev->features & NETIF_F_UFO)) {
+ skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
+ skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
+ }
+
while (size > 0) {
int i;
- if ((len = maxfraglen - skb->len) <= 0) {
+
+ if (skb_is_gso(skb))
+ len = size;
+ else {
+
+ /* Check if the remaining data fits into current packet. */
+ len = mtu - skb->len;
+ if (len < size)
+ len = maxfraglen - skb->len;
+ }
+ if (len <= 0) {
+ struct sk_buff *skb_prev;
char *data;
struct iphdr *iph;
- BUG_TRAP(len == 0);
+ int alloclen;
- skb = sock_wmalloc(sk, fragheaderlen + hh_len + 15, 1,
- sk->sk_allocation);
+ skb_prev = skb;
+ fraggap = skb_prev->len - maxfraglen;
+
+ alloclen = fragheaderlen + hh_len + fraggap + 15;
+ skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
if (unlikely(!skb)) {
err = -ENOBUFS;
goto error;
/*
* Find where to start putting bytes.
*/
- data = skb_put(skb, fragheaderlen);
+ data = skb_put(skb, fragheaderlen + fraggap);
skb->nh.iph = iph = (struct iphdr *)data;
data += fragheaderlen;
skb->h.raw = data;
+ if (fraggap) {
+ skb->csum = skb_copy_and_csum_bits(
+ skb_prev, maxfraglen,
+ data, fraggap, 0);
+ skb_prev->csum = csum_sub(skb_prev->csum,
+ skb->csum);
+ pskb_trim_unique(skb_prev, maxfraglen);
+ }
+
/*
* Put the packet on the pending queue.
*/
}
if (skb->ip_summed == CHECKSUM_NONE) {
- unsigned int csum;
+ __wsum csum;
csum = csum_page(page, offset, len);
skb->csum = csum_block_add(skb->csum, csum, skb->len);
}
error:
inet->cork.length -= size;
- IP_INC_STATS(IpOutDiscards);
+ IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
return err;
}
{
struct sk_buff *skb, *tmp_skb;
struct sk_buff **tail_skb;
- struct inet_opt *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
struct ip_options *opt = NULL;
struct rtable *rt = inet->cork.rt;
struct iphdr *iph;
- int df = 0;
+ __be16 df = 0;
__u8 ttl;
int err = 0;
tail_skb = &(tmp_skb->next);
skb->len += tmp_skb->len;
skb->data_len += tmp_skb->len;
-#if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */
skb->truesize += tmp_skb->truesize;
__sock_put(tmp_skb->sk);
tmp_skb->destructor = NULL;
tmp_skb->sk = NULL;
-#endif
}
/* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow
* If local_df is set too, we still allow to fragment this frame
* locally. */
if (inet->pmtudisc == IP_PMTUDISC_DO ||
- (!skb_shinfo(skb)->frag_list && ip_dont_fragment(sk, &rt->u.dst)))
+ (skb->len <= dst_mtu(&rt->u.dst) &&
+ ip_dont_fragment(sk, &rt->u.dst)))
df = htons(IP_DF);
if (inet->cork.flags & IPCORK_OPT)
iph->tos = inet->tos;
iph->tot_len = htons(skb->len);
iph->frag_off = df;
- if (!df) {
- __ip_select_ident(iph, &rt->u.dst, 0);
- } else {
- iph->id = htons(inet->id++);
- }
+ ip_select_ident(iph, &rt->u.dst, sk);
iph->ttl = ttl;
iph->protocol = sk->sk_protocol;
iph->saddr = rt->rt_src;
out:
inet->cork.flags &= ~IPCORK_OPT;
- if (inet->cork.opt) {
- kfree(inet->cork.opt);
- inet->cork.opt = NULL;
- }
+ kfree(inet->cork.opt);
+ inet->cork.opt = NULL;
if (inet->cork.rt) {
ip_rt_put(inet->cork.rt);
inet->cork.rt = NULL;
return err;
error:
- IP_INC_STATS(IpOutDiscards);
+ IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
goto out;
}
*/
void ip_flush_pending_frames(struct sock *sk)
{
- struct inet_opt *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb;
while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
kfree_skb(skb);
inet->cork.flags &= ~IPCORK_OPT;
- if (inet->cork.opt) {
- kfree(inet->cork.opt);
- inet->cork.opt = NULL;
- }
+ kfree(inet->cork.opt);
+ inet->cork.opt = NULL;
if (inet->cork.rt) {
ip_rt_put(inet->cork.rt);
inet->cork.rt = NULL;
static int ip_reply_glue_bits(void *dptr, char *to, int offset,
int len, int odd, struct sk_buff *skb)
{
- unsigned int csum;
+ __wsum csum;
csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
skb->csum = csum_block_add(skb->csum, csum, odd);
void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
unsigned int len)
{
- struct inet_opt *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
struct {
struct ip_options opt;
char data[40];
} replyopts;
struct ipcm_cookie ipc;
- u32 daddr;
+ __be32 daddr;
struct rtable *rt = (struct rtable*)skb->dst;
if (ip_options_echo(&replyopts.opt, skb))
{ .sport = skb->h.th->dest,
.dport = skb->h.th->source } },
.proto = sk->sk_protocol };
+ security_skb_classify_flow(skb, &fl);
if (ip_route_output_key(&rt, &fl))
return;
}
&ipc, rt, MSG_DONTWAIT);
if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
if (arg->csumoffset >= 0)
- *((u16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum));
+ *((__sum16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum));
skb->ip_summed = CHECKSUM_NONE;
ip_push_pending_frames(sk);
}
ip_rt_put(rt);
}
-/*
- * IP protocol layer initialiser
- */
-
-static struct packet_type ip_packet_type = {
- .type = __constant_htons(ETH_P_IP),
- .func = ip_rcv,
-};
-
-/*
- * IP registers the packet type and then calls the subprotocol initialisers
- */
-
void __init ip_init(void)
{
- dev_add_pack(&ip_packet_type);
-
ip_rt_init();
inet_initpeers();
#endif
}
-EXPORT_SYMBOL(ip_finish_output);
-EXPORT_SYMBOL(ip_fragment);
EXPORT_SYMBOL(ip_generic_getfrag);
EXPORT_SYMBOL(ip_queue_xmit);
EXPORT_SYMBOL(ip_send_check);
-
-#ifdef CONFIG_SYSCTL
-EXPORT_SYMBOL(sysctl_ip_default_ttl);
-#endif