#include <net/raw.h>
#include <net/checksum.h>
#include <net/inetpeer.h>
+#include <net/checksum.h>
#include <linux/igmp.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_bridge.h>
return 0;
}
-static inline int ip_select_ttl(struct inet_opt *inet, struct dst_entry *dst)
+static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
{
int ttl = inet->uc_ttl;
int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
u32 saddr, u32 daddr, struct ip_options *opt)
{
- struct inet_opt *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
struct rtable *rt = (struct rtable *)skb->dst;
struct iphdr *iph;
/*
* If the indicated interface is up and running, send the packet.
*/
- IP_INC_STATS(IpOutRequests);
+ IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
newskb->dev, ip_dev_loopback_xmit);
}
- if (skb->len > dst_pmtu(&rt->u.dst) || skb_shinfo(skb)->frag_list)
+ if (skb->len > dst_pmtu(&rt->u.dst))
return ip_fragment(skb, ip_finish_output);
else
return ip_finish_output(skb);
int ip_output(struct sk_buff *skb)
{
- IP_INC_STATS(IpOutRequests);
+ IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
- if ((skb->len > dst_pmtu(skb->dst) || skb_shinfo(skb)->frag_list) &&
- !skb_shinfo(skb)->tso_size)
+ if (skb->len > dst_pmtu(skb->dst) && !skb_shinfo(skb)->tso_size)
return ip_fragment(skb, ip_finish_output);
else
return ip_finish_output(skb);
int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
{
struct sock *sk = skb->sk;
- struct inet_opt *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
struct ip_options *opt = inet->opt;
struct rtable *rt;
struct iphdr *iph;
- u32 mtu;
/* Skip all of this if the packet is already routed,
* f.e. by something like SCTP.
skb->nh.iph = iph;
/* Transport layer set skb->h.foo itself. */
- if(opt && opt->optlen) {
+ if (opt && opt->optlen) {
iph->ihl += opt->optlen >> 2;
ip_options_build(skb, opt, inet->daddr, rt, 0);
}
- mtu = dst_pmtu(&rt->u.dst);
- if (skb->len > mtu && (sk->sk_route_caps & NETIF_F_TSO)) {
- unsigned int hlen;
-
- /* Hack zone: all this must be done by TCP. */
- hlen = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
- skb_shinfo(skb)->tso_size = mtu - hlen;
- skb_shinfo(skb)->tso_segs =
- (skb->len - hlen + skb_shinfo(skb)->tso_size - 1)/
- skb_shinfo(skb)->tso_size - 1;
- }
-
ip_select_ident_more(iph, &rt->u.dst, sk, skb_shinfo(skb)->tso_segs);
/* Add an IP checksum. */
dst_output);
no_route:
- IP_INC_STATS(IpOutNoRoutes);
+ IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
kfree_skb(skb);
return -EHOSTUNREACH;
}
to->priority = from->priority;
to->protocol = from->protocol;
to->security = from->security;
+ dst_release(to->dst);
to->dst = dst_clone(from->dst);
to->dev = from->dev;
nf_conntrack_put(to->nfct);
to->nfct = from->nfct;
nf_conntrack_get(to->nfct);
+ to->nfctinfo = from->nfctinfo;
#ifdef CONFIG_BRIDGE_NETFILTER
nf_bridge_put(to->nf_bridge);
to->nf_bridge = from->nf_bridge;
skb_headroom(frag) < hlen)
goto slow_path;
- /* Correct socket ownership. */
- if (frag->sk == NULL && skb->sk)
- goto slow_path;
-
/* Partially cloned skb? */
if (skb_shared(frag))
goto slow_path;
err = 0;
offset = 0;
frag = skb_shinfo(skb)->frag_list;
- skb_shinfo(skb)->frag_list = 0;
+ skb_shinfo(skb)->frag_list = NULL;
skb->data_len = first_len - skb_headlen(skb);
skb->len = first_len;
iph->tot_len = htons(first_len);
/* Prepare header of the next frame,
* before previous one went down. */
if (frag) {
+ frag->ip_summed = CHECKSUM_NONE;
frag->h.raw = frag->data;
frag->nh.raw = __skb_push(frag, hlen);
memcpy(frag->nh.raw, iph, hlen);
}
if (err == 0) {
- IP_INC_STATS(IpFragOKs);
+ IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
return 0;
}
kfree_skb(frag);
frag = skb;
}
- IP_INC_STATS(IpFragFails);
+ IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
return err;
}
#ifdef CONFIG_BRIDGE_NETFILTER
/* for bridged IP traffic encapsulated inside f.e. a vlan header,
* we need to make room for the encapsulating header */
- ll_rs = LL_RESERVED_SPACE(rt->u.dst.dev + nf_bridge_pad(skb));
+ ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, nf_bridge_pad(skb));
mtu -= nf_bridge_pad(skb);
#else
ll_rs = LL_RESERVED_SPACE(rt->u.dst.dev);
* Put this fragment into the sending queue.
*/
- IP_INC_STATS(IpFragCreates);
+ IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
iph->tot_len = htons(len + hlen);
goto fail;
}
kfree_skb(skb);
- IP_INC_STATS(IpFragOKs);
+ IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
return err;
fail:
kfree_skb(skb);
- IP_INC_STATS(IpFragFails);
+ IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
return err;
}
return 0;
}
-static inline int
-skb_can_coalesce(struct sk_buff *skb, int i, struct page *page, int off)
-{
- if (i) {
- skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
- return page == frag->page &&
- off == frag->page_offset+frag->size;
- }
- return 0;
-}
-
static inline unsigned int
csum_page(struct page *page, int offset, int copy)
{
/*
* ip_append_data() and ip_append_page() can make one large IP datagram
* from many pieces of data. Each pieces will be holded on the socket
- * until ip_push_pending_frames() is called. Eache pieces can be a page
+ * until ip_push_pending_frames() is called. Each piece can be a page
* or non-page data.
*
* Not only UDP, other transport protocols - e.g. raw sockets - can use
struct ipcm_cookie *ipc, struct rtable *rt,
unsigned int flags)
{
- struct inet_opt *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb;
struct ip_options *opt = NULL;
inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst);
inet->cork.rt = rt;
inet->cork.length = 0;
- inet->sndmsg_page = NULL;
- inet->sndmsg_off = 0;
+ sk->sk_sndmsg_page = NULL;
+ sk->sk_sndmsg_off = 0;
if ((exthdrlen = rt->u.dst.header_len) != 0) {
length += exthdrlen;
transhdrlen += exthdrlen;
hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
- maxfraglen = ((mtu-fragheaderlen) & ~7) + fragheaderlen;
+ maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen);
* it won't be fragmented in the future.
*/
if (transhdrlen &&
- length + fragheaderlen <= maxfraglen &&
+ length + fragheaderlen <= mtu &&
rt->u.dst.dev->features&(NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) &&
!exthdrlen)
csummode = CHECKSUM_HW;
* We use calculated fragment length to generate chained skb,
* each of segments is IP fragment ready for sending to network after
* adding appropriate IP header.
- *
- * Mistake is:
- *
- * If mtu-fragheaderlen is not 0 modulo 8, we generate additional
- * small fragment of length (mtu-fragheaderlen)%8, even though
- * it is not necessary. Not a big bug, but needs a fix.
*/
if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
goto alloc_new_skb;
while (length > 0) {
- if ((copy = maxfraglen - skb->len) <= 0) {
+ /* Check if the remaining data fits into current packet. */
+ copy = mtu - skb->len;
+ if (copy < length)
+ copy = maxfraglen - skb->len;
+ if (copy <= 0) {
char *data;
unsigned int datalen;
unsigned int fraglen;
+ unsigned int fraggap;
unsigned int alloclen;
- BUG_TRAP(copy == 0);
-
+ struct sk_buff *skb_prev;
alloc_new_skb:
- datalen = maxfraglen - fragheaderlen;
- if (datalen > length)
- datalen = length;
+ skb_prev = skb;
+ if (skb_prev)
+ fraggap = skb_prev->len - maxfraglen;
+ else
+ fraggap = 0;
+ /*
+ * If remaining data exceeds the mtu,
+ * we know we need more fragment(s).
+ */
+ datalen = length + fraggap;
+ if (datalen > mtu - fragheaderlen)
+ datalen = maxfraglen - fragheaderlen;
fraglen = datalen + fragheaderlen;
+
if ((flags & MSG_MORE) &&
!(rt->u.dst.dev->features&NETIF_F_SG))
- alloclen = maxfraglen;
+ alloclen = mtu;
else
alloclen = datalen + fragheaderlen;
data += fragheaderlen;
skb->h.raw = data + exthdrlen;
- copy = datalen - transhdrlen;
- if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, 0, skb) < 0) {
+ if (fraggap) {
+ skb->csum = skb_copy_and_csum_bits(
+ skb_prev, maxfraglen,
+ data + transhdrlen, fraggap, 0);
+ skb_prev->csum = csum_sub(skb_prev->csum,
+ skb->csum);
+ data += fraggap;
+ skb_trim(skb_prev, maxfraglen);
+ }
+
+ copy = datalen - transhdrlen - fraggap;
+ if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
err = -EFAULT;
kfree_skb(skb);
goto error;
}
offset += copy;
- length -= datalen;
+ length -= datalen - fraggap;
transhdrlen = 0;
exthdrlen = 0;
csummode = CHECKSUM_NONE;
} else {
int i = skb_shinfo(skb)->nr_frags;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
- struct page *page = inet->sndmsg_page;
- int off = inet->sndmsg_off;
+ struct page *page = sk->sk_sndmsg_page;
+ int off = sk->sk_sndmsg_off;
unsigned int left;
if (page && (left = PAGE_SIZE - off) > 0) {
goto error;
}
get_page(page);
- skb_fill_page_desc(skb, i, page, inet->sndmsg_off, 0);
+ skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
} else if (i < MAX_SKB_FRAGS) {
err = -ENOMEM;
goto error;
}
- inet->sndmsg_page = page;
- inet->sndmsg_off = 0;
+ sk->sk_sndmsg_page = page;
+ sk->sk_sndmsg_off = 0;
skb_fill_page_desc(skb, i, page, 0, 0);
frag = &skb_shinfo(skb)->frags[i];
err = -EFAULT;
goto error;
}
- inet->sndmsg_off += copy;
+ sk->sk_sndmsg_off += copy;
frag->size += copy;
skb->len += copy;
skb->data_len += copy;
error:
inet->cork.length -= length;
- IP_INC_STATS(IpOutDiscards);
+ IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
return err;
}
ssize_t ip_append_page(struct sock *sk, struct page *page,
int offset, size_t size, int flags)
{
- struct inet_opt *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb;
struct rtable *rt;
struct ip_options *opt = NULL;
int mtu;
int len;
int err;
- unsigned int maxfraglen, fragheaderlen;
+ unsigned int maxfraglen, fragheaderlen, fraggap;
if (inet->hdrincl)
return -EPERM;
mtu = inet->cork.fragsize;
fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
- maxfraglen = ((mtu-fragheaderlen) & ~7) + fragheaderlen;
+ maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
if (inet->cork.length + size > 0xFFFF - fragheaderlen) {
ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu);
while (size > 0) {
int i;
- if ((len = maxfraglen - skb->len) <= 0) {
+
+ /* Check if the remaining data fits into current packet. */
+ len = mtu - skb->len;
+ if (len < size)
+ len = maxfraglen - skb->len;
+ if (len <= 0) {
+ struct sk_buff *skb_prev;
char *data;
struct iphdr *iph;
- BUG_TRAP(len == 0);
+ int alloclen;
+
+ skb_prev = skb;
+ if (skb_prev)
+ fraggap = skb_prev->len - maxfraglen;
+ else
+ fraggap = 0;
- skb = sock_wmalloc(sk, fragheaderlen + hh_len + 15, 1,
- sk->sk_allocation);
+ alloclen = fragheaderlen + hh_len + fraggap + 15;
+ skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
if (unlikely(!skb)) {
err = -ENOBUFS;
goto error;
/*
* Find where to start putting bytes.
*/
- data = skb_put(skb, fragheaderlen);
+ data = skb_put(skb, fragheaderlen + fraggap);
skb->nh.iph = iph = (struct iphdr *)data;
data += fragheaderlen;
skb->h.raw = data;
+ if (fraggap) {
+ skb->csum = skb_copy_and_csum_bits(
+ skb_prev, maxfraglen,
+ data, fraggap, 0);
+ skb_prev->csum = csum_sub(skb_prev->csum,
+ skb->csum);
+ skb_trim(skb_prev, maxfraglen);
+ }
+
/*
* Put the packet on the pending queue.
*/
error:
inet->cork.length -= size;
- IP_INC_STATS(IpOutDiscards);
+ IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
return err;
}
{
struct sk_buff *skb, *tmp_skb;
struct sk_buff **tail_skb;
- struct inet_opt *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
struct ip_options *opt = NULL;
struct rtable *rt = inet->cork.rt;
struct iphdr *iph;
tail_skb = &(tmp_skb->next);
skb->len += tmp_skb->len;
skb->data_len += tmp_skb->len;
-#if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */
skb->truesize += tmp_skb->truesize;
__sock_put(tmp_skb->sk);
tmp_skb->destructor = NULL;
tmp_skb->sk = NULL;
-#endif
}
/* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow
return err;
error:
- IP_INC_STATS(IpOutDiscards);
+ IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
goto out;
}
*/
void ip_flush_pending_frames(struct sock *sk)
{
- struct inet_opt *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb;
while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
unsigned int len)
{
- struct inet_opt *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
struct {
struct ip_options opt;
char data[40];