X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fipv4%2Fip_fragment.c;h=8ce00d3703dacdeb46a387f27a8d5a609f100e7a;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=7f68e27eb4ea894c21f36e2d99f7f9423f1ba657;hpb=6a77f38946aaee1cd85eeec6cf4229b204c15071;p=linux-2.6.git diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 7f68e27eb..8ce00d370 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -22,7 +22,7 @@ * Patrick McHardy : LRU queue of frag heads for evictor. */ -#include +#include #include #include #include @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -53,13 +54,15 @@ * even the most extreme cases without allowing an attacker to measurably * harm machine performance. */ -int sysctl_ipfrag_high_thresh = 256*1024; -int sysctl_ipfrag_low_thresh = 192*1024; +int sysctl_ipfrag_high_thresh __read_mostly = 256*1024; +int sysctl_ipfrag_low_thresh __read_mostly = 192*1024; + +int sysctl_ipfrag_max_dist __read_mostly = 64; /* Important NOTE! Fragment queue must be destroyed before MSL expires. * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. */ -int sysctl_ipfrag_time = IP_FRAG_TIME; +int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME; struct ipfrag_skb_cb { @@ -71,12 +74,12 @@ struct ipfrag_skb_cb /* Describe an entry in the "incomplete datagrams" queue. */ struct ipq { - struct ipq *next; /* linked list pointers */ + struct hlist_node list; struct list_head lru_list; /* lru list member */ u32 user; - u32 saddr; - u32 daddr; - u16 id; + __be32 saddr; + __be32 daddr; + __be16 id; u8 protocol; u8 last_in; #define COMPLETE 4 @@ -89,9 +92,10 @@ struct ipq { spinlock_t lock; atomic_t refcnt; struct timer_list timer; /* when will this queue expire? */ - struct ipq **pprev; - int iif; struct timeval stamp; + int iif; + unsigned int rid; + struct inet_peer *peer; }; /* Hash table. */ @@ -99,7 +103,7 @@ struct ipq { #define IPQ_HASHSZ 64 /* Per-bucket lock is easy to add now. */ -static struct ipq *ipq_hash[IPQ_HASHSZ]; +static struct hlist_head ipq_hash[IPQ_HASHSZ]; static DEFINE_RWLOCK(ipfrag_lock); static u32 ipfrag_hash_rnd; static LIST_HEAD(ipq_lru_list); @@ -107,9 +111,7 @@ int ip_frag_nqueues = 0; static __inline__ void __ipq_unlink(struct ipq *qp) { - if(qp->next) - qp->next->pprev = qp->pprev; - *qp->pprev = qp->next; + hlist_del(&qp->list); list_del(&qp->lru_list); ip_frag_nqueues--; } @@ -121,14 +123,15 @@ static __inline__ void ipq_unlink(struct ipq *ipq) write_unlock(&ipfrag_lock); } -static unsigned int ipqhashfn(u16 id, u32 saddr, u32 daddr, u8 prot) +static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) { - return jhash_3words((u32)id << 16 | prot, saddr, daddr, + return jhash_3words((__force u32)id << 16 | prot, + (__force u32)saddr, (__force u32)daddr, ipfrag_hash_rnd) & (IPQ_HASHSZ - 1); } static struct timer_list ipfrag_secret_timer; -int sysctl_ipfrag_secret_interval = 10 * 60 * HZ; +int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ; static void ipfrag_secret_rebuild(unsigned long dummy) { @@ -139,27 +142,18 @@ static void ipfrag_secret_rebuild(unsigned long dummy) get_random_bytes(&ipfrag_hash_rnd, sizeof(u32)); for (i = 0; i < IPQ_HASHSZ; i++) { struct ipq *q; + struct hlist_node *p, *n; - q = ipq_hash[i]; - while (q) { - struct ipq *next = q->next; + hlist_for_each_entry_safe(q, p, n, &ipq_hash[i], list) { unsigned int hval = ipqhashfn(q->id, q->saddr, q->daddr, q->protocol); if (hval != i) { - /* Unlink. */ - if (q->next) - q->next->pprev = q->pprev; - *q->pprev = q->next; + hlist_del(&q->list); /* Relink to new hash chain. */ - if ((q->next = ipq_hash[hval]) != NULL) - q->next->pprev = &q->next; - ipq_hash[hval] = q; - q->pprev = &ipq_hash[hval]; + hlist_add_head(&q->list, &ipq_hash[hval]); } - - q = next; } } write_unlock(&ipfrag_lock); @@ -207,6 +201,9 @@ static void ip_frag_destroy(struct ipq *qp, int *work) BUG_TRAP(qp->last_in&COMPLETE); BUG_TRAP(del_timer(&qp->timer) == 0); + if (qp->peer) + inet_putpeer(qp->peer); + /* Release all fragment data. */ fp = qp->fragments; while (fp) { @@ -307,17 +304,23 @@ out: /* Creation primitives. */ -static struct ipq *ip_frag_intern(unsigned int hash, struct ipq *qp_in) +static struct ipq *ip_frag_intern(struct ipq *qp_in) { struct ipq *qp; +#ifdef CONFIG_SMP + struct hlist_node *n; +#endif + unsigned int hash; write_lock(&ipfrag_lock); + hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr, + qp_in->protocol); #ifdef CONFIG_SMP /* With SMP race we have to recheck hash table, because * such entry could be created on other cpu, while we * promoted read lock to write lock. */ - for(qp = ipq_hash[hash]; qp; qp = qp->next) { + hlist_for_each_entry(qp, n, &ipq_hash[hash], list) { if(qp->id == qp_in->id && qp->saddr == qp_in->saddr && qp->daddr == qp_in->daddr && @@ -337,10 +340,7 @@ static struct ipq *ip_frag_intern(unsigned int hash, struct ipq *qp_in) atomic_inc(&qp->refcnt); atomic_inc(&qp->refcnt); - if((qp->next = ipq_hash[hash]) != NULL) - qp->next->pprev = &qp->next; - ipq_hash[hash] = qp; - qp->pprev = &ipq_hash[hash]; + hlist_add_head(&qp->list, &ipq_hash[hash]); INIT_LIST_HEAD(&qp->lru_list); list_add_tail(&qp->lru_list, &ipq_lru_list); ip_frag_nqueues++; @@ -349,7 +349,7 @@ static struct ipq *ip_frag_intern(unsigned int hash, struct ipq *qp_in) } /* Add an entry to the 'ipq' queue for a newly received IP datagram. */ -static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, u32 user) +static struct ipq *ip_frag_create(struct iphdr *iph, u32 user) { struct ipq *qp; @@ -366,6 +366,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, u32 user) qp->meat = 0; qp->fragments = NULL; qp->iif = 0; + qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL; /* Initialize a timer for this entry. */ init_timer(&qp->timer); @@ -374,10 +375,10 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, u32 user) spin_lock_init(&qp->lock); atomic_set(&qp->refcnt, 1); - return ip_frag_intern(hash, qp); + return ip_frag_intern(qp); out_nomem: - NETDEBUG(if (net_ratelimit()) printk(KERN_ERR "ip_frag_create: no memory left !\n")); + LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n"); return NULL; } @@ -386,15 +387,17 @@ out_nomem: */ static inline struct ipq *ip_find(struct iphdr *iph, u32 user) { - __u16 id = iph->id; - __u32 saddr = iph->saddr; - __u32 daddr = iph->daddr; + __be16 id = iph->id; + __be32 saddr = iph->saddr; + __be32 daddr = iph->daddr; __u8 protocol = iph->protocol; - unsigned int hash = ipqhashfn(id, saddr, daddr, protocol); + unsigned int hash; struct ipq *qp; + struct hlist_node *n; read_lock(&ipfrag_lock); - for(qp = ipq_hash[hash]; qp; qp = qp->next) { + hash = ipqhashfn(id, saddr, daddr, protocol); + hlist_for_each_entry(qp, n, &ipq_hash[hash], list) { if(qp->id == id && qp->saddr == saddr && qp->daddr == daddr && @@ -407,7 +410,57 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user) } read_unlock(&ipfrag_lock); - return ip_frag_create(hash, iph, user); + return ip_frag_create(iph, user); +} + +/* Is the fragment too far ahead to be part of ipq? */ +static inline int ip_frag_too_far(struct ipq *qp) +{ + struct inet_peer *peer = qp->peer; + unsigned int max = sysctl_ipfrag_max_dist; + unsigned int start, end; + + int rc; + + if (!peer || !max) + return 0; + + start = qp->rid; + end = atomic_inc_return(&peer->rid); + qp->rid = end; + + rc = qp->fragments && (end - start) > max; + + if (rc) { + IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); + } + + return rc; +} + +static int ip_frag_reinit(struct ipq *qp) +{ + struct sk_buff *fp; + + if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) { + atomic_inc(&qp->refcnt); + return -ETIMEDOUT; + } + + fp = qp->fragments; + do { + struct sk_buff *xp = fp->next; + frag_kfree_skb(fp, NULL); + fp = xp; + } while (fp); + + qp->last_in = 0; + qp->len = 0; + qp->meat = 0; + qp->fragments = NULL; + qp->iif = 0; + + return 0; } /* Add new segment to existing queue. */ @@ -420,6 +473,12 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (qp->last_in & COMPLETE) goto err; + if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && + unlikely(ip_frag_too_far(qp)) && unlikely(ip_frag_reinit(qp))) { + ipq_kill(qp); + goto err; + } + offset = ntohs(skb->nh.iph->frag_off); flags = offset & ~IP_OFFSET; offset &= IP_OFFSET; @@ -457,7 +516,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (pskb_pull(skb, ihl) == NULL) goto err; - if (pskb_trim(skb, end-offset)) + if (pskb_trim_rcsum(skb, end-offset)) goto err; /* Find out which fragments are in front and at the back of us @@ -506,7 +565,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) } else { struct sk_buff *free_it = next; - /* Old fragmnet is completely overridden with + /* Old fragment is completely overridden with * new one drop it. */ next = next->next; @@ -533,7 +592,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (skb->dev) qp->iif = skb->dev->ifindex; skb->dev = NULL; - qp->stamp = skb->stamp; + skb_get_timestamp(skb, &qp->stamp); qp->meat += skb->len; atomic_add(skb->truesize, &ip_frag_mem); if (offset == 0) @@ -607,7 +666,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) head->len += fp->len; if (head->ip_summed != fp->ip_summed) head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_HW) + else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; atomic_sub(fp->truesize, &ip_frag_mem); @@ -615,7 +674,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) head->next = NULL; head->dev = dev; - head->stamp = qp->stamp; + skb_set_timestamp(head, &qp->stamp); iph = head->nh.iph; iph->frag_off = 0; @@ -625,10 +684,8 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) return head; out_nomem: - NETDEBUG(if (net_ratelimit()) - printk(KERN_ERR - "IP: queue_glue: no memory for gluing queue %p\n", - qp)); + LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing " + "queue %p\n", qp); goto out_fail; out_oversize: if (net_ratelimit())