X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fipv4%2Ftcp.c;h=01fda573983fcd63641f341428db6cd2ace2c98c;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=f28f40642ae55ea9d70207dfd7066efff0892f3c;hpb=16c70f8c1b54b61c3b951b6fb220df250fe09b32;p=linux-2.6.git diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f28f40642..01fda5739 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -258,6 +258,7 @@ #include #include #include +#include #include #include @@ -269,7 +270,7 @@ #include #include -int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; +int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly; @@ -463,11 +464,12 @@ static inline int forced_push(struct tcp_sock *tp) static inline void skb_entail(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) { - skb->csum = 0; - TCP_SKB_CB(skb)->seq = tp->write_seq; - TCP_SKB_CB(skb)->end_seq = tp->write_seq; - TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK; - TCP_SKB_CB(skb)->sacked = 0; + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); + + skb->csum = 0; + tcb->seq = tcb->end_seq = tp->write_seq; + tcb->flags = TCPCB_FLAG_ACK; + tcb->sacked = 0; skb_header_release(skb); __skb_queue_tail(&sk->sk_write_queue, skb); sk_charge_skb(sk, skb); @@ -569,7 +571,7 @@ new_segment: skb->truesize += copy; sk->sk_wmem_queued += copy; sk->sk_forward_alloc -= copy; - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; tp->write_seq += copy; TCP_SKB_CB(skb)->end_seq += copy; skb_shinfo(skb)->gso_segs = 0; @@ -724,7 +726,7 @@ new_segment: * Check whether we can use HW checksum. */ if (sk->sk_route_caps & NETIF_F_ALL_CSUM) - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; skb_entail(sk, tp, skb); copy = size_goal; @@ -956,8 +958,11 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) * receive buffer and there was a small segment * in queue. */ - (copied > 0 && (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && - !icsk->icsk_ack.pingpong && !atomic_read(&sk->sk_rmem_alloc))) + (copied > 0 && + ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) || + ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && + !icsk->icsk_ack.pingpong)) && + !atomic_read(&sk->sk_rmem_alloc))) time_to_ack = 1; } @@ -1940,6 +1945,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level, } break; +#ifdef CONFIG_TCP_MD5SIG + case TCP_MD5SIG: + /* Read the IP->Key mappings from userspace */ + err = tp->af_specific->md5_parse(sk, optval, optlen); + break; +#endif + default: err = -ENOPROTOOPT; break; @@ -2152,7 +2164,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) struct tcphdr *th; unsigned thlen; unsigned int seq; - unsigned int delta; + __be32 delta; unsigned int oldlen; unsigned int len; @@ -2205,8 +2217,9 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) do { th->fin = th->psh = 0; - th->check = ~csum_fold(th->check + delta); - if (skb->ip_summed != CHECKSUM_HW) + th->check = ~csum_fold((__force __wsum)((__force u32)th->check + + (__force u32)delta)); + if (skb->ip_summed != CHECKSUM_PARTIAL) th->check = csum_fold(csum_partial(skb->h.raw, thlen, skb->csum)); @@ -2219,8 +2232,9 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) } while (skb->next); delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len); - th->check = ~csum_fold(th->check + delta); - if (skb->ip_summed != CHECKSUM_HW) + th->check = ~csum_fold((__force __wsum)((__force u32)th->check + + (__force u32)delta)); + if (skb->ip_summed != CHECKSUM_PARTIAL) th->check = csum_fold(csum_partial(skb->h.raw, thlen, skb->csum)); @@ -2229,6 +2243,136 @@ out: } EXPORT_SYMBOL(tcp_tso_segment); +#ifdef CONFIG_TCP_MD5SIG +static unsigned long tcp_md5sig_users; +static struct tcp_md5sig_pool **tcp_md5sig_pool; +static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); + +static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool) +{ + int cpu; + for_each_possible_cpu(cpu) { + struct tcp_md5sig_pool *p = *per_cpu_ptr(pool, cpu); + if (p) { + if (p->md5_desc.tfm) + crypto_free_hash(p->md5_desc.tfm); + kfree(p); + p = NULL; + } + } + free_percpu(pool); +} + +void tcp_free_md5sig_pool(void) +{ + struct tcp_md5sig_pool **pool = NULL; + + spin_lock_bh(&tcp_md5sig_pool_lock); + if (--tcp_md5sig_users == 0) { + pool = tcp_md5sig_pool; + tcp_md5sig_pool = NULL; + } + spin_unlock_bh(&tcp_md5sig_pool_lock); + if (pool) + __tcp_free_md5sig_pool(pool); +} + +EXPORT_SYMBOL(tcp_free_md5sig_pool); + +static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(void) +{ + int cpu; + struct tcp_md5sig_pool **pool; + + pool = alloc_percpu(struct tcp_md5sig_pool *); + if (!pool) + return NULL; + + for_each_possible_cpu(cpu) { + struct tcp_md5sig_pool *p; + struct crypto_hash *hash; + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) + goto out_free; + *per_cpu_ptr(pool, cpu) = p; + + hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); + if (!hash || IS_ERR(hash)) + goto out_free; + + p->md5_desc.tfm = hash; + } + return pool; +out_free: + __tcp_free_md5sig_pool(pool); + return NULL; +} + +struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(void) +{ + struct tcp_md5sig_pool **pool; + int alloc = 0; + +retry: + spin_lock_bh(&tcp_md5sig_pool_lock); + pool = tcp_md5sig_pool; + if (tcp_md5sig_users++ == 0) { + alloc = 1; + spin_unlock_bh(&tcp_md5sig_pool_lock); + } else if (!pool) { + tcp_md5sig_users--; + spin_unlock_bh(&tcp_md5sig_pool_lock); + cpu_relax(); + goto retry; + } else + spin_unlock_bh(&tcp_md5sig_pool_lock); + + if (alloc) { + /* we cannot hold spinlock here because this may sleep. */ + struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool(); + spin_lock_bh(&tcp_md5sig_pool_lock); + if (!p) { + tcp_md5sig_users--; + spin_unlock_bh(&tcp_md5sig_pool_lock); + return NULL; + } + pool = tcp_md5sig_pool; + if (pool) { + /* oops, it has already been assigned. */ + spin_unlock_bh(&tcp_md5sig_pool_lock); + __tcp_free_md5sig_pool(p); + } else { + tcp_md5sig_pool = pool = p; + spin_unlock_bh(&tcp_md5sig_pool_lock); + } + } + return pool; +} + +EXPORT_SYMBOL(tcp_alloc_md5sig_pool); + +struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu) +{ + struct tcp_md5sig_pool **p; + spin_lock_bh(&tcp_md5sig_pool_lock); + p = tcp_md5sig_pool; + if (p) + tcp_md5sig_users++; + spin_unlock_bh(&tcp_md5sig_pool_lock); + return (p ? *per_cpu_ptr(p, cpu) : NULL); +} + +EXPORT_SYMBOL(__tcp_get_md5sig_pool); + +void __tcp_put_md5sig_pool(void) +{ + tcp_free_md5sig_pool(); +} + +EXPORT_SYMBOL(__tcp_put_md5sig_pool); +#endif + extern void __skb_cb_too_small_for_tcp(int, int); extern struct tcp_congestion_ops tcp_reno; @@ -2255,9 +2399,7 @@ void __init tcp_init(void) tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (!tcp_hashinfo.bind_bucket_cachep) - panic("tcp_init: Cannot alloc tcp_bind_bucket cache."); + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); /* Size and allocate the main established and bind bucket * hash tables. @@ -2270,7 +2412,7 @@ void __init tcp_init(void) thash_entries, (num_physpages >= 128 * 1024) ? 13 : 15, - HASH_HIGHMEM, + 0, &tcp_hashinfo.ehash_size, NULL, 0); @@ -2286,7 +2428,7 @@ void __init tcp_init(void) tcp_hashinfo.ehash_size, (num_physpages >= 128 * 1024) ? 13 : 15, - HASH_HIGHMEM, + 0, &tcp_hashinfo.bhash_size, NULL, 64 * 1024); @@ -2316,10 +2458,18 @@ void __init tcp_init(void) sysctl_max_syn_backlog = 128; } - sysctl_tcp_mem[0] = 768 << order; - sysctl_tcp_mem[1] = 1024 << order; - sysctl_tcp_mem[2] = 1536 << order; - + /* Set the pressure threshold to be a fraction of global memory that + * is up to 1/2 at 256 MB, decreasing toward zero with the amount of + * memory, with a floor of 128 pages. + */ + limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); + limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); + limit = max(limit, 128UL); + sysctl_tcp_mem[0] = limit / 4 * 3; + sysctl_tcp_mem[1] = limit; + sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; + + /* Set per-socket limits to no more than 1/128 the pressure threshold */ limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7); max_share = min(4UL*1024*1024, limit); @@ -2350,4 +2500,3 @@ EXPORT_SYMBOL(tcp_sendpage); EXPORT_SYMBOL(tcp_setsockopt); EXPORT_SYMBOL(tcp_shutdown); EXPORT_SYMBOL(tcp_statistics); -EXPORT_SYMBOL_GPL(tcp_cleanup_rbuf);