#include <linux/stddef.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <linux/vserver/debug.h>
extern int sysctl_ip_dynaddr;
int sysctl_tcp_tw_reuse;
static __inline__ int tcp_sk_hashfn(struct sock *sk)
{
- struct inet_opt *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
__u32 laddr = inet->rcv_saddr;
__u16 lport = inet->num;
__u32 faddr = inet->daddr;
tcp_sk(sk)->bind_hash = tb;
}
-/*
- Return 1 if addr match the socket IP list
- or the socket is INADDR_ANY
-*/
-static inline int tcp_in_list(struct sock *sk, u32 addr)
-{
- struct nx_info *nxi = sk->sk_nx_info;
-
- vxdprintk("tcp_in_list(%p) %p,%p;%lx\n",
- sk, nxi, sk->sk_socket,
- (sk->sk_socket?sk->sk_socket->flags:0));
-
- if (nxi) {
- int n = nxi->nbipv4;
- int i;
-
- for (i=0; i<n; i++)
- if (nxi->ipv4[i] == addr)
- return 1;
- }
- else if (!tcp_v4_rcv_saddr(sk) || tcp_v4_rcv_saddr(sk) == addr)
- return 1;
- return 0;
-}
-
-/*
- Check if the addresses in sk1 conflict with those in sk2
-*/
-int tcp_ipv4_addr_conflict(struct sock *sk1, struct sock *sk2)
-{
- if (sk1 && sk2)
- nxdprintk("inet_bind(%p,%p) %p,%p;%lx %p,%p;%lx\n",
- sk1, sk2,
- sk1->sk_nx_info, sk1->sk_socket,
- (sk1->sk_socket?sk1->sk_socket->flags:0),
- sk2->sk_nx_info, sk2->sk_socket,
- (sk2->sk_socket?sk2->sk_socket->flags:0));
-
- if (tcp_v4_rcv_saddr(sk1)) {
- /* Bind to one address only */
- return tcp_in_list (sk2, tcp_v4_rcv_saddr(sk1));
- } else if (sk1->sk_nx_info) {
- /* A restricted bind(any) */
- struct nx_info *nxi = sk1->sk_nx_info;
- int n = nxi->nbipv4;
- int i;
-
- for (i=0; i<n; i++)
- if (tcp_in_list (sk2, nxi->ipv4[i]))
- return 1;
- } else /* A bind(any) do not allow other bind on the same port */
- return 1;
- return 0;
-}
-
static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb)
{
struct sock *sk2;
sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
if (!reuse || !sk2->sk_reuse ||
sk2->sk_state == TCP_LISTEN) {
- if (tcp_ipv4_addr_conflict(sk, sk2))
+ if (nx_addr_conflict(sk->sk_nx_info,
+ tcp_v4_rcv_saddr(sk), sk2))
break;
}
}
*/
static void __tcp_put_port(struct sock *sk)
{
- struct inet_opt *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(inet->num)];
struct tcp_bind_bucket *tb;
wake_up(&tcp_lhash_wait);
}
+
/*
- Check if an address is in the list
-*/
-static inline int tcp_addr_in_list(
- u32 rcv_saddr,
- u32 daddr,
- struct nx_info *nx_info)
-{
- if (rcv_saddr == daddr)
+ * Check if a given address matches for a tcp socket
+ *
+ * nxi: the socket's nx_info if any
+ * addr: to be verified address
+ * saddr: socket addresses
+ */
+static inline int tcp_addr_match (
+ struct nx_info *nxi,
+ uint32_t addr,
+ uint32_t saddr)
+{
+ if (addr && (saddr == addr))
return 1;
- else if (rcv_saddr == 0) {
- /* Accept any address or check the list */
- if (!nx_info)
- return 1;
- else {
- int n = nx_info->nbipv4;
- int i;
-
- for (i=0; i<n; i++)
- if (nx_info->ipv4[i] == daddr)
- return 1;
- }
- }
+ if (!saddr)
+ return addr_in_nx_info(nxi, addr);
return 0;
}
-
-
/* Don't inline this cruft. Here are some nice properties to
* exploit here. The BSD API does not allow a listening TCP
* to specify the remote port nor the remote address for the
hiscore=-1;
sk_for_each(sk, node, head) {
- struct inet_opt *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
if (inet->num == hnum && !ipv6_only_sock(sk)) {
__u32 rcv_saddr = inet->rcv_saddr;
score = (sk->sk_family == PF_INET ? 1 : 0);
- if (tcp_addr_in_list(rcv_saddr, daddr, sk->sk_nx_info))
+ if (tcp_addr_match(sk->sk_nx_info, daddr, rcv_saddr))
score+=2;
else
continue;
}
/* Optimize the common listener case. */
-inline struct sock *tcp_v4_lookup_listener(u32 daddr, unsigned short hnum,
- int dif)
+static inline struct sock *tcp_v4_lookup_listener(u32 daddr,
+ unsigned short hnum, int dif)
{
struct sock *sk = NULL;
struct hlist_head *head;
read_lock(&tcp_lhash_lock);
head = &tcp_listening_hash[tcp_lhashfn(hnum)];
if (!hlist_empty(head)) {
- struct inet_opt *inet = inet_sk((sk = __sk_head(head)));
+ struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
if (inet->num == hnum && !sk->sk_node.next &&
(sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
- tcp_addr_in_list(inet->rcv_saddr, daddr, sk->sk_nx_info) &&
+ tcp_addr_match(sk->sk_nx_info, daddr, inet->rcv_saddr) &&
!sk->sk_bound_dev_if)
goto sherry_cache;
sk = __tcp_v4_lookup_listener(head, daddr, hnum, dif);
return sk;
}
+EXPORT_SYMBOL_GPL(tcp_v4_lookup);
+
static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
{
return secure_tcp_sequence_number(skb->nh.iph->daddr,
static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
struct tcp_tw_bucket **twp)
{
- struct inet_opt *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
u32 daddr = inet->rcv_saddr;
u32 saddr = inet->daddr;
int dif = sk->sk_bound_dev_if;
tw = (struct tcp_tw_bucket *)sk2;
if (TCP_IPV4_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) {
- struct tcp_opt *tp = tcp_sk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
/* With PAWS, it is safe from the viewpoint
of data integrity. Even without PAWS it
if ((tp->write_seq =
tw->tw_snd_nxt + 65535 + 2) == 0)
tp->write_seq = 1;
- tp->ts_recent = tw->tw_ts_recent;
- tp->ts_recent_stamp = tw->tw_ts_recent_stamp;
+ tp->rx_opt.ts_recent = tw->tw_ts_recent;
+ tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
sock_hold(sk2);
goto unique;
} else
if (twp) {
*twp = tw;
- NET_INC_STATS_BH(TimeWaitRecycled);
+ NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
} else if (tw) {
/* Silly. Should hash-dance instead... */
tcp_tw_deschedule(tw);
- NET_INC_STATS_BH(TimeWaitRecycled);
+ NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
tcp_tw_put(tw);
}
return -EADDRNOTAVAIL;
}
+static inline u32 connect_port_offset(const struct sock *sk)
+{
+ const struct inet_sock *inet = inet_sk(sk);
+
+ return secure_tcp_port_ephemeral(inet->rcv_saddr, inet->daddr,
+ inet->dport);
+}
+
/*
* Bind a port for a connect operation and hash it.
*/
-static int tcp_v4_hash_connect(struct sock *sk)
+static inline int tcp_v4_hash_connect(struct sock *sk)
{
unsigned short snum = inet_sk(sk)->num;
struct tcp_bind_hashbucket *head;
int ret;
if (!snum) {
- int rover;
int low = sysctl_local_port_range[0];
int high = sysctl_local_port_range[1];
- int remaining = (high - low) + 1;
+ int range = high - low;
+ int i;
+ int port;
+ static u32 hint;
+ u32 offset = hint + connect_port_offset(sk);
struct hlist_node *node;
struct tcp_tw_bucket *tw = NULL;
local_bh_disable();
-
- /* TODO. Actually it is not so bad idea to remove
- * tcp_portalloc_lock before next submission to Linus.
- * As soon as we touch this place at all it is time to think.
- *
- * Now it protects single _advisory_ variable tcp_port_rover,
- * hence it is mostly useless.
- * Code will work nicely if we just delete it, but
- * I am afraid in contented case it will work not better or
- * even worse: another cpu just will hit the same bucket
- * and spin there.
- * So some cpu salt could remove both contention and
- * memory pingpong. Any ideas how to do this in a nice way?
- */
- spin_lock(&tcp_portalloc_lock);
- rover = tcp_port_rover;
-
- do {
- rover++;
- if ((rover < low) || (rover > high))
- rover = low;
- head = &tcp_bhash[tcp_bhashfn(rover)];
+ for (i = 1; i <= range; i++) {
+ port = low + (i + offset) % range;
+ head = &tcp_bhash[tcp_bhashfn(port)];
spin_lock(&head->lock);
/* Does not bother with rcv_saddr checks,
* unique enough.
*/
tb_for_each(tb, node, &head->chain) {
- if (tb->port == rover) {
+ if (tb->port == port) {
BUG_TRAP(!hlist_empty(&tb->owners));
if (tb->fastreuse >= 0)
goto next_port;
if (!__tcp_v4_check_established(sk,
- rover,
+ port,
&tw))
goto ok;
goto next_port;
}
}
- tb = tcp_bucket_create(head, rover);
+ tb = tcp_bucket_create(head, port);
if (!tb) {
spin_unlock(&head->lock);
break;
next_port:
spin_unlock(&head->lock);
- } while (--remaining > 0);
- tcp_port_rover = rover;
- spin_unlock(&tcp_portalloc_lock);
-
+ }
local_bh_enable();
return -EADDRNOTAVAIL;
ok:
- /* All locks still held and bhs disabled */
- tcp_port_rover = rover;
- spin_unlock(&tcp_portalloc_lock);
+ hint += i;
- tcp_bind_hash(sk, tb, rover);
+ /* Head lock still held and bh's disabled */
+ tcp_bind_hash(sk, tb, port);
if (sk_unhashed(sk)) {
- inet_sk(sk)->sport = htons(rover);
+ inet_sk(sk)->sport = htons(port);
__tcp_v4_hash(sk, 0);
}
spin_unlock(&head->lock);
/* This will initiate an outgoing connection. */
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
- struct inet_opt *inet = inet_sk(sk);
- struct tcp_opt *tp = tcp_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
struct rtable *rt;
u32 daddr, nexthop;
inet->saddr = rt->rt_src;
inet->rcv_saddr = inet->saddr;
- if (tp->ts_recent_stamp && inet->daddr != daddr) {
+ if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
/* Reset inherited state */
- tp->ts_recent = 0;
- tp->ts_recent_stamp = 0;
- tp->write_seq = 0;
+ tp->rx_opt.ts_recent = 0;
+ tp->rx_opt.ts_recent_stamp = 0;
+ tp->write_seq = 0;
}
if (sysctl_tcp_tw_recycle &&
- !tp->ts_recent_stamp && rt->rt_dst == daddr) {
+ !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
struct inet_peer *peer = rt_get_peer(rt);
/* VJ's idea. We save last timestamp seen from
* the destination in peer table, when entering state TIME-WAIT
- * and initialize ts_recent from it, when trying new connection.
+ * and initialize rx_opt.ts_recent from it, when trying new connection.
*/
if (peer && peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
- tp->ts_recent_stamp = peer->tcp_ts_stamp;
- tp->ts_recent = peer->tcp_ts;
+ tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
+ tp->rx_opt.ts_recent = peer->tcp_ts;
}
}
if (inet->opt)
tp->ext_header_len = inet->opt->optlen;
- tp->mss_clamp = 536;
+ tp->rx_opt.mss_clamp = 536;
/* Socket identity is still unknown (sport may be zero).
* However we set state to SYN-SENT and not releasing socket
return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1));
}
-static struct open_request *tcp_v4_search_req(struct tcp_opt *tp,
+static struct open_request *tcp_v4_search_req(struct tcp_sock *tp,
struct open_request ***prevp,
__u16 rport,
__u32 raddr, __u32 laddr)
static void tcp_v4_synq_add(struct sock *sk, struct open_request *req)
{
- struct tcp_opt *tp = tcp_sk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
struct tcp_listen_opt *lopt = tp->listen_opt;
u32 h = tcp_v4_synq_hash(req->af.v4_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
u32 mtu)
{
struct dst_entry *dst;
- struct inet_opt *inet = inet_sk(sk);
- struct tcp_opt *tp = tcp_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
* send out by Linux are always <576bytes so they should go through
{
struct iphdr *iph = (struct iphdr *)skb->data;
struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
- struct tcp_opt *tp;
- struct inet_opt *inet;
+ struct tcp_sock *tp;
+ struct inet_sock *inet;
int type = skb->h.icmph->type;
int code = skb->h.icmph->code;
struct sock *sk;
int err;
if (skb->len < (iph->ihl << 2) + 8) {
- ICMP_INC_STATS_BH(IcmpInErrors);
+ ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
return;
}
sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr,
th->source, tcp_v4_iif(skb));
if (!sk) {
- ICMP_INC_STATS_BH(IcmpInErrors);
+ ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
return;
}
if (sk->sk_state == TCP_TIME_WAIT) {
* servers this needs to be solved differently.
*/
if (sock_owned_by_user(sk))
- NET_INC_STATS_BH(LockDroppedIcmps);
+ NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
if (sk->sk_state == TCP_CLOSE)
goto out;
seq = ntohl(th->seq);
if (sk->sk_state != TCP_LISTEN &&
!between(seq, tp->snd_una, tp->snd_nxt)) {
- NET_INC_STATS(OutOfWindowIcmps);
+ NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
switch (type) {
case ICMP_SOURCE_QUENCH:
- /* This is deprecated, but if someone generated it,
- * we have no reasons to ignore it.
- */
- if (!sock_owned_by_user(sk))
- tcp_enter_cwr(tp);
+ /* Just silently ignore these. */
goto out;
case ICMP_PARAMETERPROB:
err = EPROTO;
BUG_TRAP(!req->sk);
if (seq != req->snt_isn) {
- NET_INC_STATS_BH(OutOfWindowIcmps);
+ NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
It can f.e. if SYNs crossed.
*/
if (!sock_owned_by_user(sk)) {
- TCP_INC_STATS_BH(TcpAttemptFails);
+ TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
sk->sk_err = err;
sk->sk_error_report(sk);
void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
struct sk_buff *skb)
{
- struct inet_opt *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
if (skb->ip_summed == CHECKSUM_HW) {
th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth);
- TCP_INC_STATS_BH(TcpOutSegs);
- TCP_INC_STATS_BH(TcpOutRsts);
+ TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
+ TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
}
/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
- TCP_INC_STATS_BH(TcpOutSegs);
+ TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
}
static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
.dport = req->rmt_port } } };
if (ip_route_output_flow(&rt, &fl, sk, 0)) {
- IP_INC_STATS_BH(IpOutNoRoutes);
+ IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) {
ip_rt_put(rt);
- IP_INC_STATS_BH(IpOutNoRoutes);
+ IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
return &rt->u.dst;
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
{
- struct tcp_opt tp;
+ struct tcp_options_received tmp_opt;
struct open_request *req;
__u32 saddr = skb->nh.iph->saddr;
__u32 daddr = skb->nh.iph->daddr;
* clogging syn queue with openreqs with exponentially increasing
* timeout.
*/
- if (tcp_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
+ if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
goto drop;
req = tcp_openreq_alloc();
if (!req)
goto drop;
- tcp_clear_options(&tp);
- tp.mss_clamp = 536;
- tp.user_mss = tcp_sk(sk)->user_mss;
+ tcp_clear_options(&tmp_opt);
+ tmp_opt.mss_clamp = 536;
+ tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss;
- tcp_parse_options(skb, &tp, 0);
+ tcp_parse_options(skb, &tmp_opt, 0);
if (want_cookie) {
- tcp_clear_options(&tp);
- tp.saw_tstamp = 0;
+ tcp_clear_options(&tmp_opt);
+ tmp_opt.saw_tstamp = 0;
}
- if (tp.saw_tstamp && !tp.rcv_tsval) {
+ if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
/* Some OSes (unknown ones, but I see them on web server, which
* contains information interesting only for windows'
* users) do not send their stamp in SYN. It is easy case.
* We simply do not advertise TS support.
*/
- tp.saw_tstamp = 0;
- tp.tstamp_ok = 0;
+ tmp_opt.saw_tstamp = 0;
+ tmp_opt.tstamp_ok = 0;
}
- tp.tstamp_ok = tp.saw_tstamp;
+ tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
- tcp_openreq_init(req, &tp, skb);
+ tcp_openreq_init(req, &tmp_opt, skb);
req->af.v4_req.loc_addr = daddr;
req->af.v4_req.rmt_addr = saddr;
* timewait bucket, so that all the necessary checks
* are made in the function processing timewait state.
*/
- if (tp.saw_tstamp &&
+ if (tmp_opt.saw_tstamp &&
sysctl_tcp_tw_recycle &&
(dst = tcp_v4_route_req(sk, req)) != NULL &&
(peer = rt_get_peer((struct rtable *)dst)) != NULL &&
if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
(s32)(peer->tcp_ts - req->ts_recent) >
TCP_PAWS_WINDOW) {
- NET_INC_STATS_BH(PAWSPassiveRejected);
+ NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
dst_release(dst);
goto drop_and_free;
}
drop_and_free:
tcp_openreq_free(req);
drop:
- TCP_INC_STATS_BH(TcpAttemptFails);
+ TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
return 0;
}
struct open_request *req,
struct dst_entry *dst)
{
- struct inet_opt *newinet;
- struct tcp_opt *newtp;
+ struct inet_sock *newinet;
+ struct tcp_sock *newtp;
struct sock *newsk;
- if (tcp_acceptq_is_full(sk))
+ if (sk_acceptq_is_full(sk))
goto exit_overflow;
if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL)
return newsk;
exit_overflow:
- NET_INC_STATS_BH(ListenOverflows);
+ NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
exit:
- NET_INC_STATS_BH(ListenDrops);
+ NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
dst_release(dst);
return NULL;
}
{
struct tcphdr *th = skb->h.th;
struct iphdr *iph = skb->nh.iph;
- struct tcp_opt *tp = tcp_sk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
struct sock *nsk;
struct open_request **prev;
/* Find possible connection requests. */
return 0;
csum_err:
- TCP_INC_STATS_BH(TcpInErrs);
+ TCP_INC_STATS_BH(TCP_MIB_INERRS);
goto discard;
}
goto discard_it;
/* Count it even if it's bad */
- TCP_INC_STATS_BH(TcpInSegs);
+ TCP_INC_STATS_BH(TCP_MIB_INSEGS);
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
goto discard_it;
if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
bad_packet:
- TCP_INC_STATS_BH(TcpInErrs);
+ TCP_INC_STATS_BH(TCP_MIB_INERRS);
} else {
tcp_v4_send_reset(skb);
}
}
if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
- TCP_INC_STATS_BH(TcpInErrs);
+ TCP_INC_STATS_BH(TCP_MIB_INERRS);
tcp_tw_put((struct tcp_tw_bucket *) sk);
goto discard_it;
}
static int tcp_v4_reselect_saddr(struct sock *sk)
{
- struct inet_opt *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
int err;
struct rtable *rt;
__u32 old_saddr = inet->saddr;
int tcp_v4_rebuild_header(struct sock *sk)
{
- struct inet_opt *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
u32 daddr;
int err;
static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
{
struct sockaddr_in *sin = (struct sockaddr_in *) uaddr;
- struct inet_opt *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
sin->sin_family = AF_INET;
sin->sin_addr.s_addr = inet->daddr;
int tcp_v4_remember_stamp(struct sock *sk)
{
- struct inet_opt *inet = inet_sk(sk);
- struct tcp_opt *tp = tcp_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
struct inet_peer *peer = NULL;
int release_it = 0;
}
if (peer) {
- if ((s32)(peer->tcp_ts - tp->ts_recent) <= 0 ||
+ if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
(peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
- peer->tcp_ts_stamp <= tp->ts_recent_stamp)) {
- peer->tcp_ts_stamp = tp->ts_recent_stamp;
- peer->tcp_ts = tp->ts_recent;
+ peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
+ peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
+ peer->tcp_ts = tp->rx_opt.ts_recent;
}
if (release_it)
inet_putpeer(peer);
*/
static int tcp_v4_init_sock(struct sock *sk)
{
- struct tcp_opt *tp = tcp_sk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
skb_queue_head_init(&tp->out_of_order_queue);
tcp_init_xmit_timers(sk);
*/
tp->snd_ssthresh = 0x7fffffff; /* Infinity */
tp->snd_cwnd_clamp = ~0;
- tp->mss_cache = 536;
+ tp->mss_cache_std = tp->mss_cache = 536;
tp->reordering = sysctl_tcp_reordering;
sk->sk_state = TCP_CLOSE;
- sk->sk_write_space = tcp_write_space;
+ sk->sk_write_space = sk_stream_write_space;
sk->sk_use_write_queue = 1;
tp->af_specific = &ipv4_specific;
return 0;
}
-static int tcp_v4_destroy_sock(struct sock *sk)
+int tcp_v4_destroy_sock(struct sock *sk)
{
- struct tcp_opt *tp = tcp_sk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
tcp_clear_xmit_timers(sk);
/* Cleanup up the write buffer. */
- tcp_writequeue_purge(sk);
+ sk_stream_writequeue_purge(sk);
/* Cleans up our, hopefully empty, out_of_order_queue. */
__skb_queue_purge(&tp->out_of_order_queue);
if (tp->bind_hash)
tcp_put_port(sk);
- /* If sendmsg cached page exists, toss it. */
- if (inet_sk(sk)->sndmsg_page)
- __free_page(inet_sk(sk)->sndmsg_page);
+ /*
+ * If sendmsg cached page exists, toss it.
+ */
+ if (sk->sk_sndmsg_page) {
+ __free_page(sk->sk_sndmsg_page);
+ sk->sk_sndmsg_page = NULL;
+ }
atomic_dec(&tcp_sockets_allocated);
return 0;
}
+EXPORT_SYMBOL(tcp_v4_destroy_sock);
+
#ifdef CONFIG_PROC_FS
/* Proc filesystem TCP sock list dumping. */
static void *listening_get_next(struct seq_file *seq, void *cur)
{
- struct tcp_opt *tp;
+ struct tcp_sock *tp;
struct hlist_node *node;
struct sock *sk = cur;
struct tcp_iter_state* st = seq->private;
req = req->dl_next;
while (1) {
while (req) {
- if (!vx_check(req->sk->sk_xid, VX_IDENT|VX_WATCH))
+ vxdprintk(VXD_CBIT(net, 6),
+ "sk,req: %p [#%d] (from %d)", req->sk,
+ (req->sk)?req->sk->sk_xid:0, vx_current_xid());
+ if (req->sk &&
+ !vx_check(req->sk->sk_xid, VX_IDENT|VX_WATCH))
continue;
if (req->class->family == st->family) {
cur = req;
sk = sk_next(st->syn_wait_sk);
st->state = TCP_SEQ_STATE_LISTENING;
read_unlock_bh(&tp->syn_wait_lock);
- } else
+ } else {
+ tp = tcp_sk(sk);
+ read_lock_bh(&tp->syn_wait_lock);
+ if (tp->listen_opt && tp->listen_opt->qlen)
+ goto start_req;
+ read_unlock_bh(&tp->syn_wait_lock);
sk = sk_next(sk);
+ }
get_sk:
sk_for_each_from(sk, node) {
+ vxdprintk(VXD_CBIT(net, 6), "sk: %p [#%d] (from %d)",
+ sk, sk->sk_xid, vx_current_xid());
if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
continue;
if (sk->sk_family == st->family) {
tp = tcp_sk(sk);
read_lock_bh(&tp->syn_wait_lock);
if (tp->listen_opt && tp->listen_opt->qlen) {
+start_req:
st->uid = sock_i_uid(sk);
st->syn_wait_sk = sk;
st->state = TCP_SEQ_STATE_OPENREQ;
struct sock *sk;
struct hlist_node *node;
struct tcp_tw_bucket *tw;
-
+
+ /* We can reschedule _before_ having picked the target: */
+ cond_resched_softirq();
+
read_lock(&tcp_ehash[st->bucket].lock);
sk_for_each(sk, node, &tcp_ehash[st->bucket].chain) {
+ vxdprintk(VXD_CBIT(net, 6),
+ "sk,egf: %p [#%d] (from %d)",
+ sk, sk->sk_xid, vx_current_xid());
if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
continue;
if (sk->sk_family != st->family)
st->state = TCP_SEQ_STATE_TIME_WAIT;
tw_for_each(tw, node,
&tcp_ehash[st->bucket + tcp_ehash_size].chain) {
+ vxdprintk(VXD_CBIT(net, 6),
+ "tw: %p [#%d] (from %d)",
+ tw, tw->tw_xid, vx_current_xid());
if (!vx_check(tw->tw_xid, VX_IDENT|VX_WATCH))
continue;
if (tw->tw_family != st->family)
tw = cur;
tw = tw_next(tw);
get_tw:
- while (tw && tw->tw_family != st->family &&
- !vx_check(tw->tw_xid, VX_IDENT|VX_WATCH)) {
+ while (tw && (tw->tw_family != st->family ||
+ !vx_check(tw->tw_xid, VX_IDENT|VX_WATCH))) {
tw = tw_next(tw);
}
if (tw) {
}
read_unlock(&tcp_ehash[st->bucket].lock);
st->state = TCP_SEQ_STATE_ESTABLISHED;
+
+ /* We can reschedule between buckets: */
+ cond_resched_softirq();
+
if (++st->bucket < tcp_ehash_size) {
read_lock(&tcp_ehash[st->bucket].lock);
sk = sk_head(&tcp_ehash[st->bucket].chain);
sk = sk_next(sk);
sk_for_each_from(sk, node) {
+ vxdprintk(VXD_CBIT(net, 6),
+ "sk,egn: %p [#%d] (from %d)",
+ sk, sk->sk_xid, vx_current_xid());
if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
continue;
if (sk->sk_family == st->family)
switch (st->state) {
case TCP_SEQ_STATE_OPENREQ:
if (v) {
- struct tcp_opt *tp = tcp_sk(st->syn_wait_sk);
+ struct tcp_sock *tp = tcp_sk(st->syn_wait_sk);
read_unlock_bh(&tp->syn_wait_lock);
}
case TCP_SEQ_STATE_LISTENING:
int ttd = req->expires - jiffies;
sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
- " %02X %08X:%08X %02X:%08X %08X %5d %8d %u %d %p",
+ " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
i,
req->af.v4_req.loc_addr,
ntohs(inet_sk(sk)->sport),
{
int timer_active;
unsigned long timer_expires;
- struct tcp_opt *tp = tcp_sk(sp);
- struct inet_opt *inet = inet_sk(sp);
+ struct tcp_sock *tp = tcp_sk(sp);
+ struct inet_sock *inet = inet_sk(sp);
unsigned int dest = inet->daddr;
unsigned int src = inet->rcv_saddr;
__u16 destp = ntohs(inet->dport);
srcp = ntohs(tw->tw_sport);
sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
- " %02X %08X:%08X %02X:%08X %08X %5d %8d %d %d %p",
+ " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p",
i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
atomic_read(&tw->tw_refcnt), tw);
#endif /* CONFIG_PROC_FS */
struct proto tcp_prot = {
- .name = "TCP",
- .close = tcp_close,
- .connect = tcp_v4_connect,
- .disconnect = tcp_disconnect,
- .accept = tcp_accept,
- .ioctl = tcp_ioctl,
- .init = tcp_v4_init_sock,
- .destroy = tcp_v4_destroy_sock,
- .shutdown = tcp_shutdown,
- .setsockopt = tcp_setsockopt,
- .getsockopt = tcp_getsockopt,
- .sendmsg = tcp_sendmsg,
- .recvmsg = tcp_recvmsg,
- .backlog_rcv = tcp_v4_do_rcv,
- .hash = tcp_v4_hash,
- .unhash = tcp_unhash,
- .get_port = tcp_v4_get_port,
+ .name = "TCP",
+ .owner = THIS_MODULE,
+ .close = tcp_close,
+ .connect = tcp_v4_connect,
+ .disconnect = tcp_disconnect,
+ .accept = tcp_accept,
+ .ioctl = tcp_ioctl,
+ .init = tcp_v4_init_sock,
+ .destroy = tcp_v4_destroy_sock,
+ .shutdown = tcp_shutdown,
+ .setsockopt = tcp_setsockopt,
+ .getsockopt = tcp_getsockopt,
+ .sendmsg = tcp_sendmsg,
+ .recvmsg = tcp_recvmsg,
+ .backlog_rcv = tcp_v4_do_rcv,
+ .hash = tcp_v4_hash,
+ .unhash = tcp_unhash,
+ .get_port = tcp_v4_get_port,
+ .enter_memory_pressure = tcp_enter_memory_pressure,
+ .sockets_allocated = &tcp_sockets_allocated,
+ .memory_allocated = &tcp_memory_allocated,
+ .memory_pressure = &tcp_memory_pressure,
+ .sysctl_mem = sysctl_tcp_mem,
+ .sysctl_wmem = sysctl_tcp_wmem,
+ .sysctl_rmem = sysctl_tcp_rmem,
+ .max_header = MAX_TCP_HEADER,
+ .slab_obj_size = sizeof(struct tcp_sock),
};
EXPORT_SYMBOL(tcp_v4_conn_request);
EXPORT_SYMBOL(tcp_v4_connect);
EXPORT_SYMBOL(tcp_v4_do_rcv);
-EXPORT_SYMBOL(tcp_v4_lookup_listener);
EXPORT_SYMBOL(tcp_v4_rebuild_header);
EXPORT_SYMBOL(tcp_v4_remember_stamp);
EXPORT_SYMBOL(tcp_v4_send_check);
EXPORT_SYMBOL(tcp_proc_register);
EXPORT_SYMBOL(tcp_proc_unregister);
#endif
-#ifdef CONFIG_SYSCTL
EXPORT_SYMBOL(sysctl_local_port_range);
EXPORT_SYMBOL(sysctl_max_syn_backlog);
EXPORT_SYMBOL(sysctl_tcp_low_latency);
-#endif
+