#include <linux/stddef.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <linux/vserver/debug.h>
extern int sysctl_ip_dynaddr;
int sysctl_tcp_tw_reuse;
static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb)
{
- const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk);
struct sock *sk2;
struct hlist_node *node;
int reuse = sk->sk_reuse;
sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
if (!reuse || !sk2->sk_reuse ||
sk2->sk_state == TCP_LISTEN) {
- const u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2);
- if (!sk2_rcv_saddr || !sk_rcv_saddr ||
- sk2_rcv_saddr == sk_rcv_saddr)
+ if (nx_addr_conflict(sk->sk_nx_info,
+ tcp_v4_rcv_saddr(sk), sk2))
break;
}
}
wake_up(&tcp_lhash_wait);
}
+
+/*
+ * Check if a given address matches for a tcp socket
+ *
+ * nxi: the socket's nx_info if any
+ * addr: to be verified address
+ * saddr: socket addresses
+ */
+static inline int tcp_addr_match (
+ struct nx_info *nxi,
+ uint32_t addr,
+ uint32_t saddr)
+{
+ if (addr && (saddr == addr))
+ return 1;
+ if (!saddr)
+ return addr_in_nx_info(nxi, addr);
+ return 0;
+}
+
/* Don't inline this cruft. Here are some nice properties to
* exploit here. The BSD API does not allow a listening TCP
* to specify the remote port nor the remote address for the
__u32 rcv_saddr = inet->rcv_saddr;
score = (sk->sk_family == PF_INET ? 1 : 0);
- if (rcv_saddr) {
- if (rcv_saddr != daddr)
- continue;
+ if (tcp_addr_match(sk->sk_nx_info, daddr, rcv_saddr))
score+=2;
- }
+ else
+ continue;
if (sk->sk_bound_dev_if) {
if (sk->sk_bound_dev_if != dif)
continue;
struct inet_opt *inet = inet_sk((sk = __sk_head(head)));
if (inet->num == hnum && !sk->sk_node.next &&
- (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
(sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
+ tcp_addr_match(sk->sk_nx_info, daddr, inet->rcv_saddr) &&
!sk->sk_bound_dev_if)
goto sherry_cache;
sk = __tcp_v4_lookup_listener(head, daddr, hnum, dif);
if (twp) {
*twp = tw;
- NET_INC_STATS_BH(TimeWaitRecycled);
+ NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
} else if (tw) {
/* Silly. Should hash-dance instead... */
tcp_tw_deschedule(tw);
- NET_INC_STATS_BH(TimeWaitRecycled);
+ NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
tcp_tw_put(tw);
}
int err;
if (skb->len < (iph->ihl << 2) + 8) {
- ICMP_INC_STATS_BH(IcmpInErrors);
+ ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
return;
}
sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr,
th->source, tcp_v4_iif(skb));
if (!sk) {
- ICMP_INC_STATS_BH(IcmpInErrors);
+ ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
return;
}
if (sk->sk_state == TCP_TIME_WAIT) {
* servers this needs to be solved differently.
*/
if (sock_owned_by_user(sk))
- NET_INC_STATS_BH(LockDroppedIcmps);
+ NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
if (sk->sk_state == TCP_CLOSE)
goto out;
seq = ntohl(th->seq);
if (sk->sk_state != TCP_LISTEN &&
!between(seq, tp->snd_una, tp->snd_nxt)) {
- NET_INC_STATS(OutOfWindowIcmps);
+ NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
switch (type) {
case ICMP_SOURCE_QUENCH:
- /* This is deprecated, but if someone generated it,
- * we have no reasons to ignore it.
- */
- if (!sock_owned_by_user(sk))
- tcp_enter_cwr(tp);
+ /* Just silently ignore these. */
goto out;
case ICMP_PARAMETERPROB:
err = EPROTO;
BUG_TRAP(!req->sk);
if (seq != req->snt_isn) {
- NET_INC_STATS_BH(OutOfWindowIcmps);
+ NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
It can f.e. if SYNs crossed.
*/
if (!sock_owned_by_user(sk)) {
- TCP_INC_STATS_BH(TcpAttemptFails);
+ TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
sk->sk_err = err;
sk->sk_error_report(sk);
ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth);
- TCP_INC_STATS_BH(TcpOutSegs);
- TCP_INC_STATS_BH(TcpOutRsts);
+ TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
+ TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
}
/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
- TCP_INC_STATS_BH(TcpOutSegs);
+ TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
}
static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
.dport = req->rmt_port } } };
if (ip_route_output_flow(&rt, &fl, sk, 0)) {
- IP_INC_STATS_BH(IpOutNoRoutes);
+ IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) {
ip_rt_put(rt);
- IP_INC_STATS_BH(IpOutNoRoutes);
+ IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
return &rt->u.dst;
* clogging syn queue with openreqs with exponentially increasing
* timeout.
*/
- if (tcp_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
+ if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
goto drop;
req = tcp_openreq_alloc();
if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
(s32)(peer->tcp_ts - req->ts_recent) >
TCP_PAWS_WINDOW) {
- NET_INC_STATS_BH(PAWSPassiveRejected);
+ NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
dst_release(dst);
goto drop_and_free;
}
drop_and_free:
tcp_openreq_free(req);
drop:
- TCP_INC_STATS_BH(TcpAttemptFails);
+ TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
return 0;
}
struct tcp_opt *newtp;
struct sock *newsk;
- if (tcp_acceptq_is_full(sk))
+ if (sk_acceptq_is_full(sk))
goto exit_overflow;
if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL)
return newsk;
exit_overflow:
- NET_INC_STATS_BH(ListenOverflows);
+ NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
exit:
- NET_INC_STATS_BH(ListenDrops);
+ NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
dst_release(dst);
return NULL;
}
return 0;
csum_err:
- TCP_INC_STATS_BH(TcpInErrs);
+ TCP_INC_STATS_BH(TCP_MIB_INERRS);
goto discard;
}
goto discard_it;
/* Count it even if it's bad */
- TCP_INC_STATS_BH(TcpInSegs);
+ TCP_INC_STATS_BH(TCP_MIB_INSEGS);
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
goto discard_it;
if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
bad_packet:
- TCP_INC_STATS_BH(TcpInErrs);
+ TCP_INC_STATS_BH(TCP_MIB_INERRS);
} else {
tcp_v4_send_reset(skb);
}
}
if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
- TCP_INC_STATS_BH(TcpInErrs);
+ TCP_INC_STATS_BH(TCP_MIB_INERRS);
tcp_tw_put((struct tcp_tw_bucket *) sk);
goto discard_it;
}
*/
tp->snd_ssthresh = 0x7fffffff; /* Infinity */
tp->snd_cwnd_clamp = ~0;
- tp->mss_cache = 536;
+ tp->mss_cache_std = tp->mss_cache = 536;
tp->reordering = sysctl_tcp_reordering;
sk->sk_state = TCP_CLOSE;
- sk->sk_write_space = tcp_write_space;
+ sk->sk_write_space = sk_stream_write_space;
sk->sk_use_write_queue = 1;
tp->af_specific = &ipv4_specific;
return 0;
}
-static int tcp_v4_destroy_sock(struct sock *sk)
+int tcp_v4_destroy_sock(struct sock *sk)
{
struct tcp_opt *tp = tcp_sk(sk);
tcp_clear_xmit_timers(sk);
/* Cleanup up the write buffer. */
- tcp_writequeue_purge(sk);
+ sk_stream_writequeue_purge(sk);
/* Cleans up our, hopefully empty, out_of_order_queue. */
__skb_queue_purge(&tp->out_of_order_queue);
if (tp->bind_hash)
tcp_put_port(sk);
- /* If sendmsg cached page exists, toss it. */
- if (inet_sk(sk)->sndmsg_page)
- __free_page(inet_sk(sk)->sndmsg_page);
+ /*
+ * If sendmsg cached page exists, toss it.
+ */
+ if (sk->sk_sndmsg_page) {
+ __free_page(sk->sk_sndmsg_page);
+ sk->sk_sndmsg_page = NULL;
+ }
atomic_dec(&tcp_sockets_allocated);
return 0;
}
+EXPORT_SYMBOL(tcp_v4_destroy_sock);
+
#ifdef CONFIG_PROC_FS
/* Proc filesystem TCP sock list dumping. */
req = req->dl_next;
while (1) {
while (req) {
+ vxdprintk(VXD_CBIT(net, 6),
+ "sk,req: %p [#%d] (from %d)", req->sk,
+ (req->sk)?req->sk->sk_xid:0, current->xid);
+ if (req->sk &&
+ !vx_check(req->sk->sk_xid, VX_IDENT|VX_WATCH))
+ continue;
if (req->class->family == st->family) {
cur = req;
goto out;
sk = sk_next(st->syn_wait_sk);
st->state = TCP_SEQ_STATE_LISTENING;
read_unlock_bh(&tp->syn_wait_lock);
- } else
+ } else {
+ tp = tcp_sk(sk);
+ read_lock_bh(&tp->syn_wait_lock);
+ if (tp->listen_opt && tp->listen_opt->qlen)
+ goto start_req;
+ read_unlock_bh(&tp->syn_wait_lock);
sk = sk_next(sk);
+ }
get_sk:
sk_for_each_from(sk, node) {
+ vxdprintk(VXD_CBIT(net, 6), "sk: %p [#%d] (from %d)",
+ sk, sk->sk_xid, current->xid);
+ if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
+ continue;
if (sk->sk_family == st->family) {
cur = sk;
goto out;
tp = tcp_sk(sk);
read_lock_bh(&tp->syn_wait_lock);
if (tp->listen_opt && tp->listen_opt->qlen) {
+start_req:
st->uid = sock_i_uid(sk);
st->syn_wait_sk = sk;
st->state = TCP_SEQ_STATE_OPENREQ;
read_lock(&tcp_ehash[st->bucket].lock);
sk_for_each(sk, node, &tcp_ehash[st->bucket].chain) {
- if (sk->sk_family != st->family) {
+ vxdprintk(VXD_CBIT(net, 6),
+ "sk,egf: %p [#%d] (from %d)",
+ sk, sk->sk_xid, current->xid);
+ if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
+ continue;
+ if (sk->sk_family != st->family)
continue;
- }
rc = sk;
goto out;
}
st->state = TCP_SEQ_STATE_TIME_WAIT;
tw_for_each(tw, node,
&tcp_ehash[st->bucket + tcp_ehash_size].chain) {
- if (tw->tw_family != st->family) {
+ vxdprintk(VXD_CBIT(net, 6),
+ "tw: %p [#%d] (from %d)",
+ tw, tw->tw_xid, current->xid);
+ if (!vx_check(tw->tw_xid, VX_IDENT|VX_WATCH))
+ continue;
+ if (tw->tw_family != st->family)
continue;
- }
rc = tw;
goto out;
}
tw = cur;
tw = tw_next(tw);
get_tw:
- while (tw && tw->tw_family != st->family) {
+ while (tw && (tw->tw_family != st->family ||
+ !vx_check(tw->tw_xid, VX_IDENT|VX_WATCH))) {
tw = tw_next(tw);
}
if (tw) {
sk = sk_next(sk);
sk_for_each_from(sk, node) {
+ vxdprintk(VXD_CBIT(net, 6),
+ "sk,egn: %p [#%d] (from %d)",
+ sk, sk->sk_xid, current->xid);
+ if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
+ continue;
if (sk->sk_family == st->family)
goto found;
}
int ttd = req->expires - jiffies;
sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
- " %02X %08X:%08X %02X:%08X %08X %5d %8d %u %d %p",
+ " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
i,
req->af.v4_req.loc_addr,
ntohs(inet_sk(sk)->sport),
srcp = ntohs(tw->tw_sport);
sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
- " %02X %08X:%08X %02X:%08X %08X %5d %8d %d %d %p",
+ " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p",
i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
atomic_read(&tw->tw_refcnt), tw);
#endif /* CONFIG_PROC_FS */
struct proto tcp_prot = {
- .name = "TCP",
- .close = tcp_close,
- .connect = tcp_v4_connect,
- .disconnect = tcp_disconnect,
- .accept = tcp_accept,
- .ioctl = tcp_ioctl,
- .init = tcp_v4_init_sock,
- .destroy = tcp_v4_destroy_sock,
- .shutdown = tcp_shutdown,
- .setsockopt = tcp_setsockopt,
- .getsockopt = tcp_getsockopt,
- .sendmsg = tcp_sendmsg,
- .recvmsg = tcp_recvmsg,
- .backlog_rcv = tcp_v4_do_rcv,
- .hash = tcp_v4_hash,
- .unhash = tcp_unhash,
- .get_port = tcp_v4_get_port,
+ .name = "TCP",
+ .close = tcp_close,
+ .connect = tcp_v4_connect,
+ .disconnect = tcp_disconnect,
+ .accept = tcp_accept,
+ .ioctl = tcp_ioctl,
+ .init = tcp_v4_init_sock,
+ .destroy = tcp_v4_destroy_sock,
+ .shutdown = tcp_shutdown,
+ .setsockopt = tcp_setsockopt,
+ .getsockopt = tcp_getsockopt,
+ .sendmsg = tcp_sendmsg,
+ .recvmsg = tcp_recvmsg,
+ .backlog_rcv = tcp_v4_do_rcv,
+ .hash = tcp_v4_hash,
+ .unhash = tcp_unhash,
+ .get_port = tcp_v4_get_port,
+ .enter_memory_pressure = tcp_enter_memory_pressure,
+ .sockets_allocated = &tcp_sockets_allocated,
+ .memory_allocated = &tcp_memory_allocated,
+ .memory_pressure = &tcp_memory_pressure,
+ .sysctl_mem = sysctl_tcp_mem,
+ .sysctl_wmem = sysctl_tcp_wmem,
+ .sysctl_rmem = sysctl_tcp_rmem,
+ .max_header = MAX_TCP_HEADER,
+ .slab_obj_size = sizeof(struct tcp_sock),
};