X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fipv4%2Fnetfilter%2Fip_conntrack_proto_tcp.c;h=062b252b58ad2f7e581cb61a350cbd39b23ecf06;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=8fab05a58fb8d747e3161ea960fe030ff4b413ef;hpb=c7b5ebbddf7bcd3651947760f423e3783bbe6573;p=linux-2.6.git diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 8fab05a58..062b252b5 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -32,11 +32,9 @@ #include -#include #include #include #include -#include #if 0 #define DEBUGP printk @@ -46,7 +44,7 @@ #endif /* Protects conntrack->proto.tcp */ -static DECLARE_RWLOCK(tcp_lock); +static DEFINE_RWLOCK(tcp_lock); /* "Be conservative in what you do, be liberal in what you accept from others." @@ -86,21 +84,21 @@ static const char *tcp_conntrack_names[] = { #define HOURS * 60 MINS #define DAYS * 24 HOURS -unsigned long ip_ct_tcp_timeout_syn_sent = 2 MINS; -unsigned long ip_ct_tcp_timeout_syn_recv = 60 SECS; -unsigned long ip_ct_tcp_timeout_established = 5 DAYS; -unsigned long ip_ct_tcp_timeout_fin_wait = 2 MINS; -unsigned long ip_ct_tcp_timeout_close_wait = 60 SECS; -unsigned long ip_ct_tcp_timeout_last_ack = 30 SECS; -unsigned long ip_ct_tcp_timeout_time_wait = 2 MINS; -unsigned long ip_ct_tcp_timeout_close = 10 SECS; +unsigned int ip_ct_tcp_timeout_syn_sent = 2 MINS; +unsigned int ip_ct_tcp_timeout_syn_recv = 60 SECS; +unsigned int ip_ct_tcp_timeout_established = 5 DAYS; +unsigned int ip_ct_tcp_timeout_fin_wait = 2 MINS; +unsigned int ip_ct_tcp_timeout_close_wait = 60 SECS; +unsigned int ip_ct_tcp_timeout_last_ack = 30 SECS; +unsigned int ip_ct_tcp_timeout_time_wait = 2 MINS; +unsigned int ip_ct_tcp_timeout_close = 10 SECS; /* RFC1122 says the R2 limit should be at least 100 seconds. Linux uses 15 packets as limit, which corresponds to ~13-30min depending on RTO. */ -unsigned long ip_ct_tcp_timeout_max_retrans = 5 MINS; +unsigned int ip_ct_tcp_timeout_max_retrans = 5 MINS; -static unsigned long * tcp_timeouts[] +static const unsigned int * tcp_timeouts[] = { NULL, /* TCP_CONNTRACK_NONE */ &ip_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */ &ip_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */ @@ -171,7 +169,7 @@ enum tcp_bit_set { * if they are invalid * or we do not support the request (simultaneous open) */ -static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { +static const enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { { /* ORIGINAL */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ @@ -254,7 +252,7 @@ static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sSS -> sSR Standard open. * sSR -> sSR Retransmitted SYN/ACK. * sES -> sIG Late retransmitted SYN/ACK? - * sFW -> sIG + * sFW -> sIG Might be SYN/ACK answering ignored SYN * sCW -> sIG * sLA -> sIG * sTW -> sIG @@ -273,10 +271,10 @@ static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sCL -> sCL */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*ack*/ { sIV, sIV, sIV, sES, sCW, sCW, sTW, sTW, sCL, sIV }, +/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV }, /* - * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet. - * sSR -> sIV Simultaneous open. + * sSS -> sIG Might be a half-open connection. + * sSR -> sSR Might answer late resent SYN. * sES -> sES :-) * sFW -> sCW Normal close request answered by ACK. * sCW -> sCW @@ -330,13 +328,66 @@ static int tcp_print_conntrack(struct seq_file *s, { enum tcp_conntrack state; - READ_LOCK(&tcp_lock); + read_lock_bh(&tcp_lock); state = conntrack->proto.tcp.state; - READ_UNLOCK(&tcp_lock); + read_unlock_bh(&tcp_lock); return seq_printf(s, "%s ", tcp_conntrack_names[state]); } +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) +static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa, + const struct ip_conntrack *ct) +{ + struct nfattr *nest_parms; + + read_lock_bh(&tcp_lock); + nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP); + NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), + &ct->proto.tcp.state); + read_unlock_bh(&tcp_lock); + + NFA_NEST_END(skb, nest_parms); + + return 0; + +nfattr_failure: + read_unlock_bh(&tcp_lock); + return -1; +} + +static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = { + [CTA_PROTOINFO_TCP_STATE-1] = sizeof(u_int8_t), +}; + +static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct) +{ + struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1]; + struct nfattr *tb[CTA_PROTOINFO_TCP_MAX]; + + /* updates could not contain anything about the private + * protocol info, in that case skip the parsing */ + if (!attr) + return 0; + + nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr); + + if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp)) + return -EINVAL; + + if (!tb[CTA_PROTOINFO_TCP_STATE-1]) + return -EINVAL; + + write_lock_bh(&tcp_lock); + ct->proto.tcp.state = + *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]); + write_unlock_bh(&tcp_lock); + + return 0; +} +#endif + static unsigned int get_conntrack_index(const struct tcphdr *tcph) { if (tcph->rst) return TCP_RST_SET; @@ -352,14 +403,19 @@ static unsigned int get_conntrack_index(const struct tcphdr *tcph) http://www.nluug.nl/events/sane2000/papers.html http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz - The boundaries and the conditions are slightly changed: - + The boundaries and the conditions are changed according to RFC793: + the packet must intersect the window (i.e. segments may be + after the right or before the left edge) and thus receivers may ACK + segments after the right edge of the window. + td_maxend = max(sack + max(win,1)) seen in reply packets td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets + td_maxwin += seq + len - sender.td_maxend + if seq + len > sender.td_maxend td_end = max(seq + len) seen in sent packets - I. Upper bound for valid data: seq + len <= sender.td_maxend - II. Lower bound for valid data: seq >= sender.td_end - receiver.td_maxwin + I. Upper bound for valid data: seq <= sender.td_maxend + II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin III. Upper bound for valid ack: sack <= receiver.td_end IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW @@ -373,7 +429,7 @@ static inline __u32 segment_seq_plus_len(__u32 seq, size_t len, struct iphdr *iph, struct tcphdr *tcph) - { +{ return (seq + len - (iph->ihl + tcph->doff)*4 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0)); } @@ -436,7 +492,7 @@ static void tcp_options(const struct sk_buff *skb, state->td_scale = 14; } state->flags |= - IP_CT_TCP_STATE_FLAG_WINDOW_SCALE; + IP_CT_TCP_FLAG_WINDOW_SCALE; } ptr += opsize - 2; length -= opsize; @@ -444,22 +500,33 @@ static void tcp_options(const struct sk_buff *skb, } } -static void tcp_sack(struct tcphdr *tcph, __u32 *sack) +static void tcp_sack(const struct sk_buff *skb, + struct iphdr *iph, + struct tcphdr *tcph, + __u32 *sack) { - __u32 tmp; + unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; unsigned char *ptr; int length = (tcph->doff*4) - sizeof(struct tcphdr); - + __u32 tmp; + + if (!length) + return; + + ptr = skb_header_pointer(skb, + (iph->ihl * 4) + sizeof(struct tcphdr), + length, buff); + BUG_ON(ptr == NULL); + /* Fast path for timestamp-only option */ if (length == TCPOLEN_TSTAMP_ALIGNED*4 - && *(__u32 *)(tcph + 1) == + && *(__u32 *)ptr == __constant_ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) return; - ptr = (unsigned char *)(tcph + 1); while (length > 0) { int opcode=*ptr++; int opsize, i; @@ -500,7 +567,7 @@ static void tcp_sack(struct tcphdr *tcph, __u32 *sack) static int tcp_in_window(struct ip_ct_tcp *state, enum ip_conntrack_dir dir, - unsigned int *index, + unsigned int index, const struct sk_buff *skb, struct iphdr *iph, struct tcphdr *tcph) @@ -519,7 +586,7 @@ static int tcp_in_window(struct ip_ct_tcp *state, end = segment_seq_plus_len(seq, skb->len, iph, tcph); if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) - tcp_sack(tcph, &sack); + tcp_sack(skb, iph, tcph, &sack); DEBUGP("tcp_in_window: START\n"); DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " @@ -552,8 +619,8 @@ static int tcp_in_window(struct ip_ct_tcp *state, * Both sides must send the Window Scale option * to enable window scaling in either direction. */ - if (!(sender->flags & IP_CT_TCP_STATE_FLAG_WINDOW_SCALE - && receiver->flags & IP_CT_TCP_STATE_FLAG_WINDOW_SCALE)) + if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE + && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) sender->td_scale = receiver->td_scale = 0; } else { @@ -566,9 +633,11 @@ static int tcp_in_window(struct ip_ct_tcp *state, sender->td_maxwin = (win == 0 ? 1 : win); sender->td_maxend = end + sender->td_maxwin; } - } else if (state->state == TCP_CONNTRACK_SYN_SENT - && dir == IP_CT_DIR_ORIGINAL - && after(end, sender->td_end)) { + } else if (((state->state == TCP_CONNTRACK_SYN_SENT + && dir == IP_CT_DIR_ORIGINAL) + || (state->state == TCP_CONNTRACK_SYN_RECV + && dir == IP_CT_DIR_REPLY)) + && after(end, sender->td_end)) { /* * RFC 793: "if a TCP is reinitialized ... then it need * not wait at all; it must only be sure to use sequence @@ -596,20 +665,23 @@ static int tcp_in_window(struct ip_ct_tcp *state, ack = sack = receiver->td_end; } - if (seq == end) + if (seq == end + && (!tcph->rst + || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT))) /* * Packets contains no data: we assume it is valid * and check the ack value only. + * However RST segments are always validated by their + * SEQ number, except when seq == 0 (reset sent answering + * SYN. */ seq = end = sender->td_end; DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " - "seq=%u ack=%u sack =%u win=%u end=%u trim=%u\n", + "seq=%u ack=%u sack =%u win=%u end=%u\n", NIPQUAD(iph->saddr), ntohs(tcph->source), NIPQUAD(iph->daddr), ntohs(tcph->dest), - seq, ack, sack, win, end, - after(end, sender->td_maxend) && before(seq, sender->td_maxend) - ? sender->td_maxend : end); + seq, ack, sack, win, end); DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", sender->td_end, sender->td_maxend, sender->td_maxwin, @@ -617,24 +689,15 @@ static int tcp_in_window(struct ip_ct_tcp *state, receiver->td_end, receiver->td_maxend, receiver->td_maxwin, receiver->td_scale); - /* Ignore data over the right edge of the receiver's window. */ - if (after(end, sender->td_maxend) && - before(seq, sender->td_maxend)) { - end = sender->td_maxend; - if (*index == TCP_FIN_SET) - *index = TCP_ACK_SET; - } DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n", - before(end, sender->td_maxend + 1) - || before(seq, sender->td_maxend + 1), - after(seq, sender->td_end - receiver->td_maxwin - 1) - || after(end, sender->td_end - receiver->td_maxwin - 1), + before(seq, sender->td_maxend + 1), + after(end, sender->td_end - receiver->td_maxwin - 1), before(sack, receiver->td_end + 1), after(ack, receiver->td_end - MAXACKWINDOW(sender))); if (sender->loose || receiver->loose || - (before(end, sender->td_maxend + 1) && - after(seq, sender->td_end - receiver->td_maxwin - 1) && + (before(seq, sender->td_maxend + 1) && + after(end, sender->td_end - receiver->td_maxwin - 1) && before(sack, receiver->td_end + 1) && after(ack, receiver->td_end - MAXACKWINDOW(sender)))) { /* @@ -651,6 +714,11 @@ static int tcp_in_window(struct ip_ct_tcp *state, sender->td_maxwin = swin; if (after(end, sender->td_end)) sender->td_end = end; + /* + * Update receiver data. + */ + if (after(end, sender->td_maxend)) + receiver->td_maxwin += end - sender->td_maxend; if (after(sack + win, receiver->td_maxend - 1)) { receiver->td_maxend = sack + win; if (win == 0) @@ -660,14 +728,16 @@ static int tcp_in_window(struct ip_ct_tcp *state, /* * Check retransmissions. */ - if (*index == TCP_ACK_SET) { + if (index == TCP_ACK_SET) { if (state->last_dir == dir && state->last_seq == seq + && state->last_ack == ack && state->last_end == end) state->retrans++; else { state->last_dir = dir; state->last_seq = seq; + state->last_ack = ack; state->last_end = end; state->retrans = 0; } @@ -681,18 +751,18 @@ static int tcp_in_window(struct ip_ct_tcp *state, res = 1; } else { if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: %s ", - before(end, sender->td_maxend + 1) ? - after(seq, sender->td_end - receiver->td_maxwin - 1) ? - before(ack, receiver->td_end + 1) ? + before(seq, sender->td_maxend + 1) ? + after(end, sender->td_end - receiver->td_maxwin - 1) ? + before(sack, receiver->td_end + 1) ? after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG" - : "ACK is under the lower bound (possibly overly delayed ACK)" - : "ACK is over the upper bound (ACKed data has never seen yet)" - : "SEQ is under the lower bound (retransmitted already ACKed data)" + : "ACK is under the lower bound (possible overly delayed ACK)" + : "ACK is over the upper bound (ACKed data not seen yet)" + : "SEQ is under the lower bound (already ACKed data retransmitted)" : "SEQ is over the upper bound (over the window of the receiver)"); - res = ip_ct_tcp_be_liberal && !tcph->rst; + res = ip_ct_tcp_be_liberal; } DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u " @@ -705,9 +775,9 @@ static int tcp_in_window(struct ip_ct_tcp *state, #ifdef CONFIG_IP_NF_NAT_NEEDED /* Update sender->td_end after NAT successfully mangled the packet */ -int ip_conntrack_tcp_update(struct sk_buff *skb, - struct ip_conntrack *conntrack, - int dir) +void ip_conntrack_tcp_update(struct sk_buff *skb, + struct ip_conntrack *conntrack, + enum ip_conntrack_dir dir) { struct iphdr *iph = skb->nh.iph; struct tcphdr *tcph = (void *)skb->nh.iph + skb->nh.iph->ihl*4; @@ -719,25 +789,22 @@ int ip_conntrack_tcp_update(struct sk_buff *skb, end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, iph, tcph); - WRITE_LOCK(&tcp_lock); + write_lock_bh(&tcp_lock); /* * We have to worry for the ack in the reply packet only... */ if (after(end, conntrack->proto.tcp.seen[dir].td_end)) conntrack->proto.tcp.seen[dir].td_end = end; conntrack->proto.tcp.last_end = end; - WRITE_UNLOCK(&tcp_lock); + write_unlock_bh(&tcp_lock); DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", sender->td_end, sender->td_maxend, sender->td_maxwin, sender->td_scale, receiver->td_end, receiver->td_maxend, receiver->td_maxwin, receiver->td_scale); - - return 1; } -EXPORT_SYMBOL(ip_conntrack_tcp_update); #endif #define TH_FIN 0x01 @@ -750,10 +817,12 @@ EXPORT_SYMBOL(ip_conntrack_tcp_update); #define TH_CWR 0x80 /* table of valid flag combinations - ECE and CWR are always valid */ -static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] = +static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] = { [TH_SYN] = 1, [TH_SYN|TH_ACK] = 1, + [TH_SYN|TH_PUSH] = 1, + [TH_SYN|TH_ACK|TH_PUSH] = 1, [TH_RST] = 1, [TH_RST|TH_ACK] = 1, [TH_RST|TH_ACK|TH_PUSH] = 1, @@ -782,7 +851,7 @@ static int tcp_error(struct sk_buff *skb, sizeof(_tcph), &_tcph); if (th == NULL) { if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: short packet "); return -NF_ACCEPT; } @@ -790,7 +859,7 @@ static int tcp_error(struct sk_buff *skb, /* Not whole TCP header or malformed packet */ if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: truncated/malformed packet "); return -NF_ACCEPT; } @@ -801,12 +870,10 @@ static int tcp_error(struct sk_buff *skb, * and moreover root might send raw packets. */ /* FIXME: Source route IP option packets --RR */ - if (hooknum == NF_IP_PRE_ROUTING - && csum_tcpudp_magic(iph->saddr, iph->daddr, tcplen, IPPROTO_TCP, - skb->ip_summed == CHECKSUM_HW ? skb->csum - : skb_checksum(skb, iph->ihl*4, tcplen, 0))) { + if (hooknum == NF_IP_PRE_ROUTING && + nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_TCP)) { if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: bad TCP checksum "); return -NF_ACCEPT; } @@ -815,7 +882,7 @@ static int tcp_error(struct sk_buff *skb, tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR)); if (!tcp_valid_flags[tcpflags]) { if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: invalid TCP flag combination "); return -NF_ACCEPT; } @@ -839,7 +906,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, sizeof(_tcph), &_tcph); BUG_ON(th == NULL); - WRITE_LOCK(&tcp_lock); + write_lock_bh(&tcp_lock); old_state = conntrack->proto.tcp.state; dir = CTINFO2DIR(ctinfo); index = get_conntrack_index(th); @@ -847,12 +914,17 @@ static int tcp_packet(struct ip_conntrack *conntrack, switch (new_state) { case TCP_CONNTRACK_IGNORE: - /* Either SYN in ORIGINAL, or SYN/ACK in REPLY direction. */ + /* Ignored packets: + * + * a) SYN in ORIGINAL + * b) SYN/ACK in REPLY + * c) ACK in reply direction after initial SYN in original. + */ if (index == TCP_SYNACK_SET && conntrack->proto.tcp.last_index == TCP_SYN_SET && conntrack->proto.tcp.last_dir != dir - && after(ntohl(th->ack_seq), - conntrack->proto.tcp.last_seq)) { + && ntohl(th->ack_seq) == + conntrack->proto.tcp.last_end) { /* This SYN/ACK acknowledges a SYN that we earlier * ignored as invalid. This means that the client and * the server are both in sync, while the firewall is @@ -860,10 +932,11 @@ static int tcp_packet(struct ip_conntrack *conntrack, * that the client cannot but retransmit its SYN and * thus initiate a clean new session. */ - WRITE_UNLOCK(&tcp_lock); + write_unlock_bh(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, - "ip_ct_tcp: killing out of sync session "); + nf_log_packet(PF_INET, 0, skb, NULL, NULL, + NULL, "ip_ct_tcp: " + "killing out of sync session "); if (del_timer(&conntrack->timeout)) conntrack->timeout.function((unsigned long) conntrack); @@ -872,63 +945,77 @@ static int tcp_packet(struct ip_conntrack *conntrack, conntrack->proto.tcp.last_index = index; conntrack->proto.tcp.last_dir = dir; conntrack->proto.tcp.last_seq = ntohl(th->seq); + conntrack->proto.tcp.last_end = + segment_seq_plus_len(ntohl(th->seq), skb->len, iph, th); - WRITE_UNLOCK(&tcp_lock); + write_unlock_bh(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, - "ip_ct_tcp: invalid SYN (ignored) "); + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, + "ip_ct_tcp: invalid packet ignored "); return NF_ACCEPT; case TCP_CONNTRACK_MAX: /* Invalid packet */ DEBUGP("ip_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", dir, get_conntrack_index(th), old_state); - WRITE_UNLOCK(&tcp_lock); + write_unlock_bh(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: invalid state "); return -NF_ACCEPT; case TCP_CONNTRACK_SYN_SENT: - if (old_state >= TCP_CONNTRACK_TIME_WAIT) { + if (old_state < TCP_CONNTRACK_TIME_WAIT) + break; + if ((conntrack->proto.tcp.seen[dir].flags & + IP_CT_TCP_FLAG_CLOSE_INIT) + || after(ntohl(th->seq), + conntrack->proto.tcp.seen[dir].td_end)) { /* Attempt to reopen a closed connection. * Delete this connection and look up again. */ - WRITE_UNLOCK(&tcp_lock); + write_unlock_bh(&tcp_lock); if (del_timer(&conntrack->timeout)) conntrack->timeout.function((unsigned long) conntrack); return -NF_REPEAT; + } else { + write_unlock_bh(&tcp_lock); + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(PF_INET, 0, skb, NULL, NULL, + NULL, "ip_ct_tcp: invalid SYN"); + return -NF_ACCEPT; } - break; case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET - && test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) - && conntrack->proto.tcp.last_index <= TCP_SYNACK_SET - && after(ntohl(th->ack_seq), - conntrack->proto.tcp.last_seq)) { - /* Ignore RST closing down invalid SYN - we had let trough. */ - WRITE_UNLOCK(&tcp_lock); - if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, - "ip_ct_tcp: invalid RST (ignored) "); - return NF_ACCEPT; + && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) + && conntrack->proto.tcp.last_index == TCP_SYN_SET) + || (!test_bit(IPS_ASSURED_BIT, &conntrack->status) + && conntrack->proto.tcp.last_index == TCP_ACK_SET)) + && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) { + /* RST sent to invalid SYN or ACK we had let through + * at a) and c) above: + * + * a) SYN was in window then + * c) we hold a half-open connection. + * + * Delete our connection entry. + * We skip window checking, because packet might ACK + * segments we ignored. */ + goto in_window; } - /* Just fall trough */ + /* Just fall through */ default: /* Keep compilers happy. */ break; } - if (!tcp_in_window(&conntrack->proto.tcp, dir, &index, + if (!tcp_in_window(&conntrack->proto.tcp, dir, index, skb, iph, th)) { - WRITE_UNLOCK(&tcp_lock); + write_unlock_bh(&tcp_lock); return -NF_ACCEPT; } - /* From now on we have got in-window packets */ - - /* If FIN was trimmed off, we don't change state. */ + in_window: + /* From now on we have got in-window packets */ conntrack->proto.tcp.last_index = index; - new_state = tcp_conntracks[dir][index][old_state]; DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", @@ -939,10 +1026,18 @@ static int tcp_packet(struct ip_conntrack *conntrack, old_state, new_state); conntrack->proto.tcp.state = new_state; + if (old_state != new_state + && (new_state == TCP_CONNTRACK_FIN_WAIT + || new_state == TCP_CONNTRACK_CLOSE)) + conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state]; - WRITE_UNLOCK(&tcp_lock); + write_unlock_bh(&tcp_lock); + + ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); + if (new_state != old_state) + ip_conntrack_event_cache(IPCT_PROTOINFO, skb); if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { /* If only reply is a RST, we can consider ourselves not to @@ -962,14 +1057,15 @@ static int tcp_packet(struct ip_conntrack *conntrack, /* Set ASSURED if we see see valid ack in ESTABLISHED after SYN_RECV or a valid answer for a picked up connection. */ - set_bit(IPS_ASSURED_BIT, &conntrack->status); + set_bit(IPS_ASSURED_BIT, &conntrack->status); + ip_conntrack_event_cache(IPCT_STATUS, skb); } ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout); return NF_ACCEPT; } - /* Called when a new connection for this protocol found. */ +/* Called when a new connection for this protocol found. */ static int tcp_new(struct ip_conntrack *conntrack, const struct sk_buff *skb) { @@ -1056,22 +1152,6 @@ static int tcp_new(struct ip_conntrack *conntrack, return 1; } -static int tcp_exp_matches_pkt(struct ip_conntrack_expect *exp, - const struct sk_buff *skb) -{ - const struct iphdr *iph = skb->nh.iph; - struct tcphdr *th, _tcph; - unsigned int datalen; - - th = skb_header_pointer(skb, iph->ihl * 4, - sizeof(_tcph), &_tcph); - if (th == NULL) - return 0; - datalen = skb->len - iph->ihl*4 - th->doff*4; - - return between(exp->seq, ntohl(th->seq), ntohl(th->seq) + datalen); -} - struct ip_conntrack_protocol ip_conntrack_protocol_tcp = { .proto = IPPROTO_TCP, @@ -1082,6 +1162,12 @@ struct ip_conntrack_protocol ip_conntrack_protocol_tcp = .print_conntrack = tcp_print_conntrack, .packet = tcp_packet, .new = tcp_new, - .exp_matches_pkt = tcp_exp_matches_pkt, .error = tcp_error, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .to_nfattr = tcp_to_nfattr, + .from_nfattr = nfattr_to_tcp, + .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, + .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, +#endif };