X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fipv4%2Fnetfilter%2Fip_conntrack_proto_tcp.c;h=062b252b58ad2f7e581cb61a350cbd39b23ecf06;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=677ea802872c7a0f5ab7cc83b6dd58cc23e8d1eb;hpb=cee37fe97739d85991964371c1f3a745c00dd236;p=linux-2.6.git diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 677ea8028..062b252b5 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -32,11 +32,9 @@ #include -#include #include #include #include -#include #if 0 #define DEBUGP printk @@ -46,7 +44,7 @@ #endif /* Protects conntrack->proto.tcp */ -static DECLARE_RWLOCK(tcp_lock); +static DEFINE_RWLOCK(tcp_lock); /* "Be conservative in what you do, be liberal in what you accept from others." @@ -86,21 +84,21 @@ static const char *tcp_conntrack_names[] = { #define HOURS * 60 MINS #define DAYS * 24 HOURS -unsigned long ip_ct_tcp_timeout_syn_sent = 2 MINS; -unsigned long ip_ct_tcp_timeout_syn_recv = 60 SECS; -unsigned long ip_ct_tcp_timeout_established = 2 DAYS; -unsigned long ip_ct_tcp_timeout_fin_wait = 2 MINS; -unsigned long ip_ct_tcp_timeout_close_wait = 60 SECS; -unsigned long ip_ct_tcp_timeout_last_ack = 30 SECS; -unsigned long ip_ct_tcp_timeout_time_wait = 2 MINS; -unsigned long ip_ct_tcp_timeout_close = 10 SECS; +unsigned int ip_ct_tcp_timeout_syn_sent = 2 MINS; +unsigned int ip_ct_tcp_timeout_syn_recv = 60 SECS; +unsigned int ip_ct_tcp_timeout_established = 5 DAYS; +unsigned int ip_ct_tcp_timeout_fin_wait = 2 MINS; +unsigned int ip_ct_tcp_timeout_close_wait = 60 SECS; +unsigned int ip_ct_tcp_timeout_last_ack = 30 SECS; +unsigned int ip_ct_tcp_timeout_time_wait = 2 MINS; +unsigned int ip_ct_tcp_timeout_close = 10 SECS; /* RFC1122 says the R2 limit should be at least 100 seconds. Linux uses 15 packets as limit, which corresponds to ~13-30min depending on RTO. */ -unsigned long ip_ct_tcp_timeout_max_retrans = 5 MINS; +unsigned int ip_ct_tcp_timeout_max_retrans = 5 MINS; -static unsigned long * tcp_timeouts[] +static const unsigned int * tcp_timeouts[] = { NULL, /* TCP_CONNTRACK_NONE */ &ip_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */ &ip_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */ @@ -171,7 +169,7 @@ enum tcp_bit_set { * if they are invalid * or we do not support the request (simultaneous open) */ -static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { +static const enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { { /* ORIGINAL */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ @@ -273,9 +271,9 @@ static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sCL -> sCL */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*ack*/ { sIV, sIV, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV }, +/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV }, /* - * sSS -> sIV Might be a half-open connection. + * sSS -> sIG Might be a half-open connection. * sSR -> sSR Might answer late resent SYN. * sES -> sES :-) * sFW -> sCW Normal close request answered by ACK. @@ -330,13 +328,66 @@ static int tcp_print_conntrack(struct seq_file *s, { enum tcp_conntrack state; - READ_LOCK(&tcp_lock); + read_lock_bh(&tcp_lock); state = conntrack->proto.tcp.state; - READ_UNLOCK(&tcp_lock); + read_unlock_bh(&tcp_lock); return seq_printf(s, "%s ", tcp_conntrack_names[state]); } +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) +static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa, + const struct ip_conntrack *ct) +{ + struct nfattr *nest_parms; + + read_lock_bh(&tcp_lock); + nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP); + NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), + &ct->proto.tcp.state); + read_unlock_bh(&tcp_lock); + + NFA_NEST_END(skb, nest_parms); + + return 0; + +nfattr_failure: + read_unlock_bh(&tcp_lock); + return -1; +} + +static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = { + [CTA_PROTOINFO_TCP_STATE-1] = sizeof(u_int8_t), +}; + +static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct) +{ + struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1]; + struct nfattr *tb[CTA_PROTOINFO_TCP_MAX]; + + /* updates could not contain anything about the private + * protocol info, in that case skip the parsing */ + if (!attr) + return 0; + + nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr); + + if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp)) + return -EINVAL; + + if (!tb[CTA_PROTOINFO_TCP_STATE-1]) + return -EINVAL; + + write_lock_bh(&tcp_lock); + ct->proto.tcp.state = + *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]); + write_unlock_bh(&tcp_lock); + + return 0; +} +#endif + static unsigned int get_conntrack_index(const struct tcphdr *tcph) { if (tcph->rst) return TCP_RST_SET; @@ -700,7 +751,7 @@ static int tcp_in_window(struct ip_ct_tcp *state, res = 1; } else { if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: %s ", before(seq, sender->td_maxend + 1) ? after(end, sender->td_end - receiver->td_maxwin - 1) ? @@ -738,14 +789,14 @@ void ip_conntrack_tcp_update(struct sk_buff *skb, end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, iph, tcph); - WRITE_LOCK(&tcp_lock); + write_lock_bh(&tcp_lock); /* * We have to worry for the ack in the reply packet only... */ if (after(end, conntrack->proto.tcp.seen[dir].td_end)) conntrack->proto.tcp.seen[dir].td_end = end; conntrack->proto.tcp.last_end = end; - WRITE_UNLOCK(&tcp_lock); + write_unlock_bh(&tcp_lock); DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", sender->td_end, sender->td_maxend, sender->td_maxwin, @@ -766,10 +817,11 @@ void ip_conntrack_tcp_update(struct sk_buff *skb, #define TH_CWR 0x80 /* table of valid flag combinations - ECE and CWR are always valid */ -static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] = +static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] = { [TH_SYN] = 1, [TH_SYN|TH_ACK] = 1, + [TH_SYN|TH_PUSH] = 1, [TH_SYN|TH_ACK|TH_PUSH] = 1, [TH_RST] = 1, [TH_RST|TH_ACK] = 1, @@ -799,7 +851,7 @@ static int tcp_error(struct sk_buff *skb, sizeof(_tcph), &_tcph); if (th == NULL) { if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: short packet "); return -NF_ACCEPT; } @@ -807,7 +859,7 @@ static int tcp_error(struct sk_buff *skb, /* Not whole TCP header or malformed packet */ if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: truncated/malformed packet "); return -NF_ACCEPT; } @@ -818,13 +870,10 @@ static int tcp_error(struct sk_buff *skb, * and moreover root might send raw packets. */ /* FIXME: Source route IP option packets --RR */ - if (hooknum == NF_IP_PRE_ROUTING - && skb->ip_summed != CHECKSUM_UNNECESSARY - && csum_tcpudp_magic(iph->saddr, iph->daddr, tcplen, IPPROTO_TCP, - skb->ip_summed == CHECKSUM_HW ? skb->csum - : skb_checksum(skb, iph->ihl*4, tcplen, 0))) { + if (hooknum == NF_IP_PRE_ROUTING && + nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_TCP)) { if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: bad TCP checksum "); return -NF_ACCEPT; } @@ -833,7 +882,7 @@ static int tcp_error(struct sk_buff *skb, tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR)); if (!tcp_valid_flags[tcpflags]) { if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: invalid TCP flag combination "); return -NF_ACCEPT; } @@ -857,7 +906,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, sizeof(_tcph), &_tcph); BUG_ON(th == NULL); - WRITE_LOCK(&tcp_lock); + write_lock_bh(&tcp_lock); old_state = conntrack->proto.tcp.state; dir = CTINFO2DIR(ctinfo); index = get_conntrack_index(th); @@ -865,8 +914,12 @@ static int tcp_packet(struct ip_conntrack *conntrack, switch (new_state) { case TCP_CONNTRACK_IGNORE: - /* Either SYN in ORIGINAL - * or SYN/ACK in REPLY. */ + /* Ignored packets: + * + * a) SYN in ORIGINAL + * b) SYN/ACK in REPLY + * c) ACK in reply direction after initial SYN in original. + */ if (index == TCP_SYNACK_SET && conntrack->proto.tcp.last_index == TCP_SYN_SET && conntrack->proto.tcp.last_dir != dir @@ -879,10 +932,11 @@ static int tcp_packet(struct ip_conntrack *conntrack, * that the client cannot but retransmit its SYN and * thus initiate a clean new session. */ - WRITE_UNLOCK(&tcp_lock); + write_unlock_bh(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, - "ip_ct_tcp: killing out of sync session "); + nf_log_packet(PF_INET, 0, skb, NULL, NULL, + NULL, "ip_ct_tcp: " + "killing out of sync session "); if (del_timer(&conntrack->timeout)) conntrack->timeout.function((unsigned long) conntrack); @@ -894,9 +948,9 @@ static int tcp_packet(struct ip_conntrack *conntrack, conntrack->proto.tcp.last_end = segment_seq_plus_len(ntohl(th->seq), skb->len, iph, th); - WRITE_UNLOCK(&tcp_lock); + write_unlock_bh(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: invalid packet ignored "); return NF_ACCEPT; case TCP_CONNTRACK_MAX: @@ -904,9 +958,9 @@ static int tcp_packet(struct ip_conntrack *conntrack, DEBUGP("ip_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", dir, get_conntrack_index(th), old_state); - WRITE_UNLOCK(&tcp_lock); + write_unlock_bh(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: invalid state "); return -NF_ACCEPT; case TCP_CONNTRACK_SYN_SENT: @@ -918,30 +972,37 @@ static int tcp_packet(struct ip_conntrack *conntrack, conntrack->proto.tcp.seen[dir].td_end)) { /* Attempt to reopen a closed connection. * Delete this connection and look up again. */ - WRITE_UNLOCK(&tcp_lock); + write_unlock_bh(&tcp_lock); if (del_timer(&conntrack->timeout)) conntrack->timeout.function((unsigned long) conntrack); return -NF_REPEAT; } else { - WRITE_UNLOCK(&tcp_lock); + write_unlock_bh(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, - "ip_ct_tcp: invalid SYN"); + NULL, "ip_ct_tcp: invalid SYN"); return -NF_ACCEPT; } case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET - && test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) - && conntrack->proto.tcp.last_index == TCP_SYN_SET + && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) + && conntrack->proto.tcp.last_index == TCP_SYN_SET) + || (!test_bit(IPS_ASSURED_BIT, &conntrack->status) + && conntrack->proto.tcp.last_index == TCP_ACK_SET)) && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) { - /* RST sent to invalid SYN we had let trough - * SYN was in window then, tear down connection. + /* RST sent to invalid SYN or ACK we had let through + * at a) and c) above: + * + * a) SYN was in window then + * c) we hold a half-open connection. + * + * Delete our connection entry. * We skip window checking, because packet might ACK - * segments we ignored in the SYN. */ + * segments we ignored. */ goto in_window; } - /* Just fall trough */ + /* Just fall through */ default: /* Keep compilers happy. */ break; @@ -949,7 +1010,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, if (!tcp_in_window(&conntrack->proto.tcp, dir, index, skb, iph, th)) { - WRITE_UNLOCK(&tcp_lock); + write_unlock_bh(&tcp_lock); return -NF_ACCEPT; } in_window: @@ -972,7 +1033,11 @@ static int tcp_packet(struct ip_conntrack *conntrack, timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state]; - WRITE_UNLOCK(&tcp_lock); + write_unlock_bh(&tcp_lock); + + ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); + if (new_state != old_state) + ip_conntrack_event_cache(IPCT_PROTOINFO, skb); if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { /* If only reply is a RST, we can consider ourselves not to @@ -992,7 +1057,8 @@ static int tcp_packet(struct ip_conntrack *conntrack, /* Set ASSURED if we see see valid ack in ESTABLISHED after SYN_RECV or a valid answer for a picked up connection. */ - set_bit(IPS_ASSURED_BIT, &conntrack->status); + set_bit(IPS_ASSURED_BIT, &conntrack->status); + ip_conntrack_event_cache(IPCT_STATUS, skb); } ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout); @@ -1097,4 +1163,11 @@ struct ip_conntrack_protocol ip_conntrack_protocol_tcp = .packet = tcp_packet, .new = tcp_new, .error = tcp_error, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .to_nfattr = tcp_to_nfattr, + .from_nfattr = nfattr_to_tcp, + .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, + .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, +#endif };