1 /* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
9 #include <linux/types.h>
10 #include <linux/sched.h>
11 #include <linux/timer.h>
12 #include <linux/netfilter.h>
13 #include <linux/module.h>
16 #include <linux/tcp.h>
17 #include <linux/string.h>
21 #include <linux/netfilter_ipv4/ip_conntrack.h>
22 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
23 #include <linux/netfilter_ipv4/lockhelp.h>
28 #define DEBUGP(format, args...)
31 /* Protects conntrack->proto.tcp */
32 static DECLARE_RWLOCK(tcp_lock);
34 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
35 closely. They're more complex. --RR */
37 /* Actually, I believe that neither ipmasq (where this code is stolen
38 from) nor ipfilter do it exactly right. A new conntrack machine taking
39 into account packet loss (which creates uncertainty as to exactly
40 the conntrack of the connection) is required. RSN. --RR */
42 static const char *tcp_conntrack_names[] = {
56 #define MINS * 60 SECS
57 #define HOURS * 60 MINS
58 #define DAYS * 24 HOURS
60 unsigned long ip_ct_tcp_timeout_syn_sent = 2 MINS;
61 unsigned long ip_ct_tcp_timeout_syn_recv = 60 SECS;
62 unsigned long ip_ct_tcp_timeout_established = 5 DAYS;
63 unsigned long ip_ct_tcp_timeout_fin_wait = 2 MINS;
64 unsigned long ip_ct_tcp_timeout_close_wait = 60 SECS;
65 unsigned long ip_ct_tcp_timeout_last_ack = 30 SECS;
66 unsigned long ip_ct_tcp_timeout_time_wait = 2 MINS;
67 unsigned long ip_ct_tcp_timeout_close = 10 SECS;
69 static unsigned long * tcp_timeouts[]
70 = { 0, /* TCP_CONNTRACK_NONE */
71 &ip_ct_tcp_timeout_established, /* TCP_CONNTRACK_ESTABLISHED, */
72 &ip_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */
73 &ip_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */
74 &ip_ct_tcp_timeout_fin_wait, /* TCP_CONNTRACK_FIN_WAIT, */
75 &ip_ct_tcp_timeout_time_wait, /* TCP_CONNTRACK_TIME_WAIT, */
76 &ip_ct_tcp_timeout_close, /* TCP_CONNTRACK_CLOSE, */
77 &ip_ct_tcp_timeout_close_wait, /* TCP_CONNTRACK_CLOSE_WAIT, */
78 &ip_ct_tcp_timeout_last_ack, /* TCP_CONNTRACK_LAST_ACK, */
79 0, /* TCP_CONNTRACK_LISTEN */
82 #define sNO TCP_CONNTRACK_NONE
83 #define sES TCP_CONNTRACK_ESTABLISHED
84 #define sSS TCP_CONNTRACK_SYN_SENT
85 #define sSR TCP_CONNTRACK_SYN_RECV
86 #define sFW TCP_CONNTRACK_FIN_WAIT
87 #define sTW TCP_CONNTRACK_TIME_WAIT
88 #define sCL TCP_CONNTRACK_CLOSE
89 #define sCW TCP_CONNTRACK_CLOSE_WAIT
90 #define sLA TCP_CONNTRACK_LAST_ACK
91 #define sLI TCP_CONNTRACK_LISTEN
92 #define sIV TCP_CONNTRACK_MAX
94 static enum tcp_conntrack tcp_conntracks[2][5][TCP_CONNTRACK_MAX] = {
97 /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI */
98 /*syn*/ {sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI },
99 /*fin*/ {sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI },
100 /*ack*/ {sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES },
101 /*rst*/ {sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL },
102 /*none*/{sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
106 /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI */
107 /*syn*/ {sSR, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR },
108 /*fin*/ {sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI },
109 /*ack*/ {sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI },
110 /*rst*/ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sLA, sLI },
111 /*none*/{sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
115 static int tcp_pkt_to_tuple(const struct sk_buff *skb,
116 unsigned int dataoff,
117 struct ip_conntrack_tuple *tuple)
121 /* Actually only need first 8 bytes. */
122 if (skb_copy_bits(skb, dataoff, &hdr, 8) != 0)
125 tuple->src.u.tcp.port = hdr.source;
126 tuple->dst.u.tcp.port = hdr.dest;
131 static int tcp_invert_tuple(struct ip_conntrack_tuple *tuple,
132 const struct ip_conntrack_tuple *orig)
134 tuple->src.u.tcp.port = orig->dst.u.tcp.port;
135 tuple->dst.u.tcp.port = orig->src.u.tcp.port;
139 /* Print out the per-protocol part of the tuple. */
140 static unsigned int tcp_print_tuple(char *buffer,
141 const struct ip_conntrack_tuple *tuple)
143 return sprintf(buffer, "sport=%hu dport=%hu ",
144 ntohs(tuple->src.u.tcp.port),
145 ntohs(tuple->dst.u.tcp.port));
148 /* Print out the private part of the conntrack. */
149 static unsigned int tcp_print_conntrack(char *buffer,
150 const struct ip_conntrack *conntrack)
152 enum tcp_conntrack state;
154 READ_LOCK(&tcp_lock);
155 state = conntrack->proto.tcp.state;
156 READ_UNLOCK(&tcp_lock);
158 return sprintf(buffer, "%s ", tcp_conntrack_names[state]);
161 static unsigned int get_conntrack_index(const struct tcphdr *tcph)
163 if (tcph->rst) return 3;
164 else if (tcph->syn) return 0;
165 else if (tcph->fin) return 1;
166 else if (tcph->ack) return 2;
170 /* Returns verdict for packet, or -1 for invalid. */
171 static int tcp_packet(struct ip_conntrack *conntrack,
172 const struct sk_buff *skb,
173 enum ip_conntrack_info ctinfo)
175 enum tcp_conntrack newconntrack, oldtcpstate;
178 if (skb_copy_bits(skb, skb->nh.iph->ihl * 4, &tcph, sizeof(tcph)) != 0)
180 if (skb->len < skb->nh.iph->ihl * 4 + tcph.doff * 4)
183 /* If only reply is a RST, we can consider ourselves not to
184 have an established connection: this is a fairly common
185 problem case, so we can delete the conntrack
187 if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) && tcph.rst) {
188 if (del_timer(&conntrack->timeout))
189 conntrack->timeout.function((unsigned long)conntrack);
193 WRITE_LOCK(&tcp_lock);
194 oldtcpstate = conntrack->proto.tcp.state;
198 [get_conntrack_index(&tcph)][oldtcpstate];
201 if (newconntrack == TCP_CONNTRACK_MAX) {
202 DEBUGP("ip_conntrack_tcp: Invalid dir=%i index=%u conntrack=%u\n",
203 CTINFO2DIR(ctinfo), get_conntrack_index(&tcph),
204 conntrack->proto.tcp.state);
205 WRITE_UNLOCK(&tcp_lock);
209 conntrack->proto.tcp.state = newconntrack;
211 /* Poor man's window tracking: record SYN/ACK for handshake check */
212 if (oldtcpstate == TCP_CONNTRACK_SYN_SENT
213 && CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY
214 && tcph.syn && tcph.ack) {
215 conntrack->proto.tcp.handshake_ack
216 = htonl(ntohl(tcph.seq) + 1);
220 /* Set ASSURED if we see valid ack in ESTABLISHED after SYN_RECV */
221 if (oldtcpstate == TCP_CONNTRACK_SYN_RECV
222 && CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL
223 && tcph.ack && !tcph.syn
224 && tcph.ack_seq == conntrack->proto.tcp.handshake_ack)
225 set_bit(IPS_ASSURED_BIT, &conntrack->status);
227 out: WRITE_UNLOCK(&tcp_lock);
228 ip_ct_refresh(conntrack, *tcp_timeouts[newconntrack]);
233 /* Called when a new connection for this protocol found. */
234 static int tcp_new(struct ip_conntrack *conntrack, const struct sk_buff *skb)
236 enum tcp_conntrack newconntrack;
239 if (skb_copy_bits(skb, skb->nh.iph->ihl * 4, &tcph, sizeof(tcph)) != 0)
242 /* Don't need lock here: this conntrack not in circulation yet */
244 = tcp_conntracks[0][get_conntrack_index(&tcph)]
245 [TCP_CONNTRACK_NONE];
247 /* Invalid: delete conntrack */
248 if (newconntrack == TCP_CONNTRACK_MAX) {
249 DEBUGP("ip_conntrack_tcp: invalid new deleting.\n");
253 conntrack->proto.tcp.state = newconntrack;
257 static int tcp_exp_matches_pkt(struct ip_conntrack_expect *exp,
258 const struct sk_buff *skb)
260 const struct iphdr *iph = skb->nh.iph;
262 unsigned int datalen;
264 if (skb_copy_bits(skb, skb->nh.iph->ihl * 4, &tcph, sizeof(tcph)) != 0)
266 datalen = skb->len - iph->ihl*4 - tcph.doff*4;
268 return between(exp->seq, ntohl(tcph.seq), ntohl(tcph.seq) + datalen);
271 struct ip_conntrack_protocol ip_conntrack_protocol_tcp
272 = { { NULL, NULL }, IPPROTO_TCP, "tcp",
273 tcp_pkt_to_tuple, tcp_invert_tuple, tcp_print_tuple, tcp_print_conntrack,
274 tcp_packet, tcp_new, NULL, tcp_exp_matches_pkt, NULL };