/* This file contains all the functions required for the standalone ip_conntrack module. These are not required by the compatibility layer. */ /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ #include #include #include #include #include #include #include #include #ifdef CONFIG_SYSCTL #include #endif #include #include #define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock) #define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock) #include #include #include #include #include #if 0 #define DEBUGP printk #else #define DEBUGP(format, args...) #endif MODULE_LICENSE("GPL"); static int kill_proto(const struct ip_conntrack *i, void *data) { return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum == *((u_int8_t *) data)); } static unsigned int print_tuple(char *buffer, const struct ip_conntrack_tuple *tuple, struct ip_conntrack_protocol *proto) { int len; len = sprintf(buffer, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ", NIPQUAD(tuple->src.ip), NIPQUAD(tuple->dst.ip)); len += proto->print_tuple(buffer + len, tuple); return len; } /* FIXME: Don't print source proto part. --RR */ static unsigned int print_expect(char *buffer, const struct ip_conntrack_expect *expect) { unsigned int len; if (expect->expectant->helper->timeout) len = sprintf(buffer, "EXPECTING: %lu ", timer_pending(&expect->timeout) ? (expect->timeout.expires - jiffies)/HZ : 0); else len = sprintf(buffer, "EXPECTING: - "); len += sprintf(buffer + len, "use=%u proto=%u ", atomic_read(&expect->use), expect->tuple.dst.protonum); len += print_tuple(buffer + len, &expect->tuple, __ip_ct_find_proto(expect->tuple.dst.protonum)); len += sprintf(buffer + len, "\n"); return len; } static unsigned int print_conntrack(char *buffer, struct ip_conntrack *conntrack) { unsigned int len; struct ip_conntrack_protocol *proto = __ip_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] .tuple.dst.protonum); len = sprintf(buffer, "%-8s %u %lu ", proto->name, conntrack->tuplehash[IP_CT_DIR_ORIGINAL] .tuple.dst.protonum, timer_pending(&conntrack->timeout) ? (conntrack->timeout.expires - jiffies)/HZ : 0); len += proto->print_conntrack(buffer + len, conntrack); len += print_tuple(buffer + len, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, proto); if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status))) len += sprintf(buffer + len, "[UNREPLIED] "); len += print_tuple(buffer + len, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple, proto); if (test_bit(IPS_ASSURED_BIT, &conntrack->status)) len += sprintf(buffer + len, "[ASSURED] "); len += sprintf(buffer + len, "use=%u ", atomic_read(&conntrack->ct_general.use)); len += sprintf(buffer + len, "\n"); return len; } /* Returns true when finished. */ static inline int conntrack_iterate(const struct ip_conntrack_tuple_hash *hash, char *buffer, off_t offset, off_t *upto, unsigned int *len, unsigned int maxlen) { unsigned int newlen; IP_NF_ASSERT(hash->ctrack); MUST_BE_READ_LOCKED(&ip_conntrack_lock); /* Only count originals */ if (DIRECTION(hash)) return 0; if ((*upto)++ < offset) return 0; newlen = print_conntrack(buffer + *len, hash->ctrack); if (*len + newlen > maxlen) return 1; else *len += newlen; return 0; } static int list_conntracks(char *buffer, char **start, off_t offset, int length) { unsigned int i; unsigned int len = 0; off_t upto = 0; struct list_head *e; READ_LOCK(&ip_conntrack_lock); /* Traverse hash; print originals then reply. */ for (i = 0; i < ip_conntrack_htable_size; i++) { if (LIST_FIND(&ip_conntrack_hash[i], conntrack_iterate, struct ip_conntrack_tuple_hash *, buffer, offset, &upto, &len, length)) goto finished; } /* Now iterate through expecteds. */ READ_LOCK(&ip_conntrack_expect_tuple_lock); list_for_each(e, &ip_conntrack_expect_list) { unsigned int last_len; struct ip_conntrack_expect *expect = (struct ip_conntrack_expect *)e; if (upto++ < offset) continue; last_len = len; len += print_expect(buffer + len, expect); if (len > length) { len = last_len; goto finished_expects; } } finished_expects: READ_UNLOCK(&ip_conntrack_expect_tuple_lock); finished: READ_UNLOCK(&ip_conntrack_lock); /* `start' hack - see fs/proc/generic.c line ~165 */ *start = (char *)((unsigned int)upto - offset); return len; } static unsigned int ip_confirm(unsigned int hooknum, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { /* We've seen it coming out the other side: confirm it */ return ip_conntrack_confirm(*pskb); } static unsigned int ip_conntrack_defrag(unsigned int hooknum, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { /* Previously seen (loopback)? Ignore. Do this before fragment check. */ if ((*pskb)->nfct) return NF_ACCEPT; /* Gather fragments. */ if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { *pskb = ip_ct_gather_frags(*pskb); if (!*pskb) return NF_STOLEN; } return NF_ACCEPT; } static unsigned int ip_refrag(unsigned int hooknum, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { struct rtable *rt = (struct rtable *)(*pskb)->dst; /* We've seen it coming out the other side: confirm */ if (ip_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT) return NF_DROP; /* Local packets are never produced too large for their interface. We degfragment them at LOCAL_OUT, however, so we have to refragment them here. */ if ((*pskb)->len > dst_pmtu(&rt->u.dst) && !skb_shinfo(*pskb)->tso_size) { /* No hook can be after us, so this should be OK. */ ip_fragment(*pskb, okfn); return NF_STOLEN; } return NF_ACCEPT; } static unsigned int ip_conntrack_local(unsigned int hooknum, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { /* root is playing with raw sockets. */ if ((*pskb)->len < sizeof(struct iphdr) || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) { if (net_ratelimit()) printk("ipt_hook: happy cracking.\n"); return NF_ACCEPT; } return ip_conntrack_in(hooknum, pskb, in, out, okfn); } /* Connection tracking may drop packets, but never alters them, so make it the first hook. */ static struct nf_hook_ops ip_conntrack_defrag_ops = { .hook = ip_conntrack_defrag, .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_IP_PRE_ROUTING, .priority = NF_IP_PRI_CONNTRACK_DEFRAG, }; static struct nf_hook_ops ip_conntrack_in_ops = { .hook = ip_conntrack_in, .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_IP_PRE_ROUTING, .priority = NF_IP_PRI_CONNTRACK, }; static struct nf_hook_ops ip_conntrack_defrag_local_out_ops = { .hook = ip_conntrack_defrag, .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_IP_LOCAL_OUT, .priority = NF_IP_PRI_CONNTRACK_DEFRAG, }; static struct nf_hook_ops ip_conntrack_local_out_ops = { .hook = ip_conntrack_local, .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_IP_LOCAL_OUT, .priority = NF_IP_PRI_CONNTRACK, }; /* Refragmenter; last chance. */ static struct nf_hook_ops ip_conntrack_out_ops = { .hook = ip_refrag, .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_IP_POST_ROUTING, .priority = NF_IP_PRI_LAST, }; static struct nf_hook_ops ip_conntrack_local_in_ops = { .hook = ip_confirm, .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_IP_LOCAL_IN, .priority = NF_IP_PRI_LAST-1, }; /* Sysctl support */ #ifdef CONFIG_SYSCTL /* From ip_conntrack_core.c */ extern int ip_conntrack_max; extern unsigned int ip_conntrack_htable_size; /* From ip_conntrack_proto_tcp.c */ extern unsigned long ip_ct_tcp_timeout_syn_sent; extern unsigned long ip_ct_tcp_timeout_syn_recv; extern unsigned long ip_ct_tcp_timeout_established; extern unsigned long ip_ct_tcp_timeout_fin_wait; extern unsigned long ip_ct_tcp_timeout_close_wait; extern unsigned long ip_ct_tcp_timeout_last_ack; extern unsigned long ip_ct_tcp_timeout_time_wait; extern unsigned long ip_ct_tcp_timeout_close; /* From ip_conntrack_proto_udp.c */ extern unsigned long ip_ct_udp_timeout; extern unsigned long ip_ct_udp_timeout_stream; /* From ip_conntrack_proto_icmp.c */ extern unsigned long ip_ct_icmp_timeout; /* From ip_conntrack_proto_icmp.c */ extern unsigned long ip_ct_generic_timeout; static struct ctl_table_header *ip_ct_sysctl_header; static ctl_table ip_ct_sysctl_table[] = { { .ctl_name = NET_IPV4_NF_CONNTRACK_MAX, .procname = "ip_conntrack_max", .data = &ip_conntrack_max, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_BUCKETS, .procname = "ip_conntrack_buckets", .data = &ip_conntrack_htable_size, .maxlen = sizeof(unsigned int), .mode = 0444, .proc_handler = &proc_dointvec, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, .procname = "ip_conntrack_tcp_timeout_syn_sent", .data = &ip_ct_tcp_timeout_syn_sent, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV, .procname = "ip_conntrack_tcp_timeout_syn_recv", .data = &ip_ct_tcp_timeout_syn_recv, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED, .procname = "ip_conntrack_tcp_timeout_established", .data = &ip_ct_tcp_timeout_established, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT, .procname = "ip_conntrack_tcp_timeout_fin_wait", .data = &ip_ct_tcp_timeout_fin_wait, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT, .procname = "ip_conntrack_tcp_timeout_close_wait", .data = &ip_ct_tcp_timeout_close_wait, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK, .procname = "ip_conntrack_tcp_timeout_last_ack", .data = &ip_ct_tcp_timeout_last_ack, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT, .procname = "ip_conntrack_tcp_timeout_time_wait", .data = &ip_ct_tcp_timeout_time_wait, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE, .procname = "ip_conntrack_tcp_timeout_close", .data = &ip_ct_tcp_timeout_close, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT, .procname = "ip_conntrack_udp_timeout", .data = &ip_ct_udp_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM, .procname = "ip_conntrack_udp_timeout_stream", .data = &ip_ct_udp_timeout_stream, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT, .procname = "ip_conntrack_icmp_timeout", .data = &ip_ct_icmp_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT, .procname = "ip_conntrack_generic_timeout", .data = &ip_ct_generic_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, }, { .ctl_name = 0 } }; #define NET_IP_CONNTRACK_MAX 2089 static ctl_table ip_ct_netfilter_table[] = { { .ctl_name = NET_IPV4_NETFILTER, .procname = "netfilter", .mode = 0555, .child = ip_ct_sysctl_table, }, { .ctl_name = NET_IP_CONNTRACK_MAX, .procname = "ip_conntrack_max", .data = &ip_conntrack_max, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec }, { .ctl_name = 0 } }; static ctl_table ip_ct_ipv4_table[] = { { .ctl_name = NET_IPV4, .procname = "ipv4", .mode = 0555, .child = ip_ct_netfilter_table, }, { .ctl_name = 0 } }; static ctl_table ip_ct_net_table[] = { { .ctl_name = CTL_NET, .procname = "net", .mode = 0555, .child = ip_ct_ipv4_table, }, { .ctl_name = 0 } }; #endif static int init_or_cleanup(int init) { struct proc_dir_entry *proc; int ret = 0; if (!init) goto cleanup; ret = ip_conntrack_init(); if (ret < 0) goto cleanup_nothing; proc = proc_net_create("ip_conntrack", 0440, list_conntracks); if (!proc) goto cleanup_init; proc->owner = THIS_MODULE; ret = nf_register_hook(&ip_conntrack_defrag_ops); if (ret < 0) { printk("ip_conntrack: can't register pre-routing defrag hook.\n"); goto cleanup_proc; } ret = nf_register_hook(&ip_conntrack_defrag_local_out_ops); if (ret < 0) { printk("ip_conntrack: can't register local_out defrag hook.\n"); goto cleanup_defragops; } ret = nf_register_hook(&ip_conntrack_in_ops); if (ret < 0) { printk("ip_conntrack: can't register pre-routing hook.\n"); goto cleanup_defraglocalops; } ret = nf_register_hook(&ip_conntrack_local_out_ops); if (ret < 0) { printk("ip_conntrack: can't register local out hook.\n"); goto cleanup_inops; } ret = nf_register_hook(&ip_conntrack_out_ops); if (ret < 0) { printk("ip_conntrack: can't register post-routing hook.\n"); goto cleanup_inandlocalops; } ret = nf_register_hook(&ip_conntrack_local_in_ops); if (ret < 0) { printk("ip_conntrack: can't register local in hook.\n"); goto cleanup_inoutandlocalops; } #ifdef CONFIG_SYSCTL ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table, 0); if (ip_ct_sysctl_header == NULL) { printk("ip_conntrack: can't register to sysctl.\n"); goto cleanup; } #endif return ret; cleanup: #ifdef CONFIG_SYSCTL unregister_sysctl_table(ip_ct_sysctl_header); #endif nf_unregister_hook(&ip_conntrack_local_in_ops); cleanup_inoutandlocalops: nf_unregister_hook(&ip_conntrack_out_ops); cleanup_inandlocalops: nf_unregister_hook(&ip_conntrack_local_out_ops); cleanup_inops: nf_unregister_hook(&ip_conntrack_in_ops); cleanup_defraglocalops: nf_unregister_hook(&ip_conntrack_defrag_local_out_ops); cleanup_defragops: nf_unregister_hook(&ip_conntrack_defrag_ops); cleanup_proc: proc_net_remove("ip_conntrack"); cleanup_init: ip_conntrack_cleanup(); cleanup_nothing: return ret; } /* FIXME: Allow NULL functions and sub in pointers to generic for them. --RR */ int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto) { int ret = 0; struct list_head *i; WRITE_LOCK(&ip_conntrack_lock); list_for_each(i, &protocol_list) { if (((struct ip_conntrack_protocol *)i)->proto == proto->proto) { ret = -EBUSY; goto out; } } list_prepend(&protocol_list, proto); out: WRITE_UNLOCK(&ip_conntrack_lock); return ret; } void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto) { WRITE_LOCK(&ip_conntrack_lock); /* ip_ct_find_proto() returns proto_generic in case there is no protocol * helper. So this should be enough - HW */ LIST_DELETE(&protocol_list, proto); WRITE_UNLOCK(&ip_conntrack_lock); /* Somebody could be still looking at the proto in bh. */ synchronize_net(); /* Remove all contrack entries for this protocol */ ip_ct_selective_cleanup(kill_proto, &proto->proto); } static int __init init(void) { return init_or_cleanup(1); } static void __exit fini(void) { init_or_cleanup(0); } module_init(init); module_exit(fini); /* Some modules need us, but don't depend directly on any symbol. They should call this. */ void need_ip_conntrack(void) { } EXPORT_SYMBOL(ip_conntrack_protocol_register); EXPORT_SYMBOL(ip_conntrack_protocol_unregister); EXPORT_SYMBOL(invert_tuplepr); EXPORT_SYMBOL(ip_conntrack_alter_reply); EXPORT_SYMBOL(ip_conntrack_destroyed); EXPORT_SYMBOL(ip_conntrack_get); EXPORT_SYMBOL(need_ip_conntrack); EXPORT_SYMBOL(ip_conntrack_helper_register); EXPORT_SYMBOL(ip_conntrack_helper_unregister); EXPORT_SYMBOL(ip_ct_selective_cleanup); EXPORT_SYMBOL(ip_ct_refresh); EXPORT_SYMBOL(ip_ct_find_proto); EXPORT_SYMBOL(__ip_ct_find_proto); EXPORT_SYMBOL(ip_ct_find_helper); EXPORT_SYMBOL(ip_conntrack_expect_alloc); EXPORT_SYMBOL(ip_conntrack_expect_related); EXPORT_SYMBOL(ip_conntrack_change_expect); EXPORT_SYMBOL(ip_conntrack_unexpect_related); EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get); EXPORT_SYMBOL_GPL(ip_conntrack_expect_put); EXPORT_SYMBOL(ip_conntrack_tuple_taken); EXPORT_SYMBOL(ip_ct_gather_frags); EXPORT_SYMBOL(ip_conntrack_htable_size); EXPORT_SYMBOL(ip_conntrack_expect_list); EXPORT_SYMBOL(ip_conntrack_lock); EXPORT_SYMBOL(ip_conntrack_hash); EXPORT_SYMBOL(ip_conntrack_untracked); EXPORT_SYMBOL_GPL(ip_conntrack_find_get); EXPORT_SYMBOL_GPL(ip_conntrack_put);