vserver 1.9.3
[linux-2.6.git] / net / ipv4 / netfilter / ip_conntrack_standalone.c
index 80edac9..9abaf01 100644 (file)
@@ -20,6 +20,8 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/percpu.h>
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
 #endif
 
 MODULE_LICENSE("GPL");
 
+extern atomic_t ip_conntrack_count;
+DECLARE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
+
 static int kill_proto(const struct ip_conntrack *i, void *data)
 {
        return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum == 
                        *((u_int8_t *) data));
 }
 
-static unsigned int
-print_tuple(char *buffer, const struct ip_conntrack_tuple *tuple,
+#ifdef CONFIG_PROC_FS
+static int
+print_tuple(struct seq_file *s, const struct ip_conntrack_tuple *tuple,
            struct ip_conntrack_protocol *proto)
 {
-       int len;
-
-       len = sprintf(buffer, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ",
-                     NIPQUAD(tuple->src.ip), NIPQUAD(tuple->dst.ip));
-
-       len += proto->print_tuple(buffer + len, tuple);
-
-       return len;
+       seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ",
+                  NIPQUAD(tuple->src.ip), NIPQUAD(tuple->dst.ip));
+       return proto->print_tuple(s, tuple);
 }
 
-/* FIXME: Don't print source proto part. --RR */
+#ifdef CONFIG_IP_NF_CT_ACCT
 static unsigned int
-print_expect(char *buffer, const struct ip_conntrack_expect *expect)
+seq_print_counters(struct seq_file *s, struct ip_conntrack_counter *counter)
 {
-       unsigned int len;
+       return seq_printf(s, "packets=%llu bytes=%llu ",
+                         (unsigned long long)counter->packets,
+                         (unsigned long long)counter->bytes);
+}
+#else
+#define seq_print_counters(x, y)       0
+#endif
 
-       if (expect->expectant->helper->timeout)
-               len = sprintf(buffer, "EXPECTING: %lu ",
-                             timer_pending(&expect->timeout)
-                             ? (expect->timeout.expires - jiffies)/HZ : 0);
-       else
-               len = sprintf(buffer, "EXPECTING: - ");
-       len += sprintf(buffer + len, "use=%u proto=%u ",
-                     atomic_read(&expect->use), expect->tuple.dst.protonum);
-       len += print_tuple(buffer + len, &expect->tuple,
-                          __ip_ct_find_proto(expect->tuple.dst.protonum));
-       len += sprintf(buffer + len, "\n");
-       return len;
+static void *ct_seq_start(struct seq_file *s, loff_t *pos)
+{
+       if (*pos >= ip_conntrack_htable_size)
+               return NULL;
+       return &ip_conntrack_hash[*pos];
+}
+  
+static void ct_seq_stop(struct seq_file *s, void *v)
+{
 }
 
-static unsigned int
-print_conntrack(char *buffer, struct ip_conntrack *conntrack)
+static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
 {
-       unsigned int len;
-       struct ip_conntrack_protocol *proto
-               = __ip_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
+       (*pos)++;
+       if (*pos >= ip_conntrack_htable_size)
+               return NULL;
+       return &ip_conntrack_hash[*pos];
+}
+  
+/* return 0 on success, 1 in case of error */
+static int ct_seq_real_show(const struct ip_conntrack_tuple_hash *hash,
+                           struct seq_file *s)
+{
+       struct ip_conntrack *conntrack = hash->ctrack;
+       struct ip_conntrack_protocol *proto;
+
+       MUST_BE_READ_LOCKED(&ip_conntrack_lock);
+
+       IP_NF_ASSERT(conntrack);
+
+       /* we only want to print DIR_ORIGINAL */
+       if (DIRECTION(hash))
+               return 0;
+
+       proto = ip_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
                               .tuple.dst.protonum);
+       IP_NF_ASSERT(proto);
 
-       len = sprintf(buffer, "%-8s %u %lu ",
+       if (seq_printf(s, "%-8s %u %lu ",
                      proto->name,
-                     conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
-                     .tuple.dst.protonum,
+                     conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum,
                      timer_pending(&conntrack->timeout)
-                     ? (conntrack->timeout.expires - jiffies)/HZ : 0);
+                     ? (conntrack->timeout.expires - jiffies)/HZ : 0) != 0)
+               return 1;
 
-       len += proto->print_conntrack(buffer + len, conntrack);
-       len += print_tuple(buffer + len,
-                          &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
-                          proto);
-       if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)))
-               len += sprintf(buffer + len, "[UNREPLIED] ");
-       len += print_tuple(buffer + len,
-                          &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple,
-                          proto);
-       if (test_bit(IPS_ASSURED_BIT, &conntrack->status))
-               len += sprintf(buffer + len, "[ASSURED] ");
-       len += sprintf(buffer + len, "use=%u ",
-                      atomic_read(&conntrack->ct_general.use));
-       len += sprintf(buffer + len, "\n");
+       if (proto->print_conntrack(s, conntrack))
+               return 1;
+  
+       if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+                       proto))
+               return 1;
 
-       return len;
-}
+       if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL]))
+               return 1;
 
-/* Returns true when finished. */
-static inline int
-conntrack_iterate(const struct ip_conntrack_tuple_hash *hash,
-                 char *buffer, off_t offset, off_t *upto,
-                 unsigned int *len, unsigned int maxlen)
-{
-       unsigned int newlen;
-       IP_NF_ASSERT(hash->ctrack);
+       if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)))
+               if (seq_printf(s, "[UNREPLIED] "))
+                       return 1;
 
-       MUST_BE_READ_LOCKED(&ip_conntrack_lock);
+       if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple,
+                       proto))
+               return 1;
 
-       /* Only count originals */
-       if (DIRECTION(hash))
-               return 0;
+       if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY]))
+               return 1;
 
-       if ((*upto)++ < offset)
-               return 0;
+       if (test_bit(IPS_ASSURED_BIT, &conntrack->status))
+               if (seq_printf(s, "[ASSURED] "))
+                       return 1;
 
-       newlen = print_conntrack(buffer + *len, hash->ctrack);
-       if (*len + newlen > maxlen)
+       if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use)))
                return 1;
-       else *len += newlen;
 
        return 0;
 }
 
-static int
-list_conntracks(char *buffer, char **start, off_t offset, int length)
+static int ct_seq_show(struct seq_file *s, void *v)
 {
-       unsigned int i;
-       unsigned int len = 0;
-       off_t upto = 0;
-       struct list_head *e;
+       struct list_head *list = v;
+       int ret = 0;
 
+       /* FIXME: Simply truncates if hash chain too long. */
        READ_LOCK(&ip_conntrack_lock);
-       /* Traverse hash; print originals then reply. */
-       for (i = 0; i < ip_conntrack_htable_size; i++) {
-               if (LIST_FIND(&ip_conntrack_hash[i], conntrack_iterate,
-                             struct ip_conntrack_tuple_hash *,
-                             buffer, offset, &upto, &len, length))
-                       goto finished;
-       }
+       if (LIST_FIND(list, ct_seq_real_show,
+                     struct ip_conntrack_tuple_hash *, s))
+               ret = -ENOSPC;
+       READ_UNLOCK(&ip_conntrack_lock);
+       return ret;
+}
+       
+static struct seq_operations ct_seq_ops = {
+       .start = ct_seq_start,
+       .next  = ct_seq_next,
+       .stop  = ct_seq_stop,
+       .show  = ct_seq_show
+};
+  
+static int ct_open(struct inode *inode, struct file *file)
+{
+       return seq_open(file, &ct_seq_ops);
+}
+
+static struct file_operations ct_file_ops = {
+       .owner   = THIS_MODULE,
+       .open    = ct_open,
+       .read    = seq_read,
+       .llseek  = seq_lseek,
+       .release = seq_release
+};
+  
+/* expects */
+static void *exp_seq_start(struct seq_file *s, loff_t *pos)
+{
+       struct list_head *e = &ip_conntrack_expect_list;
+       loff_t i;
 
-       /* Now iterate through expecteds. */
+       /* strange seq_file api calls stop even if we fail,
+        * thus we need to grab lock since stop unlocks */
+       READ_LOCK(&ip_conntrack_lock);
        READ_LOCK(&ip_conntrack_expect_tuple_lock);
-       list_for_each(e, &ip_conntrack_expect_list) {
-               unsigned int last_len;
-               struct ip_conntrack_expect *expect
-                       = (struct ip_conntrack_expect *)e;
-               if (upto++ < offset) continue;
-
-               last_len = len;
-               len += print_expect(buffer + len, expect);
-               if (len > length) {
-                       len = last_len;
-                       goto finished_expects;
-               }
+
+       if (list_empty(e))
+               return NULL;
+
+       for (i = 0; i <= *pos; i++) {
+               e = e->next;
+               if (e == &ip_conntrack_expect_list)
+                       return NULL;
        }
+       return e;
+}
 
- finished_expects:
+static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+       struct list_head *e = v;
+
+       e = e->next;
+
+       if (e == &ip_conntrack_expect_list)
+               return NULL;
+
+       return e;
+}
+
+static void exp_seq_stop(struct seq_file *s, void *v)
+{
        READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
- finished:
        READ_UNLOCK(&ip_conntrack_lock);
+}
+
+static int exp_seq_show(struct seq_file *s, void *v)
+{
+       struct ip_conntrack_expect *expect = v;
 
-       /* `start' hack - see fs/proc/generic.c line ~165 */
-       *start = (char *)((unsigned int)upto - offset);
-       return len;
+       if (expect->expectant->helper->timeout)
+               seq_printf(s, "%lu ", timer_pending(&expect->timeout)
+                          ? (expect->timeout.expires - jiffies)/HZ : 0);
+       else
+               seq_printf(s, "- ");
+
+       seq_printf(s, "use=%u proto=%u ", atomic_read(&expect->use),
+                  expect->tuple.dst.protonum);
+
+       print_tuple(s, &expect->tuple,
+                   ip_ct_find_proto(expect->tuple.dst.protonum));
+       return seq_putc(s, '\n');
 }
 
+static struct seq_operations exp_seq_ops = {
+       .start = exp_seq_start,
+       .next = exp_seq_next,
+       .stop = exp_seq_stop,
+       .show = exp_seq_show
+};
+
+static int exp_open(struct inode *inode, struct file *file)
+{
+       return seq_open(file, &exp_seq_ops);
+}
+  
+static struct file_operations exp_file_ops = {
+       .owner   = THIS_MODULE,
+       .open    = exp_open,
+       .read    = seq_read,
+       .llseek  = seq_lseek,
+       .release = seq_release
+};
+
+static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
+{
+       int cpu;
+
+       if (*pos == 0)
+               return SEQ_START_TOKEN;
+
+       for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
+               if (!cpu_possible(cpu))
+                       continue;
+               *pos = cpu+1;
+               return &per_cpu(ip_conntrack_stat, cpu);
+       }
+
+       return NULL;
+}
+
+static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+       int cpu;
+
+       for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
+               if (!cpu_possible(cpu))
+                       continue;
+               *pos = cpu+1;
+               return &per_cpu(ip_conntrack_stat, cpu);
+       }
+
+       return NULL;
+}
+
+static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
+{
+}
+
+static int ct_cpu_seq_show(struct seq_file *seq, void *v)
+{
+       unsigned int nr_conntracks = atomic_read(&ip_conntrack_count);
+       struct ip_conntrack_stat *st = v;
+
+       if (v == SEQ_START_TOKEN) {
+               seq_printf(seq, "entries  searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete\n");
+               return 0;
+       }
+
+       seq_printf(seq, "%08x  %08x %08x %08x %08x %08x %08x %08x "
+                       "%08x %08x %08x %08x %08x  %08x %08x %08x \n",
+                  nr_conntracks,
+                  st->searched,
+                  st->found,
+                  st->new,
+                  st->invalid,
+                  st->ignore,
+                  st->delete,
+                  st->delete_list,
+                  st->insert,
+                  st->insert_failed,
+                  st->drop,
+                  st->early_drop,
+                  st->error,
+
+                  st->expect_new,
+                  st->expect_create,
+                  st->expect_delete
+               );
+       return 0;
+}
+
+static struct seq_operations ct_cpu_seq_ops = {
+       .start  = ct_cpu_seq_start,
+       .next   = ct_cpu_seq_next,
+       .stop   = ct_cpu_seq_stop,
+       .show   = ct_cpu_seq_show,
+};
+
+static int ct_cpu_seq_open(struct inode *inode, struct file *file)
+{
+       return seq_open(file, &ct_cpu_seq_ops);
+}
+
+static struct file_operations ct_cpu_seq_fops = {
+       .owner   = THIS_MODULE,
+       .open    = ct_cpu_seq_open,
+       .read    = seq_read,
+       .llseek  = seq_lseek,
+       .release = seq_release_private,
+};
+#endif
+
 static unsigned int ip_confirm(unsigned int hooknum,
                               struct sk_buff **pskb,
                               const struct net_device *in,
@@ -323,6 +491,10 @@ extern unsigned long ip_ct_tcp_timeout_close_wait;
 extern unsigned long ip_ct_tcp_timeout_last_ack;
 extern unsigned long ip_ct_tcp_timeout_time_wait;
 extern unsigned long ip_ct_tcp_timeout_close;
+extern unsigned long ip_ct_tcp_timeout_max_retrans;
+extern int ip_ct_tcp_loose;
+extern int ip_ct_tcp_be_liberal;
+extern int ip_ct_tcp_max_retrans;
 
 /* From ip_conntrack_proto_udp.c */
 extern unsigned long ip_ct_udp_timeout;
@@ -334,6 +506,10 @@ extern unsigned long ip_ct_icmp_timeout;
 /* From ip_conntrack_proto_icmp.c */
 extern unsigned long ip_ct_generic_timeout;
 
+/* Log invalid packets of a given protocol */
+static int log_invalid_proto_min = 0;
+static int log_invalid_proto_max = 255;
+
 static struct ctl_table_header *ip_ct_sysctl_header;
 
 static ctl_table ip_ct_sysctl_table[] = {
@@ -345,6 +521,14 @@ static ctl_table ip_ct_sysctl_table[] = {
                .mode           = 0644,
                .proc_handler   = &proc_dointvec,
        },
+       {
+               .ctl_name       = NET_IPV4_NF_CONNTRACK_COUNT,
+               .procname       = "ip_conntrack_count",
+               .data           = &ip_conntrack_count,
+               .maxlen         = sizeof(int),
+               .mode           = 0444,
+               .proc_handler   = &proc_dointvec,
+       },
        {
                .ctl_name       = NET_IPV4_NF_CONNTRACK_BUCKETS,
                .procname       = "ip_conntrack_buckets",
@@ -449,6 +633,49 @@ static ctl_table ip_ct_sysctl_table[] = {
                .mode           = 0644,
                .proc_handler   = &proc_dointvec_jiffies,
        },
+       {
+               .ctl_name       = NET_IPV4_NF_CONNTRACK_LOG_INVALID,
+               .procname       = "ip_conntrack_log_invalid",
+               .data           = &ip_ct_log_invalid,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_minmax,
+               .strategy       = &sysctl_intvec,
+               .extra1         = &log_invalid_proto_min,
+               .extra2         = &log_invalid_proto_max,
+       },
+       {
+               .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS,
+               .procname       = "ip_conntrack_tcp_timeout_max_retrans",
+               .data           = &ip_ct_tcp_timeout_max_retrans,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_jiffies,
+       },
+       {
+               .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_LOOSE,
+               .procname       = "ip_conntrack_tcp_loose",
+               .data           = &ip_ct_tcp_loose,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
+       {
+               .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL,
+               .procname       = "ip_conntrack_tcp_be_liberal",
+               .data           = &ip_ct_tcp_be_liberal,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
+       {
+               .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS,
+               .procname       = "ip_conntrack_tcp_max_retrans",
+               .data           = &ip_ct_tcp_max_retrans,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
        { .ctl_name = 0 }
 };
 
@@ -491,10 +718,15 @@ static ctl_table ip_ct_net_table[] = {
        },
        { .ctl_name = 0 }
 };
-#endif
+
+EXPORT_SYMBOL(ip_ct_log_invalid);
+#endif /* CONFIG_SYSCTL */
+
 static int init_or_cleanup(int init)
 {
-       struct proc_dir_entry *proc;
+#ifdef CONFIG_PROC_FS
+       struct proc_dir_entry *proc, *proc_exp, *proc_stat;
+#endif
        int ret = 0;
 
        if (!init) goto cleanup;
@@ -503,14 +735,26 @@ static int init_or_cleanup(int init)
        if (ret < 0)
                goto cleanup_nothing;
 
-       proc = proc_net_create("ip_conntrack", 0440, list_conntracks);
+#ifdef CONFIG_PROC_FS
+       proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops);
        if (!proc) goto cleanup_init;
-       proc->owner = THIS_MODULE;
+
+       proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440,
+                                       &exp_file_ops);
+       if (!proc_exp) goto cleanup_proc;
+
+       proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat);
+       if (!proc_stat)
+               goto cleanup_proc_exp;
+
+       proc_stat->proc_fops = &ct_cpu_seq_fops;
+       proc_stat->owner = THIS_MODULE;
+#endif
 
        ret = nf_register_hook(&ip_conntrack_defrag_ops);
        if (ret < 0) {
                printk("ip_conntrack: can't register pre-routing defrag hook.\n");
-               goto cleanup_proc;
+               goto cleanup_proc_stat;
        }
        ret = nf_register_hook(&ip_conntrack_defrag_local_out_ops);
        if (ret < 0) {
@@ -561,10 +805,22 @@ static int init_or_cleanup(int init)
  cleanup_defraglocalops:
        nf_unregister_hook(&ip_conntrack_defrag_local_out_ops);
  cleanup_defragops:
+       /* Frag queues may hold fragments with skb->dst == NULL */
+       ip_ct_no_defrag = 1;
+       synchronize_net();
+       local_bh_disable();
+       ipfrag_flush();
+       local_bh_enable();
        nf_unregister_hook(&ip_conntrack_defrag_ops);
+ cleanup_proc_stat:
+#ifdef CONFIG_PROC_FS
+       proc_net_remove("ip_conntrack_stat");
+cleanup_proc_exp:
+       proc_net_remove("ip_conntrack_exp");
  cleanup_proc:
        proc_net_remove("ip_conntrack");
  cleanup_init:
+#endif /* CONFIG_PROC_FS */
        ip_conntrack_cleanup();
  cleanup_nothing:
        return ret;
@@ -575,19 +831,13 @@ static int init_or_cleanup(int init)
 int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto)
 {
        int ret = 0;
-       struct list_head *i;
 
        WRITE_LOCK(&ip_conntrack_lock);
-       list_for_each(i, &protocol_list) {
-               if (((struct ip_conntrack_protocol *)i)->proto
-                   == proto->proto) {
-                       ret = -EBUSY;
-                       goto out;
-               }
+       if (ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) {
+               ret = -EBUSY;
+               goto out;
        }
-
-       list_prepend(&protocol_list, proto);
-
+       ip_ct_protos[proto->proto] = proto;
  out:
        WRITE_UNLOCK(&ip_conntrack_lock);
        return ret;
@@ -596,10 +846,7 @@ int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto)
 void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto)
 {
        WRITE_LOCK(&ip_conntrack_lock);
-
-       /* ip_ct_find_proto() returns proto_generic in case there is no protocol 
-        * helper. So this should be enough - HW */
-       LIST_DELETE(&protocol_list, proto);
+       ip_ct_protos[proto->proto] = &ip_conntrack_generic_protocol;
        WRITE_UNLOCK(&ip_conntrack_lock);
        
        /* Somebody could be still looking at the proto in bh. */
@@ -633,14 +880,13 @@ EXPORT_SYMBOL(ip_conntrack_protocol_unregister);
 EXPORT_SYMBOL(invert_tuplepr);
 EXPORT_SYMBOL(ip_conntrack_alter_reply);
 EXPORT_SYMBOL(ip_conntrack_destroyed);
-EXPORT_SYMBOL(ip_conntrack_get);
 EXPORT_SYMBOL(need_ip_conntrack);
 EXPORT_SYMBOL(ip_conntrack_helper_register);
 EXPORT_SYMBOL(ip_conntrack_helper_unregister);
 EXPORT_SYMBOL(ip_ct_selective_cleanup);
-EXPORT_SYMBOL(ip_ct_refresh);
+EXPORT_SYMBOL(ip_ct_refresh_acct);
+EXPORT_SYMBOL(ip_ct_protos);
 EXPORT_SYMBOL(ip_ct_find_proto);
-EXPORT_SYMBOL(__ip_ct_find_proto);
 EXPORT_SYMBOL(ip_ct_find_helper);
 EXPORT_SYMBOL(ip_conntrack_expect_alloc);
 EXPORT_SYMBOL(ip_conntrack_expect_related);