* communicating with userspace via netlink.
*
* (C) 2000-2002 James Morris <jmorris@intercode.com.au>
+ * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* Zander).
* 2000-08-01: Added Nick Williams' MAC support.
* 2002-06-25: Code cleanup.
+ * 2005-01-10: Added /proc counter for dropped packets; fixed so
+ * packets aren't delivered to user space if they're going
+ * to be dropped.
+ * 2005-05-26: local_bh_{disable,enable} around nf_reinject (Harald Welte)
*
*/
#include <linux/module.h>
#include <linux/sysctl.h>
#include <linux/proc_fs.h>
#include <linux/security.h>
+#include <linux/mutex.h>
#include <net/sock.h>
#include <net/route.h>
#define NET_IPQ_QMAX 2088
#define NET_IPQ_QMAX_NAME "ip_queue_maxlen"
-struct ipq_rt_info {
- __u8 tos;
- __u32 daddr;
- __u32 saddr;
-};
-
struct ipq_queue_entry {
struct list_head list;
struct nf_info *info;
struct sk_buff *skb;
- struct ipq_rt_info rt_info;
};
typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
static unsigned char copy_mode = IPQ_COPY_NONE;
static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT;
-static rwlock_t queue_lock = RW_LOCK_UNLOCKED;
+static DEFINE_RWLOCK(queue_lock);
static int peer_pid;
static unsigned int copy_range;
static unsigned int queue_total;
+static unsigned int queue_dropped = 0;
+static unsigned int queue_user_dropped = 0;
static struct sock *ipqnl;
static LIST_HEAD(queue_list);
-static DECLARE_MUTEX(ipqnl_sem);
+static DEFINE_MUTEX(ipqnl_mutex);
static void
ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
{
+ /* TCP input path (and probably other bits) assume to be called
+ * from softirq context, not from syscall, like ipq_issue_verdict is
+ * called. TCP input path deadlocks with locks taken from timer
+ * softirq, e.g. We therefore emulate this by local_bh_disable() */
+
+ local_bh_disable();
nf_reinject(entry->skb, entry->info, verdict);
+ local_bh_enable();
+
kfree(entry);
}
-static inline int
+static inline void
__ipq_enqueue_entry(struct ipq_queue_entry *entry)
{
- if (queue_total >= queue_maxlen) {
- if (net_ratelimit())
- printk(KERN_WARNING "ip_queue: full at %d entries, "
- "dropping packet(s).\n", queue_total);
- return -ENOSPC;
- }
list_add(&entry->list, &queue_list);
queue_total++;
- return 0;
}
/*
__ipq_reset(void)
{
peer_pid = 0;
+ net_disable_timestamp();
__ipq_set_mode(IPQ_COPY_NONE, 0);
__ipq_flush(NF_DROP);
}
break;
case IPQ_COPY_PACKET:
+ if (entry->skb->ip_summed == CHECKSUM_HW &&
+ (*errp = skb_checksum_help(entry->skb,
+ entry->info->outdev == NULL))) {
+ read_unlock_bh(&queue_lock);
+ return NULL;
+ }
if (copy_range == 0 || copy_range > entry->skb->len)
data_len = entry->skb->len;
else
pmsg->packet_id = (unsigned long )entry;
pmsg->data_len = data_len;
- pmsg->timestamp_sec = entry->skb->stamp.tv_sec;
- pmsg->timestamp_usec = entry->skb->stamp.tv_usec;
+ pmsg->timestamp_sec = entry->skb->tstamp.off_sec;
+ pmsg->timestamp_usec = entry->skb->tstamp.off_usec;
pmsg->mark = entry->skb->nfmark;
pmsg->hook = entry->info->hook;
pmsg->hw_protocol = entry->skb->protocol;
}
if (data_len)
- memcpy(pmsg->payload, entry->skb->data, data_len);
+ if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
+ BUG();
nlh->nlmsg_len = skb->tail - old_tail;
return skb;
}
static int
-ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data)
+ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info,
+ unsigned int queuenum, void *data)
{
int status = -EINVAL;
struct sk_buff *nskb;
entry->info = info;
entry->skb = skb;
- if (entry->info->hook == NF_IP_LOCAL_OUT) {
- struct iphdr *iph = skb->nh.iph;
-
- entry->rt_info.tos = iph->tos;
- entry->rt_info.daddr = iph->daddr;
- entry->rt_info.saddr = iph->saddr;
- }
-
nskb = ipq_build_packet_message(entry, &status);
if (nskb == NULL)
goto err_out_free;
if (!peer_pid)
goto err_out_free_nskb;
+ if (queue_total >= queue_maxlen) {
+ queue_dropped++;
+ status = -ENOSPC;
+ if (net_ratelimit())
+ printk (KERN_WARNING "ip_queue: full at %d entries, "
+ "dropping packets(s). Dropped: %d\n", queue_total,
+ queue_dropped);
+ goto err_out_free_nskb;
+ }
+
/* netlink_unicast will either free the nskb or attach it to a socket */
status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
- if (status < 0)
- goto err_out_unlock;
-
- status = __ipq_enqueue_entry(entry);
- if (status < 0)
+ if (status < 0) {
+ queue_user_dropped++;
goto err_out_unlock;
+ }
+
+ __ipq_enqueue_entry(entry);
write_unlock_bh(&queue_lock);
return status;
}
skb_put(e->skb, diff);
}
+ if (!skb_make_writable(&e->skb, v->data_len))
+ return -ENOMEM;
memcpy(e->skb->data, v->payload, v->data_len);
- e->skb->nfcache |= NFC_ALTERED;
-
- /*
- * Extra routing may needed on local out, as the QUEUE target never
- * returns control to the table.
- */
- if (e->info->hook == NF_IP_LOCAL_OUT) {
- struct iphdr *iph = e->skb->nh.iph;
-
- if (!(iph->tos == e->rt_info.tos
- && iph->daddr == e->rt_info.daddr
- && iph->saddr == e->rt_info.saddr))
- return ip_route_me_harder(&e->skb);
- }
+ e->skb->ip_summed = CHECKSUM_NONE;
+
return 0;
}
write_unlock_bh(&queue_lock);
RCV_SKB_FAIL(-EBUSY);
}
- }
- else
+ } else {
+ net_enable_timestamp();
peer_pid = pid;
+ }
write_unlock_bh(&queue_lock);
status = ipq_receive_peer(NLMSG_DATA(nlh), type,
- skblen - NLMSG_LENGTH(0));
+ nlmsglen - NLMSG_LENGTH(0));
if (status < 0)
RCV_SKB_FAIL(status);
static void
ipq_rcv_sk(struct sock *sk, int len)
{
- do {
- struct sk_buff *skb;
+ struct sk_buff *skb;
+ unsigned int qlen;
- if (down_trylock(&ipqnl_sem))
- return;
+ mutex_lock(&ipqnl_mutex);
- while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
- ipq_rcv_skb(skb);
- kfree_skb(skb);
- }
+ for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
+ skb = skb_dequeue(&sk->sk_receive_queue);
+ ipq_rcv_skb(skb);
+ kfree_skb(skb);
+ }
- up(&ipqnl_sem);
-
- } while (ipqnl && ipqnl->sk_receive_queue.qlen);
+ mutex_unlock(&ipqnl_mutex);
}
static int
{ .ctl_name = 0 }
};
+#ifdef CONFIG_PROC_FS
static int
ipq_get_info(char *buffer, char **start, off_t offset, int length)
{
"Copy mode : %hu\n"
"Copy range : %u\n"
"Queue length : %u\n"
- "Queue max. length : %u\n",
+ "Queue max. length : %u\n"
+ "Queue dropped : %u\n"
+ "Netlink dropped : %u\n",
peer_pid,
copy_mode,
copy_range,
queue_total,
- queue_maxlen);
+ queue_maxlen,
+ queue_dropped,
+ queue_user_dropped);
read_unlock_bh(&queue_lock);
len = 0;
return len;
}
+#endif /* CONFIG_PROC_FS */
-static int
-init_or_cleanup(int init)
+static struct nf_queue_handler nfqh = {
+ .name = "ip_queue",
+ .outfn = &ipq_enqueue_packet,
+};
+
+static int __init ip_queue_init(void)
{
int status = -ENOMEM;
struct proc_dir_entry *proc;
- if (!init)
- goto cleanup;
-
netlink_register_notifier(&ipq_nl_notifier);
- ipqnl = netlink_kernel_create(NETLINK_FIREWALL, ipq_rcv_sk);
+ ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk,
+ THIS_MODULE);
if (ipqnl == NULL) {
printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
goto cleanup_netlink_notifier;
register_netdevice_notifier(&ipq_dev_notifier);
ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0);
- status = nf_register_queue_handler(PF_INET, ipq_enqueue_packet, NULL);
+ status = nf_register_queue_handler(PF_INET, &nfqh);
if (status < 0) {
printk(KERN_ERR "ip_queue: failed to register queue handler\n");
goto cleanup_sysctl;
}
return status;
-cleanup:
- nf_unregister_queue_handler(PF_INET);
- synchronize_net();
- ipq_flush(NF_DROP);
-
cleanup_sysctl:
unregister_sysctl_table(ipq_sysctl_header);
unregister_netdevice_notifier(&ipq_dev_notifier);
cleanup_ipqnl:
sock_release(ipqnl->sk_socket);
- down(&ipqnl_sem);
- up(&ipqnl_sem);
+ mutex_lock(&ipqnl_mutex);
+ mutex_unlock(&ipqnl_mutex);
cleanup_netlink_notifier:
netlink_unregister_notifier(&ipq_nl_notifier);
return status;
}
-static int __init init(void)
+static void __exit ip_queue_fini(void)
{
-
- return init_or_cleanup(1);
-}
+ nf_unregister_queue_handlers(&nfqh);
+ synchronize_net();
+ ipq_flush(NF_DROP);
-static void __exit fini(void)
-{
- init_or_cleanup(0);
+ unregister_sysctl_table(ipq_sysctl_header);
+ unregister_netdevice_notifier(&ipq_dev_notifier);
+ proc_net_remove(IPQ_PROC_FS_NAME);
+
+ sock_release(ipqnl->sk_socket);
+ mutex_lock(&ipqnl_mutex);
+ mutex_unlock(&ipqnl_mutex);
+
+ netlink_unregister_notifier(&ipq_nl_notifier);
}
MODULE_DESCRIPTION("IPv4 packet queue handler");
MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
MODULE_LICENSE("GPL");
-module_init(init);
-module_exit(fini);
+module_init(ip_queue_init);
+module_exit(ip_queue_fini);