X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fipv4%2Fnetfilter%2Fip_queue.c;h=cd520df4dcf4c36552da82a3d6a37bef5131d0f3;hb=refs%2Fheads%2Fvserver;hp=a9aa12cc83bb40137acbd7410656b9da07724ed4;hpb=6a77f38946aaee1cd85eeec6cf4229b204c15071;p=linux-2.6.git diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index a9aa12cc8..cd520df4d 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -3,6 +3,7 @@ * communicating with userspace via netlink. * * (C) 2000-2002 James Morris + * (C) 2003-2005 Netfilter Core Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -14,6 +15,10 @@ * Zander). * 2000-08-01: Added Nick Williams' MAC support. * 2002-06-25: Code cleanup. + * 2005-01-10: Added /proc counter for dropped packets; fixed so + * packets aren't delivered to user space if they're going + * to be dropped. + * 2005-05-26: local_bh_{disable,enable} around nf_reinject (Harald Welte) * */ #include @@ -30,6 +35,7 @@ #include #include #include +#include #include #include @@ -38,50 +44,46 @@ #define NET_IPQ_QMAX 2088 #define NET_IPQ_QMAX_NAME "ip_queue_maxlen" -struct ipq_rt_info { - __u8 tos; - __u32 daddr; - __u32 saddr; -}; - struct ipq_queue_entry { struct list_head list; struct nf_info *info; struct sk_buff *skb; - struct ipq_rt_info rt_info; }; typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); -static unsigned char copy_mode = IPQ_COPY_NONE; -static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT; +static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; +static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; static DEFINE_RWLOCK(queue_lock); -static int peer_pid; -static unsigned int copy_range; +static int peer_pid __read_mostly; +static unsigned int copy_range __read_mostly; static unsigned int queue_total; -static struct sock *ipqnl; +static unsigned int queue_dropped = 0; +static unsigned int queue_user_dropped = 0; +static struct sock *ipqnl __read_mostly; static LIST_HEAD(queue_list); -static DECLARE_MUTEX(ipqnl_sem); +static DEFINE_MUTEX(ipqnl_mutex); static void ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict) { + /* TCP input path (and probably other bits) assume to be called + * from softirq context, not from syscall, like ipq_issue_verdict is + * called. TCP input path deadlocks with locks taken from timer + * softirq, e.g. We therefore emulate this by local_bh_disable() */ + + local_bh_disable(); nf_reinject(entry->skb, entry->info, verdict); + local_bh_enable(); + kfree(entry); } -static inline int +static inline void __ipq_enqueue_entry(struct ipq_queue_entry *entry) { - if (queue_total >= queue_maxlen) { - if (net_ratelimit()) - printk(KERN_WARNING "ip_queue: full at %d entries, " - "dropping packet(s).\n", queue_total); - return -ENOSPC; - } list_add(&entry->list, &queue_list); queue_total++; - return 0; } /* @@ -206,6 +208,12 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) break; case IPQ_COPY_PACKET: + if ((entry->skb->ip_summed == CHECKSUM_PARTIAL || + entry->skb->ip_summed == CHECKSUM_COMPLETE) && + (*errp = skb_checksum_help(entry->skb))) { + read_unlock_bh(&queue_lock); + return NULL; + } if (copy_range == 0 || copy_range > entry->skb->len) data_len = entry->skb->len; else @@ -233,9 +241,9 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) pmsg->packet_id = (unsigned long )entry; pmsg->data_len = data_len; - pmsg->timestamp_sec = entry->skb->stamp.tv_sec; - pmsg->timestamp_usec = entry->skb->stamp.tv_usec; - pmsg->mark = entry->skb->nfmark; + pmsg->timestamp_sec = entry->skb->tstamp.off_sec; + pmsg->timestamp_usec = entry->skb->tstamp.off_usec; + pmsg->mark = entry->skb->mark; pmsg->hook = entry->info->hook; pmsg->hw_protocol = entry->skb->protocol; @@ -273,7 +281,8 @@ nlmsg_failure: } static int -ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data) +ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, + unsigned int queuenum, void *data) { int status = -EINVAL; struct sk_buff *nskb; @@ -291,14 +300,6 @@ ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data) entry->info = info; entry->skb = skb; - if (entry->info->hook == NF_IP_LOCAL_OUT) { - struct iphdr *iph = skb->nh.iph; - - entry->rt_info.tos = iph->tos; - entry->rt_info.daddr = iph->daddr; - entry->rt_info.saddr = iph->saddr; - } - nskb = ipq_build_packet_message(entry, &status); if (nskb == NULL) goto err_out_free; @@ -308,14 +309,24 @@ ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data) if (!peer_pid) goto err_out_free_nskb; + if (queue_total >= queue_maxlen) { + queue_dropped++; + status = -ENOSPC; + if (net_ratelimit()) + printk (KERN_WARNING "ip_queue: full at %d entries, " + "dropping packets(s). Dropped: %d\n", queue_total, + queue_dropped); + goto err_out_free_nskb; + } + /* netlink_unicast will either free the nskb or attach it to a socket */ status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT); - if (status < 0) - goto err_out_unlock; - - status = __ipq_enqueue_entry(entry); - if (status < 0) + if (status < 0) { + queue_user_dropped++; goto err_out_unlock; + } + + __ipq_enqueue_entry(entry); write_unlock_bh(&queue_lock); return status; @@ -340,9 +351,10 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) if (v->data_len < sizeof(*user_iph)) return 0; diff = v->data_len - e->skb->len; - if (diff < 0) - skb_trim(e->skb, v->data_len); - else if (diff > 0) { + if (diff < 0) { + if (pskb_trim(e->skb, v->data_len)) + return -ENOMEM; + } else if (diff > 0) { if (v->data_len > 0xFFFF) return -EINVAL; if (diff > skb_tailroom(e->skb)) { @@ -364,23 +376,11 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) } skb_put(e->skb, diff); } - if (!skb_ip_make_writable(&e->skb, v->data_len)) + if (!skb_make_writable(&e->skb, v->data_len)) return -ENOMEM; memcpy(e->skb->data, v->payload, v->data_len); - e->skb->nfcache |= NFC_ALTERED; - - /* - * Extra routing may needed on local out, as the QUEUE target never - * returns control to the table. - */ - if (e->info->hook == NF_IP_LOCAL_OUT) { - struct iphdr *iph = e->skb->nh.iph; - - if (!(iph->tos == e->rt_info.tos - && iph->daddr == e->rt_info.daddr - && iph->saddr == e->rt_info.saddr)) - return ip_route_me_harder(&e->skb); - } + e->skb->ip_summed = CHECKSUM_NONE; + return 0; } @@ -458,11 +458,19 @@ dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex) if (entry->info->indev) if (entry->info->indev->ifindex == ifindex) return 1; - if (entry->info->outdev) if (entry->info->outdev->ifindex == ifindex) return 1; - +#ifdef CONFIG_BRIDGE_NETFILTER + if (entry->skb->nf_bridge) { + if (entry->skb->nf_bridge->physindev && + entry->skb->nf_bridge->physindev->ifindex == ifindex) + return 1; + if (entry->skb->nf_bridge->physoutdev && + entry->skb->nf_bridge->physoutdev->ifindex == ifindex) + return 1; + } +#endif return 0; } @@ -508,7 +516,7 @@ ipq_rcv_skb(struct sk_buff *skb) if (type <= IPQM_BASE) return; - if (security_netlink_recv(skb)) + if (security_netlink_recv(skb, CAP_NET_ADMIN)) RCV_SKB_FAIL(-EPERM); write_lock_bh(&queue_lock); @@ -526,7 +534,7 @@ ipq_rcv_skb(struct sk_buff *skb) write_unlock_bh(&queue_lock); status = ipq_receive_peer(NLMSG_DATA(nlh), type, - skblen - NLMSG_LENGTH(0)); + nlmsglen - NLMSG_LENGTH(0)); if (status < 0) RCV_SKB_FAIL(status); @@ -538,20 +546,18 @@ ipq_rcv_skb(struct sk_buff *skb) static void ipq_rcv_sk(struct sock *sk, int len) { - do { - struct sk_buff *skb; + struct sk_buff *skb; + unsigned int qlen; - if (down_trylock(&ipqnl_sem)) - return; + mutex_lock(&ipqnl_mutex); - while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { - ipq_rcv_skb(skb); - kfree_skb(skb); - } + for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) { + skb = skb_dequeue(&sk->sk_receive_queue); + ipq_rcv_skb(skb); + kfree_skb(skb); + } - up(&ipqnl_sem); - - } while (ipqnl && ipqnl->sk_receive_queue.qlen); + mutex_unlock(&ipqnl_mutex); } static int @@ -637,12 +643,16 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length) "Copy mode : %hu\n" "Copy range : %u\n" "Queue length : %u\n" - "Queue max. length : %u\n", + "Queue max. length : %u\n" + "Queue dropped : %u\n" + "Netlink dropped : %u\n", peer_pid, copy_mode, copy_range, queue_total, - queue_maxlen); + queue_maxlen, + queue_dropped, + queue_user_dropped); read_unlock_bh(&queue_lock); @@ -656,17 +666,19 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length) } #endif /* CONFIG_PROC_FS */ -static int -init_or_cleanup(int init) +static struct nf_queue_handler nfqh = { + .name = "ip_queue", + .outfn = &ipq_enqueue_packet, +}; + +static int __init ip_queue_init(void) { int status = -ENOMEM; struct proc_dir_entry *proc; - if (!init) - goto cleanup; - netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_FIREWALL, ipq_rcv_sk); + ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk, + THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; @@ -683,18 +695,13 @@ init_or_cleanup(int init) register_netdevice_notifier(&ipq_dev_notifier); ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0); - status = nf_register_queue_handler(PF_INET, ipq_enqueue_packet, NULL); + status = nf_register_queue_handler(PF_INET, &nfqh); if (status < 0) { printk(KERN_ERR "ip_queue: failed to register queue handler\n"); goto cleanup_sysctl; } return status; -cleanup: - nf_unregister_queue_handler(PF_INET); - synchronize_net(); - ipq_flush(NF_DROP); - cleanup_sysctl: unregister_sysctl_table(ipq_sysctl_header); unregister_netdevice_notifier(&ipq_dev_notifier); @@ -702,28 +709,34 @@ cleanup_sysctl: cleanup_ipqnl: sock_release(ipqnl->sk_socket); - down(&ipqnl_sem); - up(&ipqnl_sem); + mutex_lock(&ipqnl_mutex); + mutex_unlock(&ipqnl_mutex); cleanup_netlink_notifier: netlink_unregister_notifier(&ipq_nl_notifier); return status; } -static int __init init(void) +static void __exit ip_queue_fini(void) { - - return init_or_cleanup(1); -} + nf_unregister_queue_handlers(&nfqh); + synchronize_net(); + ipq_flush(NF_DROP); -static void __exit fini(void) -{ - init_or_cleanup(0); + unregister_sysctl_table(ipq_sysctl_header); + unregister_netdevice_notifier(&ipq_dev_notifier); + proc_net_remove(IPQ_PROC_FS_NAME); + + sock_release(ipqnl->sk_socket); + mutex_lock(&ipqnl_mutex); + mutex_unlock(&ipqnl_mutex); + + netlink_unregister_notifier(&ipq_nl_notifier); } MODULE_DESCRIPTION("IPv4 packet queue handler"); MODULE_AUTHOR("James Morris "); MODULE_LICENSE("GPL"); -module_init(init); -module_exit(fini); +module_init(ip_queue_init); +module_exit(ip_queue_fini);