2 * IP multicast routing support for mrouted 3.6/3.8
4 * (c) 1995 Alan Cox, <alan@redhat.com>
5 * Linux Consultancy and Custom Driver Development
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
12 * Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $
15 * Michael Chastain : Incorrect size of copying.
16 * Alan Cox : Added the cache manager code
17 * Alan Cox : Fixed the clone/copy bug and device race.
18 * Mike McLagan : Routing by source
19 * Malcolm Beattie : Buffer handling fixes.
20 * Alexey Kuznetsov : Double buffer free and other fixes.
21 * SVR Anand : Fixed several multicast bugs and problems.
22 * Alexey Kuznetsov : Status, optimisations and more.
23 * Brad Parker : Better behaviour on mrouted upcall
25 * Carlos Picoto : PIMv1 Support
26 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
27 * Relax this requrement to work with older peers.
31 #include <linux/config.h>
32 #include <asm/system.h>
33 #include <asm/uaccess.h>
34 #include <linux/types.h>
35 #include <linux/sched.h>
36 #include <linux/errno.h>
37 #include <linux/timer.h>
39 #include <linux/kernel.h>
40 #include <linux/fcntl.h>
41 #include <linux/stat.h>
42 #include <linux/socket.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/inetdevice.h>
47 #include <linux/igmp.h>
48 #include <linux/proc_fs.h>
49 #include <linux/seq_file.h>
50 #include <linux/mroute.h>
51 #include <linux/init.h>
53 #include <net/protocol.h>
54 #include <linux/skbuff.h>
59 #include <linux/notifier.h>
60 #include <linux/if_arp.h>
61 #include <linux/netfilter_ipv4.h>
63 #include <net/checksum.h>
65 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
66 #define CONFIG_IP_PIMSM 1
69 static struct sock *mroute_socket;
72 /* Big lock, protecting vif table, mrt cache and mroute socket state.
73 Note that the changes are semaphored via rtnl_lock.
76 static rwlock_t mrt_lock = RW_LOCK_UNLOCKED;
79 * Multicast router control variables
82 static struct vif_device vif_table[MAXVIFS]; /* Devices */
85 #define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
87 static int mroute_do_assert; /* Set in PIM assert */
88 static int mroute_do_pim;
90 static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */
92 static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
93 static atomic_t cache_resolve_queue_len; /* Size of unresolved */
95 /* Special spinlock for queue of unresolved entries */
96 static spinlock_t mfc_unres_lock = SPIN_LOCK_UNLOCKED;
98 /* We return to original Alan's scheme. Hash table of resolved
99 entries is changed only in process context and protected
100 with weak lock mrt_lock. Queue of unresolved entries is protected
101 with strong spinlock mfc_unres_lock.
103 In this case data path is free of exclusive locks at all.
106 static kmem_cache_t *mrt_cachep;
108 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
109 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
110 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
112 static struct inet_protocol pim_protocol;
114 static struct timer_list ipmr_expire_timer;
116 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
119 struct net_device *ipmr_new_tunnel(struct vifctl *v)
121 struct net_device *dev;
123 dev = __dev_get_by_name("tunl0");
129 struct ip_tunnel_parm p;
130 struct in_device *in_dev;
132 memset(&p, 0, sizeof(p));
133 p.iph.daddr = v->vifc_rmt_addr.s_addr;
134 p.iph.saddr = v->vifc_lcl_addr.s_addr;
137 p.iph.protocol = IPPROTO_IPIP;
138 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
139 ifr.ifr_ifru.ifru_data = (void*)&p;
141 oldfs = get_fs(); set_fs(KERNEL_DS);
142 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
147 if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
148 dev->flags |= IFF_MULTICAST;
150 in_dev = __in_dev_get(dev);
151 if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
153 in_dev->cnf.rp_filter = 0;
162 /* allow the register to be completed before unregistering. */
166 unregister_netdevice(dev);
170 #ifdef CONFIG_IP_PIMSM
172 static int reg_vif_num = -1;
174 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
176 read_lock(&mrt_lock);
177 ((struct net_device_stats*)dev->priv)->tx_bytes += skb->len;
178 ((struct net_device_stats*)dev->priv)->tx_packets++;
179 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
180 read_unlock(&mrt_lock);
185 static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
187 return (struct net_device_stats*)dev->priv;
190 static void reg_vif_setup(struct net_device *dev)
192 dev->type = ARPHRD_PIMREG;
193 dev->mtu = 1500 - sizeof(struct iphdr) - 8;
194 dev->flags = IFF_NOARP;
195 dev->hard_start_xmit = reg_vif_xmit;
196 dev->get_stats = reg_vif_get_stats;
197 dev->destructor = free_netdev;
200 static struct net_device *ipmr_reg_vif(void)
202 struct net_device *dev;
203 struct in_device *in_dev;
205 dev = alloc_netdev(sizeof(struct net_device_stats), "pimreg",
211 if (register_netdevice(dev)) {
217 if ((in_dev = inetdev_init(dev)) == NULL)
220 in_dev->cnf.rp_filter = 0;
228 /* allow the register to be completed before unregistering. */
232 unregister_netdevice(dev);
241 static int vif_delete(int vifi)
243 struct vif_device *v;
244 struct net_device *dev;
245 struct in_device *in_dev;
247 if (vifi < 0 || vifi >= maxvif)
248 return -EADDRNOTAVAIL;
250 v = &vif_table[vifi];
252 write_lock_bh(&mrt_lock);
257 write_unlock_bh(&mrt_lock);
258 return -EADDRNOTAVAIL;
261 #ifdef CONFIG_IP_PIMSM
262 if (vifi == reg_vif_num)
266 if (vifi+1 == maxvif) {
268 for (tmp=vifi-1; tmp>=0; tmp--) {
275 write_unlock_bh(&mrt_lock);
277 dev_set_allmulti(dev, -1);
279 if ((in_dev = __in_dev_get(dev)) != NULL) {
280 in_dev->cnf.mc_forwarding--;
281 ip_rt_multicast_event(in_dev);
284 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
285 unregister_netdevice(dev);
291 /* Destroy an unresolved cache entry, killing queued skbs
292 and reporting error to netlink readers.
295 static void ipmr_destroy_unres(struct mfc_cache *c)
299 atomic_dec(&cache_resolve_queue_len);
301 while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
302 if (skb->nh.iph->version == 0) {
303 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
304 nlh->nlmsg_type = NLMSG_ERROR;
305 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
306 skb_trim(skb, nlh->nlmsg_len);
307 ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
308 netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
313 kmem_cache_free(mrt_cachep, c);
317 /* Single timer process for all the unresolved queue. */
319 static void ipmr_expire_process(unsigned long dummy)
322 unsigned long expires;
323 struct mfc_cache *c, **cp;
325 if (!spin_trylock(&mfc_unres_lock)) {
326 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
330 if (atomic_read(&cache_resolve_queue_len) == 0)
335 cp = &mfc_unres_queue;
337 while ((c=*cp) != NULL) {
338 if (time_after(c->mfc_un.unres.expires, now)) {
339 unsigned long interval = c->mfc_un.unres.expires - now;
340 if (interval < expires)
348 ipmr_destroy_unres(c);
351 if (atomic_read(&cache_resolve_queue_len))
352 mod_timer(&ipmr_expire_timer, jiffies + expires);
355 spin_unlock(&mfc_unres_lock);
358 /* Fill oifs list. It is called under write locked mrt_lock. */
360 static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls)
364 cache->mfc_un.res.minvif = MAXVIFS;
365 cache->mfc_un.res.maxvif = 0;
366 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
368 for (vifi=0; vifi<maxvif; vifi++) {
369 if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
370 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
371 if (cache->mfc_un.res.minvif > vifi)
372 cache->mfc_un.res.minvif = vifi;
373 if (cache->mfc_un.res.maxvif <= vifi)
374 cache->mfc_un.res.maxvif = vifi + 1;
379 static int vif_add(struct vifctl *vifc, int mrtsock)
381 int vifi = vifc->vifc_vifi;
382 struct vif_device *v = &vif_table[vifi];
383 struct net_device *dev;
384 struct in_device *in_dev;
387 if (VIF_EXISTS(vifi))
390 switch (vifc->vifc_flags) {
391 #ifdef CONFIG_IP_PIMSM
394 * Special Purpose VIF in PIM
395 * All the packets will be sent to the daemon
397 if (reg_vif_num >= 0)
399 dev = ipmr_reg_vif();
405 dev = ipmr_new_tunnel(vifc);
410 dev=ip_dev_find(vifc->vifc_lcl_addr.s_addr);
412 return -EADDRNOTAVAIL;
419 if ((in_dev = __in_dev_get(dev)) == NULL)
420 return -EADDRNOTAVAIL;
421 in_dev->cnf.mc_forwarding++;
422 dev_set_allmulti(dev, +1);
423 ip_rt_multicast_event(in_dev);
426 * Fill in the VIF structures
428 v->rate_limit=vifc->vifc_rate_limit;
429 v->local=vifc->vifc_lcl_addr.s_addr;
430 v->remote=vifc->vifc_rmt_addr.s_addr;
431 v->flags=vifc->vifc_flags;
433 v->flags |= VIFF_STATIC;
434 v->threshold=vifc->vifc_threshold;
439 v->link = dev->ifindex;
440 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
441 v->link = dev->iflink;
443 /* And finish update writing critical data */
444 write_lock_bh(&mrt_lock);
447 #ifdef CONFIG_IP_PIMSM
448 if (v->flags&VIFF_REGISTER)
453 write_unlock_bh(&mrt_lock);
457 static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp)
459 int line=MFC_HASH(mcastgrp,origin);
462 for (c=mfc_cache_array[line]; c; c = c->next) {
463 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
470 * Allocate a multicast cache entry
472 static struct mfc_cache *ipmr_cache_alloc(void)
474 struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
477 memset(c, 0, sizeof(*c));
478 c->mfc_un.res.minvif = MAXVIFS;
482 static struct mfc_cache *ipmr_cache_alloc_unres(void)
484 struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
487 memset(c, 0, sizeof(*c));
488 skb_queue_head_init(&c->mfc_un.unres.unresolved);
489 c->mfc_un.unres.expires = jiffies + 10*HZ;
494 * A cache entry has gone into a resolved state from queued
497 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
502 * Play the pending entries through our router
505 while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
506 if (skb->nh.iph->version == 0) {
508 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
510 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
511 nlh->nlmsg_len = skb->tail - (u8*)nlh;
513 nlh->nlmsg_type = NLMSG_ERROR;
514 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
515 skb_trim(skb, nlh->nlmsg_len);
516 ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -EMSGSIZE;
518 err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
520 ip_mr_forward(skb, c, 0);
525 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
526 * expects the following bizarre scheme.
528 * Called under mrt_lock.
531 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
534 int ihl = pkt->nh.iph->ihl<<2;
535 struct igmphdr *igmp;
539 #ifdef CONFIG_IP_PIMSM
540 if (assert == IGMPMSG_WHOLEPKT)
541 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
544 skb = alloc_skb(128, GFP_ATOMIC);
549 #ifdef CONFIG_IP_PIMSM
550 if (assert == IGMPMSG_WHOLEPKT) {
551 /* Ugly, but we have no choice with this interface.
552 Duplicate old header, fix ihl, length etc.
553 And all this only to mangle msg->im_msgtype and
554 to set msg->im_mbz to "mbz" :-)
556 msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
557 skb->nh.raw = skb->h.raw = (u8*)msg;
558 memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
559 msg->im_msgtype = IGMPMSG_WHOLEPKT;
561 msg->im_vif = reg_vif_num;
562 skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
563 skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
572 skb->nh.iph = (struct iphdr *)skb_put(skb, ihl);
573 memcpy(skb->data,pkt->data,ihl);
574 skb->nh.iph->protocol = 0; /* Flag to the kernel this is a route add */
575 msg = (struct igmpmsg*)skb->nh.iph;
577 skb->dst = dst_clone(pkt->dst);
583 igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
585 msg->im_msgtype = assert;
587 skb->nh.iph->tot_len=htons(skb->len); /* Fix the length */
588 skb->h.raw = skb->nh.raw;
591 if (mroute_socket == NULL) {
599 if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
601 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
609 * Queue a packet for resolution. It gets locked cache entry!
613 ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
618 spin_lock_bh(&mfc_unres_lock);
619 for (c=mfc_unres_queue; c; c=c->next) {
620 if (c->mfc_mcastgrp == skb->nh.iph->daddr &&
621 c->mfc_origin == skb->nh.iph->saddr)
627 * Create a new entry if allowable
630 if (atomic_read(&cache_resolve_queue_len)>=10 ||
631 (c=ipmr_cache_alloc_unres())==NULL) {
632 spin_unlock_bh(&mfc_unres_lock);
639 * Fill in the new cache entry
642 c->mfc_origin=skb->nh.iph->saddr;
643 c->mfc_mcastgrp=skb->nh.iph->daddr;
646 * Reflect first query at mrouted.
648 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
649 /* If the report failed throw the cache entry
652 spin_unlock_bh(&mfc_unres_lock);
654 kmem_cache_free(mrt_cachep, c);
659 atomic_inc(&cache_resolve_queue_len);
660 c->next = mfc_unres_queue;
663 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
667 * See if we can append the packet
669 if (c->mfc_un.unres.unresolved.qlen>3) {
673 skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
677 spin_unlock_bh(&mfc_unres_lock);
682 * MFC cache manipulation by user space mroute daemon
685 static int ipmr_mfc_delete(struct mfcctl *mfc)
688 struct mfc_cache *c, **cp;
690 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
692 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
693 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
694 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
695 write_lock_bh(&mrt_lock);
697 write_unlock_bh(&mrt_lock);
699 kmem_cache_free(mrt_cachep, c);
706 static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
709 struct mfc_cache *uc, *c, **cp;
711 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
713 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
714 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
715 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
720 write_lock_bh(&mrt_lock);
721 c->mfc_parent = mfc->mfcc_parent;
722 ipmr_update_threshoulds(c, mfc->mfcc_ttls);
724 c->mfc_flags |= MFC_STATIC;
725 write_unlock_bh(&mrt_lock);
729 if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
732 c=ipmr_cache_alloc();
736 c->mfc_origin=mfc->mfcc_origin.s_addr;
737 c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
738 c->mfc_parent=mfc->mfcc_parent;
739 ipmr_update_threshoulds(c, mfc->mfcc_ttls);
741 c->mfc_flags |= MFC_STATIC;
743 write_lock_bh(&mrt_lock);
744 c->next = mfc_cache_array[line];
745 mfc_cache_array[line] = c;
746 write_unlock_bh(&mrt_lock);
749 * Check to see if we resolved a queued list. If so we
750 * need to send on the frames and tidy up.
752 spin_lock_bh(&mfc_unres_lock);
753 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
755 if (uc->mfc_origin == c->mfc_origin &&
756 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
758 if (atomic_dec_and_test(&cache_resolve_queue_len))
759 del_timer(&ipmr_expire_timer);
763 spin_unlock_bh(&mfc_unres_lock);
766 ipmr_cache_resolve(uc, c);
767 kmem_cache_free(mrt_cachep, uc);
773 * Close the multicast socket, and clear the vif tables etc
776 static void mroute_clean_tables(struct sock *sk)
781 * Shut down all active vif entries
783 for(i=0; i<maxvif; i++) {
784 if (!(vif_table[i].flags&VIFF_STATIC))
791 for (i=0;i<MFC_LINES;i++) {
792 struct mfc_cache *c, **cp;
794 cp = &mfc_cache_array[i];
795 while ((c = *cp) != NULL) {
796 if (c->mfc_flags&MFC_STATIC) {
800 write_lock_bh(&mrt_lock);
802 write_unlock_bh(&mrt_lock);
804 kmem_cache_free(mrt_cachep, c);
808 if (atomic_read(&cache_resolve_queue_len) != 0) {
811 spin_lock_bh(&mfc_unres_lock);
812 while (mfc_unres_queue != NULL) {
814 mfc_unres_queue = c->next;
815 spin_unlock_bh(&mfc_unres_lock);
817 ipmr_destroy_unres(c);
819 spin_lock_bh(&mfc_unres_lock);
821 spin_unlock_bh(&mfc_unres_lock);
825 static void mrtsock_destruct(struct sock *sk)
828 if (sk == mroute_socket) {
829 ipv4_devconf.mc_forwarding--;
831 write_lock_bh(&mrt_lock);
833 write_unlock_bh(&mrt_lock);
835 mroute_clean_tables(sk);
841 * Socket options and virtual interface manipulation. The whole
842 * virtual interface system is a complete heap, but unfortunately
843 * that's how BSD mrouted happens to think. Maybe one day with a proper
844 * MOSPF/PIM router set up we can clean this up.
847 int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int optlen)
853 if(optname!=MRT_INIT)
855 if(sk!=mroute_socket && !capable(CAP_NET_ADMIN))
862 if (sk->sk_type != SOCK_RAW ||
863 inet_sk(sk)->num != IPPROTO_IGMP)
865 if(optlen!=sizeof(int))
874 ret = ip_ra_control(sk, 1, mrtsock_destruct);
876 write_lock_bh(&mrt_lock);
878 write_unlock_bh(&mrt_lock);
880 ipv4_devconf.mc_forwarding++;
885 if (sk!=mroute_socket)
887 return ip_ra_control(sk, 0, NULL);
890 if(optlen!=sizeof(vif))
892 if (copy_from_user(&vif,optval,sizeof(vif)))
894 if(vif.vifc_vifi >= MAXVIFS)
897 if (optname==MRT_ADD_VIF) {
898 ret = vif_add(&vif, sk==mroute_socket);
900 ret = vif_delete(vif.vifc_vifi);
906 * Manipulate the forwarding caches. These live
907 * in a sort of kernel/user symbiosis.
911 if(optlen!=sizeof(mfc))
913 if (copy_from_user(&mfc,optval, sizeof(mfc)))
916 if (optname==MRT_DEL_MFC)
917 ret = ipmr_mfc_delete(&mfc);
919 ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
923 * Control PIM assert.
928 if(get_user(v,(int __user *)optval))
930 mroute_do_assert=(v)?1:0;
933 #ifdef CONFIG_IP_PIMSM
937 if(get_user(v,(int __user *)optval))
942 if (v != mroute_do_pim) {
944 mroute_do_assert = v;
945 #ifdef CONFIG_IP_PIMSM_V2
947 ret = inet_add_protocol(&pim_protocol,
950 ret = inet_del_protocol(&pim_protocol,
961 * Spurious command, or MRT_VERSION which you cannot
970 * Getsock opt support for the multicast routing system.
973 int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __user *optlen)
978 if(optname!=MRT_VERSION &&
979 #ifdef CONFIG_IP_PIMSM
985 if (get_user(olr, optlen))
988 olr = min_t(unsigned int, olr, sizeof(int));
992 if(put_user(olr,optlen))
994 if(optname==MRT_VERSION)
996 #ifdef CONFIG_IP_PIMSM
997 else if(optname==MRT_PIM)
1001 val=mroute_do_assert;
1002 if(copy_to_user(optval,&val,olr))
1008 * The IP multicast ioctl support routines.
1011 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1013 struct sioc_sg_req sr;
1014 struct sioc_vif_req vr;
1015 struct vif_device *vif;
1016 struct mfc_cache *c;
1021 if (copy_from_user(&vr,arg,sizeof(vr)))
1025 read_lock(&mrt_lock);
1026 vif=&vif_table[vr.vifi];
1027 if(VIF_EXISTS(vr.vifi)) {
1028 vr.icount=vif->pkt_in;
1029 vr.ocount=vif->pkt_out;
1030 vr.ibytes=vif->bytes_in;
1031 vr.obytes=vif->bytes_out;
1032 read_unlock(&mrt_lock);
1034 if (copy_to_user(arg,&vr,sizeof(vr)))
1038 read_unlock(&mrt_lock);
1039 return -EADDRNOTAVAIL;
1041 if (copy_from_user(&sr,arg,sizeof(sr)))
1044 read_lock(&mrt_lock);
1045 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1047 sr.pktcnt = c->mfc_un.res.pkt;
1048 sr.bytecnt = c->mfc_un.res.bytes;
1049 sr.wrong_if = c->mfc_un.res.wrong_if;
1050 read_unlock(&mrt_lock);
1052 if (copy_to_user(arg,&sr,sizeof(sr)))
1056 read_unlock(&mrt_lock);
1057 return -EADDRNOTAVAIL;
1059 return -ENOIOCTLCMD;
1064 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1066 struct vif_device *v;
1068 if (event != NETDEV_UNREGISTER)
1071 for(ct=0;ct<maxvif;ct++,v++) {
1079 static struct notifier_block ip_mr_notifier={
1080 .notifier_call = ipmr_device_event,
1084 * Encapsulate a packet by attaching a valid IPIP header to it.
1085 * This avoids tunnel drivers and other mess and gives us the speed so
1086 * important for multicast video.
1089 static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr)
1091 struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
1094 iph->tos = skb->nh.iph->tos;
1095 iph->ttl = skb->nh.iph->ttl;
1099 iph->protocol = IPPROTO_IPIP;
1101 iph->tot_len = htons(skb->len);
1102 ip_select_ident(iph, skb->dst, NULL);
1105 skb->h.ipiph = skb->nh.iph;
1107 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1108 #ifdef CONFIG_NETFILTER
1109 nf_conntrack_put(skb->nfct);
1114 static inline int ipmr_forward_finish(struct sk_buff *skb)
1116 struct ip_options * opt = &(IPCB(skb)->opt);
1118 IP_INC_STATS_BH(OutForwDatagrams);
1120 if (unlikely(opt->optlen))
1121 ip_forward_options(skb);
1123 return dst_output(skb);
1127 * Processing handlers for ipmr_forward
1130 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1132 struct iphdr *iph = skb->nh.iph;
1133 struct vif_device *vif = &vif_table[vifi];
1134 struct net_device *dev;
1138 if (vif->dev == NULL)
1141 #ifdef CONFIG_IP_PIMSM
1142 if (vif->flags & VIFF_REGISTER) {
1144 vif->bytes_out+=skb->len;
1145 ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len;
1146 ((struct net_device_stats*)vif->dev->priv)->tx_packets++;
1147 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1153 if (vif->flags&VIFF_TUNNEL) {
1154 struct flowi fl = { .oif = vif->link,
1156 { .daddr = vif->remote,
1157 .saddr = vif->local,
1158 .tos = RT_TOS(iph->tos) } },
1159 .proto = IPPROTO_IPIP };
1160 if (ip_route_output_key(&rt, &fl))
1162 encap = sizeof(struct iphdr);
1164 struct flowi fl = { .oif = vif->link,
1166 { .daddr = iph->daddr,
1167 .tos = RT_TOS(iph->tos) } },
1168 .proto = IPPROTO_IPIP };
1169 if (ip_route_output_key(&rt, &fl))
1173 dev = rt->u.dst.dev;
1175 if (skb->len+encap > dst_pmtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1176 /* Do not fragment multicasts. Alas, IPv4 does not
1177 allow to send ICMP, so that packets will disappear
1181 IP_INC_STATS_BH(FragFails);
1186 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1188 if (skb_cow(skb, encap)) {
1194 vif->bytes_out+=skb->len;
1196 dst_release(skb->dst);
1197 skb->dst = &rt->u.dst;
1199 ip_decrease_ttl(iph);
1201 /* FIXME: forward and output firewalls used to be called here.
1202 * What do we do with netfilter? -- RR */
1203 if (vif->flags & VIFF_TUNNEL) {
1204 ip_encap(skb, vif->local, vif->remote);
1205 /* FIXME: extra output firewall step used to be here. --RR */
1206 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++;
1207 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb->len;
1210 IPCB(skb)->flags |= IPSKB_FORWARDED;
1213 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1214 * not only before forwarding, but after forwarding on all output
1215 * interfaces. It is clear, if mrouter runs a multicasting
1216 * program, it should receive packets not depending to what interface
1217 * program is joined.
1218 * If we will not make it, the program will have to join on all
1219 * interfaces. On the other hand, multihoming host (or router, but
1220 * not mrouter) cannot join to more than one interface - it will
1221 * result in receiving multiple packets.
1223 NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, dev,
1224 ipmr_forward_finish);
1232 static int ipmr_find_vif(struct net_device *dev)
1235 for (ct=maxvif-1; ct>=0; ct--) {
1236 if (vif_table[ct].dev == dev)
1242 /* "local" means that we should preserve one skb (for local delivery) */
1244 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1249 vif = cache->mfc_parent;
1250 cache->mfc_un.res.pkt++;
1251 cache->mfc_un.res.bytes += skb->len;
1254 * Wrong interface: drop packet and (maybe) send PIM assert.
1256 if (vif_table[vif].dev != skb->dev) {
1259 if (((struct rtable*)skb->dst)->fl.iif == 0) {
1260 /* It is our own packet, looped back.
1261 Very complicated situation...
1263 The best workaround until routing daemons will be
1264 fixed is not to redistribute packet, if it was
1265 send through wrong interface. It means, that
1266 multicast applications WILL NOT work for
1267 (S,G), which have default multicast route pointing
1268 to wrong oif. In any case, it is not a good
1269 idea to use multicasting applications on router.
1274 cache->mfc_un.res.wrong_if++;
1275 true_vifi = ipmr_find_vif(skb->dev);
1277 if (true_vifi >= 0 && mroute_do_assert &&
1278 /* pimsm uses asserts, when switching from RPT to SPT,
1279 so that we cannot check that packet arrived on an oif.
1280 It is bad, but otherwise we would need to move pretty
1281 large chunk of pimd to kernel. Ough... --ANK
1283 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1285 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1286 cache->mfc_un.res.last_assert = jiffies;
1287 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1292 vif_table[vif].pkt_in++;
1293 vif_table[vif].bytes_in+=skb->len;
1298 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1299 if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) {
1301 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1303 ipmr_queue_xmit(skb2, cache, psend);
1310 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1312 ipmr_queue_xmit(skb2, cache, psend);
1314 ipmr_queue_xmit(skb, cache, psend);
1327 * Multicast packets for forwarding arrive here
1330 int ip_mr_input(struct sk_buff *skb)
1332 struct mfc_cache *cache;
1333 int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
1335 /* Packet is looped back after forward, it should not be
1336 forwarded second time, but still can be delivered locally.
1338 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1342 if (IPCB(skb)->opt.router_alert) {
1343 if (ip_call_ra_chain(skb))
1345 } else if (skb->nh.iph->protocol == IPPROTO_IGMP){
1346 /* IGMPv1 (and broken IGMPv2 implementations sort of
1347 Cisco IOS <= 11.2(8)) do not put router alert
1348 option to IGMP packets destined to routable
1349 groups. It is very bad, because it means
1350 that we can forward NO IGMP messages.
1352 read_lock(&mrt_lock);
1353 if (mroute_socket) {
1354 raw_rcv(mroute_socket, skb);
1355 read_unlock(&mrt_lock);
1358 read_unlock(&mrt_lock);
1362 read_lock(&mrt_lock);
1363 cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
1366 * No usable cache entry
1372 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1373 ip_local_deliver(skb);
1375 read_unlock(&mrt_lock);
1381 vif = ipmr_find_vif(skb->dev);
1383 int err = ipmr_cache_unresolved(vif, skb);
1384 read_unlock(&mrt_lock);
1388 read_unlock(&mrt_lock);
1393 ip_mr_forward(skb, cache, local);
1395 read_unlock(&mrt_lock);
1398 return ip_local_deliver(skb);
1404 return ip_local_deliver(skb);
1409 #ifdef CONFIG_IP_PIMSM_V1
1411 * Handle IGMP messages of PIMv1
1414 int pim_rcv_v1(struct sk_buff * skb)
1416 struct igmphdr *pim;
1417 struct iphdr *encap;
1418 struct net_device *reg_dev = NULL;
1420 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
1423 pim = (struct igmphdr*)skb->h.raw;
1425 if (!mroute_do_pim ||
1426 skb->len < sizeof(*pim) + sizeof(*encap) ||
1427 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1430 encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
1433 a. packet is really destinted to a multicast group
1434 b. packet is not a NULL-REGISTER
1435 c. packet is not truncated
1437 if (!MULTICAST(encap->daddr) ||
1438 encap->tot_len == 0 ||
1439 ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
1442 read_lock(&mrt_lock);
1443 if (reg_vif_num >= 0)
1444 reg_dev = vif_table[reg_vif_num].dev;
1447 read_unlock(&mrt_lock);
1449 if (reg_dev == NULL)
1452 skb->mac.raw = skb->nh.raw;
1453 skb_pull(skb, (u8*)encap - skb->data);
1454 skb->nh.iph = (struct iphdr *)skb->data;
1456 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1457 skb->protocol = htons(ETH_P_IP);
1459 skb->pkt_type = PACKET_HOST;
1460 dst_release(skb->dst);
1462 ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1463 ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1464 #ifdef CONFIG_NETFILTER
1465 nf_conntrack_put(skb->nfct);
1477 #ifdef CONFIG_IP_PIMSM_V2
1478 static int pim_rcv(struct sk_buff * skb)
1480 struct pimreghdr *pim;
1481 struct iphdr *encap;
1482 struct net_device *reg_dev = NULL;
1484 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
1487 pim = (struct pimreghdr*)skb->h.raw;
1488 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1489 (pim->flags&PIM_NULL_REGISTER) ||
1490 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1491 (u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1494 /* check if the inner packet is destined to mcast group */
1495 encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
1496 if (!MULTICAST(encap->daddr) ||
1497 encap->tot_len == 0 ||
1498 ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
1501 read_lock(&mrt_lock);
1502 if (reg_vif_num >= 0)
1503 reg_dev = vif_table[reg_vif_num].dev;
1506 read_unlock(&mrt_lock);
1508 if (reg_dev == NULL)
1511 skb->mac.raw = skb->nh.raw;
1512 skb_pull(skb, (u8*)encap - skb->data);
1513 skb->nh.iph = (struct iphdr *)skb->data;
1515 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1516 skb->protocol = htons(ETH_P_IP);
1518 skb->pkt_type = PACKET_HOST;
1519 dst_release(skb->dst);
1520 ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1521 ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1523 #ifdef CONFIG_NETFILTER
1524 nf_conntrack_put(skb->nfct);
1537 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1540 struct rtnexthop *nhp;
1541 struct net_device *dev = vif_table[c->mfc_parent].dev;
1543 struct rtattr *mp_head;
1546 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1548 mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1550 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1551 if (c->mfc_un.res.ttls[ct] < 255) {
1552 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1553 goto rtattr_failure;
1554 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1555 nhp->rtnh_flags = 0;
1556 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1557 nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1558 nhp->rtnh_len = sizeof(*nhp);
1561 mp_head->rta_type = RTA_MULTIPATH;
1562 mp_head->rta_len = skb->tail - (u8*)mp_head;
1563 rtm->rtm_type = RTN_MULTICAST;
1567 skb_trim(skb, b - skb->data);
1571 int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1574 struct mfc_cache *cache;
1575 struct rtable *rt = (struct rtable*)skb->dst;
1577 read_lock(&mrt_lock);
1578 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1581 struct net_device *dev;
1585 read_unlock(&mrt_lock);
1590 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1591 read_unlock(&mrt_lock);
1594 skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
1595 skb->nh.iph->ihl = sizeof(struct iphdr)>>2;
1596 skb->nh.iph->saddr = rt->rt_src;
1597 skb->nh.iph->daddr = rt->rt_dst;
1598 skb->nh.iph->version = 0;
1599 err = ipmr_cache_unresolved(vif, skb);
1600 read_unlock(&mrt_lock);
1604 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1605 cache->mfc_flags |= MFC_NOTIFY;
1606 err = ipmr_fill_mroute(skb, cache, rtm);
1607 read_unlock(&mrt_lock);
1611 #ifdef CONFIG_PROC_FS
1613 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1615 struct ipmr_vif_iter {
1619 static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1622 for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
1623 if(!VIF_EXISTS(iter->ct))
1626 return &vif_table[iter->ct];
1631 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1633 read_lock(&mrt_lock);
1634 return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1)
1638 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1640 struct ipmr_vif_iter *iter = seq->private;
1643 if (v == SEQ_START_TOKEN)
1644 return ipmr_vif_seq_idx(iter, 0);
1646 while (++iter->ct < maxvif) {
1647 if(!VIF_EXISTS(iter->ct))
1649 return &vif_table[iter->ct];
1654 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1656 read_unlock(&mrt_lock);
1659 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1661 if (v == SEQ_START_TOKEN) {
1663 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1665 const struct vif_device *vif = v;
1666 const char *name = vif->dev ? vif->dev->name : "none";
1669 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
1671 name, vif->bytes_in, vif->pkt_in,
1672 vif->bytes_out, vif->pkt_out,
1673 vif->flags, vif->local, vif->remote);
1678 static struct seq_operations ipmr_vif_seq_ops = {
1679 .start = ipmr_vif_seq_start,
1680 .next = ipmr_vif_seq_next,
1681 .stop = ipmr_vif_seq_stop,
1682 .show = ipmr_vif_seq_show,
1685 static int ipmr_vif_open(struct inode *inode, struct file *file)
1687 struct seq_file *seq;
1689 struct ipmr_vif_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
1694 rc = seq_open(file, &ipmr_vif_seq_ops);
1699 seq = file->private_data;
1709 static struct file_operations ipmr_vif_fops = {
1710 .owner = THIS_MODULE,
1711 .open = ipmr_vif_open,
1713 .llseek = seq_lseek,
1714 .release = seq_release,
1717 struct ipmr_mfc_iter {
1718 struct mfc_cache **cache;
1723 static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1725 struct mfc_cache *mfc;
1727 it->cache = mfc_cache_array;
1728 read_lock(&mrt_lock);
1729 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1730 for(mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next)
1733 read_unlock(&mrt_lock);
1735 it->cache = &mfc_unres_queue;
1736 spin_lock_bh(&mfc_unres_lock);
1737 for(mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1740 spin_unlock_bh(&mfc_unres_lock);
1747 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1749 return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
1753 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1755 struct mfc_cache *mfc = v;
1756 struct ipmr_mfc_iter *it = seq->private;
1760 if (v == SEQ_START_TOKEN)
1761 return ipmr_mfc_seq_idx(seq->private, 0);
1766 if (it->cache == &mfc_unres_queue)
1769 BUG_ON(it->cache != mfc_cache_array);
1771 while (++it->ct < MFC_LINES) {
1772 mfc = mfc_cache_array[it->ct];
1777 /* exhausted cache_array, show unresolved */
1778 read_unlock(&mrt_lock);
1779 it->cache = &mfc_unres_queue;
1782 spin_lock_bh(&mfc_unres_lock);
1783 mfc = mfc_unres_queue;
1788 spin_unlock_bh(&mfc_unres_lock);
1794 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1796 struct ipmr_mfc_iter *it = seq->private;
1798 if (it->cache == &mfc_unres_queue)
1799 spin_unlock_bh(&mfc_unres_lock);
1800 else if (it->cache == mfc_cache_array)
1801 read_unlock(&mrt_lock);
1804 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1808 if (v == SEQ_START_TOKEN) {
1810 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1812 const struct mfc_cache *mfc = v;
1813 const struct ipmr_mfc_iter *it = seq->private;
1815 seq_printf(seq, "%08lX %08lX %-3d %8ld %8ld %8ld",
1816 (unsigned long) mfc->mfc_mcastgrp,
1817 (unsigned long) mfc->mfc_origin,
1819 mfc->mfc_un.res.pkt,
1820 mfc->mfc_un.res.bytes,
1821 mfc->mfc_un.res.wrong_if);
1823 if (it->cache != &mfc_unres_queue) {
1824 for(n = mfc->mfc_un.res.minvif;
1825 n < mfc->mfc_un.res.maxvif; n++ ) {
1827 && mfc->mfc_un.res.ttls[n] < 255)
1830 n, mfc->mfc_un.res.ttls[n]);
1833 seq_putc(seq, '\n');
1838 static struct seq_operations ipmr_mfc_seq_ops = {
1839 .start = ipmr_mfc_seq_start,
1840 .next = ipmr_mfc_seq_next,
1841 .stop = ipmr_mfc_seq_stop,
1842 .show = ipmr_mfc_seq_show,
1845 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1847 struct seq_file *seq;
1849 struct ipmr_mfc_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
1854 rc = seq_open(file, &ipmr_mfc_seq_ops);
1858 memset(s, 0, sizeof(*s));
1859 seq = file->private_data;
1869 static struct file_operations ipmr_mfc_fops = {
1870 .owner = THIS_MODULE,
1871 .open = ipmr_mfc_open,
1873 .llseek = seq_lseek,
1874 .release = seq_release,
1878 #ifdef CONFIG_IP_PIMSM_V2
1879 static struct inet_protocol pim_protocol = {
1886 * Setup for IP multicast routing
1889 void __init ip_mr_init(void)
1891 mrt_cachep = kmem_cache_create("ip_mrt_cache",
1892 sizeof(struct mfc_cache),
1893 0, SLAB_HWCACHE_ALIGN,
1896 panic("cannot allocate ip_mrt_cache");
1898 init_timer(&ipmr_expire_timer);
1899 ipmr_expire_timer.function=ipmr_expire_process;
1900 register_netdevice_notifier(&ip_mr_notifier);
1901 #ifdef CONFIG_PROC_FS
1902 proc_net_fops_create("ip_mr_vif", 0, &ipmr_vif_fops);
1903 proc_net_fops_create("ip_mr_cache", 0, &ipmr_mfc_fops);