Separate the PlanetLab modifications from util-vserver
[util-vserver-pl.git] / src / vip6-autod.c
1 /*
2  * $Id$
3  * Copyright (c) 2007 The Trustees of Princeton University
4  * Author: Daniel Hokka Zakrisson <daniel@hozac.com>
5  *
6  * Licensed under the terms of the GNU General Public License
7  * version 2 or later.
8  */
9
10 #ifdef HAVE_CONFIG_H
11 #  include <config.h>
12 #endif
13
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <stdint.h>
18 #include <string.h>
19 #include <time.h>
20 #include <sys/types.h>
21 #include <sys/socket.h>
22 #include <sys/ioctl.h>
23 #include <arpa/inet.h>
24 #include <dirent.h>
25 #include <ctype.h>
26 #include <errno.h>
27 #include <signal.h>
28 #include <syslog.h>
29
30 #include <asm/types.h>
31 #include <netlink/netlink.h>
32 #include <netlink/route/addr.h>
33
34 #include <vserver.h>
35
36 #define HAS_ADDRESS     0x01
37 #define HAS_PREFIX      0x02
38
39 struct nid_list {
40         nid_t nid;
41         struct nid_list *next;
42 };
43 struct prefix {
44         uint32_t mask;
45         int ifindex;
46         struct {
47                 struct in6_addr addr;
48                 int prefix_len;
49                 time_t valid_until;
50         } prefix;
51         struct {
52                 struct in6_addr addr;
53                 int prefix_len;
54                 time_t valid_until;
55         } address;
56 };
57 struct nid_prefix_map {
58         struct {
59                 struct nid_prefix_map *prev;
60                 struct nid_prefix_map *next;
61         } n;
62         struct {
63                 struct nid_prefix_map *prev;
64                 struct nid_prefix_map *next;
65         } p;
66         struct prefix *prefix;
67         nid_t nid;
68 };
69
70 struct nl_handle *handle;
71
72 /* from linux/include/net/ipv6.h */
73 static inline int ipv6_prefix_equal(struct in6_addr *prefix,
74                                     struct in6_addr *addr, int prefixlen)
75 {
76         uint32_t *a1 = prefix->s6_addr32, *a2 = addr->s6_addr32;
77         unsigned pdw, pbi;
78
79         /* check complete u32 in prefix */
80         pdw = prefixlen >> 5;
81         if (pdw && memcmp(a1, a2, pdw << 2))
82                 return 0;
83
84         /* check incomplete u32 in prefix */
85         pbi = prefixlen & 0x1f;
86         if (pbi && ((a1[pdw] ^ a2[pdw]) & htonl((0xffffffff) << (32 - pbi))))
87                 return 0;
88
89         return 1;
90 }
91
92 static int add_address_to_interface(int ifindex, struct in6_addr *address,
93                                     int prefix)
94 {
95         int err = -1;
96         struct rtnl_addr *rta;
97         struct nl_addr *nl;
98
99         nl = nl_addr_build(AF_INET6, address, sizeof(struct in6_addr));
100         rta = rtnl_addr_alloc();
101
102         rtnl_addr_set_family(rta, AF_INET6);
103         rtnl_addr_set_ifindex(rta, ifindex);
104         rtnl_addr_set_local(rta, nl);
105         rtnl_addr_set_prefixlen(rta, prefix);
106
107         if (rtnl_addr_add(handle, rta, NLM_F_REPLACE) != -1 || errno == EEXIST)
108                 err = 0;
109
110         rtnl_addr_free(rta);
111         nl_addr_destroy(nl);
112         return err;
113 }
114
115 static inline int remove_address_from_interface(struct nid_prefix_map *entry)
116 {
117         struct rtnl_addr *rta;
118         struct nl_addr *nl;
119         struct in6_addr a;
120         int ret;
121
122         memcpy(&a, &entry->prefix->address.addr, sizeof(a));
123         if (entry->nid != 0) {
124                 a.s6_addr[11] = (entry->nid & 0x7f80) >> 7;
125                 a.s6_addr[12] = (entry->nid & 0x7f) << 1;
126         }
127
128         nl = nl_addr_build(AF_INET6, &a, sizeof(a));
129         if (!nl)
130                 return -1;
131         rta = rtnl_addr_alloc();
132         if (!rta)
133                 return -1;
134
135         rtnl_addr_set_family(rta, AF_INET6);
136         rtnl_addr_set_ifindex(rta, entry->prefix->ifindex);
137         rtnl_addr_set_local(rta, nl);
138         rtnl_addr_set_prefixlen(rta, entry->prefix->address.prefix_len);
139
140         ret = rtnl_addr_delete(handle, rta, 0);
141
142         rtnl_addr_free(rta);
143         nl_addr_destroy(nl);
144
145         return ret;
146 }
147
148 static int add_to_map(struct nid_prefix_map *map, struct nid_prefix_map *new)
149 {
150         struct nid_prefix_map *i;
151 #define PUT_IT_IN_PLACE(node, member, om)                               \
152         /* find the correct location in the list */                     \
153         for (i = map->node.next; i->node.next && i->member <            \
154              new->member; i = i->node.next)                             \
155                 ;                                                       \
156         if (i && i->member == new->member && i->om == new->om)          \
157                 return 0;                                               \
158         /* first in the list */                                         \
159         if (!i || !i->node.prev) {                                      \
160                 new->node.prev = NULL;                                  \
161                 new->node.next = i;                                     \
162                 map->node.next = new;                                   \
163                 if (i)                                                  \
164                         i->node.prev = new;                             \
165         }                                                               \
166         /* last in the list */                                          \
167         else if (i->node.next == NULL) {                                \
168                 new->node.prev = i;                                     \
169                 new->node.next = NULL;                                  \
170                 i->node.next = new;                                     \
171         }                                                               \
172         /* somewhere in the middle */                                   \
173         else {                                                          \
174                 new->node.prev = i->node.prev;                          \
175                 new->node.next = i;                                     \
176                 i->node.prev->node.next = new;                          \
177                 i->node.prev = new;                                     \
178         }
179         PUT_IT_IN_PLACE(p, prefix, nid)
180         PUT_IT_IN_PLACE(n, nid, prefix)
181         return 1;
182 }
183
184 static inline void remove_from_map(struct nid_prefix_map *map,
185                                    struct nid_prefix_map *entry)
186 {
187         if (map->n.next == entry)
188                 map->n.next = entry->n.next;
189         if (map->n.prev == entry)
190                 map->n.prev = entry->n.prev;
191         if (map->p.next == entry)
192                 map->p.next = entry->p.next;
193         if (map->p.prev == entry)
194                 map->p.prev = entry->p.prev;
195 }
196
197 static inline void remove_from_map_and_free(struct nid_prefix_map *map,
198                                             struct nid_prefix_map *entry)
199 {
200         remove_from_map(map, entry);
201         free(entry);
202 }
203
204 static int add_nid_to_map(struct nid_prefix_map *map, struct prefix *prefix,
205                           nid_t nid)
206 {
207         struct nid_prefix_map *new = calloc(1, sizeof(struct nid_prefix_map));
208         int ret;
209
210         if (!new)
211                 return -1;
212
213         new->prefix = prefix;
214         new->nid = nid;
215         ret = add_to_map(map, new);
216
217         if (ret == 0)
218                 free(new);
219
220         return ret;
221 }
222
223 static int add_prefix_to_map(struct nid_prefix_map *map, struct prefix *prefix)
224 {
225         return add_nid_to_map(map, prefix, 0);
226 }
227
228 static void cleanup_prefix(struct nid_prefix_map *map,
229                            struct nid_prefix_map *first)
230 {
231         struct nid_prefix_map *i, *p = NULL;
232
233         for (i = first; i && first->prefix == i->prefix; i = i->p.next) {
234                 if (p)
235                         remove_from_map_and_free(map, p);
236
237                 /* ignore errors */
238                 remove_address_from_interface(i);
239
240                 p = i;
241         }
242         if (p)
243                 remove_from_map_and_free(map, p);
244 }
245
246 static inline int add_nid_to_list(struct nid_list **head, nid_t nid)
247 {
248         struct nid_list *i, *new;
249
250         for (i = *head; i && i->next && i->next->nid < nid; i = i->next)
251                 ;
252         /* check if this nid is first in the list */
253         if (i && i->nid == nid)
254                 return 0;
255         /* check if it's already in the list */
256         if (i && i->next && i->next->nid == nid)
257                 return 0;
258
259         /* add it */
260         new = calloc(1, sizeof(struct nid_list));
261         if (!new)
262                 return -1;
263         new->nid = nid;
264
265         /* this is the lowest nid in the list */
266         if (i == *head) {
267                 *head = new;
268                 new->next = i;
269         }
270         /* in the middle/at the end */
271         else if (i) {
272                 new->next = i->next;
273                 i->next = new;
274         }
275         /* there was no list */
276         else
277                 *head = new;
278
279         return 1;
280 }
281
282 static inline void free_nid_list(struct nid_list *head)
283 {
284         struct nid_list *p;
285         for (p = NULL; head; head = head->next) {
286                 if (p)
287                         free(p);
288                 p = head;
289         }
290         if (p)
291                 free(p);
292 }
293
294 static inline void cleanup_nid(struct nid_prefix_map *map,
295                                nid_t nid)
296 {
297         struct nid_prefix_map *i, *p = NULL;
298         for (i = map->n.next; i->nid < nid; i = i->n.next)
299                 ;
300         /* this nid doesn't have any entries in the map */
301         if (i->nid != nid)
302                 return;
303         for (; i->nid == nid; i = i->n.next) {
304                 if (p)
305                         remove_from_map_and_free(map, p);
306                 remove_address_from_interface(i);
307                 p = i;
308         }
309         if (p)
310                 remove_from_map_and_free(map, p);
311 }
312
313 static inline void cleanup_nids(struct nid_prefix_map *map,
314                                 struct nid_list *previous,
315                                 struct nid_list *current)
316 {
317         struct nid_list *p, *pprev = NULL, *c;
318         for (p = previous, c = current; p; pprev = p, p = p->next) {
319                 if (pprev)
320                         free(pprev);
321                 while (c->nid < p->nid)
322                         c = c->next;
323                 if (c->nid == p->nid)
324                         continue;
325                 /* this context has disappeared */
326                 cleanup_nid(map, p->nid);
327         }
328         if (pprev)
329                 free(pprev);
330 }
331
332 static void do_slices_autoconf(struct nid_prefix_map *map)
333 {
334         DIR *dp;
335         struct dirent *de;
336         struct vc_net_addr addr;
337         struct nid_prefix_map *i;
338         struct nid_list *current = NULL, *n;
339         static struct nid_list *previous = NULL;
340
341         if ((dp = opendir("/proc/virtnet")) == NULL)
342                 return;
343         while ((de = readdir(dp)) != NULL) {
344                 nid_t nid;
345
346                 if (!isdigit(de->d_name[0]))
347                         continue;
348
349                 nid = strtoul(de->d_name, NULL, 10);
350                 addr.vna_type = VC_NXA_TYPE_IPV6 | VC_NXA_TYPE_ANY;
351                 if (vc_net_remove(nid, &addr) == -1) {
352                         syslog(LOG_ERR, "vc_net_remove(%u): %s", nid, strerror(errno));
353                         continue;
354                 }
355
356                 add_nid_to_list(&current, nid);
357         }
358         closedir(dp);
359
360         for (n = current; n; n = n->next) {
361                 for (i = map->p.next; i && i->nid == 0;) {
362                         /* expired */
363                         if (i->prefix->mask & HAS_PREFIX && i->prefix->prefix.valid_until < time(NULL)) {
364                                 struct nid_prefix_map *tmp;
365                                 char buf[64];
366
367                                 inet_ntop(AF_INET6, &i->prefix->address.addr, buf, sizeof(buf));
368                                 syslog(LOG_NOTICE, "Address %s timed out", buf);
369
370                                 tmp = i->p.next;
371
372                                 cleanup_prefix(map, i);
373
374                                 i = tmp;
375                                 continue;
376                         }
377                         if (i->prefix->mask != (HAS_ADDRESS|HAS_PREFIX))
378                                 goto next;
379
380                         addr.vna_type = VC_NXA_TYPE_IPV6 | VC_NXA_TYPE_ADDR;
381                         memcpy(&addr.vna_v6_ip, &i->prefix->address.addr, sizeof(struct in6_addr));
382                         addr.vna_prefix = i->prefix->prefix.prefix_len;
383                         if (addr.vna_prefix == 64) {
384                                 addr.vna_v6_mask.s6_addr32[0] = addr.vna_v6_mask.s6_addr32[1] = 0xffffffff;
385                                 addr.vna_v6_mask.s6_addr32[2] = addr.vna_v6_mask.s6_addr32[3] = 0;
386                         }
387                         addr.vna_v6_ip.s6_addr[11] = (n->nid & 0x7f80) >> 7;
388                         addr.vna_v6_ip.s6_addr[12] = (n->nid & 0x007f) << 1;
389                         if (vc_net_add(n->nid, &addr) == -1) {
390                                 syslog(LOG_ERR, "vc_net_add(%u): %s", n->nid, strerror(errno));
391                                 goto next;
392                         }
393                         if (add_address_to_interface(i->prefix->ifindex, &addr.vna_v6_ip, addr.vna_prefix) == -1) {
394                                 syslog(LOG_ERR, "add_address_to_interface: %s", strerror(errno));
395                                 goto next;
396                         }
397                         if (add_nid_to_map(map, i->prefix, n->nid) == -1) {
398                                 syslog(LOG_ERR, "add_nid_to_map: %s", strerror(errno));
399                                 goto next;
400                         }
401 next:
402                         i = i->p.next;
403                 }
404         }
405
406         cleanup_nids(map, previous, current);
407         previous = current;
408 }
409
410 /* XXX These two functions are very similar */
411 static int add_prefix(struct nid_prefix_map *map, struct prefixmsg *msg,
412                       struct in6_addr *prefix, struct prefix_cacheinfo *cache)
413 {
414         struct nid_prefix_map *i = map;
415         struct prefix *new;
416
417         if (!msg || !prefix || !cache)
418                 return -1;
419         /* XXX IF_PREFIX_AUTOCONF == 0x02 */
420         if (!(msg->prefix_flags & 0x02))
421                 return -1;
422
423         do {
424                 if (i->p.next != NULL)
425                         i = i->p.next;
426                 if (ipv6_prefix_equal(prefix, &i->prefix->prefix.addr, msg->prefix_len) ||
427                     ipv6_prefix_equal(prefix, &i->prefix->address.addr, msg->prefix_len)) {
428                         i->prefix->mask |= HAS_PREFIX;
429                         i->prefix->ifindex = msg->prefix_ifindex;
430                         memcpy(&i->prefix->prefix.addr, prefix, sizeof(*prefix));
431                         i->prefix->prefix.prefix_len = msg->prefix_len;
432                         i->prefix->prefix.valid_until = time(NULL) + cache->preferred_time;
433                         return 0;
434                 }
435         } while (i->p.next && i->nid == 0);
436
437         /* not yet in the map */
438         new = calloc(1, sizeof(*new));
439         if (!new)
440                 return -1;
441         new->mask = HAS_PREFIX;
442         memcpy(&new->prefix.addr, prefix, sizeof(*prefix));
443         new->prefix.prefix_len = msg->prefix_len;
444         new->prefix.valid_until = time(NULL) + cache->preferred_time;
445         if (add_prefix_to_map(map, new) == -1)
446                 return -1;
447
448         return 1;
449 }
450
451 static inline int add_address(struct nid_prefix_map *map, struct ifaddrmsg *msg,
452                               struct in6_addr *address, struct ifa_cacheinfo *cache)
453 {
454         struct nid_prefix_map *i = map;
455         struct prefix *new;
456
457         if (!msg || !address || !cache)
458                 return -1;
459
460         if (address->s6_addr[11] != 0xFF || address->s6_addr[12] != 0xFE)
461                 return -1;
462
463         do {
464                 if (i->p.next != NULL)
465                         i = i->p.next;
466                 if (ipv6_prefix_equal(address, &i->prefix->prefix.addr, msg->ifa_prefixlen) ||
467                     ipv6_prefix_equal(address, &i->prefix->address.addr, 128)) {
468                         i->prefix->mask |= HAS_ADDRESS;
469                         memcpy(&i->prefix->address.addr, address, sizeof(*address));
470                         i->prefix->address.prefix_len = msg->ifa_prefixlen;
471                         i->prefix->address.valid_until = time(NULL) + cache->ifa_prefered;
472                         return 0;
473                 }
474         } while (i->p.next && i->nid == 0);
475
476         new = calloc(1, sizeof(*new));
477         if (!new)
478                 return -1;
479         new->mask = HAS_ADDRESS;
480         memcpy(&new->address.addr, address, sizeof(*address));
481         new->address.prefix_len = msg->ifa_prefixlen;
482         new->address.valid_until = time(NULL) + cache->ifa_prefered;
483         if (add_prefix_to_map(map, new) == -1)
484                 return -1;
485
486         return 1;
487 }
488
489 static struct nla_policy addr_policy[IFA_MAX+1] = {
490         [IFA_ADDRESS]   = { .minlen = sizeof(struct in6_addr) },
491         [IFA_LABEL]     = { .type = NLA_STRING,
492                             .maxlen = IFNAMSIZ },
493         [IFA_CACHEINFO] = { .minlen = sizeof(struct ifa_cacheinfo) },
494 };
495 static struct nla_policy prefix_policy[PREFIX_MAX+1] = {
496         [PREFIX_ADDRESS]   = { .minlen = sizeof(struct in6_addr) },
497         [PREFIX_CACHEINFO] = { .minlen = sizeof(struct prefix_cacheinfo) },
498 };
499 int handle_valid_msg(struct nl_msg *msg, void *arg)
500 {
501         struct nlmsghdr *nlh = nlmsg_hdr(msg);
502         int ret = -1;
503         char *payload;
504         struct sockaddr_nl *source = nlmsg_get_src(msg);
505
506         payload = nlmsg_data(nlh);
507         if (source->nl_groups == RTMGRP_IPV6_PREFIX) {
508                 struct prefixmsg *prefixmsg;
509                 struct in6_addr *prefix = NULL;
510                 struct prefix_cacheinfo *cacheinfo = NULL;
511                 struct nlattr *tb[PREFIX_MAX+1];
512
513                 if (nlmsg_parse(nlh, sizeof(struct prefixmsg), tb, PREFIX_MAX, prefix_policy) < 0) {
514                         syslog(LOG_ERR, "Failed to parse prefixmsg");
515                         return -1;
516                 }
517
518                 prefixmsg = (struct prefixmsg *) payload;
519                 if (tb[PREFIX_ADDRESS])
520                         prefix = nl_data_get(nla_get_data(tb[PREFIX_ADDRESS]));
521                 if (tb[PREFIX_CACHEINFO])
522                         cacheinfo = nl_data_get(nla_get_data(tb[PREFIX_CACHEINFO]));
523                 ret = add_prefix(arg, prefixmsg, prefix, cacheinfo);
524         }       
525         else if (source->nl_groups == RTMGRP_IPV6_IFADDR) {
526                 struct ifaddrmsg *ifaddrmsg;
527                 struct in6_addr *address = NULL;
528                 struct ifa_cacheinfo *cacheinfo = NULL;
529                 struct nlattr *tb[IFA_MAX+1];
530
531                 if (nlmsg_parse(nlh, sizeof(struct ifaddrmsg), tb, IFA_MAX, addr_policy) < 0) {
532                         syslog(LOG_ERR, "Failed to parse ifaddrmsg");
533                         return -1;
534                 }
535
536                 ifaddrmsg = (struct ifaddrmsg *) payload;
537                 if (tb[IFA_ADDRESS])
538                         address = nl_data_get(nla_get_data(tb[IFA_ADDRESS]));
539                 if (tb[IFA_CACHEINFO])
540                         cacheinfo = nl_data_get(nla_get_data(tb[IFA_CACHEINFO]));
541                 ret = add_address(arg, ifaddrmsg, address, cacheinfo);
542         }
543         if (ret >= 0)
544                 do_slices_autoconf(arg);
545
546         return 0;
547 }
548
549 int handle_error_msg(struct sockaddr_nl *source, struct nlmsgerr *err,
550                      void *arg)
551 {
552         syslog(LOG_ERR, "%s", strerror(err->error));
553         return 0;
554 }
555
556 int handle_no_op(struct nl_msg *msg, void *arg)
557 {
558         return 0;
559 }
560
561 /* only for access in the signal handler */
562 struct nid_prefix_map map = {
563         .n = {
564                 .next = NULL,
565                 .prev = NULL,
566         },
567         .p = {
568                 .next = NULL,
569                 .prev = NULL,
570         },
571 };
572 void signal_handler(int signal)
573 {
574         switch (signal) {
575         case SIGUSR1:
576                 do_slices_autoconf(&map);
577                 break;
578         }
579 }
580
581 static int write_pidfile(const char *filename)
582 {
583         FILE *fp;
584         fp = fopen(filename, "w");
585         if (!fp)
586                 return -1;
587         fprintf(fp, "%d\n", getpid());
588         fclose(fp);
589         return 0;
590 }
591
592 int main(int argc, char *argv[])
593 {
594         struct nl_cb *cbs;
595
596         openlog("vip6-autod", LOG_PERROR, LOG_DAEMON);
597
598         handle = nl_handle_alloc_nondefault(NL_CB_VERBOSE);
599         cbs = nl_handle_get_cb(handle);
600         nl_cb_set(cbs, NL_CB_VALID, NL_CB_CUSTOM, handle_valid_msg, &map);
601         nl_cb_set(cbs, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, handle_no_op, NULL);
602         nl_cb_err(cbs, NL_CB_CUSTOM, handle_error_msg, &map);
603         nl_disable_sequence_check(handle);
604
605         nl_join_groups(handle, RTMGRP_IPV6_PREFIX|RTMGRP_IPV6_IFADDR);
606         if (nl_connect(handle, NETLINK_ROUTE) == -1) {
607                 syslog(LOG_CRIT, "nl_connect: %s", strerror(errno));
608                 exit(1);
609         }
610
611         if (daemon(0, 0) == -1)
612                 return -1;
613
614         write_pidfile(LOCALSTATEDIR "/run/vip6-autod.pid");
615
616         signal(SIGUSR1, signal_handler);
617
618         while (nl_recvmsgs(handle, cbs) > 0);
619
620         nl_close(handle);
621         closelog();
622         return 0;
623 }