_Much_ more complex vip6-autod, now cleans up after a stopped guest too.
[util-vserver.git] / src / vip6-autod.c
1 /*
2  * $Id$
3  * Copyright (c) 2007 The Trustees of Princeton University
4  * Author: Daniel Hokka Zakrisson <daniel@hozac.com>
5  *
6  * Licensed under the terms of the GNU General Public License
7  * version 2 or later.
8  */
9
10 #ifdef HAVE_CONFIG_H
11 #  include <config.h>
12 #endif
13
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <stdint.h>
18 #include <string.h>
19 #include <time.h>
20 #include <sys/types.h>
21 #include <sys/socket.h>
22 #include <sys/ioctl.h>
23 #include <arpa/inet.h>
24 #include <dirent.h>
25 #include <ctype.h>
26 #include <errno.h>
27 #include <signal.h>
28 #include <syslog.h>
29
30 #include <asm/types.h>
31 #include <netlink/netlink.h>
32 #include <netlink/route/addr.h>
33
34 #include <vserver.h>
35 #include "pathconfig.h"
36
37 #define HAS_ADDRESS     0x01
38 #define HAS_PREFIX      0x02
39
40 struct nid_list {
41         nid_t nid;
42         struct nid_list *next;
43 };
44 struct prefix {
45         uint32_t mask;
46         int ifindex;
47         struct {
48                 struct in6_addr addr;
49                 int prefix_len;
50                 time_t valid_until;
51         } prefix;
52         struct {
53                 struct in6_addr addr;
54                 int prefix_len;
55                 time_t valid_until;
56         } address;
57 };
58 struct nid_prefix_map {
59         struct {
60                 struct nid_prefix_map *prev;
61                 struct nid_prefix_map *next;
62         } n;
63         struct {
64                 struct nid_prefix_map *prev;
65                 struct nid_prefix_map *next;
66         } p;
67         struct prefix *prefix;
68         nid_t nid;
69 };
70
71 struct nl_handle *handle;
72
73 /* from linux/include/net/ipv6.h */
74 static inline int ipv6_prefix_equal(struct in6_addr *prefix,
75                                     struct in6_addr *addr, int prefixlen)
76 {
77         uint32_t *a1 = prefix->s6_addr32, *a2 = addr->s6_addr32;
78         unsigned pdw, pbi;
79
80         /* check complete u32 in prefix */
81         pdw = prefixlen >> 5;
82         if (pdw && memcmp(a1, a2, pdw << 2))
83                 return 0;
84
85         /* check incomplete u32 in prefix */
86         pbi = prefixlen & 0x1f;
87         if (pbi && ((a1[pdw] ^ a2[pdw]) & htonl((0xffffffff) << (32 - pbi))))
88                 return 0;
89
90         return 1;
91 }
92
93 static int add_address_to_interface(int ifindex, struct in6_addr *address,
94                                     int prefix)
95 {
96         int err = -1;
97         struct rtnl_addr *rta;
98         struct nl_addr *nl;
99
100         nl = nl_addr_build(AF_INET6, address, sizeof(struct in6_addr));
101         rta = rtnl_addr_alloc();
102
103         rtnl_addr_set_family(rta, AF_INET6);
104         rtnl_addr_set_ifindex(rta, ifindex);
105         rtnl_addr_set_local(rta, nl);
106         rtnl_addr_set_prefixlen(rta, prefix);
107
108         if (rtnl_addr_add(handle, rta, NLM_F_REPLACE) != -1 || errno == EEXIST)
109                 err = 0;
110
111         rtnl_addr_free(rta);
112         nl_addr_destroy(nl);
113         return err;
114 }
115
116 static inline int remove_address_from_interface(struct nid_prefix_map *entry)
117 {
118         struct rtnl_addr *rta;
119         struct nl_addr *nl;
120         struct in6_addr a;
121         int ret;
122
123         memcpy(&a, &entry->prefix->address.addr, sizeof(a));
124         if (entry->nid != 0) {
125                 a.s6_addr[11] = (entry->nid & 0x7f80) >> 7;
126                 a.s6_addr[12] = (entry->nid & 0x7f) << 1;
127         }
128
129         nl = nl_addr_build(AF_INET6, &a, sizeof(a));
130         if (!nl)
131                 return -1;
132         rta = rtnl_addr_alloc();
133         if (!rta)
134                 return -1;
135
136         rtnl_addr_set_family(rta, AF_INET6);
137         rtnl_addr_set_ifindex(rta, entry->prefix->ifindex);
138         rtnl_addr_set_local(rta, nl);
139         rtnl_addr_set_prefixlen(rta, entry->prefix->address.prefix_len);
140
141         ret = rtnl_addr_delete(handle, rta, 0);
142
143         rtnl_addr_free(rta);
144         nl_addr_destroy(nl);
145
146         return ret;
147 }
148
149 static int add_to_map(struct nid_prefix_map *map, struct nid_prefix_map *new)
150 {
151         struct nid_prefix_map *i;
152 #define PUT_IT_IN_PLACE(node, member, om)                               \
153         /* find the correct location in the list */                     \
154         for (i = map->node.next; i->node.next && i->member <            \
155              new->member; i = i->node.next)                             \
156                 ;                                                       \
157         if (i && i->member == new->member && i->om == new->om)          \
158                 return 0;                                               \
159         /* first in the list */                                         \
160         if (!i || !i->node.prev) {                                      \
161                 new->node.prev = NULL;                                  \
162                 new->node.next = i;                                     \
163                 map->node.next = new;                                   \
164                 if (i)                                                  \
165                         i->node.prev = new;                             \
166         }                                                               \
167         /* last in the list */                                          \
168         else if (i->node.next == NULL) {                                \
169                 new->node.prev = i;                                     \
170                 new->node.next = NULL;                                  \
171                 i->node.next = new;                                     \
172         }                                                               \
173         /* somewhere in the middle */                                   \
174         else {                                                          \
175                 new->node.prev = i->node.prev;                          \
176                 new->node.next = i;                                     \
177                 i->node.prev->node.next = new;                          \
178                 i->node.prev = new;                                     \
179         }
180         PUT_IT_IN_PLACE(p, prefix, nid)
181         PUT_IT_IN_PLACE(n, nid, prefix)
182         return 1;
183 }
184
185 static inline void remove_from_map(struct nid_prefix_map *map,
186                                    struct nid_prefix_map *entry)
187 {
188         if (map->n.next == entry)
189                 map->n.next = entry->n.next;
190         if (map->n.prev == entry)
191                 map->n.prev = entry->n.prev;
192         if (map->p.next == entry)
193                 map->p.next = entry->p.next;
194         if (map->p.prev == entry)
195                 map->p.prev = entry->p.prev;
196 }
197
198 static inline void remove_from_map_and_free(struct nid_prefix_map *map,
199                                             struct nid_prefix_map *entry)
200 {
201         remove_from_map(map, entry);
202         free(entry);
203 }
204
205 static int add_nid_to_map(struct nid_prefix_map *map, struct prefix *prefix,
206                           nid_t nid)
207 {
208         struct nid_prefix_map *new = calloc(1, sizeof(struct nid_prefix_map));
209         int ret;
210
211         if (!new)
212                 return -1;
213
214         new->prefix = prefix;
215         new->nid = nid;
216         ret = add_to_map(map, new);
217
218         if (ret == 0)
219                 free(new);
220
221         return ret;
222 }
223
224 static int add_prefix_to_map(struct nid_prefix_map *map, struct prefix *prefix)
225 {
226         return add_nid_to_map(map, prefix, 0);
227 }
228
229 static void cleanup_prefix(struct nid_prefix_map *map,
230                            struct nid_prefix_map *first)
231 {
232         struct nid_prefix_map *i, *p = NULL;
233
234         for (i = first; i && first->prefix == i->prefix; i = i->p.next) {
235                 if (p)
236                         remove_from_map_and_free(map, p);
237
238                 /* ignore errors */
239                 remove_address_from_interface(i);
240
241                 p = i;
242         }
243         if (p)
244                 remove_from_map_and_free(map, p);
245 }
246
247 static inline int add_nid_to_list(struct nid_list **head, nid_t nid)
248 {
249         struct nid_list *i, *new;
250
251         for (i = *head; i && i->next && i->next->nid < nid; i = i->next)
252                 ;
253         /* check if this nid is first in the list */
254         if (i && i->nid == nid)
255                 return 0;
256         /* check if it's already in the list */
257         if (i && i->next && i->next->nid == nid)
258                 return 0;
259
260         /* add it */
261         new = calloc(1, sizeof(struct nid_list));
262         if (!new)
263                 return -1;
264         new->nid = nid;
265
266         /* this is the lowest nid in the list */
267         if (i == *head) {
268                 *head = new;
269                 new->next = i;
270         }
271         /* in the middle/at the end */
272         else if (i) {
273                 new->next = i->next;
274                 i->next = new;
275         }
276         /* there was no list */
277         else
278                 *head = new;
279
280         return 1;
281 }
282
283 static inline void free_nid_list(struct nid_list *head)
284 {
285         struct nid_list *p;
286         for (p = NULL; head; head = head->next) {
287                 if (p)
288                         free(p);
289                 p = head;
290         }
291         if (p)
292                 free(p);
293 }
294
295 static inline void cleanup_nid(struct nid_prefix_map *map,
296                                nid_t nid)
297 {
298         struct nid_prefix_map *i, *p = NULL;
299         for (i = map->n.next; i->nid < nid; i = i->n.next)
300                 ;
301         /* this nid doesn't have any entries in the map */
302         if (i->nid != nid)
303                 return;
304         for (; i->nid == nid; i = i->n.next) {
305                 if (p)
306                         remove_from_map_and_free(map, p);
307                 remove_address_from_interface(i);
308                 p = i;
309         }
310         if (p)
311                 remove_from_map_and_free(map, p);
312 }
313
314 static inline void cleanup_nids(struct nid_prefix_map *map,
315                                 struct nid_list *previous,
316                                 struct nid_list *current)
317 {
318         struct nid_list *p, *pprev = NULL, *c;
319         for (p = previous, c = current; p; pprev = p, p = p->next) {
320                 if (pprev)
321                         free(pprev);
322                 while (c->nid < p->nid)
323                         c = c->next;
324                 if (c->nid == p->nid)
325                         continue;
326                 /* this context has disappeared */
327                 cleanup_nid(map, p->nid);
328         }
329         if (pprev)
330                 free(pprev);
331 }
332
333 static void do_slices_autoconf(struct nid_prefix_map *map)
334 {
335         DIR *dp;
336         struct dirent *de;
337         struct vc_net_addr addr;
338         struct nid_prefix_map *i;
339         struct nid_list *current = NULL, *n;
340         static struct nid_list *previous = NULL;
341
342         if ((dp = opendir("/proc/virtnet")) == NULL)
343                 return;
344         while ((de = readdir(dp)) != NULL) {
345                 nid_t nid;
346
347                 if (!isdigit(de->d_name[0]))
348                         continue;
349
350                 nid = strtoul(de->d_name, NULL, 10);
351                 addr.vna_type = VC_NXA_TYPE_IPV6 | VC_NXA_TYPE_ANY;
352                 if (vc_net_remove(nid, &addr) == -1) {
353                         syslog(LOG_ERR, "vc_net_remove(%u): %s", nid, strerror(errno));
354                         continue;
355                 }
356
357                 add_nid_to_list(&current, nid);
358         }
359         closedir(dp);
360
361         for (n = current; n; n = n->next) {
362                 for (i = map->p.next; i && i->nid == 0;) {
363                         /* expired */
364                         if (i->prefix->mask & HAS_PREFIX && i->prefix->prefix.valid_until < time(NULL)) {
365                                 struct nid_prefix_map *tmp;
366                                 char buf[64];
367
368                                 inet_ntop(AF_INET6, &i->prefix->address.addr, buf, sizeof(buf));
369                                 syslog(LOG_NOTICE, "Address %s timed out", buf);
370
371                                 tmp = i->p.next;
372
373                                 cleanup_prefix(map, i);
374
375                                 i = tmp;
376                                 continue;
377                         }
378                         if (i->prefix->mask != (HAS_ADDRESS|HAS_PREFIX))
379                                 goto next;
380
381                         addr.vna_type = VC_NXA_TYPE_IPV6 | VC_NXA_TYPE_ADDR;
382                         memcpy(&addr.vna_v6_ip, &i->prefix->address.addr, sizeof(struct in6_addr));
383                         addr.vna_prefix = i->prefix->prefix.prefix_len;
384                         if (addr.vna_prefix == 64) {
385                                 addr.vna_v6_mask.s6_addr32[0] = addr.vna_v6_mask.s6_addr32[1] = 0xffffffff;
386                                 addr.vna_v6_mask.s6_addr32[2] = addr.vna_v6_mask.s6_addr32[3] = 0;
387                         }
388                         addr.vna_v6_ip.s6_addr[11] = (n->nid & 0x7f80) >> 7;
389                         addr.vna_v6_ip.s6_addr[12] = (n->nid & 0x007f) << 1;
390                         if (vc_net_add(n->nid, &addr) == -1) {
391                                 syslog(LOG_ERR, "vc_net_add(%u): %s", n->nid, strerror(errno));
392                                 goto next;
393                         }
394                         if (add_address_to_interface(i->prefix->ifindex, &addr.vna_v6_ip, addr.vna_prefix) == -1) {
395                                 syslog(LOG_ERR, "add_address_to_interface: %s", strerror(errno));
396                                 goto next;
397                         }
398                         if (add_nid_to_map(map, i->prefix, n->nid) == -1) {
399                                 syslog(LOG_ERR, "add_nid_to_map: %s", strerror(errno));
400                                 goto next;
401                         }
402 next:
403                         i = i->p.next;
404                 }
405         }
406
407         cleanup_nids(map, previous, current);
408         previous = current;
409 }
410
411 /* XXX These two functions are very similar */
412 static int add_prefix(struct nid_prefix_map *map, struct prefixmsg *msg,
413                       struct in6_addr *prefix, struct prefix_cacheinfo *cache)
414 {
415         struct nid_prefix_map *i = map;
416         struct prefix *new;
417
418         if (!msg || !prefix || !cache)
419                 return -1;
420         /* XXX IF_PREFIX_AUTOCONF == 0x02 */
421         if (!(msg->prefix_flags & 0x02))
422                 return -1;
423
424         do {
425                 if (i->p.next != NULL)
426                         i = i->p.next;
427                 if (ipv6_prefix_equal(prefix, &i->prefix->prefix.addr, msg->prefix_len) ||
428                     ipv6_prefix_equal(prefix, &i->prefix->address.addr, msg->prefix_len)) {
429                         i->prefix->mask |= HAS_PREFIX;
430                         i->prefix->ifindex = msg->prefix_ifindex;
431                         memcpy(&i->prefix->prefix.addr, prefix, sizeof(*prefix));
432                         i->prefix->prefix.prefix_len = msg->prefix_len;
433                         i->prefix->prefix.valid_until = time(NULL) + cache->preferred_time;
434                         return 0;
435                 }
436         } while (i->p.next && i->nid == 0);
437
438         /* not yet in the map */
439         new = calloc(1, sizeof(*new));
440         if (!new)
441                 return -1;
442         new->mask = HAS_PREFIX;
443         memcpy(&new->prefix.addr, prefix, sizeof(*prefix));
444         new->prefix.prefix_len = msg->prefix_len;
445         new->prefix.valid_until = time(NULL) + cache->preferred_time;
446         if (add_prefix_to_map(map, new) == -1)
447                 return -1;
448
449         return 1;
450 }
451
452 static inline int add_address(struct nid_prefix_map *map, struct ifaddrmsg *msg,
453                               struct in6_addr *address, struct ifa_cacheinfo *cache)
454 {
455         struct nid_prefix_map *i = map;
456         struct prefix *new;
457
458         if (!msg || !address || !cache)
459                 return -1;
460
461         if (address->s6_addr[11] != 0xFF || address->s6_addr[12] != 0xFE)
462                 return -1;
463
464         do {
465                 if (i->p.next != NULL)
466                         i = i->p.next;
467                 if (ipv6_prefix_equal(address, &i->prefix->prefix.addr, msg->ifa_prefixlen) ||
468                     ipv6_prefix_equal(address, &i->prefix->address.addr, 128)) {
469                         i->prefix->mask |= HAS_ADDRESS;
470                         memcpy(&i->prefix->address.addr, address, sizeof(*address));
471                         i->prefix->address.prefix_len = msg->ifa_prefixlen;
472                         i->prefix->address.valid_until = time(NULL) + cache->ifa_prefered;
473                         return 0;
474                 }
475         } while (i->p.next && i->nid == 0);
476
477         new = calloc(1, sizeof(*new));
478         if (!new)
479                 return -1;
480         new->mask = HAS_ADDRESS;
481         memcpy(&new->address.addr, address, sizeof(*address));
482         new->address.prefix_len = msg->ifa_prefixlen;
483         new->address.valid_until = time(NULL) + cache->ifa_prefered;
484         if (add_prefix_to_map(map, new) == -1)
485                 return -1;
486
487         return 1;
488 }
489
490 static struct nla_policy addr_policy[IFA_MAX+1] = {
491         [IFA_ADDRESS]   = { .minlen = sizeof(struct in6_addr) },
492         [IFA_LABEL]     = { .type = NLA_STRING,
493                             .maxlen = IFNAMSIZ },
494         [IFA_CACHEINFO] = { .minlen = sizeof(struct ifa_cacheinfo) },
495 };
496 static struct nla_policy prefix_policy[PREFIX_MAX+1] = {
497         [PREFIX_ADDRESS]   = { .minlen = sizeof(struct in6_addr) },
498         [PREFIX_CACHEINFO] = { .minlen = sizeof(struct prefix_cacheinfo) },
499 };
500 int handle_valid_msg(struct nl_msg *msg, void *arg)
501 {
502         struct nlmsghdr *nlh = nlmsg_hdr(msg);
503         int ret = -1;
504         char *payload;
505         struct sockaddr_nl *source = nlmsg_get_src(msg);
506
507         payload = nlmsg_data(nlh);
508         if (source->nl_groups == RTMGRP_IPV6_PREFIX) {
509                 struct prefixmsg *prefixmsg;
510                 struct in6_addr *prefix = NULL;
511                 struct prefix_cacheinfo *cacheinfo = NULL;
512                 struct nlattr *tb[PREFIX_MAX+1];
513
514                 if (nlmsg_parse(nlh, sizeof(struct prefixmsg), tb, PREFIX_MAX, prefix_policy) < 0) {
515                         syslog(LOG_ERR, "Failed to parse prefixmsg");
516                         return -1;
517                 }
518
519                 prefixmsg = (struct prefixmsg *) payload;
520                 if (tb[PREFIX_ADDRESS])
521                         prefix = nl_data_get(nla_get_data(tb[PREFIX_ADDRESS]));
522                 if (tb[PREFIX_CACHEINFO])
523                         cacheinfo = nl_data_get(nla_get_data(tb[PREFIX_CACHEINFO]));
524                 ret = add_prefix(arg, prefixmsg, prefix, cacheinfo);
525         }       
526         else if (source->nl_groups == RTMGRP_IPV6_IFADDR) {
527                 struct ifaddrmsg *ifaddrmsg;
528                 struct in6_addr *address = NULL;
529                 struct ifa_cacheinfo *cacheinfo = NULL;
530                 struct nlattr *tb[IFA_MAX+1];
531
532                 if (nlmsg_parse(nlh, sizeof(struct ifaddrmsg), tb, IFA_MAX, addr_policy) < 0) {
533                         syslog(LOG_ERR, "Failed to parse ifaddrmsg");
534                         return -1;
535                 }
536
537                 ifaddrmsg = (struct ifaddrmsg *) payload;
538                 if (tb[IFA_ADDRESS])
539                         address = nl_data_get(nla_get_data(tb[IFA_ADDRESS]));
540                 if (tb[IFA_CACHEINFO])
541                         cacheinfo = nl_data_get(nla_get_data(tb[IFA_CACHEINFO]));
542                 ret = add_address(arg, ifaddrmsg, address, cacheinfo);
543         }
544         if (ret >= 0)
545                 do_slices_autoconf(arg);
546
547         return 0;
548 }
549
550 int handle_error_msg(struct sockaddr_nl *source, struct nlmsgerr *err,
551                      void *arg)
552 {
553         syslog(LOG_ERR, "%s", strerror(err->error));
554         return 0;
555 }
556
557 int handle_no_op(struct nl_msg *msg, void *arg)
558 {
559         return 0;
560 }
561
562 /* only for access in the signal handler */
563 struct nid_prefix_map map = {
564         .n = {
565                 .next = NULL,
566                 .prev = NULL,
567         },
568         .p = {
569                 .next = NULL,
570                 .prev = NULL,
571         },
572 };
573 void signal_handler(int signal)
574 {
575         switch (signal) {
576         case SIGUSR1:
577                 do_slices_autoconf(&map);
578                 break;
579         }
580 }
581
582 static int write_pidfile(const char *filename)
583 {
584         FILE *fp;
585         fp = fopen(filename, "w");
586         if (!fp)
587                 return -1;
588         fprintf(fp, "%d\n", getpid());
589         fclose(fp);
590         return 0;
591 }
592
593 int main(int argc, char *argv[])
594 {
595         struct nl_cb *cbs;
596
597         openlog("vip6-autod", LOG_PERROR, LOG_DAEMON);
598
599         handle = nl_handle_alloc_nondefault(NL_CB_VERBOSE);
600         cbs = nl_handle_get_cb(handle);
601         nl_cb_set(cbs, NL_CB_VALID, NL_CB_CUSTOM, handle_valid_msg, &map);
602         nl_cb_set(cbs, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, handle_no_op, NULL);
603         nl_cb_err(cbs, NL_CB_CUSTOM, handle_error_msg, &map);
604         nl_disable_sequence_check(handle);
605
606         nl_join_groups(handle, RTMGRP_IPV6_PREFIX|RTMGRP_IPV6_IFADDR);
607         if (nl_connect(handle, NETLINK_ROUTE) == -1) {
608                 syslog(LOG_CRIT, "nl_connect: %s", strerror(errno));
609                 exit(1);
610         }
611
612         if (daemon(0, 0) == -1)
613                 return -1;
614
615         /* XXX .. here is a hack */
616         write_pidfile(DEFAULT_PKGSTATEDIR "/../vip6-autod.pid");
617
618         signal(SIGUSR1, signal_handler);
619
620         while (nl_recvmsgs(handle, cbs) > 0);
621
622         nl_close(handle);
623         closelog();
624         return 0;
625 }