X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;ds=sidebyside;f=lib%2Fdpif-linux.c;h=9a7231387a266114e714705dab9b1461d1b3730a;hb=6dc6ff914570fcbe770ed5a5e529229142c5b9c1;hp=2c8c7b094d37e66ed1433380b2aa9ddf1d058778;hpb=96fba48f52254c0cef942dcce130e33d290297da;p=sliver-openvswitch.git diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c index 2c8c7b094..9a7231387 100644 --- a/lib/dpif-linux.c +++ b/lib/dpif-linux.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -32,6 +33,8 @@ #include "dpif-provider.h" #include "ofpbuf.h" #include "poll-loop.h" +#include "rtnetlink.h" +#include "svec.h" #include "util.h" #include "vlog.h" @@ -41,15 +44,29 @@ struct dpif_linux { struct dpif dpif; int fd; + + /* Used by dpif_linux_get_all_names(). */ + char *local_ifname; + int minor; + + /* Change notification. */ + int local_ifindex; /* Ifindex of local port. */ + struct svec changed_ports; /* Ports that have changed. */ + struct rtnetlink_notifier port_notifier; + bool change_error; }; static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5); static int do_ioctl(const struct dpif *, int cmd, const void *arg); static int lookup_minor(const char *name, int *minor); +static int finish_open(struct dpif *, const char *local_ifname); +static int get_openvswitch_major(void); static int create_minor(const char *name, int minor, struct dpif **dpifp); static int open_minor(int minor, struct dpif **dpifp); static int make_openvswitch_device(int minor, char **fnp); +static void dpif_linux_port_changed(const struct rtnetlink_change *, + void *dpif); static struct dpif_linux * dpif_linux_cast(const struct dpif *dpif) @@ -58,13 +75,45 @@ dpif_linux_cast(const struct dpif *dpif) return CONTAINER_OF(dpif, struct dpif_linux, dpif); } +static int +dpif_linux_enumerate(struct svec *all_dps) +{ + int major; + int error; + int i; + + /* Check that the Open vSwitch module is loaded. */ + major = get_openvswitch_major(); + if (major < 0) { + return -major; + } + + error = 0; + for (i = 0; i < ODP_MAX; i++) { + struct dpif *dpif; + char devname[16]; + int retval; + + sprintf(devname, "dp%d", i); + retval = dpif_open(devname, &dpif); + if (!retval) { + svec_add(all_dps, devname); + dpif_close(dpif); + } else if (retval != ENODEV && !error) { + error = retval; + } + } + return error; +} + static int dpif_linux_open(const char *name UNUSED, char *suffix, bool create, struct dpif **dpifp) { int minor; - minor = !strncmp(name, "dp", 2) && isdigit(name[2]) ? atoi(name + 2) : -1; + minor = !strncmp(name, "dp", 2) + && isdigit((unsigned char)name[2]) ? atoi(name + 2) : -1; if (create) { if (minor >= 0) { return create_minor(suffix, minor, dpifp); @@ -82,7 +131,7 @@ dpif_linux_open(const char *name UNUSED, char *suffix, bool create, } } else { struct dpif_linux *dpif; - int listen_mask; + struct odp_port port; int error; if (minor < 0) { @@ -98,19 +147,22 @@ dpif_linux_open(const char *name UNUSED, char *suffix, bool create, } dpif = dpif_linux_cast(*dpifp); - /* We can open the device, but that doesn't mean that it's been - * created. If it hasn't been, then any command other than - * ODP_DP_CREATE will return ENODEV. Try something innocuous. */ - listen_mask = 0; /* Make Valgrind happy. */ - error = do_ioctl(*dpifp, ODP_GET_LISTEN_MASK, &listen_mask); - if (error) { + /* We need the local port's ifindex for the poll function. Start by + * getting the local port's name. */ + memset(&port, 0, sizeof port); + port.port = ODPP_LOCAL; + if (ioctl(dpif->fd, ODP_PORT_QUERY, &port)) { + error = errno; if (error != ENODEV) { VLOG_WARN("%s: probe returned unexpected error: %s", dpif_name(*dpifp), strerror(error)); } dpif_close(*dpifp); + return error; } - return error; + + /* Then use that to finish up opening. */ + return finish_open(&dpif->dpif, port.devname); } } @@ -118,10 +170,23 @@ static void dpif_linux_close(struct dpif *dpif_) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); + rtnetlink_notifier_unregister(&dpif->port_notifier); + svec_destroy(&dpif->changed_ports); + free(dpif->local_ifname); close(dpif->fd); free(dpif); } +static int +dpif_linux_get_all_names(const struct dpif *dpif_, struct svec *all_names) +{ + struct dpif_linux *dpif = dpif_linux_cast(dpif_); + + svec_add_nocopy(all_names, xasprintf("dp%d", dpif->minor)); + svec_add(all_names, dpif->local_ifname); + return 0; +} + static int dpif_linux_delete(struct dpif *dpif_) { @@ -214,6 +279,34 @@ dpif_linux_port_list(const struct dpif *dpif_, struct odp_port *ports, int n) return error ? -error : pv.n_ports; } +static int +dpif_linux_port_poll(const struct dpif *dpif_, char **devnamep) +{ + struct dpif_linux *dpif = dpif_linux_cast(dpif_); + + if (dpif->change_error) { + dpif->change_error = false; + svec_clear(&dpif->changed_ports); + return ENOBUFS; + } else if (dpif->changed_ports.n) { + *devnamep = dpif->changed_ports.names[--dpif->changed_ports.n]; + return 0; + } else { + return EAGAIN; + } +} + +static void +dpif_linux_port_poll_wait(const struct dpif *dpif_) +{ + struct dpif_linux *dpif = dpif_linux_cast(dpif_); + if (dpif->changed_ports.n || dpif->change_error) { + poll_immediate_wake(); + } else { + rtnetlink_notifier_wait(); + } +} + static int dpif_linux_port_group_get(const struct dpif *dpif_, int group, uint16_t ports[], int n) @@ -326,7 +419,7 @@ dpif_linux_recv(struct dpif *dpif_, struct ofpbuf **bufp) return 0; } else { VLOG_WARN_RL(&error_rl, "%s: discarding message truncated " - "from %zu bytes to %d", + "from %"PRIu32" bytes to %d", dpif_name(dpif_), msg->length, retval); error = ERANGE; } @@ -355,8 +448,12 @@ dpif_linux_recv_wait(struct dpif *dpif_) const struct dpif_class dpif_linux_class = { "", /* This is the default class. */ "linux", + NULL, + NULL, + dpif_linux_enumerate, dpif_linux_open, dpif_linux_close, + dpif_linux_get_all_names, dpif_linux_delete, dpif_linux_get_stats, dpif_linux_get_drop_frags, @@ -366,6 +463,8 @@ const struct dpif_class dpif_linux_class = { dpif_linux_port_query_by_number, dpif_linux_port_query_by_name, dpif_linux_port_list, + dpif_linux_port_poll, + dpif_linux_port_poll_wait, dpif_linux_port_group_get, dpif_linux_port_group_set, dpif_linux_flow_get, @@ -381,7 +480,7 @@ const struct dpif_class dpif_linux_class = { }; static int get_openvswitch_major(void); -static int get_major(const char *target, int default_major); +static int get_major(const char *target); static int do_ioctl(const struct dpif *dpif_, int cmd, const void *arg) @@ -391,9 +490,10 @@ do_ioctl(const struct dpif *dpif_, int cmd, const void *arg) } static int -lookup_minor(const char *name, int *minor) +lookup_minor(const char *name, int *minorp) { struct ethtool_drvinfo drvinfo; + int minor, port_no; struct ifreq ifr; int error; int sock; @@ -423,14 +523,20 @@ lookup_minor(const char *name, int *minor) goto error_close_sock; } - if (!isdigit(drvinfo.bus_info[0])) { - VLOG_WARN("%s ethtool info does not contain an openvswitch minor", - name); + if (sscanf(drvinfo.bus_info, "%d.%d", &minor, &port_no) != 2) { + VLOG_WARN("%s ethtool bus_info has unexpected format", name); error = EPROTOTYPE; goto error_close_sock; + } else if (port_no != ODPP_LOCAL) { + /* This is an Open vSwitch device but not the local port. We + * intentionally support only using the name of the local port as the + * name of a datapath; otherwise, it would be too difficult to + * enumerate all the names of a datapath. */ + error = EOPNOTSUPP; + goto error_close_sock; } - *minor = atoi(drvinfo.bus_info); + *minorp = minor; close(sock); return 0; @@ -443,11 +549,18 @@ error: static int make_openvswitch_device(int minor, char **fnp) { - dev_t dev = makedev(get_openvswitch_major(), minor); const char dirname[] = "/dev/net"; + int major; + dev_t dev; struct stat s; char fn[128]; + major = get_openvswitch_major(); + if (major < 0) { + return -major; + } + dev = makedev(major, minor); + *fnp = NULL; sprintf(fn, "%s/dp%d", dirname, minor); if (!stat(fn, &s)) { @@ -456,7 +569,7 @@ make_openvswitch_device(int minor, char **fnp) fn); } else if (s.st_rdev != dev) { VLOG_WARN_RL(&error_rl, - "%s is device %u:%u instead of %u:%u, fixing", + "%s is device %u:%u but should be %u:%u, fixing", fn, major(s.st_rdev), minor(s.st_rdev), major(dev), minor(dev)); } else { @@ -499,20 +612,20 @@ success: return 0; } - +/* Return the major device number of the Open vSwitch device. If it + * cannot be determined, a negative errno is returned. */ static int get_openvswitch_major(void) { - static unsigned int openvswitch_major; - if (!openvswitch_major) { - enum { DEFAULT_MAJOR = 248 }; - openvswitch_major = get_major("openvswitch", DEFAULT_MAJOR); + static int openvswitch_major = -1; + if (openvswitch_major < 0) { + openvswitch_major = get_major("openvswitch"); } return openvswitch_major; } static int -get_major(const char *target, int default_major) +get_major(const char *target) { const char fn[] = "/proc/devices"; char line[128]; @@ -522,7 +635,7 @@ get_major(const char *target, int default_major) file = fopen(fn, "r"); if (!file) { VLOG_ERR("opening %s failed (%s)", fn, strerror(errno)); - goto error; + return -errno; } for (ln = 1; fgets(line, sizeof line, file); ln++) { @@ -548,11 +661,26 @@ get_major(const char *target, int default_major) } } - VLOG_ERR("%s: %s major not found (is the module loaded?), using " - "default major %d", fn, target, default_major); -error: - VLOG_INFO("using default major %d for %s", default_major, target); - return default_major; + fclose(file); + + VLOG_ERR("%s: %s major not found (is the module loaded?)", fn, target); + return -ENODEV; +} + +static int +finish_open(struct dpif *dpif_, const char *local_ifname) +{ + struct dpif_linux *dpif = dpif_linux_cast(dpif_); + dpif->local_ifname = strdup(local_ifname); + dpif->local_ifindex = if_nametoindex(local_ifname); + if (!dpif->local_ifindex) { + int error = errno; + dpif_close(dpif_); + VLOG_WARN("could not get ifindex of %s device: %s", + local_ifname, strerror(errno)); + return error; + } + return 0; } static int @@ -561,7 +689,9 @@ create_minor(const char *name, int minor, struct dpif **dpifp) int error = open_minor(minor, dpifp); if (!error) { error = do_ioctl(*dpifp, ODP_DP_CREATE, name); - if (error) { + if (!error) { + error = finish_open(*dpifp, name); + } else { dpif_close(*dpifp); } } @@ -582,17 +712,26 @@ open_minor(int minor, struct dpif **dpifp) fd = open(fn, O_RDONLY | O_NONBLOCK); if (fd >= 0) { - struct dpif_linux *dpif; - char *name; - - name = xasprintf("dp%d", minor); - - dpif = xmalloc(sizeof *dpif); - dpif_init(&dpif->dpif, &dpif_linux_class, name, minor, minor); - dpif->fd = fd; - *dpifp = &dpif->dpif; - - free(name); + struct dpif_linux *dpif = xmalloc(sizeof *dpif); + error = rtnetlink_notifier_register(&dpif->port_notifier, + dpif_linux_port_changed, dpif); + if (!error) { + char *name; + + name = xasprintf("dp%d", minor); + dpif_init(&dpif->dpif, &dpif_linux_class, name, minor, minor); + free(name); + + dpif->fd = fd; + dpif->local_ifname = NULL; + dpif->minor = minor; + dpif->local_ifindex = 0; + svec_init(&dpif->changed_ports); + dpif->change_error = false; + *dpifp = &dpif->dpif; + } else { + free(dpif); + } } else { error = errno; VLOG_WARN("%s: open failed (%s)", fn, strerror(error)); @@ -601,3 +740,25 @@ open_minor(int minor, struct dpif **dpifp) return error; } + +static void +dpif_linux_port_changed(const struct rtnetlink_change *change, void *dpif_) +{ + struct dpif_linux *dpif = dpif_; + + if (change) { + if (change->master_ifindex == dpif->local_ifindex + && (change->nlmsg_type == RTM_NEWLINK + || change->nlmsg_type == RTM_DELLINK)) + { + /* Our datapath changed, either adding a new port or deleting an + * existing one. */ + if (!svec_contains(&dpif->changed_ports, change->ifname)) { + svec_add(&dpif->changed_ports, change->ifname); + svec_sort(&dpif->changed_ports); + } + } + } else { + dpif->change_error = true; + } +}