X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Fdpif-linux.c;h=6aa333557aadd3b25fa443b846d77accdecc01a1;hb=cdee00fd635d1e0f1eeb5d9c009daeb59abd4777;hp=e075c8b02da0c25250b21f76145e6086753c4da1;hpb=8b61709d5ec6c4ef58a04fcaefde617ff63fa10d;p=sliver-openvswitch.git diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c index e075c8b02..6aa333557 100644 --- a/lib/dpif-linux.c +++ b/lib/dpif-linux.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009 Nicira Networks. + * Copyright (c) 2008, 2009, 2010 Nicira Networks. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,22 +23,28 @@ #include #include #include +#include #include +#include #include #include #include #include +#include #include #include "dpif-provider.h" -#include "netdev-linux.h" +#include "netdev.h" +#include "netdev-vport.h" #include "ofpbuf.h" #include "poll-loop.h" +#include "rtnetlink.h" +#include "shash.h" #include "svec.h" #include "util.h" - #include "vlog.h" -#define THIS_MODULE VLM_dpif_linux + +VLOG_DEFINE_THIS_MODULE(dpif_linux); /* Datapath interface for the openvswitch Linux kernel module. */ struct dpif_linux { @@ -51,8 +57,8 @@ struct dpif_linux { /* Change notification. */ int local_ifindex; /* Ifindex of local port. */ - struct svec changed_ports; /* Ports that have changed. */ - struct linux_netdev_notifier port_notifier; + struct shash changed_ports; /* Ports that have changed. */ + struct rtnetlink_notifier port_notifier; bool change_error; }; @@ -61,10 +67,11 @@ static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5); static int do_ioctl(const struct dpif *, int cmd, const void *arg); static int lookup_minor(const char *name, int *minor); static int finish_open(struct dpif *, const char *local_ifname); +static int get_openvswitch_major(void); static int create_minor(const char *name, int minor, struct dpif **dpifp); static int open_minor(int minor, struct dpif **dpifp); static int make_openvswitch_device(int minor, char **fnp); -static void dpif_linux_port_changed(const struct linux_netdev_change *, +static void dpif_linux_port_changed(const struct rtnetlink_change *, void *dpif); static struct dpif_linux * @@ -77,9 +84,16 @@ dpif_linux_cast(const struct dpif *dpif) static int dpif_linux_enumerate(struct svec *all_dps) { + int major; int error; int i; + /* Check that the Open vSwitch module is loaded. */ + major = get_openvswitch_major(); + if (major < 0) { + return -major; + } + error = 0; for (i = 0; i < ODP_MAX; i++) { struct dpif *dpif; @@ -87,10 +101,10 @@ dpif_linux_enumerate(struct svec *all_dps) int retval; sprintf(devname, "dp%d", i); - retval = dpif_open(devname, &dpif); + retval = dpif_open(devname, "system", &dpif); if (!retval) { svec_add(all_dps, devname); - dpif_close(dpif); + dpif_uninit(dpif, true); } else if (retval != ENODEV && !error) { error = retval; } @@ -99,19 +113,20 @@ dpif_linux_enumerate(struct svec *all_dps) } static int -dpif_linux_open(const char *name UNUSED, char *suffix, bool create, - struct dpif **dpifp) +dpif_linux_open(const struct dpif_class *class OVS_UNUSED, const char *name, + bool create, struct dpif **dpifp) { int minor; - minor = !strncmp(name, "dp", 2) && isdigit(name[2]) ? atoi(name + 2) : -1; + minor = !strncmp(name, "dp", 2) + && isdigit((unsigned char)name[2]) ? atoi(name + 2) : -1; if (create) { if (minor >= 0) { - return create_minor(suffix, minor, dpifp); + return create_minor(name, minor, dpifp); } else { /* Scan for unused minor number. */ for (minor = 0; minor < ODP_MAX; minor++) { - int error = create_minor(suffix, minor, dpifp); + int error = create_minor(name, minor, dpifp); if (error != EBUSY) { return error; } @@ -126,7 +141,7 @@ dpif_linux_open(const char *name UNUSED, char *suffix, bool create, int error; if (minor < 0) { - error = lookup_minor(suffix, &minor); + error = lookup_minor(name, &minor); if (error) { return error; } @@ -142,13 +157,13 @@ dpif_linux_open(const char *name UNUSED, char *suffix, bool create, * getting the local port's name. */ memset(&port, 0, sizeof port); port.port = ODPP_LOCAL; - if (ioctl(dpif->fd, ODP_PORT_QUERY, &port)) { + if (ioctl(dpif->fd, ODP_VPORT_QUERY, &port)) { error = errno; if (error != ENODEV) { VLOG_WARN("%s: probe returned unexpected error: %s", dpif_name(*dpifp), strerror(error)); } - dpif_close(*dpifp); + dpif_uninit(*dpifp, true); return error; } @@ -161,8 +176,8 @@ static void dpif_linux_close(struct dpif *dpif_) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); - linux_netdev_notifier_unregister(&dpif->port_notifier); - svec_destroy(&dpif->changed_ports); + rtnetlink_notifier_unregister(&dpif->port_notifier); + shash_destroy(&dpif->changed_ports); free(dpif->local_ifname); close(dpif->fd); free(dpif); @@ -179,7 +194,7 @@ dpif_linux_get_all_names(const struct dpif *dpif_, struct svec *all_names) } static int -dpif_linux_delete(struct dpif *dpif_) +dpif_linux_destroy(struct dpif *dpif_) { return do_ioctl(dpif_, ODP_DP_DESTROY, NULL); } @@ -187,6 +202,7 @@ dpif_linux_delete(struct dpif *dpif_) static int dpif_linux_get_stats(const struct dpif *dpif_, struct odp_stats *stats) { + memset(stats, 0, sizeof *stats); return do_ioctl(dpif_, ODP_DP_STATS, stats); } @@ -210,46 +226,78 @@ dpif_linux_set_drop_frags(struct dpif *dpif_, bool drop_frags) return do_ioctl(dpif_, ODP_SET_DROP_FRAGS, &drop_frags_int); } +static void +translate_vport_type_to_netdev_type(char *type, size_t size) +{ + if (!strcmp(type, "netdev")) { + ovs_strlcpy(type, "system", size); + } +} + +static void +translate_netdev_type_to_vport_type(char *type, size_t size) +{ + if (!strcmp(type, "system")) { + ovs_strlcpy(type, "netdev", size); + } +} + static int -dpif_linux_port_add(struct dpif *dpif_, const char *devname, uint16_t flags, - uint16_t *port_no) +dpif_linux_port_add(struct dpif *dpif, struct netdev *netdev, + uint16_t *port_nop) { + const char *name = netdev_get_name(netdev); + const char *type = netdev_get_type(netdev); struct odp_port port; int error; memset(&port, 0, sizeof port); - strncpy(port.devname, devname, sizeof port.devname); - port.flags = flags; - error = do_ioctl(dpif_, ODP_PORT_ADD, &port); + strncpy(port.devname, name, sizeof port.devname); + strncpy(port.type, type, sizeof port.type); + translate_netdev_type_to_vport_type(port.type, sizeof port.type); + netdev_vport_get_config(netdev, port.config); + + error = do_ioctl(dpif, ODP_VPORT_ATTACH, &port); if (!error) { - *port_no = port.port; + *port_nop = port.port; } + return error; } static int -dpif_linux_port_del(struct dpif *dpif_, uint16_t port_no) +dpif_linux_port_del(struct dpif *dpif_, uint16_t port_no_) +{ + int port_no = port_no_; /* Kernel expects an "int". */ + return do_ioctl(dpif_, ODP_VPORT_DETACH, &port_no); +} + +static int +dpif_linux_port_query__(const struct dpif *dpif, struct odp_port *port) { - int tmp = port_no; - return do_ioctl(dpif_, ODP_PORT_DEL, &tmp); + int error = do_ioctl(dpif, ODP_VPORT_QUERY, port); + if (!error) { + translate_vport_type_to_netdev_type(port->type, sizeof port->type); + } + return error; } static int -dpif_linux_port_query_by_number(const struct dpif *dpif_, uint16_t port_no, - struct odp_port *port) +dpif_linux_port_query_by_number(const struct dpif *dpif, uint16_t port_no, + struct odp_port *port) { memset(port, 0, sizeof *port); port->port = port_no; - return do_ioctl(dpif_, ODP_PORT_QUERY, port); + return dpif_linux_port_query__(dpif, port); } static int -dpif_linux_port_query_by_name(const struct dpif *dpif_, const char *devname, +dpif_linux_port_query_by_name(const struct dpif *dpif, const char *devname, struct odp_port *port) { memset(port, 0, sizeof *port); strncpy(port->devname, devname, sizeof port->devname); - return do_ioctl(dpif_, ODP_PORT_QUERY, port); + return dpif_linux_port_query__(dpif, port); } static int @@ -262,12 +310,22 @@ static int dpif_linux_port_list(const struct dpif *dpif_, struct odp_port *ports, int n) { struct odp_portvec pv; + unsigned int i; int error; pv.ports = ports; pv.n_ports = n; - error = do_ioctl(dpif_, ODP_PORT_LIST, &pv); - return error ? -error : pv.n_ports; + error = do_ioctl(dpif_, ODP_VPORT_LIST, &pv); + if (error) { + return -error; + } + + for (i = 0; i < pv.n_ports; i++) { + struct odp_port *port = &pv.ports[i]; + + translate_vport_type_to_netdev_type(port->type, sizeof port->type); + } + return pv.n_ports; } static int @@ -277,10 +335,11 @@ dpif_linux_port_poll(const struct dpif *dpif_, char **devnamep) if (dpif->change_error) { dpif->change_error = false; - svec_clear(&dpif->changed_ports); + shash_clear(&dpif->changed_ports); return ENOBUFS; - } else if (dpif->changed_ports.n) { - *devnamep = dpif->changed_ports.names[--dpif->changed_ports.n]; + } else if (!shash_is_empty(&dpif->changed_ports)) { + struct shash_node *node = shash_first(&dpif->changed_ports); + *devnamep = shash_steal(&dpif->changed_ports, node); return 0; } else { return EAGAIN; @@ -291,41 +350,13 @@ static void dpif_linux_port_poll_wait(const struct dpif *dpif_) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); - if (dpif->changed_ports.n || dpif->change_error) { + if (!shash_is_empty(&dpif->changed_ports) || dpif->change_error) { poll_immediate_wake(); } else { - linux_netdev_notifier_wait(); + rtnetlink_notifier_wait(); } } -static int -dpif_linux_port_group_get(const struct dpif *dpif_, int group, - uint16_t ports[], int n) -{ - struct odp_port_group pg; - int error; - - assert(n <= UINT16_MAX); - pg.group = group; - pg.ports = ports; - pg.n_ports = n; - error = do_ioctl(dpif_, ODP_PORT_GROUP_GET, &pg); - return error ? -error : pg.n_ports; -} - -static int -dpif_linux_port_group_set(struct dpif *dpif_, int group, - const uint16_t ports[], int n) -{ - struct odp_port_group pg; - - assert(n <= UINT16_MAX); - pg.group = group; - pg.ports = (uint16_t *) ports; - pg.n_ports = n; - return do_ioctl(dpif_, ODP_PORT_GROUP_SET, &pg); -} - static int dpif_linux_flow_get(const struct dpif *dpif_, struct odp_flow flows[], int n) { @@ -360,15 +391,14 @@ dpif_linux_flow_list(const struct dpif *dpif_, struct odp_flow flows[], int n) } static int -dpif_linux_execute(struct dpif *dpif_, uint16_t in_port, - const union odp_action actions[], int n_actions, +dpif_linux_execute(struct dpif *dpif_, + const struct nlattr *actions, unsigned int actions_len, const struct ofpbuf *buf) { struct odp_execute execute; memset(&execute, 0, sizeof execute); - execute.in_port = in_port; - execute.actions = (union odp_action *) actions; - execute.n_actions = n_actions; + execute.actions = (struct nlattr *) actions; + execute.actions_len = actions_len; execute.data = buf->data; execute.length = buf->size; return do_ioctl(dpif_, ODP_EXECUTE, &execute); @@ -386,6 +416,31 @@ dpif_linux_recv_set_mask(struct dpif *dpif_, int listen_mask) return do_ioctl(dpif_, ODP_SET_LISTEN_MASK, &listen_mask); } +static int +dpif_linux_get_sflow_probability(const struct dpif *dpif_, + uint32_t *probability) +{ + return do_ioctl(dpif_, ODP_GET_SFLOW_PROBABILITY, probability); +} + +static int +dpif_linux_set_sflow_probability(struct dpif *dpif_, uint32_t probability) +{ + return do_ioctl(dpif_, ODP_SET_SFLOW_PROBABILITY, &probability); +} + +static int +dpif_linux_queue_to_priority(const struct dpif *dpif OVS_UNUSED, + uint32_t queue_id, uint32_t *priority) +{ + if (queue_id < 0xf000) { + *priority = TC_H_MAKE(1 << 16, queue_id + 1); + return 0; + } else { + return EINVAL; + } +} + static int dpif_linux_recv(struct dpif *dpif_, struct ofpbuf **bufp) { @@ -394,7 +449,7 @@ dpif_linux_recv(struct dpif *dpif_, struct ofpbuf **bufp) int retval; int error; - buf = ofpbuf_new(65536); + buf = ofpbuf_new_with_headroom(65536, DPIF_RECV_MSG_PADDING); retval = read(dpif->fd, ofpbuf_tail(buf), ofpbuf_tailroom(buf)); if (retval < 0) { error = errno; @@ -410,7 +465,7 @@ dpif_linux_recv(struct dpif *dpif_, struct ofpbuf **bufp) return 0; } else { VLOG_WARN_RL(&error_rl, "%s: discarding message truncated " - "from %zu bytes to %d", + "from %"PRIu32" bytes to %d", dpif_name(dpif_), msg->length, retval); error = ERANGE; } @@ -437,15 +492,14 @@ dpif_linux_recv_wait(struct dpif *dpif_) } const struct dpif_class dpif_linux_class = { - "", /* This is the default class. */ - "linux", + "system", NULL, NULL, dpif_linux_enumerate, dpif_linux_open, dpif_linux_close, dpif_linux_get_all_names, - dpif_linux_delete, + dpif_linux_destroy, dpif_linux_get_stats, dpif_linux_get_drop_frags, dpif_linux_set_drop_frags, @@ -456,8 +510,6 @@ const struct dpif_class dpif_linux_class = { dpif_linux_port_list, dpif_linux_port_poll, dpif_linux_port_poll_wait, - dpif_linux_port_group_get, - dpif_linux_port_group_set, dpif_linux_flow_get, dpif_linux_flow_put, dpif_linux_flow_del, @@ -466,12 +518,15 @@ const struct dpif_class dpif_linux_class = { dpif_linux_execute, dpif_linux_recv_get_mask, dpif_linux_recv_set_mask, + dpif_linux_get_sflow_probability, + dpif_linux_set_sflow_probability, + dpif_linux_queue_to_priority, dpif_linux_recv, dpif_linux_recv_wait, }; static int get_openvswitch_major(void); -static int get_major(const char *target, int default_major); +static int get_major(const char *target); static int do_ioctl(const struct dpif *dpif_, int cmd, const void *arg) @@ -540,12 +595,20 @@ error: static int make_openvswitch_device(int minor, char **fnp) { - dev_t dev = makedev(get_openvswitch_major(), minor); const char dirname[] = "/dev/net"; + int major; + dev_t dev; struct stat s; char fn[128]; *fnp = NULL; + + major = get_openvswitch_major(); + if (major < 0) { + return -major; + } + dev = makedev(major, minor); + sprintf(fn, "%s/dp%d", dirname, minor); if (!stat(fn, &s)) { if (!S_ISCHR(s.st_mode)) { @@ -553,7 +616,7 @@ make_openvswitch_device(int minor, char **fnp) fn); } else if (s.st_rdev != dev) { VLOG_WARN_RL(&error_rl, - "%s is device %u:%u instead of %u:%u, fixing", + "%s is device %u:%u but should be %u:%u, fixing", fn, major(s.st_rdev), minor(s.st_rdev), major(dev), minor(dev)); } else { @@ -596,20 +659,20 @@ success: return 0; } - +/* Return the major device number of the Open vSwitch device. If it + * cannot be determined, a negative errno is returned. */ static int get_openvswitch_major(void) { - static unsigned int openvswitch_major; - if (!openvswitch_major) { - enum { DEFAULT_MAJOR = 248 }; - openvswitch_major = get_major("openvswitch", DEFAULT_MAJOR); + static int openvswitch_major = -1; + if (openvswitch_major < 0) { + openvswitch_major = get_major("openvswitch"); } return openvswitch_major; } static int -get_major(const char *target, int default_major) +get_major(const char *target) { const char fn[] = "/proc/devices"; char line[128]; @@ -619,7 +682,7 @@ get_major(const char *target, int default_major) file = fopen(fn, "r"); if (!file) { VLOG_ERR("opening %s failed (%s)", fn, strerror(errno)); - goto error; + return -errno; } for (ln = 1; fgets(line, sizeof line, file); ln++) { @@ -637,30 +700,25 @@ get_major(const char *target, int default_major) return major; } } else { - static bool warned; - if (!warned) { - VLOG_WARN("%s:%d: syntax error", fn, ln); - } - warned = true; + VLOG_WARN_ONCE("%s:%d: syntax error", fn, ln); } } - VLOG_ERR("%s: %s major not found (is the module loaded?), using " - "default major %d", fn, target, default_major); -error: - VLOG_INFO("using default major %d for %s", default_major, target); - return default_major; + fclose(file); + + VLOG_ERR("%s: %s major not found (is the module loaded?)", fn, target); + return -ENODEV; } static int finish_open(struct dpif *dpif_, const char *local_ifname) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); - dpif->local_ifname = strdup(local_ifname); + dpif->local_ifname = xstrdup(local_ifname); dpif->local_ifindex = if_nametoindex(local_ifname); if (!dpif->local_ifindex) { int error = errno; - dpif_close(dpif_); + dpif_uninit(dpif_, true); VLOG_WARN("could not get ifindex of %s device: %s", local_ifname, strerror(errno)); return error; @@ -677,7 +735,7 @@ create_minor(const char *name, int minor, struct dpif **dpifp) if (!error) { error = finish_open(*dpifp, name); } else { - dpif_close(*dpifp); + dpif_uninit(*dpifp, true); } } return error; @@ -698,8 +756,8 @@ open_minor(int minor, struct dpif **dpifp) fd = open(fn, O_RDONLY | O_NONBLOCK); if (fd >= 0) { struct dpif_linux *dpif = xmalloc(sizeof *dpif); - error = linux_netdev_notifier_register(&dpif->port_notifier, - dpif_linux_port_changed, dpif); + error = rtnetlink_notifier_register(&dpif->port_notifier, + dpif_linux_port_changed, dpif); if (!error) { char *name; @@ -711,7 +769,7 @@ open_minor(int minor, struct dpif **dpifp) dpif->local_ifname = NULL; dpif->minor = minor; dpif->local_ifindex = 0; - svec_init(&dpif->changed_ports); + shash_init(&dpif->changed_ports); dpif->change_error = false; *dpifp = &dpif->dpif; } else { @@ -727,7 +785,7 @@ open_minor(int minor, struct dpif **dpifp) } static void -dpif_linux_port_changed(const struct linux_netdev_change *change, void *dpif_) +dpif_linux_port_changed(const struct rtnetlink_change *change, void *dpif_) { struct dpif_linux *dpif = dpif_; @@ -738,10 +796,7 @@ dpif_linux_port_changed(const struct linux_netdev_change *change, void *dpif_) { /* Our datapath changed, either adding a new port or deleting an * existing one. */ - if (!svec_contains(&dpif->changed_ports, change->ifname)) { - svec_add(&dpif->changed_ports, change->ifname); - svec_sort(&dpif->changed_ports); - } + shash_add_once(&dpif->changed_ports, change->ifname, NULL); } } else { dpif->change_error = true;