X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Fdpif-linux.c;h=2eda329a98fa9932b13c5d3fb1468e174d992029;hb=44b4d050d4a2c966ed6f4aef2e523f70b6463648;hp=dc59fe07bed47ec85a5f97075b1e218f05095cb6;hpb=254f2dc8e3eb18debf4a8f238b9c87cf4d4dbd3f;p=sliver-openvswitch.git diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c index dc59fe07b..2eda329a9 100644 --- a/lib/dpif-linux.c +++ b/lib/dpif-linux.c @@ -32,8 +32,11 @@ #include #include +#include "bitmap.h" #include "dpif-provider.h" +#include "dynamic-string.h" #include "netdev.h" +#include "netdev-linux.h" #include "netdev-vport.h" #include "netlink-socket.h" #include "netlink.h" @@ -45,13 +48,17 @@ #include "rtnetlink.h" #include "rtnetlink-link.h" #include "shash.h" -#include "svec.h" +#include "sset.h" #include "unaligned.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(dpif_linux); +enum { LRU_MAX_PORTS = 1024 }; +enum { LRU_MASK = LRU_MAX_PORTS - 1}; +BUILD_ASSERT_DECL(IS_POW2(LRU_MAX_PORTS)); + struct dpif_linux_dp { /* Generic Netlink header. */ uint8_t cmd; @@ -89,7 +96,10 @@ struct dpif_linux_flow { * * The 'stats' and 'used' members point to 64-bit data that might only be * aligned on 32-bit boundaries, so get_unaligned_u64() should be used to - * access their values. */ + * access their values. + * + * If 'actions' is nonnull then ODP_FLOW_ATTR_ACTIONS will be included in + * the Netlink version of the command, even if actions_len is zero. */ const struct nlattr *key; /* ODP_FLOW_ATTR_KEY. */ size_t key_len; const struct nlattr *actions; /* ODP_FLOW_ATTR_ACTIONS. */ @@ -122,9 +132,15 @@ struct dpif_linux { unsigned int listen_mask; /* Change notification. */ - struct shash changed_ports; /* Ports that have changed. */ + struct sset changed_ports; /* Ports that have changed. */ struct rtnetlink_notifier port_notifier; bool change_error; + + /* Queue of unused ports. */ + unsigned long *lru_bitmap; + uint16_t lru_ports[LRU_MAX_PORTS]; + size_t lru_head; + size_t lru_tail; }; static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5); @@ -155,8 +171,31 @@ dpif_linux_cast(const struct dpif *dpif) return CONTAINER_OF(dpif, struct dpif_linux, dpif); } +static void +dpif_linux_push_port(struct dpif_linux *dp, uint16_t port) +{ + if (port < LRU_MAX_PORTS && !bitmap_is_set(dp->lru_bitmap, port)) { + bitmap_set1(dp->lru_bitmap, port); + dp->lru_ports[dp->lru_head++ & LRU_MASK] = port; + } +} + +static uint32_t +dpif_linux_pop_port(struct dpif_linux *dp) +{ + uint16_t port; + + if (dp->lru_head == dp->lru_tail) { + return UINT32_MAX; + } + + port = dp->lru_ports[dp->lru_tail++ & LRU_MASK]; + bitmap_set0(dp->lru_bitmap, port); + return port; +} + static int -dpif_linux_enumerate(struct svec *all_dps) +dpif_linux_enumerate(struct sset *all_dps) { struct nl_dump dump; struct ofpbuf msg; @@ -172,7 +211,7 @@ dpif_linux_enumerate(struct svec *all_dps) struct dpif_linux_dp dp; if (!dpif_linux_dp_from_ofpbuf(&dp, &msg)) { - svec_add(all_dps, dp.name); + sset_add(all_dps, dp.name); } } return nl_dump_done(&dump); @@ -209,7 +248,6 @@ static int open_dpif(const struct dpif_linux_dp *dp, struct dpif **dpifp) { struct dpif_linux *dpif; - char *name; int error; int i; @@ -222,7 +260,6 @@ open_dpif(const struct dpif_linux_dp *dp, struct dpif **dpifp) dpif_init(&dpif->dpif, &dpif_linux_class, dp->name, dp->dp_ifindex, dp->dp_ifindex); - free(name); dpif->mc_sock = NULL; for (i = 0; i < DPIF_N_UC_TYPES; i++) { @@ -230,10 +267,16 @@ open_dpif(const struct dpif_linux_dp *dp, struct dpif **dpifp) } dpif->listen_mask = 0; dpif->dp_ifindex = dp->dp_ifindex; - shash_init(&dpif->changed_ports); + sset_init(&dpif->changed_ports); dpif->change_error = false; *dpifp = &dpif->dpif; + dpif->lru_head = dpif->lru_tail = 0; + dpif->lru_bitmap = bitmap_allocate(LRU_MAX_PORTS); + bitmap_set1(dpif->lru_bitmap, ODPP_LOCAL); + for (i = 1; i < LRU_MAX_PORTS; i++) { + dpif_linux_push_port(dpif, i); + } return 0; error_free: @@ -246,7 +289,8 @@ dpif_linux_close(struct dpif *dpif_) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); rtnetlink_link_notifier_unregister(&dpif->port_notifier); - shash_destroy(&dpif->changed_ports); + sset_destroy(&dpif->changed_ports); + free(dpif->lru_bitmap); free(dpif); } @@ -335,11 +379,17 @@ dpif_linux_port_add(struct dpif *dpif_, struct netdev *netdev, request.options_len = options->size; } - error = dpif_linux_vport_transact(&request, &reply, &buf); - if (!error) { - *port_nop = reply.port_no; + /* Loop until we find a port that isn't used. */ + do { + request.port_no = dpif_linux_pop_port(dpif); + error = dpif_linux_vport_transact(&request, &reply, &buf); + + if (!error) { + *port_nop = reply.port_no; + } ofpbuf_delete(buf); - } + } while (request.port_no != UINT32_MAX + && (error == EBUSY || error == EFBIG)); return error; } @@ -349,12 +399,18 @@ dpif_linux_port_del(struct dpif *dpif_, uint16_t port_no) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); struct dpif_linux_vport vport; + int error; dpif_linux_vport_init(&vport); vport.cmd = ODP_VPORT_CMD_DEL; vport.dp_ifindex = dpif->dp_ifindex; vport.port_no = port_no; - return dpif_linux_vport_transact(&vport, NULL, NULL); + error = dpif_linux_vport_transact(&vport, NULL, NULL); + + if (!error) { + dpif_linux_push_port(dpif, port_no); + } + return error; } static int @@ -377,6 +433,12 @@ dpif_linux_port_query__(const struct dpif *dpif, uint32_t port_no, dpif_port->name = xstrdup(reply.name); dpif_port->type = xstrdup(netdev_vport_get_netdev_type(&reply)); dpif_port->port_no = reply.port_no; + if (reply.stats) { + netdev_stats_from_rtnl_link_stats64(&dpif_port->stats, + reply.stats); + } else { + memset(&dpif_port->stats, 0xff, sizeof dpif_port->stats); + } ofpbuf_delete(buf); } return error; @@ -418,6 +480,8 @@ dpif_linux_flow_flush(struct dpif *dpif_) struct dpif_linux_port_state { struct nl_dump dump; + unsigned long *port_bitmap; /* Ports in the datapath. */ + bool complete; /* Dump completed without error. */ }; static int @@ -429,6 +493,8 @@ dpif_linux_port_dump_start(const struct dpif *dpif_, void **statep) struct ofpbuf *buf; *statep = state = xmalloc(sizeof *state); + state->port_bitmap = bitmap_allocate(LRU_MAX_PORTS); + state->complete = false; dpif_linux_vport_init(&request); request.cmd = ODP_DP_CMD_GET; @@ -452,6 +518,7 @@ dpif_linux_port_dump_next(const struct dpif *dpif OVS_UNUSED, void *state_, int error; if (!nl_dump_next(&state->dump, &buf)) { + state->complete = true; return EOF; } @@ -460,17 +527,39 @@ dpif_linux_port_dump_next(const struct dpif *dpif OVS_UNUSED, void *state_, return error; } + if (vport.port_no < LRU_MAX_PORTS) { + bitmap_set1(state->port_bitmap, vport.port_no); + } + dpif_port->name = (char *) vport.name; dpif_port->type = (char *) netdev_vport_get_netdev_type(&vport); dpif_port->port_no = vport.port_no; + if (vport.stats) { + netdev_stats_from_rtnl_link_stats64(&dpif_port->stats, vport.stats); + } else { + memset(&dpif_port->stats, 0xff, sizeof dpif_port->stats); + } return 0; } static int -dpif_linux_port_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_) +dpif_linux_port_dump_done(const struct dpif *dpif_, void *state_) { + struct dpif_linux *dpif = dpif_linux_cast(dpif_); struct dpif_linux_port_state *state = state_; int error = nl_dump_done(&state->dump); + + if (state->complete) { + uint16_t i; + + for (i = 0; i < LRU_MAX_PORTS; i++) { + if (!bitmap_is_set(state->port_bitmap, i)) { + dpif_linux_push_port(dpif, i); + } + } + } + + free(state->port_bitmap); free(state); return error; } @@ -482,11 +571,10 @@ dpif_linux_port_poll(const struct dpif *dpif_, char **devnamep) if (dpif->change_error) { dpif->change_error = false; - shash_clear(&dpif->changed_ports); + sset_clear(&dpif->changed_ports); return ENOBUFS; - } else if (!shash_is_empty(&dpif->changed_ports)) { - struct shash_node *node = shash_first(&dpif->changed_ports); - *devnamep = shash_steal(&dpif->changed_ports, node); + } else if (!sset_is_empty(&dpif->changed_ports)) { + *devnamep = sset_pop(&dpif->changed_ports); return 0; } else { return EAGAIN; @@ -497,7 +585,7 @@ static void dpif_linux_port_poll_wait(const struct dpif *dpif_) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); - if (!shash_is_empty(&dpif->changed_ports) || dpif->change_error) { + if (!sset_is_empty(&dpif->changed_ports) || dpif->change_error) { poll_immediate_wake(); } else { rtnetlink_link_notifier_wait(); @@ -505,21 +593,31 @@ dpif_linux_port_poll_wait(const struct dpif *dpif_) } static int -dpif_linux_flow_get(const struct dpif *dpif_, - const struct nlattr *key, size_t key_len, - struct ofpbuf **actionsp, struct dpif_flow_stats *stats) +dpif_linux_flow_get__(const struct dpif *dpif_, + const struct nlattr *key, size_t key_len, + struct dpif_linux_flow *reply, struct ofpbuf **bufp) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); - struct dpif_linux_flow request, reply; - struct ofpbuf *buf; - int error; + struct dpif_linux_flow request; dpif_linux_flow_init(&request); request.cmd = ODP_FLOW_CMD_GET; request.dp_ifindex = dpif->dp_ifindex; request.key = key; request.key_len = key_len; - error = dpif_linux_flow_transact(&request, &reply, &buf); + return dpif_linux_flow_transact(&request, reply, bufp); +} + +static int +dpif_linux_flow_get(const struct dpif *dpif_, + const struct nlattr *key, size_t key_len, + struct ofpbuf **actionsp, struct dpif_flow_stats *stats) +{ + struct dpif_linux_flow reply; + struct ofpbuf *buf; + int error; + + error = dpif_linux_flow_get__(dpif_, key, key_len, &reply, &buf); if (!error) { if (stats) { dpif_linux_flow_get_stats(&reply, stats); @@ -543,6 +641,7 @@ dpif_linux_flow_put(struct dpif *dpif_, enum dpif_flow_put_flags flags, { struct dpif_linux *dpif = dpif_linux_cast(dpif_); struct dpif_linux_flow request, reply; + struct nlattr dummy_action; struct ofpbuf *buf; int error; @@ -551,7 +650,8 @@ dpif_linux_flow_put(struct dpif *dpif_, enum dpif_flow_put_flags flags, request.dp_ifindex = dpif->dp_ifindex; request.key = key; request.key_len = key_len; - request.actions = actions; + /* Ensure that ODP_FLOW_ATTR_ACTIONS will always be included. */ + request.actions = actions ? actions : &dummy_action; request.actions_len = actions_len; if (flags & DPIF_FP_ZERO_STATS) { request.clear = true; @@ -596,6 +696,7 @@ struct dpif_linux_flow_state { struct nl_dump dump; struct dpif_linux_flow flow; struct dpif_flow_stats stats; + struct ofpbuf *buf; }; static int @@ -617,6 +718,8 @@ dpif_linux_flow_dump_start(const struct dpif *dpif_, void **statep) nl_dump_start(&state->dump, genl_sock, buf); ofpbuf_delete(buf); + state->buf = NULL; + return 0; } @@ -630,24 +733,42 @@ dpif_linux_flow_dump_next(const struct dpif *dpif_ OVS_UNUSED, void *state_, struct ofpbuf buf; int error; - if (!nl_dump_next(&state->dump, &buf)) { - return EOF; - } + do { + ofpbuf_delete(state->buf); + state->buf = NULL; - error = dpif_linux_flow_from_ofpbuf(&state->flow, &buf); - if (!error) { - if (key) { - *key = state->flow.key; - *key_len = state->flow.key_len; + if (!nl_dump_next(&state->dump, &buf)) { + return EOF; } - if (actions) { - *actions = state->flow.actions; - *actions_len = state->flow.actions_len; + + error = dpif_linux_flow_from_ofpbuf(&state->flow, &buf); + if (error) { + return error; } - if (stats) { - dpif_linux_flow_get_stats(&state->flow, &state->stats); - *stats = &state->stats; + + if (actions && !state->flow.actions) { + error = dpif_linux_flow_get__(dpif_, state->flow.key, + state->flow.key_len, + &state->flow, &state->buf); + if (error == ENOENT) { + VLOG_DBG("dumped flow disappeared on get"); + } else if (error) { + VLOG_WARN("error fetching dumped flow: %s", strerror(error)); + } } + } while (error); + + if (actions) { + *actions = state->flow.actions; + *actions_len = state->flow.actions_len; + } + if (key) { + *key = state->flow.key; + *key_len = state->flow.key_len; + } + if (stats) { + dpif_linux_flow_get_stats(&state->flow, &state->stats); + *stats = &state->stats; } return error; } @@ -657,12 +778,14 @@ dpif_linux_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_) { struct dpif_linux_flow_state *state = state_; int error = nl_dump_done(&state->dump); + ofpbuf_delete(state->buf); free(state); return error; } static int dpif_linux_execute(struct dpif *dpif_, + const struct nlattr *key, size_t key_len, const struct nlattr *actions, size_t actions_len, const struct ofpbuf *packet) { @@ -680,6 +803,7 @@ dpif_linux_execute(struct dpif *dpif_, execute->dp_ifindex = dpif->dp_ifindex; nl_msg_put_unspec(buf, ODP_PACKET_ATTR_PACKET, packet->data, packet->size); + nl_msg_put_unspec(buf, ODP_PACKET_ATTR_KEY, key, key_len); nl_msg_put_unspec(buf, ODP_PACKET_ATTR_ACTIONS, actions, actions_len); error = nl_sock_transact(genl_sock, buf, NULL); @@ -907,13 +1031,12 @@ dpif_linux_recv_purge(struct dpif *dpif_) const struct dpif_class dpif_linux_class = { "system", - NULL, /* run */ - NULL, /* wait */ dpif_linux_enumerate, dpif_linux_open, dpif_linux_close, - NULL, /* get_all_names */ dpif_linux_destroy, + NULL, /* run */ + NULL, /* wait */ dpif_linux_get_stats, dpif_linux_get_drop_frags, dpif_linux_set_drop_frags, @@ -986,7 +1109,7 @@ dpif_linux_is_internal_device(const char *name) error = dpif_linux_vport_get(name, &reply, &buf); if (!error) { ofpbuf_delete(buf); - } else if (error != ENODEV) { + } else if (error != ENODEV && error != ENOENT) { VLOG_WARN_RL(&error_rl, "%s: vport query failed (%s)", name, strerror(error)); } @@ -994,6 +1117,34 @@ dpif_linux_is_internal_device(const char *name) return reply.type == ODP_VPORT_TYPE_INTERNAL; } +int +dpif_linux_vport_send(int dp_ifindex, uint32_t port_no, + const void *data, size_t size) +{ + struct odp_header *execute; + struct ofpbuf *buf; + size_t actions_ofs; + int error; + + buf = ofpbuf_new(128 + size); + + nl_msg_put_genlmsghdr(buf, 0, odp_packet_family, NLM_F_REQUEST, + ODP_PACKET_CMD_EXECUTE, 1); + + execute = ofpbuf_put_uninit(buf, sizeof *execute); + execute->dp_ifindex = dp_ifindex; + + nl_msg_put_unspec(buf, ODP_PACKET_ATTR_PACKET, data, size); + + actions_ofs = nl_msg_start_nested(buf, ODP_PACKET_ATTR_ACTIONS); + nl_msg_put_u32(buf, ODP_ACTION_ATTR_OUTPUT, port_no); + nl_msg_end_nested(buf, actions_ofs); + + error = nl_sock_transact(genl_sock, buf, NULL); + ofpbuf_delete(buf); + return error; +} + static void dpif_linux_port_changed(const struct rtnetlink_link_change *change, void *dpif_) @@ -1007,7 +1158,7 @@ dpif_linux_port_changed(const struct rtnetlink_link_change *change, { /* Our datapath changed, either adding a new port or deleting an * existing one. */ - shash_add_once(&dpif->changed_ports, change->ifname, NULL); + sset_add(&dpif->changed_ports, change->ifname); } } else { dpif->change_error = true; @@ -1074,6 +1225,8 @@ dpif_linux_vport_from_ofpbuf(struct dpif_linux_vport *vport, } if (a[ODP_VPORT_ATTR_MTU]) { vport->mtu = nl_attr_get_u32(a[ODP_VPORT_ATTR_MTU]); + } else { + vport->mtu = INT_MAX; } if (a[ODP_VPORT_ATTR_OPTIONS]) { vport->options = nl_attr_get(a[ODP_VPORT_ATTR_OPTIONS]); @@ -1124,7 +1277,7 @@ dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport *vport, vport->address, ETH_ADDR_LEN); } - if (vport->mtu) { + if (vport->mtu && vport->mtu != INT_MAX) { nl_msg_put_u32(buf, ODP_VPORT_ATTR_MTU, vport->mtu); } @@ -1166,6 +1319,15 @@ dpif_linux_vport_transact(const struct dpif_linux_vport *request, assert((reply != NULL) == (bufp != NULL)); + error = dpif_linux_init(); + if (error) { + if (reply) { + *bufp = NULL; + dpif_linux_vport_init(reply); + } + return error; + } + request_buf = ofpbuf_new(1024); dpif_linux_vport_to_ofpbuf(request, request_buf); error = nl_sock_transact(genl_sock, request_buf, bufp); @@ -1314,7 +1476,7 @@ dpif_linux_dp_to_ofpbuf(const struct dpif_linux_dp *dp, struct ofpbuf *buf) } /* Clears 'dp' to "empty" values. */ -void +static void dpif_linux_dp_init(struct dpif_linux_dp *dp) { memset(dp, 0, sizeof *dp); @@ -1341,7 +1503,7 @@ dpif_linux_dp_dump_start(struct nl_dump *dump) * result of the command is expected to be of the same form, which is decoded * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the * reply is no longer needed ('reply' will contain pointers into '*bufp'). */ -int +static int dpif_linux_dp_transact(const struct dpif_linux_dp *request, struct dpif_linux_dp *reply, struct ofpbuf **bufp) { @@ -1371,7 +1533,7 @@ dpif_linux_dp_transact(const struct dpif_linux_dp *request, /* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'. * The caller must free '*bufp' when the reply is no longer needed ('reply' * will contain pointers into '*bufp'). */ -int +static int dpif_linux_dp_get(const struct dpif *dpif_, struct dpif_linux_dp *reply, struct ofpbuf **bufp) { @@ -1440,6 +1602,9 @@ dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow *flow, if (a[ODP_FLOW_ATTR_TCP_FLAGS]) { flow->tcp_flags = nl_attr_get(a[ODP_FLOW_ATTR_TCP_FLAGS]); } + if (a[ODP_FLOW_ATTR_USED]) { + flow->used = nl_attr_get(a[ODP_FLOW_ATTR_USED]); + } return 0; } @@ -1452,7 +1617,8 @@ dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow *flow, struct odp_header *odp_header; nl_msg_put_genlmsghdr(buf, 0, odp_flow_family, - NLM_F_REQUEST | flow->nlmsg_flags, flow->cmd, 1); + NLM_F_REQUEST | NLM_F_ECHO | flow->nlmsg_flags, + flow->cmd, 1); odp_header = ofpbuf_put_uninit(buf, sizeof *odp_header); odp_header->dp_ifindex = flow->dp_ifindex; @@ -1461,7 +1627,7 @@ dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow *flow, nl_msg_put_unspec(buf, ODP_FLOW_ATTR_KEY, flow->key, flow->key_len); } - if (flow->actions_len) { + if (flow->actions || flow->actions_len) { nl_msg_put_unspec(buf, ODP_FLOW_ATTR_ACTIONS, flow->actions, flow->actions_len); } @@ -1477,7 +1643,7 @@ dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow *flow, } /* Clears 'flow' to "empty" values. */ -void +static void dpif_linux_flow_init(struct dpif_linux_flow *flow) { memset(flow, 0, sizeof *flow); @@ -1489,7 +1655,7 @@ dpif_linux_flow_init(struct dpif_linux_flow *flow) * result of the command is expected to be a flow also, which is decoded and * stored in '*reply' and '*bufp'. The caller must free '*bufp' when the reply * is no longer needed ('reply' will contain pointers into '*bufp'). */ -int +static int dpif_linux_flow_transact(const struct dpif_linux_flow *request, struct dpif_linux_flow *reply, struct ofpbuf **bufp) {