X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Fdpif-netdev.c;h=926464e266ff202f6f6c740b2c381b0631cc0633;hb=6ff686f2bc2afcfb0a9ad9793d834979dc9870d6;hp=acc14a8b251b0c8244596ef92007c5449ca0bfbc;hpb=6767a2cce9a6412b3a41a927c4d56b9f0e1ec36f;p=sliver-openvswitch.git diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index acc14a8b2..926464e26 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -91,7 +91,6 @@ struct dp_netdev { long long int n_lost; /* Number of misses not passed to client. */ /* Ports. */ - int n_ports; struct dp_netdev_port *ports[MAX_PORTS]; struct list port_list; unsigned int serial; @@ -114,7 +113,7 @@ struct dp_netdev_flow { long long int used; /* Last used time, in monotonic msecs. */ long long int packet_count; /* Number of packets matched. */ long long int byte_count; /* Number of bytes matched. */ - uint16_t tcp_ctl; /* Bitwise-OR of seen tcp_ctl values. */ + ovs_be16 tcp_ctl; /* Bitwise-OR of seen tcp_ctl values. */ /* Actions. */ struct nlattr *actions; @@ -146,7 +145,7 @@ static int do_add_port(struct dp_netdev *, const char *devname, static int do_del_port(struct dp_netdev *, uint16_t port_no); static int dpif_netdev_open(const struct dpif_class *, const char *name, bool create, struct dpif **); -static int dp_netdev_output_control(struct dp_netdev *, const struct ofpbuf *, +static int dp_netdev_output_userspace(struct dp_netdev *, const struct ofpbuf *, int queue_no, const struct flow *, uint64_t arg); static int dp_netdev_execute_actions(struct dp_netdev *, @@ -204,7 +203,7 @@ create_dp_netdev(const char *name, const struct dpif_class *class, } hmap_init(&dp->flow_table); list_init(&dp->port_list); - error = do_add_port(dp, name, "internal", ODPP_LOCAL); + error = do_add_port(dp, name, "internal", OVSP_LOCAL); if (error) { dp_netdev_free(dp); return error; @@ -265,10 +264,10 @@ dp_netdev_purge_queues(struct dp_netdev *dp) static void dp_netdev_free(struct dp_netdev *dp) { + struct dp_netdev_port *port, *next; + dp_netdev_flow_flush(dp); - while (dp->n_ports > 0) { - struct dp_netdev_port *port = CONTAINER_OF( - dp->port_list.next, struct dp_netdev_port, node); + LIST_FOR_EACH_SAFE (port, next, node, &dp->port_list) { do_del_port(dp, port->port_no); } dp_netdev_purge_queues(dp); @@ -298,10 +297,11 @@ dpif_netdev_destroy(struct dpif *dpif) } static int -dpif_netdev_get_stats(const struct dpif *dpif, struct odp_stats *stats) +dpif_netdev_get_stats(const struct dpif *dpif, struct ovs_dp_stats *stats) { struct dp_netdev *dp = get_dp_netdev(dpif); memset(stats, 0, sizeof *stats); + stats->n_flows = hmap_count(&dp->flow_table); stats->n_frags = dp->n_frags; stats->n_hit = dp->n_hit; stats->n_missed = dp->n_missed; @@ -330,14 +330,13 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type, uint16_t port_no) { struct dp_netdev_port *port; - struct netdev_options netdev_options; struct netdev *netdev; bool internal; int mtu; int error; /* XXX reject devices already in some dp_netdev. */ - if (type[0] == '\0' || !strcmp(type, "system")) { + if (type[0] == '\0' || !strcmp(type, "system") || !strcmp(type, "dummy")) { internal = false; } else if (!strcmp(type, "internal")) { internal = true; @@ -347,22 +346,27 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type, } /* Open and validate network device. */ - memset(&netdev_options, 0, sizeof netdev_options); - netdev_options.name = devname; - netdev_options.ethertype = NETDEV_ETH_TYPE_ANY; if (dp->class == &dpif_dummy_class) { - netdev_options.type = "dummy"; + type = "dummy"; } else if (internal) { - netdev_options.type = "tap"; + type = "tap"; } - error = netdev_open(&netdev_options, &netdev); + error = netdev_open(devname, type, &netdev); if (error) { return error; } /* XXX reject loopback devices */ /* XXX reject non-Ethernet devices */ + error = netdev_listen(netdev); + if (error) { + VLOG_ERR("%s: cannot receive packets on this network device (%s)", + devname, strerror(errno)); + netdev_close(netdev); + return error; + } + error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, false); if (error) { netdev_close(netdev); @@ -374,14 +378,13 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type, port->netdev = netdev; port->internal = internal; - netdev_get_mtu(netdev, &mtu); - if (mtu > max_mtu) { + error = netdev_get_mtu(netdev, &mtu); + if (!error) { max_mtu = mtu; } list_push_back(&dp->port_list, &port->node); dp->ports[port_no] = port; - dp->n_ports++; dp->serial++; return 0; @@ -408,7 +411,7 @@ static int dpif_netdev_port_del(struct dpif *dpif, uint16_t port_no) { struct dp_netdev *dp = get_dp_netdev(dpif); - return port_no == ODPP_LOCAL ? EINVAL : do_del_port(dp, port_no); + return port_no == OVSP_LOCAL ? EINVAL : do_del_port(dp, port_no); } static bool @@ -459,7 +462,6 @@ do_del_port(struct dp_netdev *dp, uint16_t port_no) list_remove(&port->node); dp->ports[port->port_no] = NULL; - dp->n_ports--; dp->serial++; name = xstrdup(netdev_get_name(port->netdev)); @@ -652,6 +654,12 @@ dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len, return EINVAL; } + if (flow->in_port < OFPP_MAX + ? flow->in_port >= MAX_PORTS + : flow->in_port != OFPP_LOCAL && flow->in_port != OFPP_NONE) { + return EINVAL; + } + return 0; } @@ -701,43 +709,42 @@ dpif_netdev_validate_actions(const struct nlattr *actions, } switch (type) { - case ODP_ACTION_ATTR_OUTPUT: + case OVS_ACTION_ATTR_OUTPUT: if (nl_attr_get_u32(a) >= MAX_PORTS) { return EINVAL; } break; - case ODP_ACTION_ATTR_CONTROLLER: - case ODP_ACTION_ATTR_DROP_SPOOFED_ARP: + case OVS_ACTION_ATTR_USERSPACE: break; - case ODP_ACTION_ATTR_SET_DL_TCI: + case OVS_ACTION_ATTR_PUSH_VLAN: *mutates = true; if (nl_attr_get_be16(a) & htons(VLAN_CFI)) { return EINVAL; } break; - case ODP_ACTION_ATTR_SET_NW_TOS: + case OVS_ACTION_ATTR_SET_NW_TOS: *mutates = true; if (nl_attr_get_u8(a) & IP_ECN_MASK) { return EINVAL; } break; - case ODP_ACTION_ATTR_STRIP_VLAN: - case ODP_ACTION_ATTR_SET_DL_SRC: - case ODP_ACTION_ATTR_SET_DL_DST: - case ODP_ACTION_ATTR_SET_NW_SRC: - case ODP_ACTION_ATTR_SET_NW_DST: - case ODP_ACTION_ATTR_SET_TP_SRC: - case ODP_ACTION_ATTR_SET_TP_DST: + case OVS_ACTION_ATTR_POP_VLAN: + case OVS_ACTION_ATTR_SET_DL_SRC: + case OVS_ACTION_ATTR_SET_DL_DST: + case OVS_ACTION_ATTR_SET_NW_SRC: + case OVS_ACTION_ATTR_SET_NW_DST: + case OVS_ACTION_ATTR_SET_TP_SRC: + case OVS_ACTION_ATTR_SET_TP_DST: *mutates = true; break; - case ODP_ACTION_ATTR_SET_TUNNEL: - case ODP_ACTION_ATTR_SET_PRIORITY: - case ODP_ACTION_ATTR_POP_PRIORITY: + case OVS_ACTION_ATTR_SET_TUNNEL: + case OVS_ACTION_ATTR_SET_PRIORITY: + case OVS_ACTION_ATTR_POP_PRIORITY: default: return EOPNOTSUPP; } @@ -872,7 +879,7 @@ struct dp_netdev_flow_state { uint32_t bucket; uint32_t offset; struct nlattr *actions; - uint32_t keybuf[ODPUTIL_FLOW_KEY_U32S]; + struct odputil_keybuf keybuf; struct dpif_flow_stats stats; }; @@ -909,9 +916,8 @@ dpif_netdev_flow_dump_next(const struct dpif *dpif, void *state_, if (key) { struct ofpbuf buf; - ofpbuf_use_stack(&buf, state->keybuf, sizeof state->keybuf); + ofpbuf_use_stack(&buf, &state->keybuf, sizeof state->keybuf); odp_flow_key_from_flow(&buf, &flow->key); - assert(buf.base == state->keybuf); *key = buf.data; *key_len = buf.size; @@ -945,6 +951,7 @@ dpif_netdev_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_) static int dpif_netdev_execute(struct dpif *dpif, + const struct nlattr *key_attrs, size_t key_len, const struct nlattr *actions, size_t actions_len, const struct ofpbuf *packet) { @@ -976,8 +983,13 @@ dpif_netdev_execute(struct dpif *dpif, * if we don't. */ copy = *packet; } + flow_extract(©, 0, -1, &key); - error = dp_netdev_execute_actions(dp, ©, &key, actions, actions_len); + error = dpif_netdev_flow_from_nlattrs(key_attrs, key_len, &key); + if (!error) { + error = dp_netdev_execute_actions(dp, ©, &key, + actions, actions_len); + } if (mutates) { ofpbuf_uninit(©); } @@ -1086,89 +1098,51 @@ dp_netdev_port_input(struct dp_netdev *dp, struct dp_netdev_port *port, dp->n_hit++; } else { dp->n_missed++; - dp_netdev_output_control(dp, packet, DPIF_UC_MISS, &key, 0); + dp_netdev_output_userspace(dp, packet, DPIF_UC_MISS, &key, 0); } } static void -dp_netdev_run(void) +dpif_netdev_run(struct dpif *dpif) { - struct shash_node *node; + struct dp_netdev *dp = get_dp_netdev(dpif); + struct dp_netdev_port *port; struct ofpbuf packet; ofpbuf_init(&packet, DP_NETDEV_HEADROOM + VLAN_ETH_HEADER_LEN + max_mtu); - SHASH_FOR_EACH (node, &dp_netdevs) { - struct dp_netdev *dp = node->data; - struct dp_netdev_port *port; - LIST_FOR_EACH (port, node, &dp->port_list) { - int error; - - /* Reset packet contents. */ - ofpbuf_clear(&packet); - ofpbuf_reserve(&packet, DP_NETDEV_HEADROOM); - - error = netdev_recv(port->netdev, &packet); - if (!error) { - dp_netdev_port_input(dp, port, &packet); - } else if (error != EAGAIN && error != EOPNOTSUPP) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); - VLOG_ERR_RL(&rl, "error receiving data from %s: %s", - netdev_get_name(port->netdev), strerror(error)); - } + LIST_FOR_EACH (port, node, &dp->port_list) { + int error; + + /* Reset packet contents. */ + ofpbuf_clear(&packet); + ofpbuf_reserve(&packet, DP_NETDEV_HEADROOM); + + error = netdev_recv(port->netdev, &packet); + if (!error) { + dp_netdev_port_input(dp, port, &packet); + } else if (error != EAGAIN && error != EOPNOTSUPP) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + VLOG_ERR_RL(&rl, "error receiving data from %s: %s", + netdev_get_name(port->netdev), strerror(error)); } } ofpbuf_uninit(&packet); } static void -dp_netdev_wait(void) +dpif_netdev_wait(struct dpif *dpif) { - struct shash_node *node; - - SHASH_FOR_EACH (node, &dp_netdevs) { - struct dp_netdev *dp = node->data; - struct dp_netdev_port *port; - - LIST_FOR_EACH (port, node, &dp->port_list) { - netdev_recv_wait(port->netdev); - } - } -} - - -/* Modify the TCI field of 'packet'. If a VLAN tag is present, its TCI field - * is replaced by 'tci'. If a VLAN tag is not present, one is added with the - * TCI field set to 'tci'. - */ -static void -dp_netdev_set_dl_tci(struct ofpbuf *packet, uint16_t tci) -{ - struct vlan_eth_header *veh; - struct eth_header *eh; + struct dp_netdev *dp = get_dp_netdev(dpif); + struct dp_netdev_port *port; - eh = packet->l2; - if (packet->size >= sizeof(struct vlan_eth_header) - && eh->eth_type == htons(ETH_TYPE_VLAN)) { - veh = packet->l2; - veh->veth_tci = tci; - } else { - /* Insert new 802.1Q header. */ - struct vlan_eth_header tmp; - memcpy(tmp.veth_dst, eh->eth_dst, ETH_ADDR_LEN); - memcpy(tmp.veth_src, eh->eth_src, ETH_ADDR_LEN); - tmp.veth_type = htons(ETH_TYPE_VLAN); - tmp.veth_tci = tci; - tmp.veth_next_type = eh->eth_type; - - veh = ofpbuf_push_uninit(packet, VLAN_HEADER_LEN); - memcpy(veh, &tmp, sizeof tmp); - packet->l2 = (char*)packet->l2 - VLAN_HEADER_LEN; + LIST_FOR_EACH (port, node, &dp->port_list) { + netdev_recv_wait(port->netdev); } } static void -dp_netdev_strip_vlan(struct ofpbuf *packet) +dp_netdev_pop_vlan(struct ofpbuf *packet) { struct vlan_eth_header *veh = packet->l2; if (packet->size >= sizeof *veh @@ -1213,9 +1187,9 @@ dp_netdev_set_nw_addr(struct ofpbuf *packet, const struct flow *key, struct ip_header *nh = packet->l3; ovs_be32 ip = nl_attr_get_be32(a); uint16_t type = nl_attr_type(a); - uint32_t *field; + ovs_be32 *field; - field = type == ODP_ACTION_ATTR_SET_NW_SRC ? &nh->ip_src : &nh->ip_dst; + field = type == OVS_ACTION_ATTR_SET_NW_SRC ? &nh->ip_src : &nh->ip_dst; if (key->nw_proto == IPPROTO_TCP && packet->l7) { struct tcp_header *th = packet->l4; th->tcp_csum = recalc_csum32(th->tcp_csum, *field, ip); @@ -1224,7 +1198,7 @@ dp_netdev_set_nw_addr(struct ofpbuf *packet, const struct flow *key, if (uh->udp_csum) { uh->udp_csum = recalc_csum32(uh->udp_csum, *field, ip); if (!uh->udp_csum) { - uh->udp_csum = 0xffff; + uh->udp_csum = htons(0xffff); } } } @@ -1257,17 +1231,17 @@ dp_netdev_set_tp_port(struct ofpbuf *packet, const struct flow *key, if (is_ip(packet, key)) { uint16_t type = nl_attr_type(a); ovs_be16 port = nl_attr_get_be16(a); - uint16_t *field; + ovs_be16 *field; if (key->nw_proto == IPPROTO_TCP && packet->l7) { struct tcp_header *th = packet->l4; - field = (type == ODP_ACTION_ATTR_SET_TP_SRC + field = (type == OVS_ACTION_ATTR_SET_TP_SRC ? &th->tcp_src : &th->tcp_dst); th->tcp_csum = recalc_csum16(th->tcp_csum, *field, port); *field = port; } else if (key->nw_proto == IPPROTO_UDP && packet->l7) { struct udp_header *uh = packet->l4; - field = (type == ODP_ACTION_ATTR_SET_TP_SRC + field = (type == OVS_ACTION_ATTR_SET_TP_SRC ? &uh->udp_src : &uh->udp_dst); uh->udp_csum = recalc_csum16(uh->udp_csum, *field, port); *field = port; @@ -1288,7 +1262,7 @@ dp_netdev_output_port(struct dp_netdev *dp, struct ofpbuf *packet, } static int -dp_netdev_output_control(struct dp_netdev *dp, const struct ofpbuf *packet, +dp_netdev_output_userspace(struct dp_netdev *dp, const struct ofpbuf *packet, int queue_no, const struct flow *flow, uint64_t arg) { struct dp_netdev_queue *q = &dp->queues[queue_no]; @@ -1315,39 +1289,11 @@ dp_netdev_output_control(struct dp_netdev *dp, const struct ofpbuf *packet, upcall->key_len = key_len; upcall->userdata = arg; - q->upcalls[++q->head & QUEUE_MASK] = upcall; + q->upcalls[q->head++ & QUEUE_MASK] = upcall; return 0; } -/* Returns true if 'packet' is an invalid Ethernet+IPv4 ARP packet: one with - * screwy or truncated header fields or one whose inner and outer Ethernet - * address differ. */ -static bool -dp_netdev_is_spoofed_arp(struct ofpbuf *packet, const struct flow *key) -{ - struct arp_eth_header *arp; - struct eth_header *eth; - ptrdiff_t l3_size; - - if (key->dl_type != htons(ETH_TYPE_ARP)) { - return false; - } - - l3_size = (char *) ofpbuf_end(packet) - (char *) packet->l3; - if (l3_size < sizeof(struct arp_eth_header)) { - return true; - } - - eth = packet->l2; - arp = packet->l3; - return (arp->ar_hrd != htons(ARP_HRD_ETHERNET) - || arp->ar_pro != htons(ARP_PRO_IP) - || arp->ar_hln != ETH_HEADER_LEN - || arp->ar_pln != 4 - || !eth_addr_equals(arp->ar_sha, eth->eth_src)); -} - static int dp_netdev_execute_actions(struct dp_netdev *dp, struct ofpbuf *packet, struct flow *key, @@ -1359,49 +1305,44 @@ dp_netdev_execute_actions(struct dp_netdev *dp, NL_ATTR_FOR_EACH_UNSAFE (a, left, actions, actions_len) { switch (nl_attr_type(a)) { - case ODP_ACTION_ATTR_OUTPUT: + case OVS_ACTION_ATTR_OUTPUT: dp_netdev_output_port(dp, packet, nl_attr_get_u32(a)); break; - case ODP_ACTION_ATTR_CONTROLLER: - dp_netdev_output_control(dp, packet, DPIF_UC_ACTION, + case OVS_ACTION_ATTR_USERSPACE: + dp_netdev_output_userspace(dp, packet, DPIF_UC_ACTION, key, nl_attr_get_u64(a)); break; - case ODP_ACTION_ATTR_SET_DL_TCI: - dp_netdev_set_dl_tci(packet, nl_attr_get_be16(a)); + case OVS_ACTION_ATTR_PUSH_VLAN: + eth_push_vlan(packet, nl_attr_get_be16(a)); break; - case ODP_ACTION_ATTR_STRIP_VLAN: - dp_netdev_strip_vlan(packet); + case OVS_ACTION_ATTR_POP_VLAN: + dp_netdev_pop_vlan(packet); break; - case ODP_ACTION_ATTR_SET_DL_SRC: + case OVS_ACTION_ATTR_SET_DL_SRC: dp_netdev_set_dl_src(packet, nl_attr_get_unspec(a, ETH_ADDR_LEN)); break; - case ODP_ACTION_ATTR_SET_DL_DST: + case OVS_ACTION_ATTR_SET_DL_DST: dp_netdev_set_dl_dst(packet, nl_attr_get_unspec(a, ETH_ADDR_LEN)); break; - case ODP_ACTION_ATTR_SET_NW_SRC: - case ODP_ACTION_ATTR_SET_NW_DST: + case OVS_ACTION_ATTR_SET_NW_SRC: + case OVS_ACTION_ATTR_SET_NW_DST: dp_netdev_set_nw_addr(packet, key, a); break; - case ODP_ACTION_ATTR_SET_NW_TOS: + case OVS_ACTION_ATTR_SET_NW_TOS: dp_netdev_set_nw_tos(packet, key, nl_attr_get_u8(a)); break; - case ODP_ACTION_ATTR_SET_TP_SRC: - case ODP_ACTION_ATTR_SET_TP_DST: + case OVS_ACTION_ATTR_SET_TP_SRC: + case OVS_ACTION_ATTR_SET_TP_DST: dp_netdev_set_tp_port(packet, key, a); break; - - case ODP_ACTION_ATTR_DROP_SPOOFED_ARP: - if (dp_netdev_is_spoofed_arp(packet, key)) { - return 0; - } } } return 0; @@ -1409,12 +1350,12 @@ dp_netdev_execute_actions(struct dp_netdev *dp, const struct dpif_class dpif_netdev_class = { "netdev", - dp_netdev_run, - dp_netdev_wait, NULL, /* enumerate */ dpif_netdev_open, dpif_netdev_close, dpif_netdev_destroy, + dpif_netdev_run, + dpif_netdev_wait, dpif_netdev_get_stats, dpif_netdev_get_drop_frags, dpif_netdev_set_drop_frags, @@ -1438,8 +1379,6 @@ const struct dpif_class dpif_netdev_class = { dpif_netdev_execute, dpif_netdev_recv_get_mask, dpif_netdev_recv_set_mask, - NULL, /* get_sflow_probability */ - NULL, /* set_sflow_probability */ NULL, /* queue_to_priority */ dpif_netdev_recv, dpif_netdev_recv_wait,