X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Fdpif-netdev.c;h=96c5feb428bdaaa508013c917848a5236be476ed;hb=003ce655b7116d18c86a74c50391e54990346931;hp=d1097e9aebff5bd0ef35f1550920346e26f76ea6;hpb=55c955bd8aecc90273ffa93114c1c38f4f9b78ec;p=sliver-openvswitch.git diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index d1097e9ae..96c5feb42 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -43,6 +43,7 @@ #include "list.h" #include "meta-flow.h" #include "netdev.h" +#include "netdev-dpdk.h" #include "netdev-vport.h" #include "netlink.h" #include "odp-execute.h" @@ -67,6 +68,9 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev); #define NETDEV_RULE_PRIORITY 0x8000 #define NR_THREADS 1 +/* Use per thread recirc_depth to prevent recirculation loop. */ +#define MAX_RECIRC_DEPTH 5 +DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0) /* Configuration parameters. */ enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */ @@ -319,7 +323,6 @@ struct pmd_thread { pthread_t thread; int id; atomic_uint change_seq; - char *name; }; /* Interface to netdev-based datapath. */ @@ -349,10 +352,11 @@ static int dpif_netdev_open(const struct dpif_class *, const char *name, bool create, struct dpif **); static int dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *, int queue_no, int type, - const struct flow *, + const struct miniflow *, const struct nlattr *userdata); static void dp_netdev_execute_actions(struct dp_netdev *dp, - const struct flow *, struct ofpbuf *, bool may_steal, + const struct miniflow *, + struct ofpbuf *, bool may_steal, struct pkt_metadata *, const struct nlattr *actions, size_t actions_len); @@ -1059,13 +1063,15 @@ dp_netdev_flow_cast(const struct cls_rule *cr) } static struct dp_netdev_flow * -dp_netdev_lookup_flow(const struct dp_netdev *dp, const struct flow *flow) +dp_netdev_lookup_flow(const struct dp_netdev *dp, const struct miniflow *key) OVS_EXCLUDED(dp->cls.rwlock) { struct dp_netdev_flow *netdev_flow; + struct cls_rule *rule; fat_rwlock_rdlock(&dp->cls.rwlock); - netdev_flow = dp_netdev_flow_cast(classifier_lookup(&dp->cls, flow, NULL)); + rule = classifier_lookup_miniflow_first(&dp->cls, key); + netdev_flow = dp_netdev_flow_cast(rule); fat_rwlock_unlock(&dp->cls.rwlock); return netdev_flow; @@ -1135,8 +1141,6 @@ dpif_netdev_mask_from_nlattrs(const struct nlattr *key, uint32_t key_len, return EINVAL; } - /* Force unwildcard the in_port. */ - mask->in_port.odp_port = u32_to_odp(UINT32_MAX); } else { enum mf_field_id id; /* No mask key, unwildcard everything except fields whose @@ -1155,6 +1159,14 @@ dpif_netdev_mask_from_nlattrs(const struct nlattr *key, uint32_t key_len, } } + /* Force unwildcard the in_port. + * + * We need to do this even in the case where we unwildcard "everything" + * above because "everything" only includes the 16-bit OpenFlow port number + * mask->in_port.ofp_port, which only covers half of the 32-bit datapath + * port number mask->in_port.odp_port. */ + mask->in_port.odp_port = u32_to_odp(UINT32_MAX); + return 0; } @@ -1284,6 +1296,7 @@ dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put) struct dp_netdev *dp = get_dp_netdev(dpif); struct dp_netdev_flow *netdev_flow; struct flow flow; + struct miniflow miniflow; struct flow_wildcards wc; int error; @@ -1297,9 +1310,10 @@ dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put) if (error) { return error; } + miniflow_init(&miniflow, &flow); ovs_mutex_lock(&dp->flow_mutex); - netdev_flow = dp_netdev_lookup_flow(dp, &flow); + netdev_flow = dp_netdev_lookup_flow(dp, &miniflow); if (!netdev_flow) { if (put->flags & DPIF_FP_CREATE) { if (hmap_count(&dp->flow_table) < MAX_FLOWS) { @@ -1432,6 +1446,7 @@ dpif_netdev_flow_dump_next(const struct dpif *dpif, void *iter_, void *state_, struct dp_netdev_flow_state *state = state_; struct dp_netdev *dp = get_dp_netdev(dpif); struct dp_netdev_flow *netdev_flow; + struct flow_wildcards wc; int error; ovs_mutex_lock(&iter->mutex); @@ -1454,29 +1469,29 @@ dpif_netdev_flow_dump_next(const struct dpif *dpif, void *iter_, void *state_, return error; } + minimask_expand(&netdev_flow->cr.match.mask, &wc); + if (key) { struct ofpbuf buf; ofpbuf_use_stack(&buf, &state->keybuf, sizeof state->keybuf); - odp_flow_key_from_flow(&buf, &netdev_flow->flow, + odp_flow_key_from_flow(&buf, &netdev_flow->flow, &wc.masks, netdev_flow->flow.in_port.odp_port); - *key = buf.data; - *key_len = buf.size; + *key = ofpbuf_data(&buf); + *key_len = ofpbuf_size(&buf); } if (key && mask) { struct ofpbuf buf; - struct flow_wildcards wc; ofpbuf_use_stack(&buf, &state->maskbuf, sizeof state->maskbuf); - minimask_expand(&netdev_flow->cr.match.mask, &wc); odp_flow_key_from_mask(&buf, &wc.masks, &netdev_flow->flow, odp_to_u32(wc.masks.in_port.odp_port), SIZE_MAX); - *mask = buf.data; - *mask_len = buf.size; + *mask = ofpbuf_data(&buf); + *mask_len = ofpbuf_size(&buf); } if (actions || stats) { @@ -1512,15 +1527,17 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute) { struct dp_netdev *dp = get_dp_netdev(dpif); struct pkt_metadata *md = &execute->md; - struct flow key; + struct miniflow key; + uint32_t buf[FLOW_U32S]; - if (execute->packet->size < ETH_HEADER_LEN || - execute->packet->size > UINT16_MAX) { + if (ofpbuf_size(execute->packet) < ETH_HEADER_LEN || + ofpbuf_size(execute->packet) > UINT16_MAX) { return EINVAL; } /* Extract flow key. */ - flow_extract(execute->packet, md, &key); + miniflow_initialize(&key, buf); + miniflow_extract(execute->packet, md, &key); ovs_rwlock_rdlock(&dp->port_rwlock); dp_netdev_execute_actions(dp, &key, execute->packet, false, md, @@ -1733,7 +1750,7 @@ dp_netdev_actions_free(struct dp_netdev_actions *actions) } -inline static void +static void dp_netdev_process_rxq_port(struct dp_netdev *dp, struct dp_netdev_port *port, struct netdev_rxq *rxq) @@ -1859,11 +1876,10 @@ pmd_thread_main(void *f_) int poll_cnt; int i; - f->name = xasprintf("pmd_%u", ovsthread_id_self()); - set_subprogram_name("%s", f->name); poll_cnt = 0; poll_list = NULL; + pmd_thread_setaffinity_cpu(f->id); reload: poll_cnt = pmd_load_queues(f, &poll_list, poll_cnt); atomic_read(&f->change_seq, &port_seq); @@ -1899,7 +1915,6 @@ reload: } free(poll_list); - free(f->name); return NULL; } @@ -1936,7 +1951,7 @@ dp_netdev_set_pmd_threads(struct dp_netdev *dp, int n) /* Each thread will distribute all devices rx-queues among * themselves. */ - xpthread_create(&f->thread, NULL, pmd_thread_main, f); + f->thread = ovs_thread_create("pmd", pmd_thread_main, f); } } @@ -1952,9 +1967,9 @@ dp_netdev_flow_stats_new_cb(void) static void dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow, const struct ofpbuf *packet, - const struct flow *key) + const struct miniflow *key) { - uint16_t tcp_flags = ntohs(key->tcp_flags); + uint16_t tcp_flags = miniflow_get_tcp_flags(key); long long int now = time_msec(); struct dp_netdev_flow_stats *bucket; @@ -1964,7 +1979,7 @@ dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow, ovs_mutex_lock(&bucket->mutex); bucket->used = MAX(now, bucket->used); bucket->packet_count++; - bucket->byte_count += packet->size; + bucket->byte_count += ofpbuf_size(packet); bucket->tcp_flags |= tcp_flags; ovs_mutex_unlock(&bucket->mutex); } @@ -1989,17 +2004,21 @@ dp_netdev_count_packet(struct dp_netdev *dp, enum dp_stat_type type) } static void -dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet, - struct pkt_metadata *md) +dp_netdev_input(struct dp_netdev *dp, struct ofpbuf *packet, + struct pkt_metadata *md) + OVS_REQ_RDLOCK(dp->port_rwlock) { struct dp_netdev_flow *netdev_flow; - struct flow key; + struct miniflow key; + uint32_t buf[FLOW_U32S]; - if (packet->size < ETH_HEADER_LEN) { + if (ofpbuf_size(packet) < ETH_HEADER_LEN) { ofpbuf_delete(packet); return; } - flow_extract(packet, md, &key); + miniflow_initialize(&key, buf); + miniflow_extract(packet, md, &key); + netdev_flow = dp_netdev_lookup_flow(dp, &key); if (netdev_flow) { struct dp_netdev_actions *actions; @@ -2013,15 +2032,27 @@ dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet, } else if (dp->handler_queues) { dp_netdev_count_packet(dp, DP_STAT_MISS); dp_netdev_output_userspace(dp, packet, - flow_hash_5tuple(&key, 0) % dp->n_handlers, + miniflow_hash_5tuple(&key, 0) + % dp->n_handlers, DPIF_UC_MISS, &key, NULL); ofpbuf_delete(packet); } } +static void +dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet, + struct pkt_metadata *md) + OVS_REQ_RDLOCK(dp->port_rwlock) +{ + uint32_t *recirc_depth = recirc_depth_get(); + + *recirc_depth = 0; + dp_netdev_input(dp, packet, md); +} + static int dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet, - int queue_no, int type, const struct flow *flow, + int queue_no, int type, const struct miniflow *key, const struct nlattr *userdata) { struct dp_netdev_queue *q; @@ -2035,6 +2066,7 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet, struct dpif_upcall *upcall = &u->upcall; struct ofpbuf *buf = &u->buf; size_t buf_size; + struct flow flow; upcall->type = type; @@ -2043,13 +2075,14 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet, if (userdata) { buf_size += NLA_ALIGN(userdata->nla_len); } - buf_size += packet->size; + buf_size += ofpbuf_size(packet); ofpbuf_init(buf, buf_size); /* Put ODP flow. */ - odp_flow_key_from_flow(buf, flow, flow->in_port.odp_port); - upcall->key = buf->data; - upcall->key_len = buf->size; + miniflow_expand(key, &flow); + odp_flow_key_from_flow(buf, &flow, NULL, flow.in_port.odp_port); + upcall->key = ofpbuf_data(buf); + upcall->key_len = ofpbuf_size(buf); /* Put userdata. */ if (userdata) { @@ -2057,8 +2090,9 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet, NLA_ALIGN(userdata->nla_len)); } - upcall->packet.data = ofpbuf_put(buf, packet->data, packet->size); - upcall->packet.size = packet->size; + ofpbuf_set_data(&upcall->packet, + ofpbuf_put(buf, ofpbuf_data(packet), ofpbuf_size(packet))); + ofpbuf_set_size(&upcall->packet, ofpbuf_size(packet)); seq_change(q->seq); @@ -2075,18 +2109,19 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet, struct dp_netdev_execute_aux { struct dp_netdev *dp; - const struct flow *key; + const struct miniflow *key; }; static void dp_execute_cb(void *aux_, struct ofpbuf *packet, - const struct pkt_metadata *md OVS_UNUSED, + struct pkt_metadata *md, const struct nlattr *a, bool may_steal) OVS_NO_THREAD_SAFETY_ANALYSIS { struct dp_netdev_execute_aux *aux = aux_; int type = nl_attr_type(a); struct dp_netdev_port *p; + uint32_t *depth = recirc_depth_get(); switch ((enum ovs_action_attr)type) { case OVS_ACTION_ATTR_OUTPUT: @@ -2102,7 +2137,7 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet, userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA); dp_netdev_output_userspace(aux->dp, packet, - flow_hash_5tuple(aux->key, 0) + miniflow_hash_5tuple(aux->key, 0) % aux->dp->n_handlers, DPIF_UC_ACTION, aux->key, userdata); @@ -2112,6 +2147,47 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet, } break; } + + case OVS_ACTION_ATTR_HASH: { + const struct ovs_action_hash *hash_act; + uint32_t hash; + + hash_act = nl_attr_get(a); + if (hash_act->hash_alg == OVS_HASH_ALG_L4) { + /* Hash need not be symmetric, nor does it need to include + * L2 fields. */ + hash = miniflow_hash_5tuple(aux->key, hash_act->hash_basis); + if (!hash) { + hash = 1; /* 0 is not valid */ + } + + } else { + VLOG_WARN("Unknown hash algorithm specified for the hash action."); + hash = 2; + } + + md->dp_hash = hash; + break; + } + + case OVS_ACTION_ATTR_RECIRC: + if (*depth < MAX_RECIRC_DEPTH) { + struct pkt_metadata recirc_md = *md; + struct ofpbuf *recirc_packet; + + recirc_packet = may_steal ? packet : ofpbuf_clone(packet); + recirc_md.recirc_id = nl_attr_get_u32(a); + + (*depth)++; + dp_netdev_input(aux->dp, recirc_packet, &recirc_md); + (*depth)--; + + break; + } else { + VLOG_WARN("Packet dropped. Max recirculation depth exceeded."); + } + break; + case OVS_ACTION_ATTR_PUSH_VLAN: case OVS_ACTION_ATTR_POP_VLAN: case OVS_ACTION_ATTR_PUSH_MPLS: @@ -2122,11 +2198,10 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet, case __OVS_ACTION_ATTR_MAX: OVS_NOT_REACHED(); } - } static void -dp_netdev_execute_actions(struct dp_netdev *dp, const struct flow *key, +dp_netdev_execute_actions(struct dp_netdev *dp, const struct miniflow *key, struct ofpbuf *packet, bool may_steal, struct pkt_metadata *md, const struct nlattr *actions, size_t actions_len)