X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Fdpif-netdev.c;h=55712dde1674beb117e1ea70db81d3aec82a2a07;hb=27bbe15dec4e1862396b5c4d265f0ced71b49930;hp=ceee233ad76d8a05c01fbbe9421ccbccb0413870;hpb=5794e276b48638c7e44a763481aa051111de1676;p=sliver-openvswitch.git diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index ceee233ad..55712dde1 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -68,6 +68,9 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev); #define NETDEV_RULE_PRIORITY 0x8000 #define NR_THREADS 1 +/* Use per thread recirc_depth to prevent recirculation loop. */ +#define MAX_RECIRC_DEPTH 5 +DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0) /* Configuration parameters. */ enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */ @@ -320,7 +323,6 @@ struct pmd_thread { pthread_t thread; int id; atomic_uint change_seq; - char *name; }; /* Interface to netdev-based datapath. */ @@ -350,10 +352,11 @@ static int dpif_netdev_open(const struct dpif_class *, const char *name, bool create, struct dpif **); static int dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *, int queue_no, int type, - const struct flow *, + const struct miniflow *, const struct nlattr *userdata); static void dp_netdev_execute_actions(struct dp_netdev *dp, - const struct flow *, struct ofpbuf *, bool may_steal, + const struct miniflow *, + struct ofpbuf *, bool may_steal, struct pkt_metadata *, const struct nlattr *actions, size_t actions_len); @@ -1060,13 +1063,15 @@ dp_netdev_flow_cast(const struct cls_rule *cr) } static struct dp_netdev_flow * -dp_netdev_lookup_flow(const struct dp_netdev *dp, const struct flow *flow) +dp_netdev_lookup_flow(const struct dp_netdev *dp, const struct miniflow *key) OVS_EXCLUDED(dp->cls.rwlock) { struct dp_netdev_flow *netdev_flow; + struct cls_rule *rule; fat_rwlock_rdlock(&dp->cls.rwlock); - netdev_flow = dp_netdev_flow_cast(classifier_lookup(&dp->cls, flow, NULL)); + rule = classifier_lookup_miniflow_first(&dp->cls, key); + netdev_flow = dp_netdev_flow_cast(rule); fat_rwlock_unlock(&dp->cls.rwlock); return netdev_flow; @@ -1136,8 +1141,6 @@ dpif_netdev_mask_from_nlattrs(const struct nlattr *key, uint32_t key_len, return EINVAL; } - /* Force unwildcard the in_port. */ - mask->in_port.odp_port = u32_to_odp(UINT32_MAX); } else { enum mf_field_id id; /* No mask key, unwildcard everything except fields whose @@ -1156,6 +1159,14 @@ dpif_netdev_mask_from_nlattrs(const struct nlattr *key, uint32_t key_len, } } + /* Force unwildcard the in_port. + * + * We need to do this even in the case where we unwildcard "everything" + * above because "everything" only includes the 16-bit OpenFlow port number + * mask->in_port.ofp_port, which only covers half of the 32-bit datapath + * port number mask->in_port.odp_port. */ + mask->in_port.odp_port = u32_to_odp(UINT32_MAX); + return 0; } @@ -1285,6 +1296,7 @@ dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put) struct dp_netdev *dp = get_dp_netdev(dpif); struct dp_netdev_flow *netdev_flow; struct flow flow; + struct miniflow miniflow; struct flow_wildcards wc; int error; @@ -1298,9 +1310,10 @@ dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put) if (error) { return error; } + miniflow_init(&miniflow, &flow); ovs_mutex_lock(&dp->flow_mutex); - netdev_flow = dp_netdev_lookup_flow(dp, &flow); + netdev_flow = dp_netdev_lookup_flow(dp, &miniflow); if (!netdev_flow) { if (put->flags & DPIF_FP_CREATE) { if (hmap_count(&dp->flow_table) < MAX_FLOWS) { @@ -1433,6 +1446,7 @@ dpif_netdev_flow_dump_next(const struct dpif *dpif, void *iter_, void *state_, struct dp_netdev_flow_state *state = state_; struct dp_netdev *dp = get_dp_netdev(dpif); struct dp_netdev_flow *netdev_flow; + struct flow_wildcards wc; int error; ovs_mutex_lock(&iter->mutex); @@ -1455,29 +1469,29 @@ dpif_netdev_flow_dump_next(const struct dpif *dpif, void *iter_, void *state_, return error; } + minimask_expand(&netdev_flow->cr.match.mask, &wc); + if (key) { struct ofpbuf buf; ofpbuf_use_stack(&buf, &state->keybuf, sizeof state->keybuf); - odp_flow_key_from_flow(&buf, &netdev_flow->flow, + odp_flow_key_from_flow(&buf, &netdev_flow->flow, &wc.masks, netdev_flow->flow.in_port.odp_port); - *key = buf.data; - *key_len = buf.size; + *key = ofpbuf_data(&buf); + *key_len = ofpbuf_size(&buf); } if (key && mask) { struct ofpbuf buf; - struct flow_wildcards wc; ofpbuf_use_stack(&buf, &state->maskbuf, sizeof state->maskbuf); - minimask_expand(&netdev_flow->cr.match.mask, &wc); odp_flow_key_from_mask(&buf, &wc.masks, &netdev_flow->flow, odp_to_u32(wc.masks.in_port.odp_port), SIZE_MAX); - *mask = buf.data; - *mask_len = buf.size; + *mask = ofpbuf_data(&buf); + *mask_len = ofpbuf_size(&buf); } if (actions || stats) { @@ -1513,18 +1527,22 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute) { struct dp_netdev *dp = get_dp_netdev(dpif); struct pkt_metadata *md = &execute->md; - struct flow key; + struct { + struct miniflow flow; + uint32_t buf[FLOW_U32S]; + } key; - if (execute->packet->size < ETH_HEADER_LEN || - execute->packet->size > UINT16_MAX) { + if (ofpbuf_size(execute->packet) < ETH_HEADER_LEN || + ofpbuf_size(execute->packet) > UINT16_MAX) { return EINVAL; } /* Extract flow key. */ - flow_extract(execute->packet, md, &key); + miniflow_initialize(&key.flow, key.buf); + miniflow_extract(execute->packet, md, &key.flow); ovs_rwlock_rdlock(&dp->port_rwlock); - dp_netdev_execute_actions(dp, &key, execute->packet, false, md, + dp_netdev_execute_actions(dp, &key.flow, execute->packet, false, md, execute->actions, execute->actions_len); ovs_rwlock_unlock(&dp->port_rwlock); @@ -1860,8 +1878,6 @@ pmd_thread_main(void *f_) int poll_cnt; int i; - f->name = xasprintf("pmd_%u", ovsthread_id_self()); - set_subprogram_name("%s", f->name); poll_cnt = 0; poll_list = NULL; @@ -1901,7 +1917,6 @@ reload: } free(poll_list); - free(f->name); return NULL; } @@ -1938,7 +1953,7 @@ dp_netdev_set_pmd_threads(struct dp_netdev *dp, int n) /* Each thread will distribute all devices rx-queues among * themselves. */ - xpthread_create(&f->thread, NULL, pmd_thread_main, f); + f->thread = ovs_thread_create("pmd", pmd_thread_main, f); } } @@ -1954,9 +1969,9 @@ dp_netdev_flow_stats_new_cb(void) static void dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow, const struct ofpbuf *packet, - const struct flow *key) + const struct miniflow *key) { - uint16_t tcp_flags = ntohs(key->tcp_flags); + uint16_t tcp_flags = miniflow_get_tcp_flags(key); long long int now = time_msec(); struct dp_netdev_flow_stats *bucket; @@ -1966,7 +1981,7 @@ dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow, ovs_mutex_lock(&bucket->mutex); bucket->used = MAX(now, bucket->used); bucket->packet_count++; - bucket->byte_count += packet->size; + bucket->byte_count += ofpbuf_size(packet); bucket->tcp_flags |= tcp_flags; ovs_mutex_unlock(&bucket->mutex); } @@ -1991,39 +2006,57 @@ dp_netdev_count_packet(struct dp_netdev *dp, enum dp_stat_type type) } static void -dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet, - struct pkt_metadata *md) +dp_netdev_input(struct dp_netdev *dp, struct ofpbuf *packet, + struct pkt_metadata *md) + OVS_REQ_RDLOCK(dp->port_rwlock) { struct dp_netdev_flow *netdev_flow; - struct flow key; + struct { + struct miniflow flow; + uint32_t buf[FLOW_U32S]; + } key; - if (packet->size < ETH_HEADER_LEN) { + if (ofpbuf_size(packet) < ETH_HEADER_LEN) { ofpbuf_delete(packet); return; } - flow_extract(packet, md, &key); - netdev_flow = dp_netdev_lookup_flow(dp, &key); + miniflow_initialize(&key.flow, key.buf); + miniflow_extract(packet, md, &key.flow); + + netdev_flow = dp_netdev_lookup_flow(dp, &key.flow); if (netdev_flow) { struct dp_netdev_actions *actions; - dp_netdev_flow_used(netdev_flow, packet, &key); + dp_netdev_flow_used(netdev_flow, packet, &key.flow); actions = dp_netdev_flow_get_actions(netdev_flow); - dp_netdev_execute_actions(dp, &key, packet, true, md, + dp_netdev_execute_actions(dp, &key.flow, packet, true, md, actions->actions, actions->size); dp_netdev_count_packet(dp, DP_STAT_HIT); } else if (dp->handler_queues) { dp_netdev_count_packet(dp, DP_STAT_MISS); dp_netdev_output_userspace(dp, packet, - flow_hash_5tuple(&key, 0) % dp->n_handlers, - DPIF_UC_MISS, &key, NULL); + miniflow_hash_5tuple(&key.flow, 0) + % dp->n_handlers, + DPIF_UC_MISS, &key.flow, NULL); ofpbuf_delete(packet); } } +static void +dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet, + struct pkt_metadata *md) + OVS_REQ_RDLOCK(dp->port_rwlock) +{ + uint32_t *recirc_depth = recirc_depth_get(); + + *recirc_depth = 0; + dp_netdev_input(dp, packet, md); +} + static int dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet, - int queue_no, int type, const struct flow *flow, + int queue_no, int type, const struct miniflow *key, const struct nlattr *userdata) { struct dp_netdev_queue *q; @@ -2037,6 +2070,7 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet, struct dpif_upcall *upcall = &u->upcall; struct ofpbuf *buf = &u->buf; size_t buf_size; + struct flow flow; upcall->type = type; @@ -2045,13 +2079,14 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet, if (userdata) { buf_size += NLA_ALIGN(userdata->nla_len); } - buf_size += packet->size; + buf_size += ofpbuf_size(packet); ofpbuf_init(buf, buf_size); /* Put ODP flow. */ - odp_flow_key_from_flow(buf, flow, flow->in_port.odp_port); - upcall->key = buf->data; - upcall->key_len = buf->size; + miniflow_expand(key, &flow); + odp_flow_key_from_flow(buf, &flow, NULL, flow.in_port.odp_port); + upcall->key = ofpbuf_data(buf); + upcall->key_len = ofpbuf_size(buf); /* Put userdata. */ if (userdata) { @@ -2059,8 +2094,9 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet, NLA_ALIGN(userdata->nla_len)); } - upcall->packet.data = ofpbuf_put(buf, packet->data, packet->size); - upcall->packet.size = packet->size; + ofpbuf_set_data(&upcall->packet, + ofpbuf_put(buf, ofpbuf_data(packet), ofpbuf_size(packet))); + ofpbuf_set_size(&upcall->packet, ofpbuf_size(packet)); seq_change(q->seq); @@ -2077,7 +2113,7 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet, struct dp_netdev_execute_aux { struct dp_netdev *dp; - const struct flow *key; + const struct miniflow *key; }; static void @@ -2089,6 +2125,7 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet, struct dp_netdev_execute_aux *aux = aux_; int type = nl_attr_type(a); struct dp_netdev_port *p; + uint32_t *depth = recirc_depth_get(); switch ((enum ovs_action_attr)type) { case OVS_ACTION_ATTR_OUTPUT: @@ -2104,7 +2141,7 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet, userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA); dp_netdev_output_userspace(aux->dp, packet, - flow_hash_5tuple(aux->key, 0) + miniflow_hash_5tuple(aux->key, 0) % aux->dp->n_handlers, DPIF_UC_ACTION, aux->key, userdata); @@ -2115,24 +2152,46 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet, break; } - case OVS_ACTION_ATTR_RECIRC: { - const struct ovs_action_recirc *act; + case OVS_ACTION_ATTR_HASH: { + const struct ovs_action_hash *hash_act; + uint32_t hash; - act = nl_attr_get(a); - md->recirc_id = act->recirc_id; - md->dp_hash = 0; - - if (act->hash_alg == OVS_RECIRC_HASH_ALG_L4) { - struct flow flow; + hash_act = nl_attr_get(a); + if (hash_act->hash_alg == OVS_HASH_ALG_L4) { + /* Hash need not be symmetric, nor does it need to include + * L2 fields. */ + hash = miniflow_hash_5tuple(aux->key, hash_act->hash_basis); + if (!hash) { + hash = 1; /* 0 is not valid */ + } - flow_extract(packet, md, &flow); - md->dp_hash = flow_hash_symmetric_l4(&flow, act->hash_bias); + } else { + VLOG_WARN("Unknown hash algorithm specified for the hash action."); + hash = 2; } - dp_netdev_port_input(aux->dp, packet, md); + md->dp_hash = hash; break; } + case OVS_ACTION_ATTR_RECIRC: + if (*depth < MAX_RECIRC_DEPTH) { + struct pkt_metadata recirc_md = *md; + struct ofpbuf *recirc_packet; + + recirc_packet = may_steal ? packet : ofpbuf_clone(packet); + recirc_md.recirc_id = nl_attr_get_u32(a); + + (*depth)++; + dp_netdev_input(aux->dp, recirc_packet, &recirc_md); + (*depth)--; + + break; + } else { + VLOG_WARN("Packet dropped. Max recirculation depth exceeded."); + } + break; + case OVS_ACTION_ATTR_PUSH_VLAN: case OVS_ACTION_ATTR_POP_VLAN: case OVS_ACTION_ATTR_PUSH_MPLS: @@ -2143,11 +2202,10 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet, case __OVS_ACTION_ATTR_MAX: OVS_NOT_REACHED(); } - } static void -dp_netdev_execute_actions(struct dp_netdev *dp, const struct flow *key, +dp_netdev_execute_actions(struct dp_netdev *dp, const struct miniflow *key, struct ofpbuf *packet, bool may_steal, struct pkt_metadata *md, const struct nlattr *actions, size_t actions_len)