X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Fdpif-netdev.c;h=c17b52572ebd70eb19ecf5139de0fda88fe0f450;hb=991559357;hp=35724d9bf837be03b3f8b9eab9d33111c273b0af;hpb=d65349ea28bb67a0062a9b4b60ff97538206373b;p=sliver-openvswitch.git diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 35724d9bf..c17b52572 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009 Nicira Networks. + * Copyright (c) 2009, 2010 Nicira Networks. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,8 +22,9 @@ #include #include #include -#include #include +#include +#include #include #include #include @@ -44,9 +45,9 @@ #include "queue.h" #include "timeval.h" #include "util.h" - #include "vlog.h" -#define THIS_MODULE VLM_dpif_netdev + +VLOG_DEFINE_THIS_MODULE(dpif_netdev) /* Configuration parameters. */ enum { N_QUEUES = 2 }; /* Number of queues for dpif_recv(). */ @@ -64,7 +65,7 @@ struct dp_netdev { struct list node; int dp_idx; int open_cnt; - bool deleted; + bool destroyed; bool drop_frags; /* Drop all IP fragments, if true. */ struct ovs_queue queues[N_QUEUES]; /* Messages queued for dpif_recv(). */ @@ -98,10 +99,9 @@ struct dp_netdev_flow { flow_t key; /* Statistics. */ - struct timeval used; /* Last used time, in milliseconds. */ + struct timespec used; /* Last used time. */ long long int packet_count; /* Number of packets matched. */ long long int byte_count; /* Number of bytes matched. */ - uint8_t ip_tos; /* IP TOS value. */ uint16_t tcp_ctl; /* Bitwise-OR of seen tcp_ctl values. */ /* Actions. */ @@ -137,7 +137,7 @@ static int do_del_port(struct dp_netdev *, uint16_t port_no); static int dp_netdev_output_control(struct dp_netdev *, const struct ofpbuf *, int queue_no, int port_no, uint32_t arg); static int dp_netdev_execute_actions(struct dp_netdev *, - struct ofpbuf *, flow_t *, + struct ofpbuf *, const flow_t *, const union odp_action *, int n); static struct dpif_netdev * @@ -196,7 +196,7 @@ create_dpif_netdev(struct dp_netdev *dp) dp->open_cnt++; - dpname = xasprintf("netdev:dp%d", dp->dp_idx); + dpname = xasprintf("dp%d", dp->dp_idx); dpif = xmalloc(sizeof *dpif); dpif_init(&dpif->dpif, &dpif_netdev_class, dpname, dp->dp_idx, dp->dp_idx); dpif->dp = dp; @@ -219,7 +219,7 @@ create_dp_netdev(const char *name, int dp_idx, struct dpif **dpifp) } /* Create datapath. */ - dp_netdevs[dp_idx] = dp = xcalloc(1, sizeof *dp); + dp_netdevs[dp_idx] = dp = xzalloc(sizeof *dp); list_push_back(&dp_netdev_list, &dp->node); dp->dp_idx = dp_idx; dp->open_cnt = 0; @@ -237,7 +237,7 @@ create_dp_netdev(const char *name, int dp_idx, struct dpif **dpifp) error = do_add_port(dp, name, ODP_PORT_INTERNAL, ODPP_LOCAL); if (error) { dp_netdev_free(dp); - return error; + return ENODEV; } *dpifp = create_dpif_netdev(dp); @@ -245,20 +245,20 @@ create_dp_netdev(const char *name, int dp_idx, struct dpif **dpifp) } static int -dpif_netdev_open(const char *name UNUSED, char *suffix, bool create, +dpif_netdev_open(const char *name, const char *type OVS_UNUSED, bool create, struct dpif **dpifp) { if (create) { - if (find_dp_netdev(suffix)) { + if (find_dp_netdev(name)) { return EEXIST; } else { - int dp_idx = name_to_dp_idx(suffix); + int dp_idx = name_to_dp_idx(name); if (dp_idx >= 0) { - return create_dp_netdev(suffix, dp_idx, dpifp); + return create_dp_netdev(name, dp_idx, dpifp); } else { /* Scan for unused dp_idx number. */ for (dp_idx = 0; dp_idx < N_DP_NETDEVS; dp_idx++) { - int error = create_dp_netdev(suffix, dp_idx, dpifp); + int error = create_dp_netdev(name, dp_idx, dpifp); if (error != EBUSY) { return error; } @@ -269,7 +269,7 @@ dpif_netdev_open(const char *name UNUSED, char *suffix, bool create, } } } else { - struct dp_netdev *dp = find_dp_netdev(suffix); + struct dp_netdev *dp = find_dp_netdev(name); if (dp) { *dpifp = create_dpif_netdev(dp); return 0; @@ -307,17 +307,17 @@ dpif_netdev_close(struct dpif *dpif) { struct dp_netdev *dp = get_dp_netdev(dpif); assert(dp->open_cnt > 0); - if (--dp->open_cnt == 0 && dp->deleted) { + if (--dp->open_cnt == 0 && dp->destroyed) { dp_netdev_free(dp); } free(dpif); } static int -dpif_netdev_delete(struct dpif *dpif) +dpif_netdev_destroy(struct dpif *dpif) { struct dp_netdev *dp = get_dp_netdev(dpif); - dp->deleted = true; + dp->destroyed = true; return 0; } @@ -363,6 +363,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, uint16_t flags, { bool internal = (flags & ODP_PORT_INTERNAL) != 0; struct dp_netdev_port *port; + struct netdev_options netdev_options; struct netdev *netdev; int mtu; int error; @@ -370,13 +371,14 @@ do_add_port(struct dp_netdev *dp, const char *devname, uint16_t flags, /* XXX reject devices already in some dp_netdev. */ /* Open and validate network device. */ - if (!internal) { - error = netdev_open(devname, NETDEV_ETH_TYPE_ANY, &netdev); - } else { - char *tapname = xasprintf("tap:%s", devname); - error = netdev_open(tapname, NETDEV_ETH_TYPE_ANY, &netdev); - free(tapname); + memset(&netdev_options, 0, sizeof netdev_options); + netdev_options.name = devname; + netdev_options.ethertype = NETDEV_ETH_TYPE_ANY; + if (internal) { + netdev_options.type = "tap"; } + + error = netdev_open(&netdev_options, &netdev); if (error) { return error; } @@ -468,6 +470,7 @@ static int do_del_port(struct dp_netdev *dp, uint16_t port_no) { struct dp_netdev_port *port; + char *name; int error; error = get_port_by_number(dp, port_no, &port); @@ -480,7 +483,10 @@ do_del_port(struct dp_netdev *dp, uint16_t port_no) dp->n_ports--; dp->serial++; + name = xstrdup(netdev_get_name(port->netdev)); netdev_close(port->netdev); + + free(name); free(port); return 0; @@ -573,7 +579,7 @@ dpif_netdev_port_list(const struct dpif *dpif, struct odp_port *ports, int n) } static int -dpif_netdev_port_poll(const struct dpif *dpif_, char **devnamep UNUSED) +dpif_netdev_port_poll(const struct dpif *dpif_, char **devnamep OVS_UNUSED) { struct dpif_netdev *dpif = dpif_netdev_cast(dpif_); if (dpif->dp_serial != dpif->dp->serial) { @@ -654,7 +660,7 @@ dp_netdev_lookup_flow(const struct dp_netdev *dp, const flow_t *key) { struct dp_netdev_flow *flow; - assert(key->reserved == 0); + assert(!key->reserved[0] && !key->reserved[1] && !key->reserved[2]); HMAP_FOR_EACH_WITH_HASH (flow, struct dp_netdev_flow, node, flow_hash(key, 0), &dp->flow_table) { if (flow_equal(&flow->key, key)) { @@ -673,9 +679,9 @@ answer_flow_query(struct dp_netdev_flow *flow, uint32_t query_flags, odp_flow->stats.n_packets = flow->packet_count; odp_flow->stats.n_bytes = flow->byte_count; odp_flow->stats.used_sec = flow->used.tv_sec; - odp_flow->stats.used_nsec = flow->used.tv_usec * 1000; + odp_flow->stats.used_nsec = flow->used.tv_nsec; odp_flow->stats.tcp_flags = TCP_FLAGS(flow->tcp_ctl); - odp_flow->stats.ip_tos = flow->ip_tos; + odp_flow->stats.reserved = 0; odp_flow->stats.error = 0; if (odp_flow->n_actions > 0) { unsigned int n = MIN(odp_flow->n_actions, flow->n_actions); @@ -711,41 +717,48 @@ static int dpif_netdev_validate_actions(const union odp_action *actions, int n_actions, bool *mutates) { - unsigned int i; + unsigned int i; *mutates = false; - for (i = 0; i < n_actions; i++) { - const union odp_action *a = &actions[i]; - switch (a->type) { - case ODPAT_OUTPUT: - if (a->output.port >= MAX_PORTS) { - return EINVAL; + for (i = 0; i < n_actions; i++) { + const union odp_action *a = &actions[i]; + switch (a->type) { + case ODPAT_OUTPUT: + if (a->output.port >= MAX_PORTS) { + return EINVAL; } - break; + break; - case ODPAT_OUTPUT_GROUP: + case ODPAT_OUTPUT_GROUP: *mutates = true; - if (a->output_group.group >= N_GROUPS) { - return EINVAL; + if (a->output_group.group >= N_GROUPS) { + return EINVAL; } - break; + break; case ODPAT_CONTROLLER: break; - case ODPAT_SET_VLAN_VID: + case ODPAT_SET_VLAN_VID: *mutates = true; - if (a->vlan_vid.vlan_vid & htons(~VLAN_VID_MASK)) { - return EINVAL; + if (a->vlan_vid.vlan_vid & htons(~VLAN_VID_MASK)) { + return EINVAL; } - break; + break; - case ODPAT_SET_VLAN_PCP: + case ODPAT_SET_VLAN_PCP: *mutates = true; - if (a->vlan_pcp.vlan_pcp & ~VLAN_PCP_MASK) { - return EINVAL; + if (a->vlan_pcp.vlan_pcp & ~(VLAN_PCP_MASK >> VLAN_PCP_SHIFT)) { + return EINVAL; } - break; + break; + + case ODPAT_SET_NW_TOS: + *mutates = true; + if (a->nw_tos.nw_tos & IP_ECN_MASK) { + return EINVAL; + } + break; case ODPAT_STRIP_VLAN: case ODPAT_SET_DL_SRC: @@ -757,11 +770,11 @@ dpif_netdev_validate_actions(const union odp_action *actions, int n_actions, *mutates = true; break; - default: + default: return EOPNOTSUPP; - } - } - return 0; + } + } + return 0; } static int @@ -794,9 +807,9 @@ add_flow(struct dpif *dpif, struct odp_flow *odp_flow) struct dp_netdev_flow *flow; int error; - flow = xcalloc(1, sizeof *flow); + flow = xzalloc(sizeof *flow); flow->key = odp_flow->key; - flow->key.reserved = 0; + memset(flow->key.reserved, 0, sizeof flow->key.reserved); error = set_flow_actions(flow, odp_flow); if (error) { @@ -812,10 +825,9 @@ static void clear_stats(struct dp_netdev_flow *flow) { flow->used.tv_sec = 0; - flow->used.tv_usec = 0; + flow->used.tv_nsec = 0; flow->packet_count = 0; flow->byte_count = 0; - flow->ip_tos = 0; flow->tcp_ctl = 0; } @@ -916,7 +928,7 @@ dpif_netdev_execute(struct dpif *dpif, uint16_t in_port, * if we don't. */ copy = *packet; } - flow_extract(©, in_port, &flow); + flow_extract(©, 0, in_port, &flow); error = dp_netdev_execute_actions(dp, ©, &flow, actions, n_actions); if (mutates) { ofpbuf_uninit(©); @@ -989,17 +1001,12 @@ static void dp_netdev_flow_used(struct dp_netdev_flow *flow, const flow_t *key, const struct ofpbuf *packet) { - time_timeval(&flow->used); + time_timespec(&flow->used); flow->packet_count++; flow->byte_count += packet->size; - if (key->dl_type == htons(ETH_TYPE_IP)) { - struct ip_header *nh = packet->l3; - flow->ip_tos = nh->ip_tos; - - if (key->nw_proto == IPPROTO_TCP) { - struct tcp_header *th = packet->l4; - flow->tcp_ctl |= th->tcp_ctl; - } + if (key->dl_type == htons(ETH_TYPE_IP) && key->nw_proto == IPPROTO_TCP) { + struct tcp_header *th = packet->l4; + flow->tcp_ctl |= th->tcp_ctl; } } @@ -1010,7 +1017,10 @@ dp_netdev_port_input(struct dp_netdev *dp, struct dp_netdev_port *port, struct dp_netdev_flow *flow; flow_t key; - if (flow_extract(packet, port->port_no, &key) && dp->drop_frags) { + if (packet->size < ETH_HEADER_LEN) { + return; + } + if (flow_extract(packet, 0, port->port_no, &key) && dp->drop_frags) { dp->n_frags++; return; } @@ -1070,14 +1080,25 @@ dp_netdev_wait(void) } } + +/* Modify the TCI field of 'packet'. If a VLAN tag is not present, one + * is added with the TCI field set to 'tci'. If a VLAN tag is present, + * then 'mask' bits are cleared before 'tci' is logically OR'd into the + * TCI field. + * + * Note that the function does not ensure that 'tci' does not affect + * bits outside of 'mask'. + */ static void -dp_netdev_modify_vlan_tci(struct ofpbuf *packet, flow_t *key, - uint16_t tci, uint16_t mask) +dp_netdev_modify_vlan_tci(struct ofpbuf *packet, uint16_t tci, uint16_t mask) { struct vlan_eth_header *veh; + struct eth_header *eh; - if (key->dl_vlan != htons(ODP_VLAN_NONE)) { - /* Modify 'mask' bits, but maintain other TCI bits. */ + eh = packet->l2; + if (packet->size >= sizeof(struct vlan_eth_header) + && eh->eth_type == htons(ETH_TYPE_VLAN)) { + /* Clear 'mask' bits, but maintain other TCI bits. */ veh = packet->l2; veh->veth_tci &= ~htons(mask); veh->veth_tci |= htons(tci); @@ -1095,15 +1116,14 @@ dp_netdev_modify_vlan_tci(struct ofpbuf *packet, flow_t *key, memcpy(veh, &tmp, sizeof tmp); packet->l2 = (char*)packet->l2 - VLAN_HEADER_LEN; } - - key->dl_vlan = veh->veth_tci & htons(VLAN_VID_MASK); } static void -dp_netdev_strip_vlan(struct ofpbuf *packet, flow_t *key) +dp_netdev_strip_vlan(struct ofpbuf *packet) { struct vlan_eth_header *veh = packet->l2; - if (veh->veth_type == htons(ETH_TYPE_VLAN)) { + if (packet->size >= sizeof *veh + && veh->veth_type == htons(ETH_TYPE_VLAN)) { struct eth_header tmp; memcpy(tmp.eth_dst, veh->veth_dst, ETH_ADDR_LEN); @@ -1114,29 +1134,25 @@ dp_netdev_strip_vlan(struct ofpbuf *packet, flow_t *key) packet->data = (char*)packet->data + VLAN_HEADER_LEN; packet->l2 = (char*)packet->l2 + VLAN_HEADER_LEN; memcpy(packet->data, &tmp, sizeof tmp); - - key->dl_vlan = htons(ODP_VLAN_NONE); } } static void -dp_netdev_set_dl_src(struct ofpbuf *packet, - const uint8_t dl_addr[ETH_ADDR_LEN]) +dp_netdev_set_dl_src(struct ofpbuf *packet, const uint8_t dl_addr[ETH_ADDR_LEN]) { struct eth_header *eh = packet->l2; memcpy(eh->eth_src, dl_addr, sizeof eh->eth_src); } static void -dp_netdev_set_dl_dst(struct ofpbuf *packet, - const uint8_t dl_addr[ETH_ADDR_LEN]) +dp_netdev_set_dl_dst(struct ofpbuf *packet, const uint8_t dl_addr[ETH_ADDR_LEN]) { struct eth_header *eh = packet->l2; memcpy(eh->eth_dst, dl_addr, sizeof eh->eth_dst); } static void -dp_netdev_set_nw_addr(struct ofpbuf *packet, flow_t *key, +dp_netdev_set_nw_addr(struct ofpbuf *packet, const flow_t *key, const struct odp_action_nw_addr *a) { if (key->dl_type == htons(ETH_TYPE_IP)) { @@ -1162,7 +1178,24 @@ dp_netdev_set_nw_addr(struct ofpbuf *packet, flow_t *key, } static void -dp_netdev_set_tp_port(struct ofpbuf *packet, flow_t *key, +dp_netdev_set_nw_tos(struct ofpbuf *packet, const flow_t *key, + const struct odp_action_nw_tos *a) +{ + if (key->dl_type == htons(ETH_TYPE_IP)) { + struct ip_header *nh = packet->l3; + uint8_t *field = &nh->ip_tos; + + /* Set the DSCP bits and preserve the ECN bits. */ + uint8_t new = a->nw_tos | (nh->ip_tos & IP_ECN_MASK); + + nh->ip_csum = recalc_csum16(nh->ip_csum, htons((uint16_t)*field), + htons((uint16_t)a->nw_tos)); + *field = new; + } +} + +static void +dp_netdev_set_tp_port(struct ofpbuf *packet, const flow_t *key, const struct odp_action_tp_port *a) { if (key->dl_type == htons(ETH_TYPE_IP)) { @@ -1177,6 +1210,8 @@ dp_netdev_set_tp_port(struct ofpbuf *packet, flow_t *key, field = a->type == ODPAT_SET_TP_SRC ? &uh->udp_src : &uh->udp_dst; uh->udp_csum = recalc_csum16(uh->udp_csum, *field, a->tp_port); *field = a->tp_port; + } else { + return; } } } @@ -1221,7 +1256,8 @@ dp_netdev_output_control(struct dp_netdev *dp, const struct ofpbuf *packet, } msg_size = sizeof *header + packet->size; - msg = ofpbuf_new(msg_size); + msg = ofpbuf_new(msg_size + DPIF_RECV_MSG_PADDING); + ofpbuf_reserve(msg, DPIF_RECV_MSG_PADDING); header = ofpbuf_put_uninit(msg, sizeof *header); header->type = queue_no; header->length = msg_size; @@ -1235,7 +1271,7 @@ dp_netdev_output_control(struct dp_netdev *dp, const struct ofpbuf *packet, static int dp_netdev_execute_actions(struct dp_netdev *dp, - struct ofpbuf *packet, flow_t *key, + struct ofpbuf *packet, const flow_t *key, const union odp_action *actions, int n_actions) { int i; @@ -1258,17 +1294,18 @@ dp_netdev_execute_actions(struct dp_netdev *dp, break; case ODPAT_SET_VLAN_VID: - dp_netdev_modify_vlan_tci(packet, key, ntohs(a->vlan_vid.vlan_vid), + dp_netdev_modify_vlan_tci(packet, ntohs(a->vlan_vid.vlan_vid), VLAN_VID_MASK); break; case ODPAT_SET_VLAN_PCP: - dp_netdev_modify_vlan_tci(packet, key, a->vlan_pcp.vlan_pcp << 13, + dp_netdev_modify_vlan_tci(packet, + a->vlan_pcp.vlan_pcp << VLAN_PCP_SHIFT, VLAN_PCP_MASK); break; case ODPAT_STRIP_VLAN: - dp_netdev_strip_vlan(packet, key); + dp_netdev_strip_vlan(packet); break; case ODPAT_SET_DL_SRC: @@ -1284,6 +1321,10 @@ dp_netdev_execute_actions(struct dp_netdev *dp, dp_netdev_set_nw_addr(packet, key, &a->nw_addr); break; + case ODPAT_SET_NW_TOS: + dp_netdev_set_nw_tos(packet, key, &a->nw_tos); + break; + case ODPAT_SET_TP_SRC: case ODPAT_SET_TP_DST: dp_netdev_set_tp_port(packet, key, &a->tp_port); @@ -1294,7 +1335,6 @@ dp_netdev_execute_actions(struct dp_netdev *dp, } const struct dpif_class dpif_netdev_class = { - "netdev", "netdev", dp_netdev_run, dp_netdev_wait, @@ -1302,7 +1342,7 @@ const struct dpif_class dpif_netdev_class = { dpif_netdev_open, dpif_netdev_close, NULL, /* get_all_names */ - dpif_netdev_delete, + dpif_netdev_destroy, dpif_netdev_get_stats, dpif_netdev_get_drop_frags, dpif_netdev_set_drop_frags, @@ -1323,6 +1363,9 @@ const struct dpif_class dpif_netdev_class = { dpif_netdev_execute, dpif_netdev_recv_get_mask, dpif_netdev_recv_set_mask, + NULL, /* get_sflow_probability */ + NULL, /* set_sflow_probability */ + NULL, /* queue_to_priority */ dpif_netdev_recv, dpif_netdev_recv_wait, };