X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Flearning-switch.c;h=ca579116fea2a52bca481ea15feddc0e0ff272e7;hb=HEAD;hp=d53f147a3656952caa6568e2ed7314021f344a1e;hpb=002c3f1734454492d921de29f2d5be6428ed1a18;p=sliver-openvswitch.git diff --git a/lib/learning-switch.c b/lib/learning-switch.c index d53f147a3..ca579116f 100644 --- a/lib/learning-switch.c +++ b/lib/learning-switch.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc. + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -48,12 +48,19 @@ VLOG_DEFINE_THIS_MODULE(learning_switch); struct lswitch_port { struct hmap_node hmap_node; /* Hash node for port number. */ - uint16_t port_no; /* OpenFlow port number, in host byte order. */ + ofp_port_t port_no; /* OpenFlow port number. */ uint32_t queue_id; /* OpenFlow queue number. */ }; +enum lswitch_state { + S_CONNECTING, /* Waiting for connection to complete. */ + S_FEATURES_REPLY, /* Waiting for features reply. */ + S_SWITCHING, /* Switching flows. */ +}; + struct lswitch { struct rconn *rconn; + enum lswitch_state state; /* If nonnegative, the switch sets up flows that expire after the given * number of seconds (or never expire, if the value is OFP_FLOW_PERMANENT). @@ -62,7 +69,6 @@ struct lswitch { enum ofputil_protocol protocol; unsigned long long int datapath_id; - time_t last_features_request; struct mac_learning *ml; /* NULL to act as hub instead of switch. */ struct flow_wildcards wc; /* Wildcards to apply to flows. */ bool action_normal; /* Use OFPP_NORMAL? */ @@ -78,6 +84,12 @@ struct lswitch { /* If true, do not reply to any messages from the switch (for debugging * fail-open mode). */ bool mute; + + /* Optional "flow mod" requests to send to the switch at connection time, + * to set up the flow table. */ + const struct ofputil_flow_mod *default_flows; + size_t n_default_flows; + enum ofputil_protocol usable_protocols; }; /* The log messages here could actually be useful in debugging, so keep the @@ -100,37 +112,39 @@ static void process_echo_request(struct lswitch *, const struct ofp_header *); struct lswitch * lswitch_create(struct rconn *rconn, const struct lswitch_config *cfg) { - enum ofputil_protocol protocol; struct lswitch *sw; + uint32_t ofpfw; sw = xzalloc(sizeof *sw); sw->rconn = rconn; + sw->state = S_CONNECTING; sw->max_idle = cfg->max_idle; sw->datapath_id = 0; - sw->last_features_request = time_now() - 1; sw->ml = (cfg->mode == LSW_LEARN ? mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME) : NULL); sw->action_normal = cfg->mode == LSW_NORMAL; - flow_wildcards_init_exact(&sw->wc); - if (cfg->wildcards) { - uint32_t ofpfw; - - if (cfg->wildcards == UINT32_MAX) { - /* Try to wildcard as many fields as possible, but we cannot - * wildcard all fields. We need in_port to detect moves. We need - * Ethernet source and dest and VLAN VID to do L2 learning. */ - ofpfw = (OFPFW10_DL_TYPE | OFPFW10_DL_VLAN_PCP - | OFPFW10_NW_SRC_ALL | OFPFW10_NW_DST_ALL - | OFPFW10_NW_TOS | OFPFW10_NW_PROTO - | OFPFW10_TP_SRC | OFPFW10_TP_DST); - } else { - ofpfw = cfg->wildcards; - } + switch (cfg->wildcards) { + case 0: + ofpfw = 0; + break; - ofputil_wildcard_from_ofpfw10(ofpfw, &sw->wc); + case UINT32_MAX: + /* Try to wildcard as many fields as possible, but we cannot + * wildcard all fields. We need in_port to detect moves. We need + * Ethernet source and dest and VLAN VID to do L2 learning. */ + ofpfw = (OFPFW10_DL_TYPE | OFPFW10_DL_VLAN_PCP + | OFPFW10_NW_SRC_ALL | OFPFW10_NW_DST_ALL + | OFPFW10_NW_TOS | OFPFW10_NW_PROTO + | OFPFW10_TP_SRC | OFPFW10_TP_DST); + break; + + default: + ofpfw = cfg->wildcards; + break; } + ofputil_wildcard_from_ofpfw10(ofpfw, &sw->wc); sw->default_queue = cfg->default_queue; hmap_init(&sw->queue_numbers); @@ -146,12 +160,24 @@ lswitch_create(struct rconn *rconn, const struct lswitch_config *cfg) } } + sw->default_flows = cfg->default_flows; + sw->n_default_flows = cfg->n_default_flows; + sw->usable_protocols = cfg->usable_protocols; + sw->queued = rconn_packet_counter_create(); + + return sw; +} + +static void +lswitch_handshake(struct lswitch *sw) +{ + enum ofputil_protocol protocol; + send_features_request(sw); - protocol = ofputil_protocol_from_ofp_version(rconn_get_version(rconn)); - if (cfg->default_flows) { - enum ofputil_protocol usable_protocols; + protocol = ofputil_protocol_from_ofp_version(rconn_get_version(sw->rconn)); + if (sw->default_flows) { struct ofpbuf *msg = NULL; int error = 0; size_t i; @@ -163,32 +189,32 @@ lswitch_create(struct rconn *rconn, const struct lswitch_config *cfg) * This could be improved by actually negotiating a mutually acceptable * flow format with the switch, but that would require an asynchronous * state machine. This version ought to work fine in practice. */ - usable_protocols = ofputil_flow_mod_usable_protocols( - cfg->default_flows, cfg->n_default_flows); - if (!(protocol & usable_protocols)) { - enum ofputil_protocol want = rightmost_1bit(usable_protocols); + if (!(protocol & sw->usable_protocols)) { + enum ofputil_protocol want = rightmost_1bit(sw->usable_protocols); while (!error) { msg = ofputil_encode_set_protocol(protocol, want, &protocol); if (!msg) { break; } - error = rconn_send(rconn, msg, NULL); + error = rconn_send(sw->rconn, msg, NULL); } } + if (protocol & sw->usable_protocols) { + for (i = 0; !error && i < sw->n_default_flows; i++) { + msg = ofputil_encode_flow_mod(&sw->default_flows[i], protocol); + error = rconn_send(sw->rconn, msg, NULL); + } - for (i = 0; !error && i < cfg->n_default_flows; i++) { - msg = ofputil_encode_flow_mod(&cfg->default_flows[i], protocol); - error = rconn_send(rconn, msg, NULL); - } - - if (error) { - VLOG_INFO_RL(&rl, "%s: failed to queue default flows (%s)", - rconn_get_name(rconn), strerror(error)); + if (error) { + VLOG_INFO_RL(&rl, "%s: failed to queue default flows (%s)", + rconn_get_name(sw->rconn), ovs_strerror(error)); + } + } else { + VLOG_INFO_RL(&rl, "%s: failed to set usable protocol", + rconn_get_name(sw->rconn)); } } sw->protocol = protocol; - - return sw; } bool @@ -210,7 +236,7 @@ lswitch_destroy(struct lswitch *sw) free(node); } shash_destroy(&sw->queue_names); - mac_learning_destroy(sw->ml); + mac_learning_unref(sw->ml); rconn_packet_counter_destroy(sw->queued); free(sw); } @@ -224,11 +250,21 @@ lswitch_run(struct lswitch *sw) int i; if (sw->ml) { - mac_learning_run(sw->ml, NULL); + ovs_rwlock_wrlock(&sw->ml->rwlock); + mac_learning_run(sw->ml); + ovs_rwlock_unlock(&sw->ml->rwlock); } rconn_run(sw->rconn); + if (sw->state == S_CONNECTING) { + if (rconn_get_version(sw->rconn) != -1) { + lswitch_handshake(sw); + sw->state = S_FEATURES_REPLY; + } + return; + } + for (i = 0; i < 50; i++) { struct ofpbuf *msg; @@ -248,9 +284,11 @@ void lswitch_wait(struct lswitch *sw) { if (sw->ml) { + ovs_rwlock_rdlock(&sw->ml->rwlock); mac_learning_wait(sw->ml); + ovs_rwlock_unlock(&sw->ml->rwlock); } - rconn_run(sw->rconn); + rconn_run_wait(sw->rconn); rconn_recv_wait(sw->rconn); } @@ -269,24 +307,29 @@ lswitch_process_packet(struct lswitch *sw, const struct ofpbuf *msg) return; } - if (sw->datapath_id == 0 + if (sw->state == S_FEATURES_REPLY && type != OFPTYPE_ECHO_REQUEST && type != OFPTYPE_FEATURES_REPLY) { - send_features_request(sw); return; } switch (type) { case OFPTYPE_ECHO_REQUEST: - process_echo_request(sw, msg->data); + process_echo_request(sw, ofpbuf_data(msg)); break; case OFPTYPE_FEATURES_REPLY: - process_switch_features(sw, msg->data); + if (sw->state == S_FEATURES_REPLY) { + if (!process_switch_features(sw, ofpbuf_data(msg))) { + sw->state = S_SWITCHING; + } else { + rconn_disconnect(sw->rconn); + } + } break; case OFPTYPE_PACKET_IN: - process_packet_in(sw, msg->data); + process_packet_in(sw, ofpbuf_data(msg)); break; case OFPTYPE_FLOW_REMOVED: @@ -303,9 +346,13 @@ lswitch_process_packet(struct lswitch *sw, const struct ofpbuf *msg) case OFPTYPE_PORT_STATUS: case OFPTYPE_PACKET_OUT: case OFPTYPE_FLOW_MOD: + case OFPTYPE_GROUP_MOD: case OFPTYPE_PORT_MOD: + case OFPTYPE_TABLE_MOD: case OFPTYPE_BARRIER_REQUEST: case OFPTYPE_BARRIER_REPLY: + case OFPTYPE_QUEUE_GET_CONFIG_REQUEST: + case OFPTYPE_QUEUE_GET_CONFIG_REPLY: case OFPTYPE_DESC_STATS_REQUEST: case OFPTYPE_DESC_STATS_REPLY: case OFPTYPE_FLOW_STATS_REQUEST: @@ -322,20 +369,40 @@ lswitch_process_packet(struct lswitch *sw, const struct ofpbuf *msg) case OFPTYPE_PORT_DESC_STATS_REPLY: case OFPTYPE_ROLE_REQUEST: case OFPTYPE_ROLE_REPLY: + case OFPTYPE_ROLE_STATUS: case OFPTYPE_SET_FLOW_FORMAT: case OFPTYPE_FLOW_MOD_TABLE_ID: case OFPTYPE_SET_PACKET_IN_FORMAT: case OFPTYPE_FLOW_AGE: - case OFPTYPE_SET_ASYNC_CONFIG: case OFPTYPE_SET_CONTROLLER_ID: case OFPTYPE_FLOW_MONITOR_STATS_REQUEST: case OFPTYPE_FLOW_MONITOR_STATS_REPLY: case OFPTYPE_FLOW_MONITOR_CANCEL: case OFPTYPE_FLOW_MONITOR_PAUSED: case OFPTYPE_FLOW_MONITOR_RESUMED: + case OFPTYPE_GET_ASYNC_REQUEST: + case OFPTYPE_GET_ASYNC_REPLY: + case OFPTYPE_SET_ASYNC_CONFIG: + case OFPTYPE_METER_MOD: + case OFPTYPE_GROUP_STATS_REQUEST: + case OFPTYPE_GROUP_STATS_REPLY: + case OFPTYPE_GROUP_DESC_STATS_REQUEST: + case OFPTYPE_GROUP_DESC_STATS_REPLY: + case OFPTYPE_GROUP_FEATURES_STATS_REQUEST: + case OFPTYPE_GROUP_FEATURES_STATS_REPLY: + case OFPTYPE_METER_STATS_REQUEST: + case OFPTYPE_METER_STATS_REPLY: + case OFPTYPE_METER_CONFIG_STATS_REQUEST: + case OFPTYPE_METER_CONFIG_STATS_REPLY: + case OFPTYPE_METER_FEATURES_STATS_REQUEST: + case OFPTYPE_METER_FEATURES_STATS_REPLY: + case OFPTYPE_TABLE_FEATURES_STATS_REQUEST: + case OFPTYPE_TABLE_FEATURES_STATS_REPLY: + case OFPTYPE_BUNDLE_CONTROL: + case OFPTYPE_BUNDLE_ADD_MESSAGE: default: if (VLOG_IS_DBG_ENABLED()) { - char *s = ofp_to_string(msg->data, msg->size, 2); + char *s = ofp_to_string(ofpbuf_data(msg), ofpbuf_size(msg), 2); VLOG_DBG_RL(&rl, "%016llx: OpenFlow packet ignored: %s", sw->datapath_id, s); free(s); @@ -346,26 +413,21 @@ lswitch_process_packet(struct lswitch *sw, const struct ofpbuf *msg) static void send_features_request(struct lswitch *sw) { - time_t now = time_now(); - if (now >= sw->last_features_request + 1) { - struct ofpbuf *b; - struct ofp_switch_config *osc; - int ofp_version = rconn_get_version(sw->rconn); - - assert(ofp_version > 0 && ofp_version < 0xff); + struct ofpbuf *b; + struct ofp_switch_config *osc; + int ofp_version = rconn_get_version(sw->rconn); - /* Send OFPT_FEATURES_REQUEST. */ - b = ofpraw_alloc(OFPRAW_OFPT_FEATURES_REQUEST, ofp_version, 0); - queue_tx(sw, b); + ovs_assert(ofp_version > 0 && ofp_version < 0xff); - /* Send OFPT_SET_CONFIG. */ - b = ofpraw_alloc(OFPRAW_OFPT_SET_CONFIG, ofp_version, sizeof *osc); - osc = ofpbuf_put_zeros(b, sizeof *osc); - osc->miss_send_len = htons(OFP_DEFAULT_MISS_SEND_LEN); - queue_tx(sw, b); + /* Send OFPT_FEATURES_REQUEST. */ + b = ofpraw_alloc(OFPRAW_OFPT_FEATURES_REQUEST, ofp_version, 0); + queue_tx(sw, b); - sw->last_features_request = now; - } + /* Send OFPT_SET_CONFIG. */ + b = ofpraw_alloc(OFPRAW_OFPT_SET_CONFIG, ofp_version, sizeof *osc); + osc = ofpbuf_put_zeros(b, sizeof *osc); + osc->miss_send_len = htons(OFP_DEFAULT_MISS_SEND_LEN); + queue_tx(sw, b); } static void @@ -379,7 +441,7 @@ queue_tx(struct lswitch *sw, struct ofpbuf *b) } else { VLOG_WARN_RL(&rl, "%016llx: %s: send: %s", sw->datapath_id, rconn_get_name(sw->rconn), - strerror(retval)); + ovs_strerror(retval)); } } } @@ -406,28 +468,34 @@ process_switch_features(struct lswitch *sw, struct ofp_header *oh) if (lp && hmap_node_is_null(&lp->hmap_node)) { lp->port_no = port.port_no; hmap_insert(&sw->queue_numbers, &lp->hmap_node, - hash_int(lp->port_no, 0)); + hash_ofp_port(lp->port_no)); } } return 0; } -static uint16_t +static ofp_port_t lswitch_choose_destination(struct lswitch *sw, const struct flow *flow) { - uint16_t out_port; + ofp_port_t out_port; /* Learn the source MAC. */ - if (mac_learning_may_learn(sw->ml, flow->dl_src, 0)) { - struct mac_entry *mac = mac_learning_insert(sw->ml, flow->dl_src, 0); - if (mac_entry_is_new(mac) || mac->port.i != flow->in_port) { - VLOG_DBG_RL(&rl, "%016llx: learned that "ETH_ADDR_FMT" is on " - "port %"PRIu16, sw->datapath_id, - ETH_ADDR_ARGS(flow->dl_src), flow->in_port); - - mac->port.i = flow->in_port; - mac_learning_changed(sw->ml, mac); + if (sw->ml) { + ovs_rwlock_wrlock(&sw->ml->rwlock); + if (mac_learning_may_learn(sw->ml, flow->dl_src, 0)) { + struct mac_entry *mac = mac_learning_insert(sw->ml, flow->dl_src, + 0); + if (mac->port.ofp_port != flow->in_port.ofp_port) { + VLOG_DBG_RL(&rl, "%016llx: learned that "ETH_ADDR_FMT" is on " + "port %"PRIu16, sw->datapath_id, + ETH_ADDR_ARGS(flow->dl_src), + flow->in_port.ofp_port); + + mac->port.ofp_port = flow->in_port.ofp_port; + mac_learning_changed(sw->ml); + } } + ovs_rwlock_unlock(&sw->ml->rwlock); } /* Drop frames for reserved multicast addresses. */ @@ -439,14 +507,17 @@ lswitch_choose_destination(struct lswitch *sw, const struct flow *flow) if (sw->ml) { struct mac_entry *mac; - mac = mac_learning_lookup(sw->ml, flow->dl_dst, 0, NULL); + ovs_rwlock_rdlock(&sw->ml->rwlock); + mac = mac_learning_lookup(sw->ml, flow->dl_dst, 0); if (mac) { - out_port = mac->port.i; - if (out_port == flow->in_port) { + out_port = mac->port.ofp_port; + if (out_port == flow->in_port.ofp_port) { /* Don't send a packet back out its input port. */ + ovs_rwlock_unlock(&sw->ml->rwlock); return OFPP_NONE; } } + ovs_rwlock_unlock(&sw->ml->rwlock); } /* Check if we need to use "NORMAL" action. */ @@ -458,11 +529,11 @@ lswitch_choose_destination(struct lswitch *sw, const struct flow *flow) } static uint32_t -get_queue_id(const struct lswitch *sw, uint16_t in_port) +get_queue_id(const struct lswitch *sw, ofp_port_t in_port) { const struct lswitch_port *port; - HMAP_FOR_EACH_WITH_HASH (port, hmap_node, hash_int(in_port, 0), + HMAP_FOR_EACH_WITH_HASH (port, hmap_node, hash_ofp_port(in_port), &sw->queue_numbers) { if (port->port_no == in_port) { return port->queue_id; @@ -477,7 +548,7 @@ process_packet_in(struct lswitch *sw, const struct ofp_header *oh) { struct ofputil_packet_in pi; uint32_t queue_id; - uint16_t out_port; + ofp_port_t out_port; uint64_t ofpacts_stub[64 / 8]; struct ofpbuf ofpacts; @@ -504,7 +575,9 @@ process_packet_in(struct lswitch *sw, const struct ofp_header *oh) /* Extract flow data from 'opi' into 'flow'. */ ofpbuf_use_const(&pkt, pi.packet, pi.packet_len); - flow_extract(&pkt, 0, pi.fmd.tun_id, pi.fmd.in_port, &flow); + flow_extract(&pkt, NULL, &flow); + flow.in_port.ofp_port = pi.fmd.in_port; + flow.tunnel.tun_id = pi.fmd.tun_id; /* Choose output port. */ out_port = lswitch_choose_destination(sw, &flow); @@ -514,7 +587,8 @@ process_packet_in(struct lswitch *sw, const struct ofp_header *oh) ofpbuf_use_stack(&ofpacts, ofpacts_stub, sizeof ofpacts_stub); if (out_port == OFPP_NONE) { /* No actions. */ - } else if (queue_id == UINT32_MAX || out_port >= OFPP_MAX) { + } else if (queue_id == UINT32_MAX + || ofp_to_u16(out_port) >= ofp_to_u16(OFPP_MAX)) { ofpact_put_OUTPUT(&ofpacts)->port = out_port; } else { struct ofpact_enqueue *enqueue = ofpact_put_ENQUEUE(&ofpacts); @@ -526,15 +600,15 @@ process_packet_in(struct lswitch *sw, const struct ofp_header *oh) /* Prepare packet_out in case we need one. */ po.buffer_id = pi.buffer_id; if (po.buffer_id == UINT32_MAX) { - po.packet = pkt.data; - po.packet_len = pkt.size; + po.packet = ofpbuf_data(&pkt); + po.packet_len = ofpbuf_size(&pkt); } else { po.packet = NULL; po.packet_len = 0; } po.in_port = pi.fmd.in_port; - po.ofpacts = ofpacts.data; - po.ofpacts_len = ofpacts.size; + po.ofpacts = ofpbuf_data(&ofpacts); + po.ofpacts_len = ofpbuf_size(&ofpacts); /* Send the packet, and possibly the whole flow, to the output port. */ if (sw->max_idle >= 0 && (!sw->ml || out_port != OFPP_FLOOD)) { @@ -544,27 +618,29 @@ process_packet_in(struct lswitch *sw, const struct ofp_header *oh) /* The output port is known, or we always flood everything, so add a * new flow. */ memset(&fm, 0, sizeof fm); - cls_rule_init(&flow, &sw->wc, 0, &fm.cr); + match_init(&fm.match, &flow, &sw->wc); + ofputil_normalize_match_quiet(&fm.match); + fm.priority = 0; fm.table_id = 0xff; fm.command = OFPFC_ADD; fm.idle_timeout = sw->max_idle; fm.buffer_id = pi.buffer_id; fm.out_port = OFPP_NONE; - fm.ofpacts = ofpacts.data; - fm.ofpacts_len = ofpacts.size; + fm.ofpacts = ofpbuf_data(&ofpacts); + fm.ofpacts_len = ofpbuf_size(&ofpacts); buffer = ofputil_encode_flow_mod(&fm, sw->protocol); queue_tx(sw, buffer); /* If the switch didn't buffer the packet, we need to send a copy. */ if (pi.buffer_id == UINT32_MAX && out_port != OFPP_NONE) { - queue_tx(sw, ofputil_encode_packet_out(&po)); + queue_tx(sw, ofputil_encode_packet_out(&po, sw->protocol)); } } else { /* We don't know that MAC, or we don't set up flows. Send along the * packet without setting up a flow. */ if (pi.buffer_id != UINT32_MAX || out_port != OFPP_NONE) { - queue_tx(sw, ofputil_encode_packet_out(&po)); + queue_tx(sw, ofputil_encode_packet_out(&po, sw->protocol)); } } }