X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Flearning-switch.c;h=ca579116fea2a52bca481ea15feddc0e0ff272e7;hb=777af88d50b8271a8cc8f0a79d17022944481506;hp=ecfa87f6076853cb193117b83dfc59c37b591a83;hpb=9af9e2e8cff919c6053d862703ac228ce1a15bdd;p=sliver-openvswitch.git diff --git a/lib/learning-switch.c b/lib/learning-switch.c index ecfa87f60..ca579116f 100644 --- a/lib/learning-switch.c +++ b/lib/learning-switch.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009 Nicira Networks. + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,120 +23,220 @@ #include #include +#include "byte-order.h" +#include "classifier.h" #include "flow.h" +#include "hmap.h" #include "mac-learning.h" #include "ofpbuf.h" +#include "ofp-actions.h" +#include "ofp-errors.h" +#include "ofp-msgs.h" +#include "ofp-parse.h" #include "ofp-print.h" +#include "ofp-util.h" #include "openflow/openflow.h" #include "poll-loop.h" -#include "queue.h" #include "rconn.h" -#include "stp.h" +#include "shash.h" +#include "simap.h" #include "timeval.h" #include "vconn.h" -#include "xtoxll.h" - -#define THIS_MODULE VLM_learning_switch #include "vlog.h" -enum port_state { - P_DISABLED = 1 << 0, - P_LISTENING = 1 << 1, - P_LEARNING = 1 << 2, - P_FORWARDING = 1 << 3, - P_BLOCKING = 1 << 4 +VLOG_DEFINE_THIS_MODULE(learning_switch); + +struct lswitch_port { + struct hmap_node hmap_node; /* Hash node for port number. */ + ofp_port_t port_no; /* OpenFlow port number. */ + uint32_t queue_id; /* OpenFlow queue number. */ +}; + +enum lswitch_state { + S_CONNECTING, /* Waiting for connection to complete. */ + S_FEATURES_REPLY, /* Waiting for features reply. */ + S_SWITCHING, /* Switching flows. */ }; struct lswitch { + struct rconn *rconn; + enum lswitch_state state; + /* If nonnegative, the switch sets up flows that expire after the given * number of seconds (or never expire, if the value is OFP_FLOW_PERMANENT). * Otherwise, the switch processes every packet. */ int max_idle; + enum ofputil_protocol protocol; unsigned long long int datapath_id; - uint32_t capabilities; - time_t last_features_request; struct mac_learning *ml; /* NULL to act as hub instead of switch. */ - bool exact_flows; /* Use exact-match flows? */ + struct flow_wildcards wc; /* Wildcards to apply to flows. */ bool action_normal; /* Use OFPP_NORMAL? */ + /* Queue distribution. */ + uint32_t default_queue; /* Default OpenFlow queue, or UINT32_MAX. */ + struct hmap queue_numbers; /* Map from port number to lswitch_port. */ + struct shash queue_names; /* Map from port name to lswitch_port. */ + /* Number of outgoing queued packets on the rconn. */ struct rconn_packet_counter *queued; - /* Spanning tree protocol implementation. - * - * We implement STP states by, whenever a port's STP state changes, - * querying all the flows on the switch and then deleting any of them that - * are inappropriate for a port's STP state. */ - long long int next_query; /* Next time at which to query all flows. */ - long long int last_query; /* Last time we sent a query. */ - long long int last_reply; /* Last time we received a query reply. */ - unsigned int port_states[STP_MAX_PORTS]; - uint32_t query_xid; /* XID used for query. */ - int n_flows, n_no_recv, n_no_send; + /* If true, do not reply to any messages from the switch (for debugging + * fail-open mode). */ + bool mute; + + /* Optional "flow mod" requests to send to the switch at connection time, + * to set up the flow table. */ + const struct ofputil_flow_mod *default_flows; + size_t n_default_flows; + enum ofputil_protocol usable_protocols; }; /* The log messages here could actually be useful in debugging, so keep the * rate limit relatively high. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300); -static void queue_tx(struct lswitch *, struct rconn *, struct ofpbuf *); -static void send_features_request(struct lswitch *, struct rconn *); -static void schedule_query(struct lswitch *, long long int delay); -static bool may_learn(const struct lswitch *, uint16_t port_no); -static bool may_recv(const struct lswitch *, uint16_t port_no, - bool any_actions); -static bool may_send(const struct lswitch *, uint16_t port_no); - -typedef void packet_handler_func(struct lswitch *, struct rconn *, void *); -static packet_handler_func process_switch_features; -static packet_handler_func process_packet_in; -static packet_handler_func process_echo_request; -static packet_handler_func process_port_status; -static packet_handler_func process_phy_port; -static packet_handler_func process_stats_reply; - -/* Creates and returns a new learning switch. - * - * If 'learn_macs' is true, the new switch will learn the ports on which MAC - * addresses appear. Otherwise, the new switch will flood all packets. - * - * If 'max_idle' is nonnegative, the new switch will set up flows that expire - * after the given number of seconds (or never expire, if 'max_idle' is - * OFP_FLOW_PERMANENT). Otherwise, the new switch will process every packet. +static void queue_tx(struct lswitch *, struct ofpbuf *); +static void send_features_request(struct lswitch *); + +static void lswitch_process_packet(struct lswitch *, const struct ofpbuf *); +static enum ofperr process_switch_features(struct lswitch *, + struct ofp_header *); +static void process_packet_in(struct lswitch *, const struct ofp_header *); +static void process_echo_request(struct lswitch *, const struct ofp_header *); + +/* Creates and returns a new learning switch whose configuration is given by + * 'cfg'. * * 'rconn' is used to send out an OpenFlow features request. */ struct lswitch * -lswitch_create(struct rconn *rconn, bool learn_macs, - bool exact_flows, int max_idle, bool action_normal) +lswitch_create(struct rconn *rconn, const struct lswitch_config *cfg) { struct lswitch *sw; - size_t i; + uint32_t ofpfw; - sw = xcalloc(1, sizeof *sw); - sw->max_idle = max_idle; + sw = xzalloc(sizeof *sw); + sw->rconn = rconn; + sw->state = S_CONNECTING; + sw->max_idle = cfg->max_idle; sw->datapath_id = 0; - sw->last_features_request = time_now() - 1; - sw->ml = learn_macs ? mac_learning_create() : NULL; - sw->action_normal = action_normal; - sw->exact_flows = exact_flows; - sw->queued = rconn_packet_counter_create(); - sw->next_query = LLONG_MIN; - sw->last_query = LLONG_MIN; - sw->last_reply = LLONG_MIN; - for (i = 0; i < STP_MAX_PORTS; i++) { - sw->port_states[i] = P_DISABLED; + sw->ml = (cfg->mode == LSW_LEARN + ? mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME) + : NULL); + sw->action_normal = cfg->mode == LSW_NORMAL; + + switch (cfg->wildcards) { + case 0: + ofpfw = 0; + break; + + case UINT32_MAX: + /* Try to wildcard as many fields as possible, but we cannot + * wildcard all fields. We need in_port to detect moves. We need + * Ethernet source and dest and VLAN VID to do L2 learning. */ + ofpfw = (OFPFW10_DL_TYPE | OFPFW10_DL_VLAN_PCP + | OFPFW10_NW_SRC_ALL | OFPFW10_NW_DST_ALL + | OFPFW10_NW_TOS | OFPFW10_NW_PROTO + | OFPFW10_TP_SRC | OFPFW10_TP_DST); + break; + + default: + ofpfw = cfg->wildcards; + break; } - send_features_request(sw, rconn); + ofputil_wildcard_from_ofpfw10(ofpfw, &sw->wc); + + sw->default_queue = cfg->default_queue; + hmap_init(&sw->queue_numbers); + shash_init(&sw->queue_names); + if (cfg->port_queues) { + struct simap_node *node; + + SIMAP_FOR_EACH (node, cfg->port_queues) { + struct lswitch_port *port = xmalloc(sizeof *port); + hmap_node_nullify(&port->hmap_node); + port->queue_id = node->data; + shash_add(&sw->queue_names, node->name, port); + } + } + + sw->default_flows = cfg->default_flows; + sw->n_default_flows = cfg->n_default_flows; + sw->usable_protocols = cfg->usable_protocols; + + sw->queued = rconn_packet_counter_create(); + return sw; } +static void +lswitch_handshake(struct lswitch *sw) +{ + enum ofputil_protocol protocol; + + send_features_request(sw); + + protocol = ofputil_protocol_from_ofp_version(rconn_get_version(sw->rconn)); + if (sw->default_flows) { + struct ofpbuf *msg = NULL; + int error = 0; + size_t i; + + /* If the initial protocol isn't good enough for default_flows, then + * pick one that will work and encode messages to set up that + * protocol. + * + * This could be improved by actually negotiating a mutually acceptable + * flow format with the switch, but that would require an asynchronous + * state machine. This version ought to work fine in practice. */ + if (!(protocol & sw->usable_protocols)) { + enum ofputil_protocol want = rightmost_1bit(sw->usable_protocols); + while (!error) { + msg = ofputil_encode_set_protocol(protocol, want, &protocol); + if (!msg) { + break; + } + error = rconn_send(sw->rconn, msg, NULL); + } + } + if (protocol & sw->usable_protocols) { + for (i = 0; !error && i < sw->n_default_flows; i++) { + msg = ofputil_encode_flow_mod(&sw->default_flows[i], protocol); + error = rconn_send(sw->rconn, msg, NULL); + } + + if (error) { + VLOG_INFO_RL(&rl, "%s: failed to queue default flows (%s)", + rconn_get_name(sw->rconn), ovs_strerror(error)); + } + } else { + VLOG_INFO_RL(&rl, "%s: failed to set usable protocol", + rconn_get_name(sw->rconn)); + } + } + sw->protocol = protocol; +} + +bool +lswitch_is_alive(const struct lswitch *sw) +{ + return rconn_is_alive(sw->rconn); +} + /* Destroys 'sw'. */ void lswitch_destroy(struct lswitch *sw) { if (sw) { - mac_learning_destroy(sw->ml); + struct lswitch_port *node, *next; + + rconn_destroy(sw->rconn); + HMAP_FOR_EACH_SAFE (node, next, hmap_node, &sw->queue_numbers) { + hmap_remove(&sw->queue_numbers, &node->hmap_node); + free(node); + } + shash_destroy(&sw->queue_names); + mac_learning_unref(sw->ml); rconn_packet_counter_destroy(sw->queued); free(sw); } @@ -145,87 +245,38 @@ lswitch_destroy(struct lswitch *sw) /* Takes care of necessary 'sw' activity, except for receiving packets (which * the caller must do). */ void -lswitch_run(struct lswitch *sw, struct rconn *rconn) +lswitch_run(struct lswitch *sw) { - long long int now = time_msec(); + int i; if (sw->ml) { - mac_learning_run(sw->ml, NULL); + ovs_rwlock_wrlock(&sw->ml->rwlock); + mac_learning_run(sw->ml); + ovs_rwlock_unlock(&sw->ml->rwlock); } - /* If we're waiting for more replies, keeping waiting for up to 10 s. */ - if (sw->last_reply != LLONG_MIN) { - if (now - sw->last_reply > 10000) { - VLOG_ERR_RL(&rl, "%012llx: No more flow stat replies last 10 s", - sw->datapath_id); - sw->last_reply = LLONG_MIN; - sw->last_query = LLONG_MIN; - schedule_query(sw, 0); - } else { - return; - } - } + rconn_run(sw->rconn); - /* If we're waiting for any reply at all, keep waiting for up to 10 s. */ - if (sw->last_query != LLONG_MIN) { - if (now - sw->last_query > 10000) { - VLOG_ERR_RL(&rl, "%012llx: No flow stat replies in last 10 s", - sw->datapath_id); - sw->last_query = LLONG_MIN; - schedule_query(sw, 0); - } else { - return; + if (sw->state == S_CONNECTING) { + if (rconn_get_version(sw->rconn) != -1) { + lswitch_handshake(sw); + sw->state = S_FEATURES_REPLY; } + return; } - /* If it's time to send another query, do so. */ - if (sw->next_query != LLONG_MIN && now >= sw->next_query) { - sw->next_query = LLONG_MIN; - if (!rconn_is_connected(rconn)) { - schedule_query(sw, 1000); - } else { - struct ofp_stats_request *osr; - struct ofp_flow_stats_request *ofsr; - struct ofpbuf *b; - int error; - - VLOG_DBG("%012llx: Sending flow stats request to implement STP", - sw->datapath_id); - - sw->last_query = now; - sw->query_xid = random_uint32(); - sw->n_flows = 0; - sw->n_no_recv = 0; - sw->n_no_send = 0; - osr = make_openflow_xid(sizeof *osr + sizeof *ofsr, - OFPT_STATS_REQUEST, sw->query_xid, &b); - osr->type = htons(OFPST_FLOW); - osr->flags = htons(0); - ofsr = (struct ofp_flow_stats_request *) osr->body; - ofsr->match.wildcards = htonl(OFPFW_ALL); - ofsr->table_id = 0xff; - ofsr->out_port = htons(OFPP_NONE); - - error = rconn_send(rconn, b, NULL); - if (error) { - VLOG_WARN_RL(&rl, "%012llx: sending flow stats request " - "failed: %s", sw->datapath_id, strerror(error)); - ofpbuf_delete(b); - schedule_query(sw, 1000); - } + for (i = 0; i < 50; i++) { + struct ofpbuf *msg; + + msg = rconn_recv(sw->rconn); + if (!msg) { + break; } - } -} -static void -wait_timeout(long long int started) -{ - long long int now = time_msec(); - long long int timeout = 10000 - (now - started); - if (timeout <= 0) { - poll_immediate_wake(); - } else { - poll_timer_wait(timeout); + if (!sw->mute) { + lswitch_process_packet(sw, msg); + } + ofpbuf_delete(msg); } } @@ -233,447 +284,369 @@ void lswitch_wait(struct lswitch *sw) { if (sw->ml) { + ovs_rwlock_rdlock(&sw->ml->rwlock); mac_learning_wait(sw->ml); + ovs_rwlock_unlock(&sw->ml->rwlock); } - - if (sw->last_reply != LLONG_MIN) { - wait_timeout(sw->last_reply); - } else if (sw->last_query != LLONG_MIN) { - wait_timeout(sw->last_query); - } + rconn_run_wait(sw->rconn); + rconn_recv_wait(sw->rconn); } /* Processes 'msg', which should be an OpenFlow received on 'rconn', according * to the learning switch state in 'sw'. The most likely result of processing * is that flow-setup and packet-out OpenFlow messages will be sent out on * 'rconn'. */ -void -lswitch_process_packet(struct lswitch *sw, struct rconn *rconn, - const struct ofpbuf *msg) +static void +lswitch_process_packet(struct lswitch *sw, const struct ofpbuf *msg) { - struct processor { - uint8_t type; - size_t min_size; - packet_handler_func *handler; - }; - static const struct processor processors[] = { - { - OFPT_ECHO_REQUEST, - sizeof(struct ofp_header), - process_echo_request - }, - { - OFPT_FEATURES_REPLY, - sizeof(struct ofp_switch_features), - process_switch_features - }, - { - OFPT_PACKET_IN, - offsetof(struct ofp_packet_in, data), - process_packet_in - }, - { - OFPT_PORT_STATUS, - sizeof(struct ofp_port_status), - process_port_status - }, - { - OFPT_STATS_REPLY, - offsetof(struct ofp_stats_reply, body), - process_stats_reply - }, - { - OFPT_FLOW_EXPIRED, - sizeof(struct ofp_flow_expired), - NULL - }, - }; - const size_t n_processors = ARRAY_SIZE(processors); - const struct processor *p; - struct ofp_header *oh; - - oh = msg->data; - if (sw->datapath_id == 0 - && oh->type != OFPT_ECHO_REQUEST - && oh->type != OFPT_FEATURES_REPLY) { - send_features_request(sw, rconn); + enum ofptype type; + struct ofpbuf b; + + b = *msg; + if (ofptype_pull(&type, &b)) { return; } - for (p = processors; p < &processors[n_processors]; p++) { - if (oh->type == p->type) { - if (msg->size < p->min_size) { - VLOG_WARN_RL(&rl, "%012llx: %s: too short (%zu bytes) for " - "type %"PRIu8" (min %zu)", sw->datapath_id, - rconn_get_name(rconn), msg->size, oh->type, - p->min_size); - return; - } - if (p->handler) { - (p->handler)(sw, rconn, msg->data); + if (sw->state == S_FEATURES_REPLY + && type != OFPTYPE_ECHO_REQUEST + && type != OFPTYPE_FEATURES_REPLY) { + return; + } + + switch (type) { + case OFPTYPE_ECHO_REQUEST: + process_echo_request(sw, ofpbuf_data(msg)); + break; + + case OFPTYPE_FEATURES_REPLY: + if (sw->state == S_FEATURES_REPLY) { + if (!process_switch_features(sw, ofpbuf_data(msg))) { + sw->state = S_SWITCHING; + } else { + rconn_disconnect(sw->rconn); } - return; } - } - if (VLOG_IS_DBG_ENABLED()) { - char *p = ofp_to_string(msg->data, msg->size, 2); - VLOG_DBG_RL(&rl, "%012llx: OpenFlow packet ignored: %s", - sw->datapath_id, p); - free(p); + break; + + case OFPTYPE_PACKET_IN: + process_packet_in(sw, ofpbuf_data(msg)); + break; + + case OFPTYPE_FLOW_REMOVED: + /* Nothing to do. */ + break; + + case OFPTYPE_HELLO: + case OFPTYPE_ERROR: + case OFPTYPE_ECHO_REPLY: + case OFPTYPE_FEATURES_REQUEST: + case OFPTYPE_GET_CONFIG_REQUEST: + case OFPTYPE_GET_CONFIG_REPLY: + case OFPTYPE_SET_CONFIG: + case OFPTYPE_PORT_STATUS: + case OFPTYPE_PACKET_OUT: + case OFPTYPE_FLOW_MOD: + case OFPTYPE_GROUP_MOD: + case OFPTYPE_PORT_MOD: + case OFPTYPE_TABLE_MOD: + case OFPTYPE_BARRIER_REQUEST: + case OFPTYPE_BARRIER_REPLY: + case OFPTYPE_QUEUE_GET_CONFIG_REQUEST: + case OFPTYPE_QUEUE_GET_CONFIG_REPLY: + case OFPTYPE_DESC_STATS_REQUEST: + case OFPTYPE_DESC_STATS_REPLY: + case OFPTYPE_FLOW_STATS_REQUEST: + case OFPTYPE_FLOW_STATS_REPLY: + case OFPTYPE_AGGREGATE_STATS_REQUEST: + case OFPTYPE_AGGREGATE_STATS_REPLY: + case OFPTYPE_TABLE_STATS_REQUEST: + case OFPTYPE_TABLE_STATS_REPLY: + case OFPTYPE_PORT_STATS_REQUEST: + case OFPTYPE_PORT_STATS_REPLY: + case OFPTYPE_QUEUE_STATS_REQUEST: + case OFPTYPE_QUEUE_STATS_REPLY: + case OFPTYPE_PORT_DESC_STATS_REQUEST: + case OFPTYPE_PORT_DESC_STATS_REPLY: + case OFPTYPE_ROLE_REQUEST: + case OFPTYPE_ROLE_REPLY: + case OFPTYPE_ROLE_STATUS: + case OFPTYPE_SET_FLOW_FORMAT: + case OFPTYPE_FLOW_MOD_TABLE_ID: + case OFPTYPE_SET_PACKET_IN_FORMAT: + case OFPTYPE_FLOW_AGE: + case OFPTYPE_SET_CONTROLLER_ID: + case OFPTYPE_FLOW_MONITOR_STATS_REQUEST: + case OFPTYPE_FLOW_MONITOR_STATS_REPLY: + case OFPTYPE_FLOW_MONITOR_CANCEL: + case OFPTYPE_FLOW_MONITOR_PAUSED: + case OFPTYPE_FLOW_MONITOR_RESUMED: + case OFPTYPE_GET_ASYNC_REQUEST: + case OFPTYPE_GET_ASYNC_REPLY: + case OFPTYPE_SET_ASYNC_CONFIG: + case OFPTYPE_METER_MOD: + case OFPTYPE_GROUP_STATS_REQUEST: + case OFPTYPE_GROUP_STATS_REPLY: + case OFPTYPE_GROUP_DESC_STATS_REQUEST: + case OFPTYPE_GROUP_DESC_STATS_REPLY: + case OFPTYPE_GROUP_FEATURES_STATS_REQUEST: + case OFPTYPE_GROUP_FEATURES_STATS_REPLY: + case OFPTYPE_METER_STATS_REQUEST: + case OFPTYPE_METER_STATS_REPLY: + case OFPTYPE_METER_CONFIG_STATS_REQUEST: + case OFPTYPE_METER_CONFIG_STATS_REPLY: + case OFPTYPE_METER_FEATURES_STATS_REQUEST: + case OFPTYPE_METER_FEATURES_STATS_REPLY: + case OFPTYPE_TABLE_FEATURES_STATS_REQUEST: + case OFPTYPE_TABLE_FEATURES_STATS_REPLY: + case OFPTYPE_BUNDLE_CONTROL: + case OFPTYPE_BUNDLE_ADD_MESSAGE: + default: + if (VLOG_IS_DBG_ENABLED()) { + char *s = ofp_to_string(ofpbuf_data(msg), ofpbuf_size(msg), 2); + VLOG_DBG_RL(&rl, "%016llx: OpenFlow packet ignored: %s", + sw->datapath_id, s); + free(s); + } } } static void -send_features_request(struct lswitch *sw, struct rconn *rconn) +send_features_request(struct lswitch *sw) { - time_t now = time_now(); - if (now >= sw->last_features_request + 1) { - struct ofpbuf *b; - struct ofp_switch_config *osc; - - /* Send OFPT_FEATURES_REQUEST. */ - make_openflow(sizeof(struct ofp_header), OFPT_FEATURES_REQUEST, &b); - queue_tx(sw, rconn, b); - - /* Send OFPT_SET_CONFIG. */ - osc = make_openflow(sizeof *osc, OFPT_SET_CONFIG, &b); - osc->flags = htons(OFPC_SEND_FLOW_EXP); - osc->miss_send_len = htons(OFP_DEFAULT_MISS_SEND_LEN); - queue_tx(sw, rconn, b); - - sw->last_features_request = now; - } + struct ofpbuf *b; + struct ofp_switch_config *osc; + int ofp_version = rconn_get_version(sw->rconn); + + ovs_assert(ofp_version > 0 && ofp_version < 0xff); + + /* Send OFPT_FEATURES_REQUEST. */ + b = ofpraw_alloc(OFPRAW_OFPT_FEATURES_REQUEST, ofp_version, 0); + queue_tx(sw, b); + + /* Send OFPT_SET_CONFIG. */ + b = ofpraw_alloc(OFPRAW_OFPT_SET_CONFIG, ofp_version, sizeof *osc); + osc = ofpbuf_put_zeros(b, sizeof *osc); + osc->miss_send_len = htons(OFP_DEFAULT_MISS_SEND_LEN); + queue_tx(sw, b); } static void -queue_tx(struct lswitch *sw, struct rconn *rconn, struct ofpbuf *b) +queue_tx(struct lswitch *sw, struct ofpbuf *b) { - int retval = rconn_send_with_limit(rconn, b, sw->queued, 10); + int retval = rconn_send_with_limit(sw->rconn, b, sw->queued, 10); if (retval && retval != ENOTCONN) { if (retval == EAGAIN) { - VLOG_INFO_RL(&rl, "%012llx: %s: tx queue overflow", - sw->datapath_id, rconn_get_name(rconn)); + VLOG_INFO_RL(&rl, "%016llx: %s: tx queue overflow", + sw->datapath_id, rconn_get_name(sw->rconn)); } else { - VLOG_WARN_RL(&rl, "%012llx: %s: send: %s", - sw->datapath_id, rconn_get_name(rconn), - strerror(retval)); + VLOG_WARN_RL(&rl, "%016llx: %s: send: %s", + sw->datapath_id, rconn_get_name(sw->rconn), + ovs_strerror(retval)); } } } -static void -schedule_query(struct lswitch *sw, long long int delay) +static enum ofperr +process_switch_features(struct lswitch *sw, struct ofp_header *oh) { - long long int now = time_msec(); - if (sw->next_query == LLONG_MIN || sw->next_query > now + delay) { - sw->next_query = now + delay; + struct ofputil_switch_features features; + struct ofputil_phy_port port; + enum ofperr error; + struct ofpbuf b; + + error = ofputil_decode_switch_features(oh, &features, &b); + if (error) { + VLOG_ERR("received invalid switch feature reply (%s)", + ofperr_to_string(error)); + return error; } -} -static void -process_switch_features(struct lswitch *sw, struct rconn *rconn, void *osf_) -{ - struct ofp_switch_features *osf = osf_; - size_t n_ports = ((ntohs(osf->header.length) - - offsetof(struct ofp_switch_features, ports)) - / sizeof *osf->ports); - size_t i; - - sw->datapath_id = ntohll(osf->datapath_id); - sw->capabilities = ntohl(osf->capabilities); - for (i = 0; i < n_ports; i++) { - process_phy_port(sw, rconn, &osf->ports[i]); - } - if (sw->capabilities & OFPC_STP) { - schedule_query(sw, 1000); + sw->datapath_id = features.datapath_id; + + while (!ofputil_pull_phy_port(oh->version, &b, &port)) { + struct lswitch_port *lp = shash_find_data(&sw->queue_names, port.name); + if (lp && hmap_node_is_null(&lp->hmap_node)) { + lp->port_no = port.port_no; + hmap_insert(&sw->queue_numbers, &lp->hmap_node, + hash_ofp_port(lp->port_no)); + } } + return 0; } -static void -process_packet_in(struct lswitch *sw, struct rconn *rconn, void *opi_) +static ofp_port_t +lswitch_choose_destination(struct lswitch *sw, const struct flow *flow) { - struct ofp_packet_in *opi = opi_; - uint16_t in_port = ntohs(opi->in_port); - uint16_t out_port = OFPP_FLOOD; - - size_t pkt_ofs, pkt_len; - struct ofpbuf pkt; - flow_t flow; + ofp_port_t out_port; - /* Extract flow data from 'opi' into 'flow'. */ - pkt_ofs = offsetof(struct ofp_packet_in, data); - pkt_len = ntohs(opi->header.length) - pkt_ofs; - pkt.data = opi->data; - pkt.size = pkt_len; - flow_extract(&pkt, in_port, &flow); - - if (may_learn(sw, in_port) && sw->ml) { - if (mac_learning_learn(sw->ml, flow.dl_src, 0, in_port)) { - VLOG_DBG_RL(&rl, "%012llx: learned that "ETH_ADDR_FMT" is on " - "port %"PRIu16, sw->datapath_id, - ETH_ADDR_ARGS(flow.dl_src), in_port); + /* Learn the source MAC. */ + if (sw->ml) { + ovs_rwlock_wrlock(&sw->ml->rwlock); + if (mac_learning_may_learn(sw->ml, flow->dl_src, 0)) { + struct mac_entry *mac = mac_learning_insert(sw->ml, flow->dl_src, + 0); + if (mac->port.ofp_port != flow->in_port.ofp_port) { + VLOG_DBG_RL(&rl, "%016llx: learned that "ETH_ADDR_FMT" is on " + "port %"PRIu16, sw->datapath_id, + ETH_ADDR_ARGS(flow->dl_src), + flow->in_port.ofp_port); + + mac->port.ofp_port = flow->in_port.ofp_port; + mac_learning_changed(sw->ml); + } } + ovs_rwlock_unlock(&sw->ml->rwlock); } - if (eth_addr_is_reserved(flow.dl_src)) { - goto drop_it; - } - - if (!may_recv(sw, in_port, false)) { - /* STP prevents receiving anything on this port. */ - goto drop_it; + /* Drop frames for reserved multicast addresses. */ + if (eth_addr_is_reserved(flow->dl_dst)) { + return OFPP_NONE; } + out_port = OFPP_FLOOD; if (sw->ml) { - int learned_port = mac_learning_lookup(sw->ml, flow.dl_dst, 0); - if (learned_port >= 0 && may_send(sw, learned_port)) { - out_port = learned_port; - } - } - - if (in_port == out_port) { - /* Don't send out packets on their input ports. */ - goto drop_it; - } else if (sw->max_idle >= 0 && (!sw->ml || out_port != OFPP_FLOOD)) { - struct ofpbuf *buffer; - struct ofp_flow_mod *ofm; - uint32_t wildcards; - - /* Check if we need to wildcard the flows. */ - if (!sw->exact_flows) { - /* We can not wildcard all fields. - * We need in_port to detect moves. - * We need both SA and DA to do learning. */ - wildcards = (OFPFW_DL_TYPE | OFPFW_NW_SRC_MASK | OFPFW_NW_DST_MASK - | OFPFW_NW_PROTO | OFPFW_TP_SRC | OFPFW_TP_DST); - } else { - /* Exact match */ - wildcards = 0; - } - - /* Check if we need to use "NORMAL" action. */ - if (sw->action_normal && out_port != OFPP_FLOOD) { - out_port = OFPP_NORMAL; - } - - /* The output port is known, or we always flood everything, so add a - * new flow. */ - buffer = make_add_simple_flow(&flow, ntohl(opi->buffer_id), - out_port, sw->max_idle); - ofm = buffer->data; - ofm->match.wildcards = htonl(wildcards); - queue_tx(sw, rconn, buffer); - - /* If the switch didn't buffer the packet, we need to send a copy. */ - if (ntohl(opi->buffer_id) == UINT32_MAX) { - queue_tx(sw, rconn, - make_unbuffered_packet_out(&pkt, in_port, out_port)); - } - } else { - struct ofpbuf *b; - - /* Check if we need to use "NORMAL" action. */ - if (sw->action_normal && out_port != OFPP_FLOOD) { - out_port = OFPP_NORMAL; - } - - /* We don't know that MAC, or we don't set up flows. Send along the - * packet without setting up a flow. */ - if (ntohl(opi->buffer_id) == UINT32_MAX) { - b = make_unbuffered_packet_out(&pkt, in_port, out_port); - } else { - b = make_buffered_packet_out(ntohl(opi->buffer_id), - in_port, out_port); + struct mac_entry *mac; + + ovs_rwlock_rdlock(&sw->ml->rwlock); + mac = mac_learning_lookup(sw->ml, flow->dl_dst, 0); + if (mac) { + out_port = mac->port.ofp_port; + if (out_port == flow->in_port.ofp_port) { + /* Don't send a packet back out its input port. */ + ovs_rwlock_unlock(&sw->ml->rwlock); + return OFPP_NONE; + } } - queue_tx(sw, rconn, b); + ovs_rwlock_unlock(&sw->ml->rwlock); } - return; -drop_it: - if (sw->max_idle >= 0) { - /* Set up a flow to drop packets. */ - queue_tx(sw, rconn, make_add_flow(&flow, ntohl(opi->buffer_id), - sw->max_idle, 0)); - } else { - /* Just drop the packet, since we don't set up flows at all. - * XXX we should send a packet_out with no actions if buffer_id != - * UINT32_MAX, to avoid clogging the kernel buffers. */ + /* Check if we need to use "NORMAL" action. */ + if (sw->action_normal && out_port != OFPP_FLOOD) { + return OFPP_NORMAL; } - return; -} -static void -process_echo_request(struct lswitch *sw, struct rconn *rconn, void *rq_) -{ - struct ofp_header *rq = rq_; - queue_tx(sw, rconn, make_echo_reply(rq)); + return out_port; } -static void -process_port_status(struct lswitch *sw, struct rconn *rconn, void *ops_) +static uint32_t +get_queue_id(const struct lswitch *sw, ofp_port_t in_port) { - struct ofp_port_status *ops = ops_; - process_phy_port(sw, rconn, &ops->desc); -} + const struct lswitch_port *port; -static void -process_phy_port(struct lswitch *sw, struct rconn *rconn UNUSED, void *opp_) -{ - const struct ofp_phy_port *opp = opp_; - uint16_t port_no = ntohs(opp->port_no); - if (sw->capabilities & OFPC_STP && port_no < STP_MAX_PORTS) { - uint32_t config = ntohl(opp->config); - uint32_t state = ntohl(opp->state); - unsigned int *port_state = &sw->port_states[port_no]; - unsigned int new_port_state; - - if (!(config & (OFPPC_NO_STP | OFPPC_PORT_DOWN)) - && !(state & OFPPS_LINK_DOWN)) - { - switch (state & OFPPS_STP_MASK) { - case OFPPS_STP_LISTEN: - new_port_state = P_LISTENING; - break; - case OFPPS_STP_LEARN: - new_port_state = P_LEARNING; - break; - case OFPPS_STP_FORWARD: - new_port_state = P_FORWARDING; - break; - case OFPPS_STP_BLOCK: - new_port_state = P_BLOCKING; - break; - default: - new_port_state = P_DISABLED; - break; - } - } else { - new_port_state = P_FORWARDING; - } - if (*port_state != new_port_state) { - *port_state = new_port_state; - schedule_query(sw, 1000); + HMAP_FOR_EACH_WITH_HASH (port, hmap_node, hash_ofp_port(in_port), + &sw->queue_numbers) { + if (port->port_no == in_port) { + return port->queue_id; } } -} -static unsigned int -get_port_state(const struct lswitch *sw, uint16_t port_no) -{ - return (port_no >= STP_MAX_PORTS || !(sw->capabilities & OFPC_STP) - ? P_FORWARDING - : sw->port_states[port_no]); + return sw->default_queue; } -static bool -may_learn(const struct lswitch *sw, uint16_t port_no) +static void +process_packet_in(struct lswitch *sw, const struct ofp_header *oh) { - return get_port_state(sw, port_no) & (P_LEARNING | P_FORWARDING); -} + struct ofputil_packet_in pi; + uint32_t queue_id; + ofp_port_t out_port; -static bool -may_recv(const struct lswitch *sw, uint16_t port_no, bool any_actions) -{ - unsigned int state = get_port_state(sw, port_no); - return !(any_actions - ? state & (P_DISABLED | P_LISTENING | P_BLOCKING) - : state & (P_DISABLED | P_LISTENING | P_BLOCKING | P_LEARNING)); -} + uint64_t ofpacts_stub[64 / 8]; + struct ofpbuf ofpacts; -static bool -may_send(const struct lswitch *sw, uint16_t port_no) -{ - return get_port_state(sw, port_no) & P_FORWARDING; -} + struct ofputil_packet_out po; + enum ofperr error; -static void -process_flow_stats(struct lswitch *sw, struct rconn *rconn, - const struct ofp_flow_stats *ofs) -{ - const char *end = (char *) ofs + ntohs(ofs->length); - bool delete = false; - - /* Decide to delete the flow if it matches on an STP-disabled physical - * port. But don't delete it if the flow just drops all received packets, - * because that's a perfectly reasonable thing to do for disabled physical - * ports. */ - if (!(ofs->match.wildcards & htonl(OFPFW_IN_PORT))) { - if (!may_recv(sw, ntohs(ofs->match.in_port), - end > (char *) ofs->actions)) { - delete = true; - sw->n_no_recv++; - } - } + struct ofpbuf pkt; + struct flow flow; - /* Decide to delete the flow if it forwards to an STP-disabled physical - * port. */ - if (!delete) { - const struct ofp_action_header *a; - size_t len; - - for (a = ofs->actions; (char *) a < end; a += len / 8) { - len = ntohs(a->len); - if (len > end - (char *) a) { - VLOG_DBG_RL(&rl, "%012llx: action exceeds available space " - "(%zu > %td)", - sw->datapath_id, len, end - (char *) a); - break; - } else if (len % 8) { - VLOG_DBG_RL(&rl, "%012llx: action length (%zu) not multiple " - "of 8 bytes", sw->datapath_id, len); - break; - } + error = ofputil_decode_packet_in(&pi, oh); + if (error) { + VLOG_WARN_RL(&rl, "failed to decode packet-in: %s", + ofperr_to_string(error)); + return; + } - if (a->type == htons(OFPAT_OUTPUT)) { - struct ofp_action_output *oao = (struct ofp_action_output *) a; - if (!may_send(sw, ntohs(oao->port))) { - delete = true; - sw->n_no_send++; - break; - } - } - } + /* Ignore packets sent via output to OFPP_CONTROLLER. This library never + * uses such an action. You never know what experiments might be going on, + * though, and it seems best not to interfere with them. */ + if (pi.reason != OFPR_NO_MATCH) { + return; } - /* Delete the flow. */ - if (delete) { - struct ofp_flow_mod *ofm; - struct ofpbuf *b; + /* Extract flow data from 'opi' into 'flow'. */ + ofpbuf_use_const(&pkt, pi.packet, pi.packet_len); + flow_extract(&pkt, NULL, &flow); + flow.in_port.ofp_port = pi.fmd.in_port; + flow.tunnel.tun_id = pi.fmd.tun_id; + + /* Choose output port. */ + out_port = lswitch_choose_destination(sw, &flow); + + /* Make actions. */ + queue_id = get_queue_id(sw, pi.fmd.in_port); + ofpbuf_use_stack(&ofpacts, ofpacts_stub, sizeof ofpacts_stub); + if (out_port == OFPP_NONE) { + /* No actions. */ + } else if (queue_id == UINT32_MAX + || ofp_to_u16(out_port) >= ofp_to_u16(OFPP_MAX)) { + ofpact_put_OUTPUT(&ofpacts)->port = out_port; + } else { + struct ofpact_enqueue *enqueue = ofpact_put_ENQUEUE(&ofpacts); + enqueue->port = out_port; + enqueue->queue = queue_id; + } + ofpact_pad(&ofpacts); - ofm = make_openflow(offsetof(struct ofp_flow_mod, actions), - OFPT_FLOW_MOD, &b); - ofm->match = ofs->match; - ofm->command = OFPFC_DELETE_STRICT; - rconn_send(rconn, b, NULL); + /* Prepare packet_out in case we need one. */ + po.buffer_id = pi.buffer_id; + if (po.buffer_id == UINT32_MAX) { + po.packet = ofpbuf_data(&pkt); + po.packet_len = ofpbuf_size(&pkt); + } else { + po.packet = NULL; + po.packet_len = 0; } -} + po.in_port = pi.fmd.in_port; + po.ofpacts = ofpbuf_data(&ofpacts); + po.ofpacts_len = ofpbuf_size(&ofpacts); -static void -process_stats_reply(struct lswitch *sw, struct rconn *rconn, void *osr_) -{ - struct ofp_stats_reply *osr = osr_; - struct flow_stats_iterator i; - const struct ofp_flow_stats *fs; + /* Send the packet, and possibly the whole flow, to the output port. */ + if (sw->max_idle >= 0 && (!sw->ml || out_port != OFPP_FLOOD)) { + struct ofputil_flow_mod fm; + struct ofpbuf *buffer; - if (sw->last_query == LLONG_MIN - || osr->type != htons(OFPST_FLOW) - || osr->header.xid != sw->query_xid) { - return; - } - for (fs = flow_stats_first(&i, osr); fs; fs = flow_stats_next(&i)) { - sw->n_flows++; - process_flow_stats(sw, rconn, fs); - } - if (!(osr->flags & htons(OFPSF_REPLY_MORE))) { - VLOG_DBG("%012llx: Deleted %d of %d received flows to " - "implement STP, %d because of no-recv, %d because of " - "no-send", sw->datapath_id, - sw->n_no_recv + sw->n_no_send, sw->n_flows, - sw->n_no_recv, sw->n_no_send); - sw->last_query = LLONG_MIN; - sw->last_reply = LLONG_MIN; + /* The output port is known, or we always flood everything, so add a + * new flow. */ + memset(&fm, 0, sizeof fm); + match_init(&fm.match, &flow, &sw->wc); + ofputil_normalize_match_quiet(&fm.match); + fm.priority = 0; + fm.table_id = 0xff; + fm.command = OFPFC_ADD; + fm.idle_timeout = sw->max_idle; + fm.buffer_id = pi.buffer_id; + fm.out_port = OFPP_NONE; + fm.ofpacts = ofpbuf_data(&ofpacts); + fm.ofpacts_len = ofpbuf_size(&ofpacts); + buffer = ofputil_encode_flow_mod(&fm, sw->protocol); + + queue_tx(sw, buffer); + + /* If the switch didn't buffer the packet, we need to send a copy. */ + if (pi.buffer_id == UINT32_MAX && out_port != OFPP_NONE) { + queue_tx(sw, ofputil_encode_packet_out(&po, sw->protocol)); + } } else { - sw->last_reply = time_msec(); + /* We don't know that MAC, or we don't set up flows. Send along the + * packet without setting up a flow. */ + if (pi.buffer_id != UINT32_MAX || out_port != OFPP_NONE) { + queue_tx(sw, ofputil_encode_packet_out(&po, sw->protocol)); + } } } +static void +process_echo_request(struct lswitch *sw, const struct ofp_header *rq) +{ + queue_tx(sw, make_echo_reply(rq)); +}