+ enum nx_flow_monitor_flags update;
+
+ if (ofproto_rule_is_hidden(rule)) {
+ return;
+ }
+
+ if (!(rule->pending
+ ? ofoperation_has_out_port(rule->pending, m->out_port)
+ : ofproto_rule_has_out_port(rule, m->out_port))) {
+ return;
+ }
+
+ if (seqno) {
+ if (rule->add_seqno > seqno) {
+ update = NXFMF_ADD | NXFMF_MODIFY;
+ } else if (rule->modify_seqno > seqno) {
+ update = NXFMF_MODIFY;
+ } else {
+ return;
+ }
+
+ if (!(m->flags & update)) {
+ return;
+ }
+ } else {
+ update = NXFMF_INITIAL;
+ }
+
+ if (!rule->monitor_flags) {
+ list_push_back(rules, &rule->ofproto_node);
+ }
+ rule->monitor_flags |= update | (m->flags & NXFMF_ACTIONS);
+}
+
+static void
+ofproto_collect_ofmonitor_refresh_rules(const struct ofmonitor *m,
+ uint64_t seqno,
+ struct list *rules)
+{
+ const struct ofproto *ofproto = ofconn_get_ofproto(m->ofconn);
+ const struct ofoperation *op;
+ const struct oftable *table;
+ struct cls_rule target;
+
+ cls_rule_init_from_minimatch(&target, &m->match, 0);
+ FOR_EACH_MATCHING_TABLE (table, m->table_id, ofproto) {
+ struct cls_cursor cursor;
+ struct rule *rule;
+
+ cls_cursor_init(&cursor, &table->cls, &target);
+ CLS_CURSOR_FOR_EACH (rule, cr, &cursor) {
+ ovs_assert(!rule->pending); /* XXX */
+ ofproto_collect_ofmonitor_refresh_rule(m, rule, seqno, rules);
+ }
+ }
+
+ HMAP_FOR_EACH (op, hmap_node, &ofproto->deletions) {
+ struct rule *rule = op->rule;
+
+ if (((m->table_id == 0xff
+ ? !(ofproto->tables[rule->table_id].flags & OFTABLE_HIDDEN)
+ : m->table_id == rule->table_id))
+ && cls_rule_is_loose_match(&rule->cr, &target.match)) {
+ ofproto_collect_ofmonitor_refresh_rule(m, rule, seqno, rules);
+ }
+ }
+ cls_rule_destroy(&target);
+}
+
+static void
+ofproto_collect_ofmonitor_initial_rules(struct ofmonitor *m,
+ struct list *rules)
+{
+ if (m->flags & NXFMF_INITIAL) {
+ ofproto_collect_ofmonitor_refresh_rules(m, 0, rules);
+ }
+}
+
+void
+ofmonitor_collect_resume_rules(struct ofmonitor *m,
+ uint64_t seqno, struct list *rules)
+{
+ ofproto_collect_ofmonitor_refresh_rules(m, seqno, rules);
+}
+
+static enum ofperr
+handle_flow_monitor_request(struct ofconn *ofconn, const struct ofp_header *oh)
+{
+ struct ofproto *ofproto = ofconn_get_ofproto(ofconn);
+ struct ofmonitor **monitors;
+ size_t n_monitors, allocated_monitors;
+ struct list replies;
+ enum ofperr error;
+ struct list rules;
+ struct ofpbuf b;
+ size_t i;
+
+ error = 0;
+ ofpbuf_use_const(&b, oh, ntohs(oh->length));
+ monitors = NULL;
+ n_monitors = allocated_monitors = 0;
+ for (;;) {
+ struct ofputil_flow_monitor_request request;
+ struct ofmonitor *m;
+ int retval;
+
+ retval = ofputil_decode_flow_monitor_request(&request, &b);
+ if (retval == EOF) {
+ break;
+ } else if (retval) {
+ error = retval;
+ goto error;
+ }
+
+ if (request.table_id != 0xff
+ && request.table_id >= ofproto->n_tables) {
+ error = OFPERR_OFPBRC_BAD_TABLE_ID;
+ goto error;
+ }
+
+ error = ofmonitor_create(&request, ofconn, &m);
+ if (error) {
+ goto error;
+ }
+
+ if (n_monitors >= allocated_monitors) {
+ monitors = x2nrealloc(monitors, &allocated_monitors,
+ sizeof *monitors);
+ }
+ monitors[n_monitors++] = m;
+ }
+
+ list_init(&rules);
+ for (i = 0; i < n_monitors; i++) {
+ ofproto_collect_ofmonitor_initial_rules(monitors[i], &rules);
+ }
+
+ ofpmp_init(&replies, oh);
+ ofmonitor_compose_refresh_updates(&rules, &replies);
+ ofconn_send_replies(ofconn, &replies);
+
+ free(monitors);
+
+ return 0;
+
+error:
+ for (i = 0; i < n_monitors; i++) {
+ ofmonitor_destroy(monitors[i]);
+ }
+ free(monitors);
+ return error;
+}
+
+static enum ofperr
+handle_flow_monitor_cancel(struct ofconn *ofconn, const struct ofp_header *oh)
+{
+ struct ofmonitor *m;
+ uint32_t id;
+
+ id = ofputil_decode_flow_monitor_cancel(oh);
+ m = ofmonitor_lookup(ofconn, id);
+ if (!m) {
+ return OFPERR_NXBRC_FM_BAD_ID;
+ }
+
+ ofmonitor_destroy(m);
+ return 0;
+}
+
+static enum ofperr
+handle_openflow__(struct ofconn *ofconn, const struct ofpbuf *msg)
+{
+ const struct ofp_header *oh = msg->data;
+ enum ofptype type;
+ enum ofperr error;
+
+ error = ofptype_decode(&type, oh);
+ if (error) {
+ return error;
+ }
+
+ switch (type) {
+ /* OpenFlow requests. */
+ case OFPTYPE_ECHO_REQUEST:
+ return handle_echo_request(ofconn, oh);
+
+ case OFPTYPE_FEATURES_REQUEST:
+ return handle_features_request(ofconn, oh);
+
+ case OFPTYPE_GET_CONFIG_REQUEST:
+ return handle_get_config_request(ofconn, oh);
+
+ case OFPTYPE_SET_CONFIG:
+ return handle_set_config(ofconn, oh);
+
+ case OFPTYPE_PACKET_OUT:
+ return handle_packet_out(ofconn, oh);
+
+ case OFPTYPE_PORT_MOD:
+ return handle_port_mod(ofconn, oh);
+
+ case OFPTYPE_FLOW_MOD:
+ return handle_flow_mod(ofconn, oh);
+
+ case OFPTYPE_BARRIER_REQUEST:
+ return handle_barrier_request(ofconn, oh);
+
+ case OFPTYPE_ROLE_REQUEST:
+ return handle_role_request(ofconn, oh);
+
+ /* OpenFlow replies. */
+ case OFPTYPE_ECHO_REPLY:
+ return 0;
+
+ /* Nicira extension requests. */
+ case OFPTYPE_FLOW_MOD_TABLE_ID:
+ return handle_nxt_flow_mod_table_id(ofconn, oh);
+
+ case OFPTYPE_SET_FLOW_FORMAT:
+ return handle_nxt_set_flow_format(ofconn, oh);
+
+ case OFPTYPE_SET_PACKET_IN_FORMAT:
+ return handle_nxt_set_packet_in_format(ofconn, oh);
+
+ case OFPTYPE_SET_CONTROLLER_ID:
+ return handle_nxt_set_controller_id(ofconn, oh);
+
+ case OFPTYPE_FLOW_AGE:
+ /* Nothing to do. */
+ return 0;
+
+ case OFPTYPE_FLOW_MONITOR_CANCEL:
+ return handle_flow_monitor_cancel(ofconn, oh);
+
+ case OFPTYPE_SET_ASYNC_CONFIG:
+ return handle_nxt_set_async_config(ofconn, oh);
+
+ /* Statistics requests. */
+ case OFPTYPE_DESC_STATS_REQUEST:
+ return handle_desc_stats_request(ofconn, oh);
+
+ case OFPTYPE_FLOW_STATS_REQUEST:
+ return handle_flow_stats_request(ofconn, oh);
+
+ case OFPTYPE_AGGREGATE_STATS_REQUEST:
+ return handle_aggregate_stats_request(ofconn, oh);
+
+ case OFPTYPE_TABLE_STATS_REQUEST:
+ return handle_table_stats_request(ofconn, oh);
+
+ case OFPTYPE_PORT_STATS_REQUEST:
+ return handle_port_stats_request(ofconn, oh);
+
+ case OFPTYPE_QUEUE_STATS_REQUEST:
+ return handle_queue_stats_request(ofconn, oh);
+
+ case OFPTYPE_PORT_DESC_STATS_REQUEST:
+ return handle_port_desc_stats_request(ofconn, oh);
+
+ case OFPTYPE_FLOW_MONITOR_STATS_REQUEST:
+ return handle_flow_monitor_request(ofconn, oh);
+
+ /* FIXME: Change the following once they are implemented: */
+ case OFPTYPE_QUEUE_GET_CONFIG_REQUEST:
+ case OFPTYPE_GET_ASYNC_REQUEST:
+ case OFPTYPE_METER_MOD:
+ case OFPTYPE_GROUP_REQUEST:
+ case OFPTYPE_GROUP_DESC_REQUEST:
+ case OFPTYPE_GROUP_FEATURES_REQUEST:
+ case OFPTYPE_METER_REQUEST:
+ case OFPTYPE_METER_CONFIG_REQUEST:
+ case OFPTYPE_METER_FEATURES_REQUEST:
+ case OFPTYPE_TABLE_FEATURES_REQUEST:
+ return OFPERR_OFPBRC_BAD_TYPE;
+
+ case OFPTYPE_HELLO:
+ case OFPTYPE_ERROR:
+ case OFPTYPE_FEATURES_REPLY:
+ case OFPTYPE_GET_CONFIG_REPLY:
+ case OFPTYPE_PACKET_IN:
+ case OFPTYPE_FLOW_REMOVED:
+ case OFPTYPE_PORT_STATUS:
+ case OFPTYPE_BARRIER_REPLY:
+ case OFPTYPE_QUEUE_GET_CONFIG_REPLY:
+ case OFPTYPE_DESC_STATS_REPLY:
+ case OFPTYPE_FLOW_STATS_REPLY:
+ case OFPTYPE_QUEUE_STATS_REPLY:
+ case OFPTYPE_PORT_STATS_REPLY:
+ case OFPTYPE_TABLE_STATS_REPLY:
+ case OFPTYPE_AGGREGATE_STATS_REPLY:
+ case OFPTYPE_PORT_DESC_STATS_REPLY:
+ case OFPTYPE_ROLE_REPLY:
+ case OFPTYPE_FLOW_MONITOR_PAUSED:
+ case OFPTYPE_FLOW_MONITOR_RESUMED:
+ case OFPTYPE_FLOW_MONITOR_STATS_REPLY:
+ case OFPTYPE_GET_ASYNC_REPLY:
+ case OFPTYPE_GROUP_REPLY:
+ case OFPTYPE_GROUP_DESC_REPLY:
+ case OFPTYPE_GROUP_FEATURES_REPLY:
+ case OFPTYPE_METER_REPLY:
+ case OFPTYPE_METER_CONFIG_REPLY:
+ case OFPTYPE_METER_FEATURES_REPLY:
+ case OFPTYPE_TABLE_FEATURES_REPLY:
+ default:
+ return OFPERR_OFPBRC_BAD_TYPE;
+ }
+}
+
+static bool
+handle_openflow(struct ofconn *ofconn, const struct ofpbuf *ofp_msg)
+{
+ int error = handle_openflow__(ofconn, ofp_msg);
+ if (error && error != OFPROTO_POSTPONE) {
+ ofconn_send_error(ofconn, ofp_msg->data, error);
+ }
+ COVERAGE_INC(ofproto_recv_openflow);
+ return error != OFPROTO_POSTPONE;
+}
+\f
+/* Asynchronous operations. */
+
+/* Creates and returns a new ofopgroup that is not associated with any
+ * OpenFlow connection.
+ *
+ * The caller should add operations to the returned group with
+ * ofoperation_create() and then submit it with ofopgroup_submit(). */
+static struct ofopgroup *
+ofopgroup_create_unattached(struct ofproto *ofproto)
+{
+ struct ofopgroup *group = xzalloc(sizeof *group);
+ group->ofproto = ofproto;
+ list_init(&group->ofproto_node);
+ list_init(&group->ops);
+ list_init(&group->ofconn_node);
+ return group;
+}
+
+/* Creates and returns a new ofopgroup for 'ofproto'.
+ *
+ * If 'ofconn' is NULL, the new ofopgroup is not associated with any OpenFlow
+ * connection. The 'request' and 'buffer_id' arguments are ignored.
+ *
+ * If 'ofconn' is nonnull, then the new ofopgroup is associated with 'ofconn'.
+ * If the ofopgroup eventually fails, then the error reply will include
+ * 'request'. If the ofopgroup eventually succeeds, then the packet with
+ * buffer id 'buffer_id' on 'ofconn' will be sent by 'ofconn''s ofproto.
+ *
+ * The caller should add operations to the returned group with
+ * ofoperation_create() and then submit it with ofopgroup_submit(). */
+static struct ofopgroup *
+ofopgroup_create(struct ofproto *ofproto, struct ofconn *ofconn,
+ const struct ofp_header *request, uint32_t buffer_id)
+{
+ struct ofopgroup *group = ofopgroup_create_unattached(ofproto);
+ if (ofconn) {
+ size_t request_len = ntohs(request->length);
+
+ ovs_assert(ofconn_get_ofproto(ofconn) == ofproto);
+
+ ofconn_add_opgroup(ofconn, &group->ofconn_node);
+ group->ofconn = ofconn;
+ group->request = xmemdup(request, MIN(request_len, 64));
+ group->buffer_id = buffer_id;
+ }
+ return group;
+}
+
+/* Submits 'group' for processing.
+ *
+ * If 'group' contains no operations (e.g. none were ever added, or all of the
+ * ones that were added completed synchronously), then it is destroyed
+ * immediately. Otherwise it is added to the ofproto's list of pending
+ * groups. */
+static void
+ofopgroup_submit(struct ofopgroup *group)
+{
+ if (!group->n_running) {
+ ofopgroup_complete(group);
+ } else {
+ list_push_back(&group->ofproto->pending, &group->ofproto_node);
+ group->ofproto->n_pending++;
+ }
+}
+
+static void
+ofopgroup_complete(struct ofopgroup *group)
+{
+ struct ofproto *ofproto = group->ofproto;
+
+ struct ofconn *abbrev_ofconn;
+ ovs_be32 abbrev_xid;
+
+ struct ofoperation *op, *next_op;
+ int error;
+
+ ovs_assert(!group->n_running);
+
+ error = 0;
+ LIST_FOR_EACH (op, group_node, &group->ops) {
+ if (op->error) {
+ error = op->error;
+ break;
+ }
+ }
+
+ if (!error && group->ofconn && group->buffer_id != UINT32_MAX) {
+ LIST_FOR_EACH (op, group_node, &group->ops) {
+ if (op->type != OFOPERATION_DELETE) {
+ struct ofpbuf *packet;
+ uint16_t in_port;
+
+ error = ofconn_pktbuf_retrieve(group->ofconn, group->buffer_id,
+ &packet, &in_port);
+ if (packet) {
+ ovs_assert(!error);
+ error = rule_execute(op->rule, in_port, packet);
+ }
+ break;
+ }
+ }
+ }
+
+ if (!error && !list_is_empty(&group->ofconn_node)) {
+ abbrev_ofconn = group->ofconn;
+ abbrev_xid = group->request->xid;
+ } else {
+ abbrev_ofconn = NULL;
+ abbrev_xid = htonl(0);
+ }
+ LIST_FOR_EACH_SAFE (op, next_op, group_node, &group->ops) {
+ struct rule *rule = op->rule;
+
+ /* We generally want to report the change to active OpenFlow flow
+ monitors (e.g. NXST_FLOW_MONITOR). There are three exceptions:
+
+ - The operation failed.
+
+ - The affected rule is not visible to controllers.
+
+ - The operation's only effect was to update rule->modified. */
+ if (!(op->error
+ || ofproto_rule_is_hidden(rule)
+ || (op->type == OFOPERATION_MODIFY
+ && op->ofpacts
+ && rule->flow_cookie == op->flow_cookie))) {
+ /* Check that we can just cast from ofoperation_type to
+ * nx_flow_update_event. */
+ BUILD_ASSERT_DECL((enum nx_flow_update_event) OFOPERATION_ADD
+ == NXFME_ADDED);
+ BUILD_ASSERT_DECL((enum nx_flow_update_event) OFOPERATION_DELETE
+ == NXFME_DELETED);
+ BUILD_ASSERT_DECL((enum nx_flow_update_event) OFOPERATION_MODIFY
+ == NXFME_MODIFIED);
+
+ ofmonitor_report(ofproto->connmgr, rule,
+ (enum nx_flow_update_event) op->type,
+ op->reason, abbrev_ofconn, abbrev_xid);
+ }
+
+ rule->pending = NULL;
+
+ switch (op->type) {
+ case OFOPERATION_ADD:
+ if (!op->error) {
+ uint16_t vid_mask;
+
+ ofproto_rule_destroy__(op->victim);
+ vid_mask = minimask_get_vid_mask(&rule->cr.match.mask);
+ if (vid_mask == VLAN_VID_MASK) {
+ if (ofproto->vlan_bitmap) {
+ uint16_t vid = miniflow_get_vid(&rule->cr.match.flow);
+ if (!bitmap_is_set(ofproto->vlan_bitmap, vid)) {
+ bitmap_set1(ofproto->vlan_bitmap, vid);
+ ofproto->vlans_changed = true;
+ }
+ } else {
+ ofproto->vlans_changed = true;
+ }
+ }
+ } else {
+ oftable_substitute_rule(rule, op->victim);
+ ofproto_rule_destroy__(rule);
+ }
+ break;
+
+ case OFOPERATION_DELETE:
+ ovs_assert(!op->error);
+ ofproto_rule_destroy__(rule);
+ op->rule = NULL;
+ break;
+
+ case OFOPERATION_MODIFY:
+ if (!op->error) {
+ rule->modified = time_msec();
+ } else {
+ rule->flow_cookie = op->flow_cookie;
+ if (op->ofpacts) {
+ free(rule->ofpacts);
+ rule->ofpacts = op->ofpacts;
+ rule->ofpacts_len = op->ofpacts_len;
+ op->ofpacts = NULL;
+ op->ofpacts_len = 0;
+ }
+ }
+ break;
+
+ default:
+ NOT_REACHED();
+ }
+
+ ofoperation_destroy(op);
+ }
+
+ ofmonitor_flush(ofproto->connmgr);
+
+ if (!list_is_empty(&group->ofproto_node)) {
+ ovs_assert(ofproto->n_pending > 0);
+ ofproto->n_pending--;
+ list_remove(&group->ofproto_node);
+ }
+ if (!list_is_empty(&group->ofconn_node)) {
+ list_remove(&group->ofconn_node);
+ if (error) {
+ ofconn_send_error(group->ofconn, group->request, error);
+ }
+ connmgr_retry(ofproto->connmgr);
+ }
+ free(group->request);
+ free(group);
+}
+
+/* Initiates a new operation on 'rule', of the specified 'type', within
+ * 'group'. Prior to calling, 'rule' must not have any pending operation.
+ *
+ * For a 'type' of OFOPERATION_DELETE, 'reason' should specify the reason that
+ * the flow is being deleted. For other 'type's, 'reason' is ignored (use 0).
+ *
+ * Returns the newly created ofoperation (which is also available as
+ * rule->pending). */
+static struct ofoperation *
+ofoperation_create(struct ofopgroup *group, struct rule *rule,
+ enum ofoperation_type type,
+ enum ofp_flow_removed_reason reason)
+{
+ struct ofproto *ofproto = group->ofproto;
+ struct ofoperation *op;
+
+ ovs_assert(!rule->pending);
+
+ op = rule->pending = xzalloc(sizeof *op);
+ op->group = group;
+ list_push_back(&group->ops, &op->group_node);
+ op->rule = rule;
+ op->type = type;
+ op->reason = reason;
+ op->flow_cookie = rule->flow_cookie;
+
+ group->n_running++;
+
+ if (type == OFOPERATION_DELETE) {
+ hmap_insert(&ofproto->deletions, &op->hmap_node,
+ cls_rule_hash(&rule->cr, rule->table_id));
+ }
+
+ return op;
+}
+
+static void
+ofoperation_destroy(struct ofoperation *op)
+{
+ struct ofopgroup *group = op->group;
+
+ if (op->rule) {
+ op->rule->pending = NULL;
+ }
+ if (op->type == OFOPERATION_DELETE) {
+ hmap_remove(&group->ofproto->deletions, &op->hmap_node);
+ }
+ list_remove(&op->group_node);
+ free(op->ofpacts);
+ free(op);
+}
+
+/* Indicates that 'op' completed with status 'error', which is either 0 to
+ * indicate success or an OpenFlow error code on failure.
+ *
+ * If 'error' is 0, indicating success, the operation will be committed
+ * permanently to the flow table. There is one interesting subcase:
+ *
+ * - If 'op' is an "add flow" operation that is replacing an existing rule in
+ * the flow table (the "victim" rule) by a new one, then the caller must
+ * have uninitialized any derived state in the victim rule, as in step 5 in
+ * the "Life Cycle" in ofproto/ofproto-provider.h. ofoperation_complete()
+ * performs steps 6 and 7 for the victim rule, most notably by calling its
+ * ->rule_dealloc() function.
+ *
+ * If 'error' is nonzero, then generally the operation will be rolled back:
+ *
+ * - If 'op' is an "add flow" operation, ofproto removes the new rule or
+ * restores the original rule. The caller must have uninitialized any
+ * derived state in the new rule, as in step 5 of in the "Life Cycle" in
+ * ofproto/ofproto-provider.h. ofoperation_complete() performs steps 6 and
+ * and 7 for the new rule, calling its ->rule_dealloc() function.
+ *
+ * - If 'op' is a "modify flow" operation, ofproto restores the original
+ * actions.
+ *
+ * - 'op' must not be a "delete flow" operation. Removing a rule is not
+ * allowed to fail. It must always succeed.
+ *
+ * Please see the large comment in ofproto/ofproto-provider.h titled
+ * "Asynchronous Operation Support" for more information. */
+void
+ofoperation_complete(struct ofoperation *op, enum ofperr error)
+{
+ struct ofopgroup *group = op->group;
+
+ ovs_assert(op->rule->pending == op);
+ ovs_assert(group->n_running > 0);
+ ovs_assert(!error || op->type != OFOPERATION_DELETE);
+
+ op->error = error;
+ if (!--group->n_running && !list_is_empty(&group->ofproto_node)) {
+ ofopgroup_complete(group);
+ }
+}
+
+struct rule *
+ofoperation_get_victim(struct ofoperation *op)
+{
+ ovs_assert(op->type == OFOPERATION_ADD);
+ return op->victim;
+}
+\f
+static uint64_t
+pick_datapath_id(const struct ofproto *ofproto)
+{
+ const struct ofport *port;
+
+ port = ofproto_get_port(ofproto, OFPP_LOCAL);
+ if (port) {
+ uint8_t ea[ETH_ADDR_LEN];
+ int error;
+
+ error = netdev_get_etheraddr(port->netdev, ea);
+ if (!error) {
+ return eth_addr_to_uint64(ea);
+ }
+ VLOG_WARN("%s: could not get MAC address for %s (%s)",
+ ofproto->name, netdev_get_name(port->netdev),
+ strerror(error));
+ }
+ return ofproto->fallback_dpid;
+}
+
+static uint64_t
+pick_fallback_dpid(void)
+{
+ uint8_t ea[ETH_ADDR_LEN];
+ eth_addr_nicira_random(ea);
+ return eth_addr_to_uint64(ea);
+}
+\f
+/* Table overflow policy. */
+
+/* Chooses and returns a rule to evict from 'table'. Returns NULL if the table
+ * is not configured to evict rules or if the table contains no evictable
+ * rules. (Rules with 'evictable' set to false or with no timeouts are not
+ * evictable.) */
+static struct rule *
+choose_rule_to_evict(struct oftable *table)
+{
+ struct eviction_group *evg;
+
+ if (!table->eviction_fields) {
+ return NULL;
+ }
+
+ /* In the common case, the outer and inner loops here will each be entered
+ * exactly once:
+ *
+ * - The inner loop normally "return"s in its first iteration. If the
+ * eviction group has any evictable rules, then it always returns in
+ * some iteration.
+ *
+ * - The outer loop only iterates more than once if the largest eviction
+ * group has no evictable rules.
+ *
+ * - The outer loop can exit only if table's 'max_flows' is all filled up
+ * by unevictable rules'. */
+ HEAP_FOR_EACH (evg, size_node, &table->eviction_groups_by_size) {
+ struct rule *rule;
+
+ HEAP_FOR_EACH (rule, evg_node, &evg->rules) {
+ if (rule->evictable) {
+ return rule;
+ }
+ }
+ }
+
+ return NULL;
+}
+
+/* Searches 'ofproto' for tables that have more flows than their configured
+ * maximum and that have flow eviction enabled, and evicts as many flows as
+ * necessary and currently feasible from them.
+ *
+ * This triggers only when an OpenFlow table has N flows in it and then the
+ * client configures a maximum number of flows less than N. */
+static void
+ofproto_evict(struct ofproto *ofproto)
+{
+ struct ofopgroup *group;
+ struct oftable *table;
+
+ group = ofopgroup_create_unattached(ofproto);
+ OFPROTO_FOR_EACH_TABLE (table, ofproto) {
+ while (classifier_count(&table->cls) > table->max_flows
+ && table->eviction_fields) {
+ struct rule *rule;
+
+ rule = choose_rule_to_evict(table);
+ if (!rule || rule->pending) {
+ break;
+ }
+
+ ofoperation_create(group, rule,
+ OFOPERATION_DELETE, OFPRR_EVICTION);
+ oftable_remove_rule(rule);
+ ofproto->ofproto_class->rule_destruct(rule);
+ }
+ }
+ ofopgroup_submit(group);
+}
+\f
+/* Eviction groups. */
+
+/* Returns the priority to use for an eviction_group that contains 'n_rules'
+ * rules. The priority contains low-order random bits to ensure that eviction
+ * groups with the same number of rules are prioritized randomly. */
+static uint32_t
+eviction_group_priority(size_t n_rules)
+{
+ uint16_t size = MIN(UINT16_MAX, n_rules);
+ return (size << 16) | random_uint16();
+}
+
+/* Updates 'evg', an eviction_group within 'table', following a change that
+ * adds or removes rules in 'evg'. */
+static void
+eviction_group_resized(struct oftable *table, struct eviction_group *evg)
+{
+ heap_change(&table->eviction_groups_by_size, &evg->size_node,
+ eviction_group_priority(heap_count(&evg->rules)));
+}
+
+/* Destroys 'evg', an eviction_group within 'table':
+ *
+ * - Removes all the rules, if any, from 'evg'. (It doesn't destroy the
+ * rules themselves, just removes them from the eviction group.)
+ *
+ * - Removes 'evg' from 'table'.
+ *
+ * - Frees 'evg'. */
+static void
+eviction_group_destroy(struct oftable *table, struct eviction_group *evg)
+{
+ while (!heap_is_empty(&evg->rules)) {
+ struct rule *rule;
+
+ rule = CONTAINER_OF(heap_pop(&evg->rules), struct rule, evg_node);
+ rule->eviction_group = NULL;
+ }
+ hmap_remove(&table->eviction_groups_by_id, &evg->id_node);
+ heap_remove(&table->eviction_groups_by_size, &evg->size_node);
+ heap_destroy(&evg->rules);
+ free(evg);
+}
+
+/* Removes 'rule' from its eviction group, if any. */
+static void
+eviction_group_remove_rule(struct rule *rule)
+{
+ if (rule->eviction_group) {
+ struct oftable *table = &rule->ofproto->tables[rule->table_id];
+ struct eviction_group *evg = rule->eviction_group;
+
+ rule->eviction_group = NULL;
+ heap_remove(&evg->rules, &rule->evg_node);
+ if (heap_is_empty(&evg->rules)) {
+ eviction_group_destroy(table, evg);
+ } else {
+ eviction_group_resized(table, evg);
+ }
+ }
+}
+
+/* Hashes the 'rule''s values for the eviction_fields of 'rule''s table, and
+ * returns the hash value. */
+static uint32_t
+eviction_group_hash_rule(struct rule *rule)
+{
+ struct oftable *table = &rule->ofproto->tables[rule->table_id];
+ const struct mf_subfield *sf;
+ struct flow flow;
+ uint32_t hash;
+
+ hash = table->eviction_group_id_basis;
+ miniflow_expand(&rule->cr.match.flow, &flow);
+ for (sf = table->eviction_fields;
+ sf < &table->eviction_fields[table->n_eviction_fields];
+ sf++)
+ {
+ if (mf_are_prereqs_ok(sf->field, &flow)) {
+ union mf_value value;
+
+ mf_get_value(sf->field, &flow, &value);
+ if (sf->ofs) {
+ bitwise_zero(&value, sf->field->n_bytes, 0, sf->ofs);
+ }
+ if (sf->ofs + sf->n_bits < sf->field->n_bytes * 8) {
+ unsigned int start = sf->ofs + sf->n_bits;
+ bitwise_zero(&value, sf->field->n_bytes, start,
+ sf->field->n_bytes * 8 - start);
+ }
+ hash = hash_bytes(&value, sf->field->n_bytes, hash);
+ } else {
+ hash = hash_int(hash, 0);
+ }
+ }
+
+ return hash;
+}
+
+/* Returns an eviction group within 'table' with the given 'id', creating one
+ * if necessary. */
+static struct eviction_group *
+eviction_group_find(struct oftable *table, uint32_t id)
+{
+ struct eviction_group *evg;
+
+ HMAP_FOR_EACH_WITH_HASH (evg, id_node, id, &table->eviction_groups_by_id) {
+ return evg;
+ }
+
+ evg = xmalloc(sizeof *evg);
+ hmap_insert(&table->eviction_groups_by_id, &evg->id_node, id);
+ heap_insert(&table->eviction_groups_by_size, &evg->size_node,
+ eviction_group_priority(0));
+ heap_init(&evg->rules);
+
+ return evg;
+}
+
+/* Returns an eviction priority for 'rule'. The return value should be
+ * interpreted so that higher priorities make a rule more attractive candidates
+ * for eviction. */
+static uint32_t
+rule_eviction_priority(struct rule *rule)
+{
+ long long int hard_expiration;
+ long long int idle_expiration;
+ long long int expiration;
+ uint32_t expiration_offset;
+
+ /* Calculate time of expiration. */
+ hard_expiration = (rule->hard_timeout
+ ? rule->modified + rule->hard_timeout * 1000
+ : LLONG_MAX);
+ idle_expiration = (rule->idle_timeout
+ ? rule->used + rule->idle_timeout * 1000
+ : LLONG_MAX);
+ expiration = MIN(hard_expiration, idle_expiration);
+ if (expiration == LLONG_MAX) {
+ return 0;
+ }
+
+ /* Calculate the time of expiration as a number of (approximate) seconds
+ * after program startup.
+ *
+ * This should work OK for program runs that last UINT32_MAX seconds or
+ * less. Therefore, please restart OVS at least once every 136 years. */
+ expiration_offset = (expiration >> 10) - (time_boot_msec() >> 10);
+
+ /* Invert the expiration offset because we're using a max-heap. */
+ return UINT32_MAX - expiration_offset;
+}
+
+/* Adds 'rule' to an appropriate eviction group for its oftable's
+ * configuration. Does nothing if 'rule''s oftable doesn't have eviction
+ * enabled, or if 'rule' is a permanent rule (one that will never expire on its
+ * own).
+ *
+ * The caller must ensure that 'rule' is not already in an eviction group. */
+static void
+eviction_group_add_rule(struct rule *rule)
+{
+ struct ofproto *ofproto = rule->ofproto;
+ struct oftable *table = &ofproto->tables[rule->table_id];
+
+ if (table->eviction_fields
+ && (rule->hard_timeout || rule->idle_timeout)) {
+ struct eviction_group *evg;
+
+ evg = eviction_group_find(table, eviction_group_hash_rule(rule));
+
+ rule->eviction_group = evg;
+ heap_insert(&evg->rules, &rule->evg_node,
+ rule_eviction_priority(rule));
+ eviction_group_resized(table, evg);
+ }
+}
+\f
+/* oftables. */
+
+/* Initializes 'table'. */
+static void
+oftable_init(struct oftable *table)
+{
+ memset(table, 0, sizeof *table);
+ classifier_init(&table->cls);
+ table->max_flows = UINT_MAX;
+}
+
+/* Destroys 'table', including its classifier and eviction groups.
+ *
+ * The caller is responsible for freeing 'table' itself. */
+static void
+oftable_destroy(struct oftable *table)
+{
+ ovs_assert(classifier_is_empty(&table->cls));
+ oftable_disable_eviction(table);
+ classifier_destroy(&table->cls);
+ free(table->name);
+}
+
+/* Changes the name of 'table' to 'name'. If 'name' is NULL or the empty
+ * string, then 'table' will use its default name.
+ *
+ * This only affects the name exposed for a table exposed through the OpenFlow
+ * OFPST_TABLE (as printed by "ovs-ofctl dump-tables"). */
+static void
+oftable_set_name(struct oftable *table, const char *name)
+{
+ if (name && name[0]) {
+ int len = strnlen(name, OFP_MAX_TABLE_NAME_LEN);
+ if (!table->name || strncmp(name, table->name, len)) {
+ free(table->name);
+ table->name = xmemdup0(name, len);
+ }
+ } else {
+ free(table->name);
+ table->name = NULL;
+ }
+}
+
+/* oftables support a choice of two policies when adding a rule would cause the
+ * number of flows in the table to exceed the configured maximum number: either
+ * they can refuse to add the new flow or they can evict some existing flow.
+ * This function configures the former policy on 'table'. */
+static void
+oftable_disable_eviction(struct oftable *table)
+{
+ if (table->eviction_fields) {
+ struct eviction_group *evg, *next;
+
+ HMAP_FOR_EACH_SAFE (evg, next, id_node,
+ &table->eviction_groups_by_id) {
+ eviction_group_destroy(table, evg);
+ }
+ hmap_destroy(&table->eviction_groups_by_id);
+ heap_destroy(&table->eviction_groups_by_size);
+
+ free(table->eviction_fields);
+ table->eviction_fields = NULL;
+ table->n_eviction_fields = 0;
+ }
+}
+
+/* oftables support a choice of two policies when adding a rule would cause the
+ * number of flows in the table to exceed the configured maximum number: either
+ * they can refuse to add the new flow or they can evict some existing flow.
+ * This function configures the latter policy on 'table', with fairness based
+ * on the values of the 'n_fields' fields specified in 'fields'. (Specifying
+ * 'n_fields' as 0 disables fairness.) */
+static void
+oftable_enable_eviction(struct oftable *table,
+ const struct mf_subfield *fields, size_t n_fields)
+{
+ struct cls_cursor cursor;
+ struct rule *rule;
+
+ if (table->eviction_fields
+ && n_fields == table->n_eviction_fields
+ && (!n_fields
+ || !memcmp(fields, table->eviction_fields,
+ n_fields * sizeof *fields))) {
+ /* No change. */
+ return;
+ }
+
+ oftable_disable_eviction(table);
+
+ table->n_eviction_fields = n_fields;
+ table->eviction_fields = xmemdup(fields, n_fields * sizeof *fields);
+
+ table->eviction_group_id_basis = random_uint32();
+ hmap_init(&table->eviction_groups_by_id);
+ heap_init(&table->eviction_groups_by_size);
+
+ cls_cursor_init(&cursor, &table->cls, NULL);
+ CLS_CURSOR_FOR_EACH (rule, cr, &cursor) {
+ eviction_group_add_rule(rule);
+ }
+}
+
+/* Removes 'rule' from the oftable that contains it. */
+static void
+oftable_remove_rule(struct rule *rule)
+{
+ struct ofproto *ofproto = rule->ofproto;
+ struct oftable *table = &ofproto->tables[rule->table_id];
+
+ classifier_remove(&table->cls, &rule->cr);
+ eviction_group_remove_rule(rule);
+ if (!list_is_empty(&rule->expirable)) {
+ list_remove(&rule->expirable);
+ }
+}
+
+/* Inserts 'rule' into its oftable. Removes any existing rule from 'rule''s
+ * oftable that has an identical cls_rule. Returns the rule that was removed,
+ * if any, and otherwise NULL. */
+static struct rule *
+oftable_replace_rule(struct rule *rule)
+{
+ struct ofproto *ofproto = rule->ofproto;
+ struct oftable *table = &ofproto->tables[rule->table_id];
+ struct rule *victim;
+ bool may_expire = rule->hard_timeout || rule->idle_timeout;
+
+ if (may_expire) {
+ list_insert(&ofproto->expirable, &rule->expirable);
+ }
+
+ victim = rule_from_cls_rule(classifier_replace(&table->cls, &rule->cr));
+ if (victim) {
+ if (!list_is_empty(&victim->expirable)) {
+ list_remove(&victim->expirable);
+ }
+ eviction_group_remove_rule(victim);
+ }
+ eviction_group_add_rule(rule);
+ return victim;
+}
+
+/* Removes 'old' from its oftable then, if 'new' is nonnull, inserts 'new'. */
+static void
+oftable_substitute_rule(struct rule *old, struct rule *new)
+{
+ if (new) {
+ oftable_replace_rule(new);
+ } else {
+ oftable_remove_rule(old);
+ }
+}
+\f