Merge branch 'mainstream'
[sliver-openvswitch.git] / ofproto / ofproto.c
index 34298d4..bbdb2d2 100644 (file)
@@ -152,7 +152,10 @@ static void oftable_enable_eviction(struct oftable *,
                                     const struct mf_subfield *fields,
                                     size_t n_fields);
 
-static void oftable_remove_rule(struct rule *);
+static void oftable_remove_rule(struct rule *rule) OVS_RELEASES(rule->evict);
+static void oftable_remove_rule__(struct ofproto *ofproto,
+                                  struct classifier *cls, struct rule *rule)
+    OVS_REQ_WRLOCK(cls->rwlock) OVS_RELEASES(rule->evict);
 static struct rule *oftable_replace_rule(struct rule *);
 static void oftable_substitute_rule(struct rule *old, struct rule *new);
 
@@ -178,7 +181,8 @@ struct eviction_group {
     struct heap rules;          /* Contains "struct rule"s. */
 };
 
-static struct rule *choose_rule_to_evict(struct oftable *);
+static bool choose_rule_to_evict(struct oftable *table, struct rule **rulep)
+    OVS_TRY_WRLOCK(true, (*rulep)->evict);
 static void ofproto_evict(struct ofproto *);
 static uint32_t rule_eviction_priority(struct rule *);
 
@@ -199,8 +203,9 @@ static bool rule_is_modifiable(const struct rule *);
 static enum ofperr add_flow(struct ofproto *, struct ofconn *,
                             struct ofputil_flow_mod *,
                             const struct ofp_header *);
-static void delete_flow__(struct rule *, struct ofopgroup *,
-                          enum ofp_flow_removed_reason);
+static void delete_flow__(struct rule *rule, struct ofopgroup *,
+                          enum ofp_flow_removed_reason)
+    OVS_RELEASES(rule->evict);
 static bool handle_openflow(struct ofconn *, const struct ofpbuf *);
 static enum ofperr handle_flow_mod__(struct ofproto *, struct ofconn *,
                                      struct ofputil_flow_mod *,
@@ -430,6 +435,7 @@ ofproto_create(const char *datapath_name, const char *datapath_type,
     ofproto->n_tables = 0;
     hindex_init(&ofproto->cookies);
     list_init(&ofproto->expirable);
+    ovs_mutex_init(&ofproto->expirable_mutex, PTHREAD_MUTEX_RECURSIVE);
     ofproto->connmgr = connmgr_create(ofproto, datapath_name, datapath_name);
     ofproto->state = S_OPENFLOW;
     list_init(&ofproto->pending);
@@ -1017,6 +1023,7 @@ ofproto_configure_table(struct ofproto *ofproto, int table_id,
     }
 
     table->max_flows = s->max_flows;
+    ovs_rwlock_rdlock(&table->cls.rwlock);
     if (classifier_count(&table->cls) > table->max_flows
         && table->eviction_fields) {
         /* 'table' contains more flows than allowed.  We might not be able to
@@ -1032,6 +1039,7 @@ ofproto_configure_table(struct ofproto *ofproto, int table_id,
             break;
         }
     }
+    ovs_rwlock_unlock(&table->cls.rwlock);
 }
 \f
 bool
@@ -1065,15 +1073,18 @@ ofproto_flush__(struct ofproto *ofproto)
             continue;
         }
 
+        ovs_rwlock_wrlock(&table->cls.rwlock);
         cls_cursor_init(&cursor, &table->cls, NULL);
         CLS_CURSOR_FOR_EACH_SAFE (rule, next_rule, cr, &cursor) {
             if (!rule->pending) {
                 ofoperation_create(group, rule, OFOPERATION_DELETE,
                                    OFPRR_DELETE);
-                oftable_remove_rule(rule);
+                ovs_rwlock_wrlock(&rule->evict);
+                oftable_remove_rule__(ofproto, &table->cls, rule);
                 ofproto->ofproto_class->rule_destruct(rule);
             }
         }
+        ovs_rwlock_unlock(&table->cls.rwlock);
     }
     ofopgroup_submit(group);
 }
@@ -1115,6 +1126,7 @@ ofproto_destroy__(struct ofproto *ofproto)
 
     free(ofproto->vlan_bitmap);
 
+    ovs_mutex_destroy(&ofproto->expirable_mutex);
     ofproto->ofproto_class->dealloc(ofproto);
 }
 
@@ -1385,7 +1397,9 @@ ofproto_get_memory_usage(const struct ofproto *ofproto, struct simap *usage)
 
     n_rules = 0;
     OFPROTO_FOR_EACH_TABLE (table, ofproto) {
+        ovs_rwlock_rdlock(&table->cls.rwlock);
         n_rules += classifier_count(&table->cls);
+        ovs_rwlock_unlock(&table->cls.rwlock);
     }
     simap_increase(usage, "rules", n_rules);
 
@@ -1612,8 +1626,10 @@ ofproto_add_flow(struct ofproto *ofproto, const struct match *match,
 {
     const struct rule *rule;
 
+    ovs_rwlock_rdlock(&ofproto->tables[0].cls.rwlock);
     rule = rule_from_cls_rule(classifier_find_match_exactly(
                                   &ofproto->tables[0].cls, match, priority));
+    ovs_rwlock_unlock(&ofproto->tables[0].cls.rwlock);
     if (!rule || !ofpacts_equal(rule->ofpacts, rule->ofpacts_len,
                                 ofpacts, ofpacts_len)) {
         struct ofputil_flow_mod fm;
@@ -1650,8 +1666,10 @@ ofproto_delete_flow(struct ofproto *ofproto,
 {
     struct rule *rule;
 
+    ovs_rwlock_rdlock(&ofproto->tables[0].cls.rwlock);
     rule = rule_from_cls_rule(classifier_find_match_exactly(
                                   &ofproto->tables[0].cls, target, priority));
+    ovs_rwlock_unlock(&ofproto->tables[0].cls.rwlock);
     if (!rule) {
         /* No such rule -> success. */
         return true;
@@ -1663,6 +1681,7 @@ ofproto_delete_flow(struct ofproto *ofproto,
         /* Initiate deletion -> success. */
         struct ofopgroup *group = ofopgroup_create_unattached(ofproto);
         ofoperation_create(group, rule, OFOPERATION_DELETE, OFPRR_DELETE);
+        ovs_rwlock_wrlock(&rule->evict);
         oftable_remove_rule(rule);
         ofproto->ofproto_class->rule_destruct(rule);
         ofopgroup_submit(group);
@@ -2166,6 +2185,7 @@ ofproto_rule_destroy__(struct rule *rule)
         cls_rule_destroy(&rule->cr);
         free(rule->ofpacts);
         ovs_mutex_destroy(&rule->timeout_mutex);
+        ovs_rwlock_destroy(&rule->evict);
         rule->ofproto->ofproto_class->rule_dealloc(rule);
     }
 }
@@ -2179,10 +2199,15 @@ ofproto_rule_destroy__(struct rule *rule)
  * This function should only be called from an ofproto implementation's
  * ->destruct() function.  It is not suitable elsewhere. */
 void
-ofproto_rule_destroy(struct rule *rule)
+ofproto_rule_destroy(struct ofproto *ofproto, struct classifier *cls,
+                     struct rule *rule) OVS_REQ_WRLOCK(cls->rwlock)
 {
     ovs_assert(!rule->pending);
-    oftable_remove_rule(rule);
+    if (!ovs_rwlock_trywrlock(&rule->evict)) {
+        oftable_remove_rule__(ofproto, cls, rule);
+    } else {
+        NOT_REACHED();
+    }
     ofproto_rule_destroy__(rule);
 }
 
@@ -2614,7 +2639,9 @@ handle_table_stats_request(struct ofconn *ofconn,
         ots[i].instructions = htonl(OFPIT11_ALL);
         ots[i].config = htonl(OFPTC11_TABLE_MISS_MASK);
         ots[i].max_entries = htonl(1000000); /* An arbitrary big number. */
+        ovs_rwlock_rdlock(&p->tables[i].cls.rwlock);
         ots[i].active_count = htonl(classifier_count(&p->tables[i].cls));
+        ovs_rwlock_unlock(&p->tables[i].cls.rwlock);
     }
 
     p->ofproto_class->get_tables(p, ots);
@@ -2881,9 +2908,11 @@ collect_rules_loose(struct ofproto *ofproto, uint8_t table_id,
         struct cls_cursor cursor;
         struct rule *rule;
 
+        ovs_rwlock_rdlock(&table->cls.rwlock);
         cls_cursor_init(&cursor, &table->cls, &cr);
         CLS_CURSOR_FOR_EACH (rule, cr, &cursor) {
             if (rule->pending) {
+                ovs_rwlock_unlock(&table->cls.rwlock);
                 error = OFPROTO_POSTPONE;
                 goto exit;
             }
@@ -2893,6 +2922,7 @@ collect_rules_loose(struct ofproto *ofproto, uint8_t table_id,
                 list_push_back(rules, &rule->ofproto_node);
             }
         }
+        ovs_rwlock_unlock(&table->cls.rwlock);
     }
 
 exit:
@@ -2957,8 +2987,10 @@ collect_rules_strict(struct ofproto *ofproto, uint8_t table_id,
     FOR_EACH_MATCHING_TABLE (table, table_id, ofproto) {
         struct rule *rule;
 
+        ovs_rwlock_rdlock(&table->cls.rwlock);
         rule = rule_from_cls_rule(classifier_find_rule_exactly(&table->cls,
                                                                &cr));
+        ovs_rwlock_unlock(&table->cls.rwlock);
         if (rule) {
             if (rule->pending) {
                 error = OFPROTO_POSTPONE;
@@ -3078,10 +3110,12 @@ ofproto_get_all_flows(struct ofproto *p, struct ds *results)
         struct cls_cursor cursor;
         struct rule *rule;
 
+        ovs_rwlock_rdlock(&table->cls.rwlock);
         cls_cursor_init(&cursor, &table->cls, NULL);
         CLS_CURSOR_FOR_EACH (rule, cr, &cursor) {
             flow_stats_ds(rule, results);
         }
+        ovs_rwlock_unlock(&table->cls.rwlock);
     }
 }
 
@@ -3311,6 +3345,7 @@ add_flow(struct ofproto *ofproto, struct ofconn *ofconn,
     struct rule *victim;
     struct rule *rule;
     uint8_t table_id;
+    bool overlaps;
     int error;
 
     error = check_table_id(ofproto, fm->table_id);
@@ -3367,8 +3402,10 @@ add_flow(struct ofproto *ofproto, struct ofconn *ofconn,
     }
 
     /* Check for overlap, if requested. */
-    if (fm->flags & OFPFF_CHECK_OVERLAP
-        && classifier_rule_overlaps(&table->cls, &rule->cr)) {
+    ovs_rwlock_rdlock(&table->cls.rwlock);
+    overlaps = classifier_rule_overlaps(&table->cls, &rule->cr);
+    ovs_rwlock_unlock(&table->cls.rwlock);
+    if (fm->flags & OFPFF_CHECK_OVERLAP && overlaps) {
         cls_rule_destroy(&rule->cr);
         ofproto->ofproto_class->rule_dealloc(rule);
         return OFPERR_OFPFMFC_OVERLAP;
@@ -3395,12 +3432,12 @@ add_flow(struct ofproto *ofproto, struct ofconn *ofconn,
     rule->ofpacts_len = fm->ofpacts_len;
     rule->meter_id = find_meter(rule->ofpacts, rule->ofpacts_len);
     list_init(&rule->meter_list_node);
-    rule->evictable = true;
     rule->eviction_group = NULL;
     list_init(&rule->expirable);
     rule->monitor_flags = 0;
     rule->add_seqno = 0;
     rule->modify_seqno = 0;
+    ovs_rwlock_init(&rule->evict);
 
     /* Insert new rule. */
     victim = oftable_replace_rule(rule);
@@ -3411,21 +3448,24 @@ add_flow(struct ofproto *ofproto, struct ofconn *ofconn,
     } else {
         struct ofoperation *op;
         struct rule *evict;
-
-        if (classifier_count(&table->cls) > table->max_flows) {
-            bool was_evictable;
-
-            was_evictable = rule->evictable;
-            rule->evictable = false;
-            evict = choose_rule_to_evict(table);
-            rule->evictable = was_evictable;
-
-            if (!evict) {
+        size_t n_rules;
+
+        ovs_rwlock_rdlock(&table->cls.rwlock);
+        n_rules = classifier_count(&table->cls);
+        ovs_rwlock_unlock(&table->cls.rwlock);
+        if (n_rules > table->max_flows) {
+            ovs_rwlock_rdlock(&rule->evict);
+            if (choose_rule_to_evict(table, &evict)) {
+                ovs_rwlock_unlock(&rule->evict);
+                ovs_rwlock_unlock(&evict->evict);
+                if (evict->pending) {
+                    error = OFPROTO_POSTPONE;
+                    goto exit;
+                }
+            } else {
+                ovs_rwlock_unlock(&rule->evict);
                 error = OFPERR_OFPFMFC_TABLE_FULL;
                 goto exit;
-            } else if (evict->pending) {
-                error = OFPROTO_POSTPONE;
-                goto exit;
             }
         } else {
             evict = NULL;
@@ -3440,6 +3480,13 @@ add_flow(struct ofproto *ofproto, struct ofconn *ofconn,
             op->group->n_running--;
             ofoperation_destroy(rule->pending);
         } else if (evict) {
+            /* It would be better if we maintained the lock we took in
+             * choose_rule_to_evict() earlier, but that confuses the thread
+             * safety analysis, and this code is fragile enough that we really
+             * need it.  In the worst case, we'll have to block a little while
+             * before we perform the eviction, which doesn't seem like a big
+             * problem. */
+            ovs_rwlock_wrlock(&evict->evict);
             delete_flow__(evict, group, OFPRR_EVICTION);
         }
         ofopgroup_submit(group);
@@ -3610,6 +3657,7 @@ delete_flows__(struct ofproto *ofproto, struct ofconn *ofconn,
 
     group = ofopgroup_create(ofproto, ofconn, request, UINT32_MAX);
     LIST_FOR_EACH_SAFE (rule, next, ofproto_node, rules) {
+        ovs_rwlock_wrlock(&rule->evict);
         delete_flow__(rule, group, reason);
     }
     ofopgroup_submit(group);
@@ -4097,11 +4145,13 @@ ofproto_collect_ofmonitor_refresh_rules(const struct ofmonitor *m,
         struct cls_cursor cursor;
         struct rule *rule;
 
+        ovs_rwlock_rdlock(&table->cls.rwlock);
         cls_cursor_init(&cursor, &table->cls, &target);
         CLS_CURSOR_FOR_EACH (rule, cr, &cursor) {
             ovs_assert(!rule->pending); /* XXX */
             ofproto_collect_ofmonitor_refresh_rule(m, rule, seqno, rules);
         }
+        ovs_rwlock_unlock(&table->cls.rwlock);
     }
 
     HMAP_FOR_EACH (op, hmap_node, &ofproto->deletions) {
@@ -5031,17 +5081,18 @@ pick_fallback_dpid(void)
 \f
 /* Table overflow policy. */
 
-/* Chooses and returns a rule to evict from 'table'.  Returns NULL if the table
- * is not configured to evict rules or if the table contains no evictable
- * rules.  (Rules with 'evictable' set to false or with no timeouts are not
- * evictable.) */
-static struct rule *
-choose_rule_to_evict(struct oftable *table)
+/* Chooses and updates 'rulep' with a rule to evict from 'table'.  Sets 'rulep'
+ * to NULL if the table is not configured to evict rules or if the table
+ * contains no evictable rules.  (Rules with a readlock on their evict rwlock,
+ * or with no timeouts are not evictable.) */
+static bool
+choose_rule_to_evict(struct oftable *table, struct rule **rulep)
 {
     struct eviction_group *evg;
 
+    *rulep = NULL;
     if (!table->eviction_fields) {
-        return NULL;
+        return false;
     }
 
     /* In the common case, the outer and inner loops here will each be entered
@@ -5060,13 +5111,14 @@ choose_rule_to_evict(struct oftable *table)
         struct rule *rule;
 
         HEAP_FOR_EACH (rule, evg_node, &evg->rules) {
-            if (rule->evictable) {
-                return rule;
+            if (!ovs_rwlock_trywrlock(&rule->evict)) {
+                *rulep = rule;
+                return true;
             }
         }
     }
 
-    return NULL;
+    return false;
 }
 
 /* Searches 'ofproto' for tables that have more flows than their configured
@@ -5083,12 +5135,24 @@ ofproto_evict(struct ofproto *ofproto)
 
     group = ofopgroup_create_unattached(ofproto);
     OFPROTO_FOR_EACH_TABLE (table, ofproto) {
-        while (classifier_count(&table->cls) > table->max_flows
-               && table->eviction_fields) {
+        while (table->eviction_fields) {
             struct rule *rule;
+            size_t n_rules;
 
-            rule = choose_rule_to_evict(table);
-            if (!rule || rule->pending) {
+            ovs_rwlock_rdlock(&table->cls.rwlock);
+            n_rules = classifier_count(&table->cls);
+            ovs_rwlock_unlock(&table->cls.rwlock);
+
+            if (n_rules <= table->max_flows) {
+                break;
+            }
+
+            if (!choose_rule_to_evict(table, &rule)) {
+                break;
+            }
+
+            if (rule->pending) {
+                ovs_rwlock_unlock(&rule->evict);
                 break;
             }
 
@@ -5302,7 +5366,9 @@ oftable_init(struct oftable *table)
 static void
 oftable_destroy(struct oftable *table)
 {
+    ovs_rwlock_rdlock(&table->cls.rwlock);
     ovs_assert(classifier_is_empty(&table->cls));
+    ovs_rwlock_unlock(&table->cls.rwlock);
     oftable_disable_eviction(table);
     classifier_destroy(&table->cls);
     free(table->name);
@@ -5382,31 +5448,46 @@ oftable_enable_eviction(struct oftable *table,
     hmap_init(&table->eviction_groups_by_id);
     heap_init(&table->eviction_groups_by_size);
 
+    ovs_rwlock_rdlock(&table->cls.rwlock);
     cls_cursor_init(&cursor, &table->cls, NULL);
     CLS_CURSOR_FOR_EACH (rule, cr, &cursor) {
         eviction_group_add_rule(rule);
     }
+    ovs_rwlock_unlock(&table->cls.rwlock);
 }
 
 /* Removes 'rule' from the oftable that contains it. */
 static void
-oftable_remove_rule(struct rule *rule)
+oftable_remove_rule__(struct ofproto *ofproto, struct classifier *cls,
+                      struct rule *rule)
+    OVS_REQ_WRLOCK(cls->rwlock) OVS_RELEASES(rule->evict)
 {
-    struct ofproto *ofproto = rule->ofproto;
-    struct oftable *table = &ofproto->tables[rule->table_id];
-
-    classifier_remove(&table->cls, &rule->cr);
+    classifier_remove(cls, &rule->cr);
     if (rule->meter_id) {
         list_remove(&rule->meter_list_node);
     }
     cookies_remove(ofproto, rule);
     eviction_group_remove_rule(rule);
+    ovs_mutex_lock(&ofproto->expirable_mutex);
     if (!list_is_empty(&rule->expirable)) {
         list_remove(&rule->expirable);
     }
+    ovs_mutex_unlock(&ofproto->expirable_mutex);
     if (!list_is_empty(&rule->meter_list_node)) {
         list_remove(&rule->meter_list_node);
     }
+    ovs_rwlock_unlock(&rule->evict);
+}
+
+static void
+oftable_remove_rule(struct rule *rule)
+{
+    struct ofproto *ofproto = rule->ofproto;
+    struct oftable *table = &ofproto->tables[rule->table_id];
+
+    ovs_rwlock_wrlock(&table->cls.rwlock);
+    oftable_remove_rule__(ofproto, &table->cls, rule);
+    ovs_rwlock_unlock(&table->cls.rwlock);
 }
 
 /* Inserts 'rule' into its oftable.  Removes any existing rule from 'rule''s
@@ -5425,23 +5506,29 @@ oftable_replace_rule(struct rule *rule)
     ovs_mutex_unlock(&rule->timeout_mutex);
 
     if (may_expire) {
+        ovs_mutex_lock(&ofproto->expirable_mutex);
         list_insert(&ofproto->expirable, &rule->expirable);
+        ovs_mutex_unlock(&ofproto->expirable_mutex);
     }
     cookies_insert(ofproto, rule);
     if (rule->meter_id) {
         struct meter *meter = ofproto->meters[rule->meter_id];
         list_insert(&meter->rules, &rule->meter_list_node);
     }
+    ovs_rwlock_wrlock(&table->cls.rwlock);
     victim = rule_from_cls_rule(classifier_replace(&table->cls, &rule->cr));
+    ovs_rwlock_unlock(&table->cls.rwlock);
     if (victim) {
         if (victim->meter_id) {
             list_remove(&victim->meter_list_node);
         }
         cookies_remove(ofproto, victim);
 
+        ovs_mutex_lock(&ofproto->expirable_mutex);
         if (!list_is_empty(&victim->expirable)) {
             list_remove(&victim->expirable);
         }
+        ovs_mutex_unlock(&ofproto->expirable_mutex);
         eviction_group_remove_rule(victim);
     }
     eviction_group_add_rule(rule);
@@ -5455,6 +5542,7 @@ oftable_substitute_rule(struct rule *old, struct rule *new)
     if (new) {
         oftable_replace_rule(new);
     } else {
+        ovs_rwlock_wrlock(&old->evict);
         oftable_remove_rule(old);
     }
 }