X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Fclassifier.h;h=c3c1c3bd2adef8df2ad0e1e340850cb661506a7a;hb=06f81620436881449cb9a2db4f875aa00803f28d;hp=6f8c186d43cf640bbba79d494cf96c8226b4cd43;hpb=8ea3791cc48cdfa477ae42ad0eb5e4fb2bf0ce5d;p=sliver-openvswitch.git diff --git a/lib/classifier.h b/lib/classifier.h index 6f8c186d4..c3c1c3bd2 100644 --- a/lib/classifier.h +++ b/lib/classifier.h @@ -24,15 +24,79 @@ * ===== * * A flow classifier holds any number of "rules", each of which specifies - * values to match for some fields or subfields and a priority. The primary - * design goal for the classifier is that, given a packet, it can as quickly as - * possible find the highest-priority rule that matches the packet. + * values to match for some fields or subfields and a priority. Each OpenFlow + * table is implemented as a flow classifier. * - * Each OpenFlow table is implemented as a flow classifier. + * The classifier has two primary design goals. The first is obvious: given a + * set of packet headers, as quickly as possible find the highest-priority rule + * that matches those headers. The following section describes the second + * goal. * * - * Basic Design - * ============ + * "Un-wildcarding" + * ================ + * + * A primary goal of the flow classifier is to produce, as a side effect of a + * packet lookup, a wildcard mask that indicates which bits of the packet + * headers were essential to the classification result. Ideally, a 1-bit in + * any position of this mask means that, if the corresponding bit in the packet + * header were flipped, then the classification result might change. A 0-bit + * means that changing the packet header bit would have no effect. Thus, the + * wildcarded bits are the ones that played no role in the classification + * decision. + * + * Such a wildcard mask is useful with datapaths that support installing flows + * that wildcard fields or subfields. If an OpenFlow lookup for a TCP flow + * does not actually look at the TCP source or destination ports, for example, + * then the switch may install into the datapath a flow that wildcards the port + * numbers, which in turn allows the datapath to handle packets that arrive for + * other TCP source or destination ports without additional help from + * ovs-vswitchd. This is useful for the Open vSwitch software and, + * potentially, for ASIC-based switches as well. + * + * Some properties of the wildcard mask: + * + * - "False 1-bits" are acceptable, that is, setting a bit in the wildcard + * mask to 1 will never cause a packet to be forwarded the wrong way. + * As a corollary, a wildcard mask composed of all 1-bits will always + * yield correct (but often needlessly inefficient) behavior. + * + * - "False 0-bits" can cause problems, so they must be avoided. In the + * extreme case, a mask of all 0-bits is only correct if the classifier + * contains only a single flow that matches all packets. + * + * - 0-bits are desirable because they allow the datapath to act more + * autonomously, relying less on ovs-vswitchd to process flow setups, + * thereby improving performance. + * + * - We don't know a good way to generate wildcard masks with the maximum + * (correct) number of 0-bits. We use various approximations, described + * in later sections. + * + * - Wildcard masks for lookups in a given classifier yield a + * non-overlapping set of rules. More specifically: + * + * Consider an classifier C1 filled with an arbitrary collection of rules + * and an empty classifier C2. Now take a set of packet headers H and + * look it up in C1, yielding a highest-priority matching rule R1 and + * wildcard mask M. Form a new classifier rule R2 out of packet headers + * H and mask M, and add R2 to C2 with a fixed priority. If one were to + * do this for every possible set of packet headers H, then this + * process would not attempt to add any overlapping rules to C2, that is, + * any packet lookup using the rules generated by this process matches at + * most one rule in C2. + * + * During the lookup process, the classifier starts out with a wildcard mask + * that is all 0-bits, that is, fully wildcarded. As lookup proceeds, each + * step tends to add constraints to the wildcard mask, that is, change + * wildcarded 0-bits into exact-match 1-bits. We call this "un-wildcarding". + * A lookup step that examines a particular field must un-wildcard that field. + * In general, un-wildcarding is necessary for correctness but undesirable for + * performance. + * + * + * Basic Classifier Design + * ======================= * * Suppose that all the rules in a classifier had the same form. For example, * suppose that they all matched on the source and destination Ethernet address @@ -51,8 +115,10 @@ * cls_subtable" in the classifier and tracks the highest-priority match that * it finds. The subtables are kept in a descending priority order according * to the highest priority rule in each subtable, which allows lookup to skip - * over subtables that can't possibly have a higher-priority match than - * already found. + * over subtables that can't possibly have a higher-priority match than already + * found. Eliminating lookups through priority ordering aids both classifier + * primary design goals: skipping lookups saves time and avoids un-wildcarding + * fields that those lookups would have examined. * * One detail: a classifier can contain multiple rules that are identical other * than their priority. When this happens, only the highest priority rule out @@ -61,8 +127,51 @@ * list inside that highest-priority rule. * * - * Partitioning - * ============ + * Staged Lookup (Wildcard Optimization) + * ===================================== + * + * Subtable lookup is performed in ranges defined for struct flow, starting + * from metadata (registers, in_port, etc.), then L2 header, L3, and finally + * L4 ports. Whenever it is found that there are no matches in the current + * subtable, the rest of the subtable can be skipped. + * + * Staged lookup does not reduce lookup time, and it may increase it, because + * it changes a single hash table lookup into multiple hash table lookups. + * It reduces un-wildcarding significantly in important use cases. + * + * + * Prefix Tracking (Wildcard Optimization) + * ======================================= + * + * Classifier uses prefix trees ("tries") for tracking the used + * address space, enabling skipping classifier tables containing + * longer masks than necessary for the given address. This reduces + * un-wildcarding for datapath flows in parts of the address space + * without host routes, but consulting extra data structures (the + * tries) may slightly increase lookup time. + * + * Trie lookup is interwoven with staged lookup, so that a trie is + * searched only when the configured trie field becomes relevant for + * the lookup. The trie lookup results are retained so that each trie + * is checked at most once for each classifier lookup. + * + * This implementation tracks the number of rules at each address + * prefix for the whole classifier. More aggressive table skipping + * would be possible by maintaining lists of tables that have prefixes + * at the lengths encountered on tree traversal, or by maintaining + * separate tries for subsets of rules separated by metadata fields. + * + * Prefix tracking is configured via OVSDB "Flow_Table" table, + * "fieldspec" column. "fieldspec" is a string map where a "prefix" + * key tells which fields should be used for prefix tracking. The + * value of the "prefix" key is a comma separated list of field names. + * + * There is a maximum number of fields that can be enabled for any one + * flow table. Currently this limit is 3. + * + * + * Partitioning (Lookup Time and Wildcard Optimization) + * ==================================================== * * Suppose that a given classifier is being used to handle multiple stages in a * pipeline using "resubmit", with metadata (that is, the OpenFlow 1.1+ field @@ -94,6 +203,9 @@ * any cls_subtable that doesn't match on metadata. We handle that by giving * any such cls_subtable TAG_ALL as its 'tags' so that it matches any tag.) * + * Partitioning saves lookup time by reducing the number of subtable lookups. + * Each eliminated subtable lookup also reduces the amount of un-wildcarding. + * * * Thread-safety * ============= @@ -101,10 +213,13 @@ * The classifier may safely be accessed by many reader threads concurrently or * by a single writer. */ +#include "fat-rwlock.h" #include "flow.h" +#include "hindex.h" #include "hmap.h" #include "list.h" #include "match.h" +#include "meta-flow.h" #include "tag.h" #include "openflow/nicira-ext.h" #include "openflow/openflow.h" @@ -117,15 +232,32 @@ extern "C" { /* Needed only for the lock annotation in struct classifier. */ extern struct ovs_mutex ofproto_mutex; +struct trie_node; + +/* Prefix trie for a 'field' */ +struct cls_trie { + const struct mf_field *field; /* Trie field, or NULL. */ + struct trie_node *root; /* NULL if none. */ +}; + +enum { + CLS_MAX_INDICES = 3, /* Maximum number of lookup indices per subtable. */ + CLS_MAX_TRIES = 3 /* Maximum number of prefix trees per classifier. */ +}; /* A flow classifier. */ struct classifier { int n_rules; /* Total number of rules. */ + uint8_t n_flow_segments; + uint8_t flow_segments[CLS_MAX_INDICES]; /* Flow segment boundaries to use + * for staged lookup. */ struct hmap subtables; /* Contains "struct cls_subtable"s. */ struct list subtables_priority; /* Subtables in descending priority order. */ struct hmap partitions; /* Contains "struct cls_partition"s. */ - struct ovs_rwlock rwlock OVS_ACQ_AFTER(ofproto_mutex); + struct fat_rwlock rwlock OVS_ACQ_AFTER(ofproto_mutex); + struct cls_trie tries[CLS_MAX_TRIES]; /* Prefix tries. */ + unsigned int n_tries; }; /* A set of rules that all have the same fields wildcarded. */ @@ -140,6 +272,10 @@ struct cls_subtable { unsigned int max_priority; /* Max priority of any rule in the subtable. */ unsigned int max_count; /* Count of max_priority rules. */ tag_type tag; /* Tag generated from mask for partitioning. */ + uint8_t n_indices; /* How many indices to use. */ + uint8_t index_ofs[CLS_MAX_INDICES]; /* u32 flow segment boundaries. */ + struct hindex indices[CLS_MAX_INDICES]; /* Staged lookup indices. */ + unsigned int trie_plen[CLS_MAX_TRIES]; /* Trie prefix length in 'mask'. */ }; /* Returns true if 'table' is a "catch-all" subtable that will match every @@ -157,6 +293,8 @@ struct cls_rule { struct minimatch match; /* Matching rule. */ unsigned int priority; /* Larger numbers are higher priorities. */ struct cls_partition *partition; + struct hindex_node index_nodes[CLS_MAX_INDICES]; /* Within subtable's + * 'indices'. */ }; /* Associates a metadata value (that is, a value of the OpenFlow 1.1+ metadata @@ -187,8 +325,13 @@ bool cls_rule_is_catchall(const struct cls_rule *); bool cls_rule_is_loose_match(const struct cls_rule *rule, const struct minimatch *criteria); -void classifier_init(struct classifier *cls); +void classifier_init(struct classifier *cls, const uint8_t *flow_segments); void classifier_destroy(struct classifier *); +void classifier_set_prefix_fields(struct classifier *cls, + const enum mf_field_id *trie_fields, + unsigned int n_trie_fields) + OVS_REQ_WRLOCK(cls->rwlock); + bool classifier_is_empty(const struct classifier *cls) OVS_REQ_RDLOCK(cls->rwlock); int classifier_count(const struct classifier *cls)