X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Fflow.h;h=9e8549d45e0bc351dd4e51f232f07e26bb83fae6;hb=80e448834d1f57131c0751e299d1c7cef9761210;hp=6fa3bbdb2480bb6a0c165add7bc65b949548b94f;hpb=080e28d0f0025c705011ce69857fa30d667a5524;p=sliver-openvswitch.git diff --git a/lib/flow.h b/lib/flow.h index 6fa3bbdb2..9e8549d45 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -30,14 +30,13 @@ struct dpif_flow_stats; struct ds; struct flow_wildcards; -struct miniflow; struct minimask; struct ofpbuf; /* This sequence number should be incremented whenever anything involving flows * or the wildcarding of flows changes. This will cause build assertion * failures in places which likely need to be updated. */ -#define FLOW_WC_SEQ 22 +#define FLOW_WC_SEQ 23 #define FLOW_N_REGS 8 BUILD_ASSERT_DECL(FLOW_N_REGS <= NXM_NX_MAX_REGS); @@ -88,44 +87,76 @@ union flow_in_port { * 16-bit OpenFlow 1.0 port number. In the software datapath interface (dpif) * layer and its implementations (e.g. dpif-linux, dpif-netdev), it is instead * a 32-bit datapath port number. + * + * The fields are organized in four segments to facilitate staged lookup, where + * lower layer fields are first used to determine if the later fields need to + * be looked at. This enables better wildcarding for datapath flows. */ struct flow { + /* L1 */ struct flow_tnl tunnel; /* Encapsulating tunnel parameters. */ ovs_be64 metadata; /* OpenFlow Metadata. */ + uint32_t regs[FLOW_N_REGS]; /* Registers. */ + uint32_t skb_priority; /* Packet priority for QoS. */ + uint32_t pkt_mark; /* Packet mark. */ + union flow_in_port in_port; /* Input port.*/ + + /* L2 */ + uint8_t dl_src[6]; /* Ethernet source address. */ + uint8_t dl_dst[6]; /* Ethernet destination address. */ + ovs_be16 dl_type; /* Ethernet frame type. */ + ovs_be16 vlan_tci; /* If 802.1Q, TCI | VLAN_CFI; otherwise 0. */ + + /* L3 */ + ovs_be32 mpls_lse; /* MPLS label stack entry. */ struct in6_addr ipv6_src; /* IPv6 source address. */ struct in6_addr ipv6_dst; /* IPv6 destination address. */ struct in6_addr nd_target; /* IPv6 neighbor discovery (ND) target. */ - uint32_t skb_priority; /* Packet priority for QoS. */ - uint32_t regs[FLOW_N_REGS]; /* Registers. */ + ovs_be32 ipv6_label; /* IPv6 flow label. */ ovs_be32 nw_src; /* IPv4 source address. */ ovs_be32 nw_dst; /* IPv4 destination address. */ - ovs_be32 ipv6_label; /* IPv6 flow label. */ - union flow_in_port in_port; /* Input port.*/ - uint32_t pkt_mark; /* Packet mark. */ - ovs_be32 mpls_lse; /* MPLS label stack entry. */ - ovs_be16 vlan_tci; /* If 802.1Q, TCI | VLAN_CFI; otherwise 0. */ - ovs_be16 dl_type; /* Ethernet frame type. */ - ovs_be16 tp_src; /* TCP/UDP/SCTP source port. */ - ovs_be16 tp_dst; /* TCP/UDP/SCTP destination port. */ - ovs_be16 tcp_flags; /* TCP flags. */ - uint8_t dl_src[6]; /* Ethernet source address. */ - uint8_t dl_dst[6]; /* Ethernet destination address. */ - uint8_t nw_proto; /* IP protocol or low 8 bits of ARP opcode. */ + uint8_t nw_frag; /* FLOW_FRAG_* flags. */ uint8_t nw_tos; /* IP ToS (including DSCP and ECN). */ + uint8_t nw_ttl; /* IP TTL/Hop Limit. */ + uint8_t nw_proto; /* IP protocol or low 8 bits of ARP opcode. */ uint8_t arp_sha[6]; /* ARP/ND source hardware address. */ uint8_t arp_tha[6]; /* ARP/ND target hardware address. */ - uint8_t nw_ttl; /* IP TTL/Hop Limit. */ - uint8_t nw_frag; /* FLOW_FRAG_* flags. Keep last for the - BUILD_ASSERT_DECL below */ + ovs_be16 tcp_flags; /* TCP flags. With L3 to avoid matching L4. */ + ovs_be16 pad; /* Padding. */ + /* L4 */ + ovs_be16 tp_src; /* TCP/UDP/SCTP source port. */ + ovs_be16 tp_dst; /* TCP/UDP/SCTP destination port. + * Keep last for the BUILD_ASSERT_DECL below */ }; BUILD_ASSERT_DECL(sizeof(struct flow) % 4 == 0); #define FLOW_U32S (sizeof(struct flow) / 4) /* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */ -BUILD_ASSERT_DECL(offsetof(struct flow, nw_frag) + 1 - == sizeof(struct flow_tnl) + 154 - && FLOW_WC_SEQ == 22); +BUILD_ASSERT_DECL(offsetof(struct flow, tp_dst) + 2 + == sizeof(struct flow_tnl) + 156 + && FLOW_WC_SEQ == 23); + +/* Incremental points at which flow classification may be performed in + * segments. + * This is located here since this is dependent on the structure of the + * struct flow defined above: + * Each offset must be on a distint, successive U32 boundary srtictly + * within the struct flow. */ +enum { + FLOW_SEGMENT_1_ENDS_AT = offsetof(struct flow, dl_src), + FLOW_SEGMENT_2_ENDS_AT = offsetof(struct flow, mpls_lse), + FLOW_SEGMENT_3_ENDS_AT = offsetof(struct flow, tp_src), +}; +BUILD_ASSERT_DECL(FLOW_SEGMENT_1_ENDS_AT % 4 == 0); +BUILD_ASSERT_DECL(FLOW_SEGMENT_2_ENDS_AT % 4 == 0); +BUILD_ASSERT_DECL(FLOW_SEGMENT_3_ENDS_AT % 4 == 0); +BUILD_ASSERT_DECL( 0 < FLOW_SEGMENT_1_ENDS_AT); +BUILD_ASSERT_DECL(FLOW_SEGMENT_1_ENDS_AT < FLOW_SEGMENT_2_ENDS_AT); +BUILD_ASSERT_DECL(FLOW_SEGMENT_2_ENDS_AT < FLOW_SEGMENT_3_ENDS_AT); +BUILD_ASSERT_DECL(FLOW_SEGMENT_3_ENDS_AT < sizeof(struct flow)); + +extern const uint8_t flow_segment_u32s[]; /* Represents the metadata fields of struct flow. */ struct flow_metadata { @@ -143,11 +174,15 @@ void flow_extract(struct ofpbuf *, uint32_t priority, uint32_t mark, struct flow *); void flow_zero_wildcards(struct flow *, const struct flow_wildcards *); +void flow_unwildcard_tp_ports(const struct flow *, struct flow_wildcards *); void flow_get_metadata(const struct flow *, struct flow_metadata *); char *flow_to_string(const struct flow *); void format_flags(struct ds *ds, const char *(*bit_to_string)(uint32_t), uint32_t flags, char del); +void format_flags_masked(struct ds *ds, const char *name, + const char *(*bit_to_string)(uint32_t), + uint32_t flags, uint32_t mask); void flow_format(struct ds *, const struct flow *); void flow_print(FILE *, const struct flow *); @@ -234,6 +269,10 @@ hash_odp_port(odp_port_t odp_port) uint32_t flow_hash_in_minimask(const struct flow *, const struct minimask *, uint32_t basis); +uint32_t flow_hash_in_minimask_range(const struct flow *, + const struct minimask *, + uint8_t start, uint8_t end, + uint32_t *basis); /* Wildcards for a flow. * @@ -246,6 +285,8 @@ struct flow_wildcards { void flow_wildcards_init_catchall(struct flow_wildcards *); +void flow_wildcards_clear_non_packet_fields(struct flow_wildcards *); + bool flow_wildcards_is_catchall(const struct flow_wildcards *); void flow_wildcards_set_reg_mask(struct flow_wildcards *, @@ -262,6 +303,9 @@ bool flow_wildcards_has_extra(const struct flow_wildcards *, void flow_wildcards_fold_minimask(struct flow_wildcards *, const struct minimask *); +void flow_wildcards_fold_minimask_range(struct flow_wildcards *, + const struct minimask *, + uint8_t start, uint8_t end); uint32_t flow_wildcards_hash(const struct flow_wildcards *, uint32_t basis); bool flow_wildcards_equal(const struct flow_wildcards *, @@ -318,7 +362,9 @@ BUILD_ASSERT_DECL(FLOW_U32S <= 64); * * Elements in 'values' are allowed to be zero. This is useful for "struct * minimatch", for which ensuring that the miniflow and minimask members have - * same 'map' allows optimization . + * same 'map' allows optimization. This allowance applies only to a miniflow + * that is not a mask. That is, a minimask may NOT have zero elements in + * its 'values'. */ struct miniflow { uint64_t map; @@ -349,12 +395,19 @@ bool miniflow_equal_flow_in_minimask(const struct miniflow *a, uint32_t miniflow_hash(const struct miniflow *, uint32_t basis); uint32_t miniflow_hash_in_minimask(const struct miniflow *, const struct minimask *, uint32_t basis); +uint64_t miniflow_get_map_in_range(const struct miniflow *miniflow, + uint8_t start, uint8_t end, + unsigned int *offset); + /* Compressed flow wildcards. */ /* A sparse representation of a "struct flow_wildcards". * - * See the large comment on struct miniflow for details. */ + * See the large comment on struct miniflow for details. + * + * Note: While miniflow can have zero data for a 1-bit in the map, + * a minimask may not! We rely on this in the implementation. */ struct minimask { struct miniflow masks; };