X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=ofproto%2Fofproto-dpif.h;h=8af66458e439a7e162b5dde21b8ea7f67f8e6a8b;hb=1a7c0cd710e19db3ff85606dbfd5fdad964a1eea;hp=0c3252ccf9f48c566e64d40eff01998581e89b98;hpb=9583bc14430acc0578c1d00a78143c01d9cf7bee;p=sliver-openvswitch.git diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h index 0c3252ccf..8af66458e 100644 --- a/ofproto/ofproto-dpif.h +++ b/ofproto/ofproto-dpif.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. +/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,334 +18,206 @@ #include #include "hmapx.h" -#include "ofproto/ofproto-provider.h" -#include "tag.h" +#include "odp-util.h" +#include "ofp-util.h" +#include "ovs-thread.h" +#include "ofproto-provider.h" #include "timer.h" #include "util.h" +#include "ovs-thread.h" union user_action_cookie; +struct dpif_flow_stats; +struct ofproto; +struct ofproto_dpif; +struct ofproto_packet_in; +struct ofport_dpif; +struct dpif_backer; +struct OVS_LOCKABLE rule_dpif; +struct OVS_LOCKABLE group_dpif; + +enum rule_dpif_lookup_verdict { + RULE_DPIF_LOOKUP_VERDICT_MATCH, /* A match occurred. */ + RULE_DPIF_LOOKUP_VERDICT_CONTROLLER, /* A miss occurred and the packet + * should be passed to + * the controller. */ + RULE_DPIF_LOOKUP_VERDICT_DROP, /* A miss occurred and the packet + * should be dropped. */ + RULE_DPIF_LOOKUP_VERDICT_DEFAULT, /* A miss occurred and the packet + * should handled by the default + * miss behaviour. + * For pre-OF1.3 it should be + * forwarded to the controller. + * For OF1.3+ it should be + * dropped. */ +}; -#define MAX_MIRRORS 32 -typedef uint32_t mirror_mask_t; -#define MIRROR_MASK_C(X) UINT32_C(X) -BUILD_ASSERT_DECL(sizeof(mirror_mask_t) * CHAR_BIT >= MAX_MIRRORS); - -/* Number of implemented OpenFlow tables. */ -enum { N_TABLES = 255 }; -enum { TBL_INTERNAL = N_TABLES - 1 }; /* Used for internal hidden rules. */ -BUILD_ASSERT_DECL(N_TABLES >= 2 && N_TABLES <= 255); +/* For lock annotation below only. */ +extern struct ovs_rwlock xlate_rwlock; -/* Reasons that we might need to revalidate every facet, and corresponding - * coverage counters. +/* Ofproto-dpif -- DPIF based ofproto implementation. * - * A value of 0 means that there is no need to revalidate. + * Ofproto-dpif provides an ofproto implementation for those platforms which + * implement the netdev and dpif interface defined in netdev.h and dpif.h. The + * most important of which is the Linux Kernel Module (dpif-linux), but + * alternatives are supported such as a userspace only implementation + * (dpif-netdev), and a dummy implementation used for unit testing. * - * It would be nice to have some cleaner way to integrate with coverage - * counters, but with only a few reasons I guess this is good enough for - * now. */ -enum revalidate_reason { - REV_RECONFIGURE = 1, /* Switch configuration changed. */ - REV_STP, /* Spanning tree protocol port status change. */ - REV_PORT_TOGGLED, /* Port enabled or disabled by CFM, LACP, ...*/ - REV_FLOW_TABLE, /* Flow table changed. */ - REV_INCONSISTENCY /* Facet self-check failed. */ -}; - -struct rule_dpif { - struct rule up; - - /* These statistics: - * - * - Do include packets and bytes from facets that have been deleted or - * whose own statistics have been folded into the rule. - * - * - Do include packets and bytes sent "by hand" that were accounted to - * the rule without any facet being involved (this is a rare corner - * case in rule_execute()). - * - * - Do not include packet or bytes that can be obtained from any facet's - * packet_count or byte_count member or that can be obtained from the - * datapath by, e.g., dpif_flow_get() for any subfacet. - */ - uint64_t packet_count; /* Number of packets received. */ - uint64_t byte_count; /* Number of bytes received. */ - - tag_type tag; /* Caches rule_calculate_tag() result. */ - - struct list facets; /* List of "struct facet"s. */ -}; - -struct avg_subfacet_rates { - double add_rate; /* Moving average of new flows created per minute. */ - double del_rate; /* Moving average of flows deleted per minute. */ -}; - -/* All datapaths of a given type share a single dpif backer instance. */ -struct dpif_backer { - char *type; - int refcount; - struct dpif *dpif; - struct timer next_expiration; - struct hmap odp_to_ofport_map; /* ODP port to ofport mapping. */ - - struct simap tnl_backers; /* Set of dpif ports backing tunnels. */ - - /* Facet revalidation flags applying to facets which use this backer. */ - enum revalidate_reason need_revalidate; /* Revalidate every facet. */ - struct tag_set revalidate_set; /* Revalidate only matching facets. */ - - struct hmap drop_keys; /* Set of dropped odp keys. */ - bool recv_set_enable; /* Enables or disables receiving packets. */ - - struct hmap subfacets; - struct governor *governor; - - /* Subfacet statistics. - * - * These keep track of the total number of subfacets added and deleted and - * flow life span. They are useful for computing the flow rates stats - * exposed via "ovs-appctl dpif/show". The goal is to learn about - * traffic patterns in ways that we can use later to improve Open vSwitch - * performance in new situations. */ - long long int created; /* Time when it is created. */ - unsigned max_n_subfacet; /* Maximum number of flows */ - unsigned avg_n_subfacet; /* Average number of flows. */ - long long int avg_subfacet_life; /* Average life span of subfacets. */ - - /* The average number of subfacets... */ - struct avg_subfacet_rates hourly; /* ...over the last hour. */ - struct avg_subfacet_rates daily; /* ...over the last day. */ - struct avg_subfacet_rates lifetime; /* ...over the switch lifetime. */ - long long int last_minute; /* Last time 'hourly' was updated. */ - - /* Number of subfacets added or deleted since 'last_minute'. */ - unsigned subfacet_add_count; - unsigned subfacet_del_count; - - /* Number of subfacets added or deleted from 'created' to 'last_minute.' */ - unsigned long long int total_subfacet_add_count; - unsigned long long int total_subfacet_del_count; -}; - -/* Extra information about a classifier table. - * Currently used just for optimized flow revalidation. */ -struct table_dpif { - /* If either of these is nonnull, then this table has a form that allows - * flows to be tagged to avoid revalidating most flows for the most common - * kinds of flow table changes. */ - struct cls_table *catchall_table; /* Table that wildcards all fields. */ - struct cls_table *other_table; /* Table with any other wildcard set. */ - uint32_t basis; /* Keeps each table's tags separate. */ -}; - -struct ofproto_dpif { - struct hmap_node all_ofproto_dpifs_node; /* In 'all_ofproto_dpifs'. */ - struct ofproto up; - struct dpif_backer *backer; - - /* Special OpenFlow rules. */ - struct rule_dpif *miss_rule; /* Sends flow table misses to controller. */ - struct rule_dpif *no_packet_in_rule; /* Drops flow table misses. */ - struct rule_dpif *drop_frags_rule; /* Used in OFPC_FRAG_DROP mode. */ - - /* Bridging. */ - struct netflow *netflow; - struct dpif_sflow *sflow; - struct dpif_ipfix *ipfix; - struct hmap bundles; /* Contains "struct ofbundle"s. */ - struct mac_learning *ml; - struct ofmirror *mirrors[MAX_MIRRORS]; - bool has_mirrors; - bool has_bonded_bundles; - - /* Facets. */ - struct classifier facets; /* Contains 'struct facet's. */ - long long int consistency_rl; - - /* Revalidation. */ - struct table_dpif tables[N_TABLES]; - - /* Support for debugging async flow mods. */ - struct list completions; - - bool has_bundle_action; /* True when the first bundle action appears. */ - struct netdev_stats stats; /* To account packets generated and consumed in - * userspace. */ - - /* Spanning tree. */ - struct stp *stp; - long long int stp_last_tick; - - /* VLAN splinters. */ - struct hmap realdev_vid_map; /* (realdev,vid) -> vlandev. */ - struct hmap vlandev_map; /* vlandev -> (realdev,vid). */ - - /* Ports. */ - struct sset ports; /* Set of standard port names. */ - struct sset ghost_ports; /* Ports with no datapath port. */ - struct sset port_poll_set; /* Queued names for port_poll() reply. */ - int port_poll_errno; /* Last errno for port_poll() reply. */ - - /* Per ofproto's dpif stats. */ - uint64_t n_hit; - uint64_t n_missed; -}; - -struct ofport_dpif { - struct hmap_node odp_port_node; /* In dpif_backer's "odp_to_ofport_map". */ - struct ofport up; - - uint32_t odp_port; - struct ofbundle *bundle; /* Bundle that contains this port, if any. */ - struct list bundle_node; /* In struct ofbundle's "ports" list. */ - struct cfm *cfm; /* Connectivity Fault Management, if any. */ - struct bfd *bfd; /* BFD, if any. */ - tag_type tag; /* Tag associated with this port. */ - bool may_enable; /* May be enabled in bonds. */ - long long int carrier_seq; /* Carrier status changes. */ - struct tnl_port *tnl_port; /* Tunnel handle, or null. */ - - /* Spanning tree. */ - struct stp_port *stp_port; /* Spanning Tree Protocol, if any. */ - enum stp_state stp_state; /* Always STP_DISABLED if STP not in use. */ - long long int stp_state_entered; - - struct hmap priorities; /* Map of attached 'priority_to_dscp's. */ - - /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.) - * - * This is deprecated. It is only for compatibility with broken device - * drivers in old versions of Linux that do not properly support VLANs when - * VLAN devices are not used. When broken device drivers are no longer in - * widespread use, we will delete these interfaces. */ - uint16_t realdev_ofp_port; - int vlandev_vid; -}; - -struct ofbundle { - struct hmap_node hmap_node; /* In struct ofproto's "bundles" hmap. */ - struct ofproto_dpif *ofproto; /* Owning ofproto. */ - void *aux; /* Key supplied by ofproto's client. */ - char *name; /* Identifier for log messages. */ - - /* Configuration. */ - struct list ports; /* Contains "struct ofport"s. */ - enum port_vlan_mode vlan_mode; /* VLAN mode */ - int vlan; /* -1=trunk port, else a 12-bit VLAN ID. */ - unsigned long *trunks; /* Bitmap of trunked VLANs, if 'vlan' == -1. - * NULL if all VLANs are trunked. */ - struct lacp *lacp; /* LACP if LACP is enabled, otherwise NULL. */ - struct bond *bond; /* Nonnull iff more than one port. */ - bool use_priority_tags; /* Use 802.1p tag for frames in VLAN 0? */ - - /* Status. */ - bool floodable; /* True if no port has OFPUTIL_PC_NO_FLOOD set. */ - - /* Port mirroring info. */ - mirror_mask_t src_mirrors; /* Mirrors triggered when packet received. */ - mirror_mask_t dst_mirrors; /* Mirrors triggered when packet sent. */ - mirror_mask_t mirror_out; /* Mirrors that output to this bundle. */ -}; - -struct ofmirror { - struct ofproto_dpif *ofproto; /* Owning ofproto. */ - size_t idx; /* In ofproto's "mirrors" array. */ - void *aux; /* Key supplied by ofproto's client. */ - char *name; /* Identifier for log messages. */ - - /* Selection criteria. */ - struct hmapx srcs; /* Contains "struct ofbundle *"s. */ - struct hmapx dsts; /* Contains "struct ofbundle *"s. */ - unsigned long *vlans; /* Bitmap of chosen VLANs, NULL selects all. */ - - /* Output (exactly one of out == NULL and out_vlan == -1 is true). */ - struct ofbundle *out; /* Output port or NULL. */ - int out_vlan; /* Output VLAN or -1. */ - mirror_mask_t dup_mirrors; /* Bitmap of mirrors with the same output. */ - - /* Counters. */ - int64_t packet_count; /* Number of packets sent. */ - int64_t byte_count; /* Number of bytes sent. */ -}; - -/* Node in 'ofport_dpif''s 'priorities' map. Used to maintain a map from - * 'priority' (the datapath's term for QoS queue) to the dscp bits which all - * traffic egressing the 'ofport' with that priority should be marked with. */ -struct priority_to_dscp { - struct hmap_node hmap_node; /* Node in 'ofport_dpif''s 'priorities' map. */ - uint32_t priority; /* Priority of this queue (see struct flow). */ - - uint8_t dscp; /* DSCP bits to mark outgoing traffic with. */ -}; - -static inline struct rule_dpif *rule_dpif_cast(const struct rule *rule) -{ - return rule ? CONTAINER_OF(rule, struct rule_dpif, up) : NULL; -} - -static inline struct ofproto_dpif * -ofproto_dpif_cast(const struct ofproto *ofproto) -{ - ovs_assert(ofproto->ofproto_class == &ofproto_dpif_class); - return CONTAINER_OF(ofproto, struct ofproto_dpif, up); -} - -static inline struct ofport_dpif * -ofbundle_get_a_port(const struct ofbundle *bundle) -{ - return CONTAINER_OF(list_front(&bundle->ports), struct ofport_dpif, - bundle_node); -} - -static inline int -mirror_mask_ffs(mirror_mask_t mask) -{ - BUILD_ASSERT_DECL(sizeof(unsigned int) >= sizeof(mask)); - return ffs(mask); -} - -struct ofport_dpif *get_ofp_port(const struct ofproto_dpif *, - uint16_t ofp_port); - -struct ofport_dpif *get_odp_port(const struct ofproto_dpif *, - uint32_t odp_port); - -struct ofport_dpif *ofport_get_peer(const struct ofport_dpif *); - -uint32_t ofp_port_to_odp_port(const struct ofproto_dpif *, uint16_t ofp_port); - -struct rule_dpif *rule_dpif_lookup_in_table(struct ofproto_dpif *, - const struct flow *, - struct flow_wildcards *, - uint8_t table_id); - -tag_type rule_calculate_tag(const struct flow *flow, const struct minimask *, - uint32_t secret); - -struct rule_dpif *rule_dpif_miss_rule(struct ofproto_dpif *ofproto, - const struct flow *); - -void rule_credit_stats(struct rule_dpif *, const struct dpif_flow_stats *); - -void ofproto_trace(struct ofproto_dpif *, const struct flow *, - const struct ofpbuf *packet, struct ds *); - -size_t put_userspace_action(const struct ofproto_dpif *, - struct ofpbuf *odp_actions, const struct flow *, - const union user_action_cookie *, - const size_t cookie_size); - -enum slow_path_reason process_special(struct ofproto_dpif *, - const struct flow *, - const struct ofport_dpif *, - const struct ofpbuf *packet); - -uint16_t vsp_realdev_to_vlandev(const struct ofproto_dpif *, - uint16_t realdev_ofp_port, - ovs_be16 vlan_tci); - -struct priority_to_dscp *get_priority(const struct ofport_dpif *, - uint32_t priority); - + * Ofproto-dpif is divided into three major chunks. + * + * - ofproto-dpif.c + * The main ofproto-dpif module is responsible for implementing the + * provider interface, installing and removing datapath flows, maintaining + * packet statistics, running protocols (BFD, LACP, STP, etc), and + * configuring relevant submodules. + * + * - ofproto-dpif-upcall.c + * Ofproto-dpif-upcall is responsible for retrieving upcalls from the kernel, + * processing miss upcalls, and handing more complex ones up to the main + * ofproto-dpif module. Miss upcall processing boils down to figuring out + * what each packet's actions are, executing them (i.e. asking the kernel to + * forward it), and handing it up to ofproto-dpif to decided whether or not + * to install a kernel flow. + * + * - ofproto-dpif-xlate.c + * Ofproto-dpif-xlate is responsible for translating translating OpenFlow + * actions into datapath actions. */ + +size_t ofproto_dpif_get_max_mpls_depth(const struct ofproto_dpif *); +bool ofproto_dpif_get_enable_recirc(const struct ofproto_dpif *); + +uint8_t rule_dpif_lookup(struct ofproto_dpif *, struct flow *, + struct flow_wildcards *, struct rule_dpif **rule, + bool take_ref); + +enum rule_dpif_lookup_verdict rule_dpif_lookup_from_table(struct ofproto_dpif *, + const struct flow *, + struct flow_wildcards *, + bool force_controller_on_miss, + uint8_t *table_id, + struct rule_dpif **rule, + bool take_ref); + +void rule_dpif_ref(struct rule_dpif *); +void rule_dpif_unref(struct rule_dpif *); + +void rule_dpif_credit_stats(struct rule_dpif *rule , + const struct dpif_flow_stats *); + +bool rule_dpif_is_fail_open(const struct rule_dpif *); +bool rule_dpif_is_table_miss(const struct rule_dpif *); +bool rule_dpif_is_internal(const struct rule_dpif *); +uint8_t rule_dpif_get_table(const struct rule_dpif *); + +struct rule_actions *rule_dpif_get_actions(const struct rule_dpif *); + +ovs_be64 rule_dpif_get_flow_cookie(const struct rule_dpif *rule); + +void rule_dpif_reduce_timeouts(struct rule_dpif *rule, uint16_t idle_timeout, + uint16_t hard_timeout); + +void choose_miss_rule(enum ofputil_port_config, + struct rule_dpif *miss_rule, + struct rule_dpif *no_packet_in_rule, + struct rule_dpif **rule, bool take_ref); + +bool group_dpif_lookup(struct ofproto_dpif *ofproto, uint32_t group_id, + struct group_dpif **group); + +void group_dpif_release(struct group_dpif *group); + +void group_dpif_get_buckets(const struct group_dpif *group, + const struct list **buckets); +enum ofp11_group_type group_dpif_get_type(const struct group_dpif *group); + +bool ofproto_has_vlan_splinters(const struct ofproto_dpif *); +ofp_port_t vsp_realdev_to_vlandev(const struct ofproto_dpif *, + ofp_port_t realdev_ofp_port, + ovs_be16 vlan_tci); +bool vsp_adjust_flow(const struct ofproto_dpif *, struct flow *); + +int ofproto_dpif_execute_actions(struct ofproto_dpif *, const struct flow *, + struct rule_dpif *, const struct ofpact *, + size_t ofpacts_len, struct ofpbuf *) + OVS_EXCLUDED(xlate_rwlock); +void ofproto_dpif_send_packet_in(struct ofproto_dpif *, + struct ofproto_packet_in *); +bool ofproto_dpif_wants_packet_in_on_miss(struct ofproto_dpif *); +int ofproto_dpif_send_packet(const struct ofport_dpif *, struct ofpbuf *); +void ofproto_dpif_flow_mod(struct ofproto_dpif *, struct ofputil_flow_mod *); +struct rule_dpif *ofproto_dpif_refresh_rule(struct rule_dpif *); + +struct ofport_dpif *odp_port_to_ofport(const struct dpif_backer *, odp_port_t); + +/* + * Recirculation + * ============= + * + * Recirculation is a technique to allow a frame to re-enter the packet + * processing path for one or multiple times to achieve more flexible packet + * processing in the data path. MPLS handling and selecting bond slave port + * of a bond ports. + * + * Data path and user space interface + * ----------------------------------- + * + * Two new fields, recirc_id and dp_hash, are added to the current flow data + * structure. They are both of type uint32_t. In addition, a new action, + * RECIRC, are added. + * + * The value recirc_id is used to distinguish a packet from multiple + * iterations of recirculation. A packet initially received is considered of + * having recirc_id of 0. Recirc_id is managed by the user space, opaque to + * the data path. + * + * On the other hand, dp_hash can only be computed by the data path, opaque to + * the user space. In fact, user space may not able to recompute the hash + * value. The dp_hash value should be wildcarded when for a newly received + * packet. RECIRC action specifies whether the hash is computed. If computed, + * how many fields to be included in the hash computation. The computed hash + * value is stored into the dp_hash field prior to recirculation. + * + * The RECIRC action computes and set the dp_hash field, set the recirc_id + * field and then reprocess the packet as if it was received on the same input + * port. RECIRC action works like a function call; actions listed behind the + * RECIRC action will be executed after its execution. RECIRC action can be + * nested, data path implementation limits the number of recirculation executed + * to prevent unreasonable nesting depth or infinite loop. + * + * Both flow fields and the RECIRC action are exposed as open flow fields via + * Nicira extensions. + * + * Post recirculation flow + * ------------------------ + * + * At the open flow level, post recirculation rules are always hidden from the + * controller. They are installed in table 254 which is set up as a hidden + * table during boot time. Those rules are managed by the local user space + * program only. + * + * To speed up the classifier look up process, recirc_id is always reflected + * into the metadata field, since recirc_id is required to be exactly matched. + * + * Classifier look up always starts with table 254. A post recirculation flow + * lookup should find its hidden rule within this table. On the other hand, A + * newly received packet should miss all post recirculation rules because its + * recirc_id is zero, then hit a pre-installed lower priority rule to redirect + * classifier to look up starting from table 0: + * + * * , actions=resubmit(,0) + * + * Post recirculation data path flows are managed like other data path flows. + * They are created on demand. Miss handling, stats collection and revalidation + * work the same way as regular flows. + */ + +uint32_t ofproto_dpif_alloc_recirc_id(struct ofproto_dpif *ofproto); +void ofproto_dpif_free_recirc_id(struct ofproto_dpif *ofproto, uint32_t recirc_id); +int ofproto_dpif_add_internal_flow(struct ofproto_dpif *, + struct match *, int priority, + const struct ofpbuf *ofpacts, + struct rule **rulep); +int ofproto_dpif_delete_internal_flow(struct ofproto_dpif *, struct match *, + int priority); #endif /* ofproto-dpif.h */