Merge branch 'mainstream'
[sliver-openvswitch.git] / ofproto / ofproto-dpif.c
1 /*
2  * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <config.h>
18
19 #include "ofproto/ofproto-dpif.h"
20 #include "ofproto/ofproto-provider.h"
21
22 #include <errno.h>
23
24 #include "bfd.h"
25 #include "bond.h"
26 #include "bundle.h"
27 #include "byte-order.h"
28 #include "connmgr.h"
29 #include "coverage.h"
30 #include "cfm.h"
31 #include "dpif.h"
32 #include "dynamic-string.h"
33 #include "fail-open.h"
34 #include "hmapx.h"
35 #include "lacp.h"
36 #include "learn.h"
37 #include "mac-learning.h"
38 #include "meta-flow.h"
39 #include "multipath.h"
40 #include "netdev-vport.h"
41 #include "netdev.h"
42 #include "netlink.h"
43 #include "nx-match.h"
44 #include "odp-util.h"
45 #include "odp-execute.h"
46 #include "ofp-util.h"
47 #include "ofpbuf.h"
48 #include "ofp-actions.h"
49 #include "ofp-parse.h"
50 #include "ofp-print.h"
51 #include "ofproto-dpif-governor.h"
52 #include "ofproto-dpif-ipfix.h"
53 #include "ofproto-dpif-mirror.h"
54 #include "ofproto-dpif-sflow.h"
55 #include "ofproto-dpif-upcall.h"
56 #include "ofproto-dpif-xlate.h"
57 #include "poll-loop.h"
58 #include "simap.h"
59 #include "smap.h"
60 #include "timer.h"
61 #include "tunnel.h"
62 #include "unaligned.h"
63 #include "unixctl.h"
64 #include "vlan-bitmap.h"
65 #include "vlog.h"
66
67 VLOG_DEFINE_THIS_MODULE(ofproto_dpif);
68
69 COVERAGE_DEFINE(ofproto_dpif_expired);
70 COVERAGE_DEFINE(facet_changed_rule);
71 COVERAGE_DEFINE(facet_revalidate);
72 COVERAGE_DEFINE(facet_unexpected);
73 COVERAGE_DEFINE(facet_suppress);
74 COVERAGE_DEFINE(subfacet_install_fail);
75 COVERAGE_DEFINE(packet_in_overflow);
76 COVERAGE_DEFINE(flow_mod_overflow);
77
78 /* Number of implemented OpenFlow tables. */
79 enum { N_TABLES = 255 };
80 enum { TBL_INTERNAL = N_TABLES - 1 };    /* Used for internal hidden rules. */
81 BUILD_ASSERT_DECL(N_TABLES >= 2 && N_TABLES <= 255);
82
83 struct flow_miss;
84 struct facet;
85
86 static void rule_get_stats(struct rule *, uint64_t *packets, uint64_t *bytes);
87
88 struct ofbundle {
89     struct hmap_node hmap_node; /* In struct ofproto's "bundles" hmap. */
90     struct ofproto_dpif *ofproto; /* Owning ofproto. */
91     void *aux;                  /* Key supplied by ofproto's client. */
92     char *name;                 /* Identifier for log messages. */
93
94     /* Configuration. */
95     struct list ports;          /* Contains "struct ofport"s. */
96     enum port_vlan_mode vlan_mode; /* VLAN mode */
97     int vlan;                   /* -1=trunk port, else a 12-bit VLAN ID. */
98     unsigned long *trunks;      /* Bitmap of trunked VLANs, if 'vlan' == -1.
99                                  * NULL if all VLANs are trunked. */
100     struct lacp *lacp;          /* LACP if LACP is enabled, otherwise NULL. */
101     struct bond *bond;          /* Nonnull iff more than one port. */
102     bool use_priority_tags;     /* Use 802.1p tag for frames in VLAN 0? */
103
104     /* Status. */
105     bool floodable;          /* True if no port has OFPUTIL_PC_NO_FLOOD set. */
106 };
107
108 static void bundle_remove(struct ofport *);
109 static void bundle_update(struct ofbundle *);
110 static void bundle_destroy(struct ofbundle *);
111 static void bundle_del_port(struct ofport_dpif *);
112 static void bundle_run(struct ofbundle *);
113 static void bundle_wait(struct ofbundle *);
114
115 static void stp_run(struct ofproto_dpif *ofproto);
116 static void stp_wait(struct ofproto_dpif *ofproto);
117 static int set_stp_port(struct ofport *,
118                         const struct ofproto_port_stp_settings *);
119
120 static void compose_slow_path(const struct ofproto_dpif *, const struct flow *,
121                               enum slow_path_reason,
122                               uint64_t *stub, size_t stub_size,
123                               const struct nlattr **actionsp,
124                               size_t *actions_lenp);
125
126 /* A subfacet (see "struct subfacet" below) has three possible installation
127  * states:
128  *
129  *   - SF_NOT_INSTALLED: Not installed in the datapath.  This will only be the
130  *     case just after the subfacet is created, just before the subfacet is
131  *     destroyed, or if the datapath returns an error when we try to install a
132  *     subfacet.
133  *
134  *   - SF_FAST_PATH: The subfacet's actions are installed in the datapath.
135  *
136  *   - SF_SLOW_PATH: An action that sends every packet for the subfacet through
137  *     ofproto_dpif is installed in the datapath.
138  */
139 enum subfacet_path {
140     SF_NOT_INSTALLED,           /* No datapath flow for this subfacet. */
141     SF_FAST_PATH,               /* Full actions are installed. */
142     SF_SLOW_PATH,               /* Send-to-userspace action is installed. */
143 };
144
145 /* A dpif flow and actions associated with a facet.
146  *
147  * See also the large comment on struct facet. */
148 struct subfacet {
149     /* Owners. */
150     struct hmap_node hmap_node; /* In struct ofproto_dpif 'subfacets' list. */
151     struct list list_node;      /* In struct facet's 'facets' list. */
152     struct facet *facet;        /* Owning facet. */
153     struct dpif_backer *backer; /* Owning backer. */
154
155     enum odp_key_fitness key_fitness;
156     struct nlattr *key;
157     int key_len;
158
159     long long int used;         /* Time last used; time created if not used. */
160     long long int created;      /* Time created. */
161
162     uint64_t dp_packet_count;   /* Last known packet count in the datapath. */
163     uint64_t dp_byte_count;     /* Last known byte count in the datapath. */
164
165     enum subfacet_path path;    /* Installed in datapath? */
166 };
167
168 #define SUBFACET_DESTROY_MAX_BATCH 50
169
170 static struct subfacet *subfacet_create(struct facet *, struct flow_miss *);
171 static struct subfacet *subfacet_find(struct dpif_backer *,
172                                       const struct nlattr *key, size_t key_len,
173                                       uint32_t key_hash);
174 static void subfacet_destroy(struct subfacet *);
175 static void subfacet_destroy__(struct subfacet *);
176 static void subfacet_destroy_batch(struct dpif_backer *,
177                                    struct subfacet **, int n);
178 static void subfacet_reset_dp_stats(struct subfacet *,
179                                     struct dpif_flow_stats *);
180 static void subfacet_update_stats(struct subfacet *,
181                                   const struct dpif_flow_stats *);
182 static int subfacet_install(struct subfacet *,
183                             const struct ofpbuf *odp_actions,
184                             struct dpif_flow_stats *);
185 static void subfacet_uninstall(struct subfacet *);
186
187 /* A unique, non-overlapping instantiation of an OpenFlow flow.
188  *
189  * A facet associates a "struct flow", which represents the Open vSwitch
190  * userspace idea of an exact-match flow, with one or more subfacets.
191  * While the facet is created based on an exact-match flow, it is stored
192  * within the ofproto based on the wildcards that could be expressed
193  * based on the flow table and other configuration.  (See the 'wc'
194  * description in "struct xlate_out" for more details.)
195  *
196  * Each subfacet tracks the datapath's idea of the flow equivalent to
197  * the facet.  When the kernel module (or other dpif implementation) and
198  * Open vSwitch userspace agree on the definition of a flow key, there
199  * is exactly one subfacet per facet.  If the dpif implementation
200  * supports more-specific flow matching than userspace, however, a facet
201  * can have more than one subfacet.  Examples include the dpif
202  * implementation not supporting the same wildcards as userspace or some
203  * distinction in flow that userspace simply doesn't understand.
204  *
205  * Flow expiration works in terms of subfacets, so a facet must have at
206  * least one subfacet or it will never expire, leaking memory. */
207 struct facet {
208     /* Owners. */
209     struct hmap_node hmap_node;  /* In owning ofproto's 'facets' hmap. */
210     struct ofproto_dpif *ofproto;
211
212     /* Owned data. */
213     struct list subfacets;
214     long long int used;         /* Time last used; time created if not used. */
215
216     /* Key. */
217     struct flow flow;           /* Flow of the creating subfacet. */
218     struct cls_rule cr;         /* In 'ofproto_dpif's facets classifier. */
219
220     /* These statistics:
221      *
222      *   - Do include packets and bytes sent "by hand", e.g. with
223      *     dpif_execute().
224      *
225      *   - Do include packets and bytes that were obtained from the datapath
226      *     when a subfacet's statistics were reset (e.g. dpif_flow_put() with
227      *     DPIF_FP_ZERO_STATS).
228      *
229      *   - Do not include packets or bytes that can be obtained from the
230      *     datapath for any existing subfacet.
231      */
232     uint64_t packet_count;       /* Number of packets received. */
233     uint64_t byte_count;         /* Number of bytes received. */
234
235     /* Resubmit statistics. */
236     uint64_t prev_packet_count;  /* Number of packets from last stats push. */
237     uint64_t prev_byte_count;    /* Number of bytes from last stats push. */
238     long long int prev_used;     /* Used time from last stats push. */
239
240     /* Accounting. */
241     uint64_t accounted_bytes;    /* Bytes processed by facet_account(). */
242     struct netflow_flow nf_flow; /* Per-flow NetFlow tracking data. */
243     uint8_t tcp_flags;           /* TCP flags seen for this 'rule'. */
244
245     struct xlate_out xout;
246
247     /* Storage for a single subfacet, to reduce malloc() time and space
248      * overhead.  (A facet always has at least one subfacet and in the common
249      * case has exactly one subfacet.  However, 'one_subfacet' may not
250      * always be valid, since it could have been removed after newer
251      * subfacets were pushed onto the 'subfacets' list.) */
252     struct subfacet one_subfacet;
253
254     long long int learn_rl;      /* Rate limiter for facet_learn(). */
255 };
256
257 static struct facet *facet_create(const struct flow_miss *);
258 static void facet_remove(struct facet *);
259 static void facet_free(struct facet *);
260
261 static struct facet *facet_find(struct ofproto_dpif *, const struct flow *);
262 static struct facet *facet_lookup_valid(struct ofproto_dpif *,
263                                         const struct flow *);
264 static bool facet_revalidate(struct facet *);
265 static bool facet_check_consistency(struct facet *);
266
267 static void facet_flush_stats(struct facet *);
268
269 static void facet_reset_counters(struct facet *);
270 static void flow_push_stats(struct ofproto_dpif *, struct flow *,
271                             struct dpif_flow_stats *, bool may_learn);
272 static void facet_push_stats(struct facet *, bool may_learn);
273 static void facet_learn(struct facet *);
274 static void facet_account(struct facet *);
275 static void push_all_stats(void);
276
277 static bool facet_is_controller_flow(struct facet *);
278
279 struct ofport_dpif {
280     struct hmap_node odp_port_node; /* In dpif_backer's "odp_to_ofport_map". */
281     struct ofport up;
282
283     odp_port_t odp_port;
284     struct ofbundle *bundle;    /* Bundle that contains this port, if any. */
285     struct list bundle_node;    /* In struct ofbundle's "ports" list. */
286     struct cfm *cfm;            /* Connectivity Fault Management, if any. */
287     struct bfd *bfd;            /* BFD, if any. */
288     bool may_enable;            /* May be enabled in bonds. */
289     bool is_tunnel;             /* This port is a tunnel. */
290     long long int carrier_seq;  /* Carrier status changes. */
291     struct ofport_dpif *peer;   /* Peer if patch port. */
292
293     /* Spanning tree. */
294     struct stp_port *stp_port;  /* Spanning Tree Protocol, if any. */
295     enum stp_state stp_state;   /* Always STP_DISABLED if STP not in use. */
296     long long int stp_state_entered;
297
298     /* Queue to DSCP mapping. */
299     struct ofproto_port_queue *qdscp;
300     size_t n_qdscp;
301
302     /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
303      *
304      * This is deprecated.  It is only for compatibility with broken device
305      * drivers in old versions of Linux that do not properly support VLANs when
306      * VLAN devices are not used.  When broken device drivers are no longer in
307      * widespread use, we will delete these interfaces. */
308     ofp_port_t realdev_ofp_port;
309     int vlandev_vid;
310 };
311
312 /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
313  *
314  * This is deprecated.  It is only for compatibility with broken device drivers
315  * in old versions of Linux that do not properly support VLANs when VLAN
316  * devices are not used.  When broken device drivers are no longer in
317  * widespread use, we will delete these interfaces. */
318 struct vlan_splinter {
319     struct hmap_node realdev_vid_node;
320     struct hmap_node vlandev_node;
321     ofp_port_t realdev_ofp_port;
322     ofp_port_t vlandev_ofp_port;
323     int vid;
324 };
325
326 static void vsp_remove(struct ofport_dpif *);
327 static void vsp_add(struct ofport_dpif *, ofp_port_t realdev_ofp_port, int vid);
328
329 static odp_port_t ofp_port_to_odp_port(const struct ofproto_dpif *,
330                                        ofp_port_t);
331
332 static ofp_port_t odp_port_to_ofp_port(const struct ofproto_dpif *,
333                                        odp_port_t);
334
335 static struct ofport_dpif *
336 ofport_dpif_cast(const struct ofport *ofport)
337 {
338     return ofport ? CONTAINER_OF(ofport, struct ofport_dpif, up) : NULL;
339 }
340
341 static void port_run(struct ofport_dpif *);
342 static void port_run_fast(struct ofport_dpif *);
343 static void port_wait(struct ofport_dpif *);
344 static int set_bfd(struct ofport *, const struct smap *);
345 static int set_cfm(struct ofport *, const struct cfm_settings *);
346 static void ofport_update_peer(struct ofport_dpif *);
347 static void run_fast_rl(void);
348 static int run_fast(struct ofproto *);
349
350 struct dpif_completion {
351     struct list list_node;
352     struct ofoperation *op;
353 };
354
355 /* Reasons that we might need to revalidate every facet, and corresponding
356  * coverage counters.
357  *
358  * A value of 0 means that there is no need to revalidate.
359  *
360  * It would be nice to have some cleaner way to integrate with coverage
361  * counters, but with only a few reasons I guess this is good enough for
362  * now. */
363 enum revalidate_reason {
364     REV_RECONFIGURE = 1,       /* Switch configuration changed. */
365     REV_STP,                   /* Spanning tree protocol port status change. */
366     REV_BOND,                  /* Bonding changed. */
367     REV_PORT_TOGGLED,          /* Port enabled or disabled by CFM, LACP, ...*/
368     REV_FLOW_TABLE,            /* Flow table changed. */
369     REV_MAC_LEARNING,          /* Mac learning changed. */
370     REV_INCONSISTENCY          /* Facet self-check failed. */
371 };
372 COVERAGE_DEFINE(rev_reconfigure);
373 COVERAGE_DEFINE(rev_stp);
374 COVERAGE_DEFINE(rev_bond);
375 COVERAGE_DEFINE(rev_port_toggled);
376 COVERAGE_DEFINE(rev_flow_table);
377 COVERAGE_DEFINE(rev_mac_learning);
378 COVERAGE_DEFINE(rev_inconsistency);
379
380 struct avg_subfacet_rates {
381     double add_rate;   /* Moving average of new flows created per minute. */
382     double del_rate;   /* Moving average of flows deleted per minute. */
383 };
384
385 /* All datapaths of a given type share a single dpif backer instance. */
386 struct dpif_backer {
387     char *type;
388     int refcount;
389     struct dpif *dpif;
390     struct udpif *udpif;
391     struct timer next_expiration;
392
393     struct ovs_rwlock odp_to_ofport_lock;
394     struct hmap odp_to_ofport_map OVS_GUARDED; /* ODP port to ofport map. */
395
396     struct simap tnl_backers;      /* Set of dpif ports backing tunnels. */
397
398     /* Facet revalidation flags applying to facets which use this backer. */
399     enum revalidate_reason need_revalidate; /* Revalidate every facet. */
400
401     struct hmap drop_keys; /* Set of dropped odp keys. */
402     bool recv_set_enable; /* Enables or disables receiving packets. */
403
404     struct hmap subfacets;
405     struct governor *governor;
406
407     /* Subfacet statistics.
408      *
409      * These keep track of the total number of subfacets added and deleted and
410      * flow life span.  They are useful for computing the flow rates stats
411      * exposed via "ovs-appctl dpif/show".  The goal is to learn about
412      * traffic patterns in ways that we can use later to improve Open vSwitch
413      * performance in new situations.  */
414     long long int created;           /* Time when it is created. */
415     unsigned max_n_subfacet;         /* Maximum number of flows */
416     unsigned avg_n_subfacet;         /* Average number of flows. */
417     long long int avg_subfacet_life; /* Average life span of subfacets. */
418
419     /* The average number of subfacets... */
420     struct avg_subfacet_rates hourly;   /* ...over the last hour. */
421     struct avg_subfacet_rates daily;    /* ...over the last day. */
422     struct avg_subfacet_rates lifetime; /* ...over the switch lifetime. */
423     long long int last_minute;          /* Last time 'hourly' was updated. */
424
425     /* Number of subfacets added or deleted since 'last_minute'. */
426     unsigned subfacet_add_count;
427     unsigned subfacet_del_count;
428
429     /* Number of subfacets added or deleted from 'created' to 'last_minute.' */
430     unsigned long long int total_subfacet_add_count;
431     unsigned long long int total_subfacet_del_count;
432
433     /* Number of upcall handling threads. */
434     unsigned int n_handler_threads;
435 };
436
437 /* All existing ofproto_backer instances, indexed by ofproto->up.type. */
438 static struct shash all_dpif_backers = SHASH_INITIALIZER(&all_dpif_backers);
439
440 static void drop_key_clear(struct dpif_backer *);
441 static void update_moving_averages(struct dpif_backer *backer);
442
443 struct ofproto_dpif {
444     struct hmap_node all_ofproto_dpifs_node; /* In 'all_ofproto_dpifs'. */
445     struct ofproto up;
446     struct dpif_backer *backer;
447
448     /* Special OpenFlow rules. */
449     struct rule_dpif *miss_rule; /* Sends flow table misses to controller. */
450     struct rule_dpif *no_packet_in_rule; /* Drops flow table misses. */
451     struct rule_dpif *drop_frags_rule; /* Used in OFPC_FRAG_DROP mode. */
452
453     /* Bridging. */
454     struct netflow *netflow;
455     struct dpif_sflow *sflow;
456     struct dpif_ipfix *ipfix;
457     struct hmap bundles;        /* Contains "struct ofbundle"s. */
458     struct mac_learning *ml;
459     bool has_bonded_bundles;
460     struct mbridge *mbridge;
461
462     /* Facets. */
463     struct classifier facets;     /* Contains 'struct facet's. */
464     long long int consistency_rl;
465
466     /* Support for debugging async flow mods. */
467     struct list completions;
468
469     struct netdev_stats stats; /* To account packets generated and consumed in
470                                 * userspace. */
471
472     /* Spanning tree. */
473     struct stp *stp;
474     long long int stp_last_tick;
475
476     /* VLAN splinters. */
477     struct ovs_mutex vsp_mutex;
478     struct hmap realdev_vid_map OVS_GUARDED; /* (realdev,vid) -> vlandev. */
479     struct hmap vlandev_map OVS_GUARDED;     /* vlandev -> (realdev,vid). */
480
481     /* Ports. */
482     struct sset ports;             /* Set of standard port names. */
483     struct sset ghost_ports;       /* Ports with no datapath port. */
484     struct sset port_poll_set;     /* Queued names for port_poll() reply. */
485     int port_poll_errno;           /* Last errno for port_poll() reply. */
486
487     /* Per ofproto's dpif stats. */
488     uint64_t n_hit;
489     uint64_t n_missed;
490
491     /* Work queues. */
492     struct ovs_mutex flow_mod_mutex;
493     struct list flow_mods OVS_GUARDED;
494     size_t n_flow_mods OVS_GUARDED;
495
496     struct ovs_mutex pin_mutex;
497     struct list pins OVS_GUARDED;
498     size_t n_pins OVS_GUARDED;
499 };
500
501 /* Defer flow mod completion until "ovs-appctl ofproto/unclog"?  (Useful only
502  * for debugging the asynchronous flow_mod implementation.) */
503 static bool clogged;
504
505 /* By default, flows in the datapath are wildcarded (megaflows).  They
506  * may be disabled with the "ovs-appctl dpif/disable-megaflows" command. */
507 static bool enable_megaflows = true;
508
509 /* All existing ofproto_dpif instances, indexed by ->up.name. */
510 static struct hmap all_ofproto_dpifs = HMAP_INITIALIZER(&all_ofproto_dpifs);
511
512 static void ofproto_dpif_unixctl_init(void);
513
514 static inline struct ofproto_dpif *
515 ofproto_dpif_cast(const struct ofproto *ofproto)
516 {
517     ovs_assert(ofproto->ofproto_class == &ofproto_dpif_class);
518     return CONTAINER_OF(ofproto, struct ofproto_dpif, up);
519 }
520
521 static struct ofport_dpif *get_ofp_port(const struct ofproto_dpif *ofproto,
522                                         ofp_port_t ofp_port);
523 static void ofproto_trace(struct ofproto_dpif *, const struct flow *,
524                           const struct ofpbuf *packet, struct ds *);
525
526 /* Upcalls. */
527 static void handle_upcalls(struct dpif_backer *);
528
529 /* Flow expiration. */
530 static int expire(struct dpif_backer *);
531
532 /* NetFlow. */
533 static void send_netflow_active_timeouts(struct ofproto_dpif *);
534
535 /* Utilities. */
536 static int send_packet(const struct ofport_dpif *, struct ofpbuf *packet);
537
538 /* Global variables. */
539 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
540
541 /* Initial mappings of port to bridge mappings. */
542 static struct shash init_ofp_ports = SHASH_INITIALIZER(&init_ofp_ports);
543
544 /* Executes and takes ownership of 'fm'. */
545 void
546 ofproto_dpif_flow_mod(struct ofproto_dpif *ofproto,
547                       struct ofputil_flow_mod *fm)
548 {
549     ovs_mutex_lock(&ofproto->flow_mod_mutex);
550     if (ofproto->n_flow_mods > 1024) {
551         ovs_mutex_unlock(&ofproto->flow_mod_mutex);
552         COVERAGE_INC(flow_mod_overflow);
553         free(fm->ofpacts);
554         free(fm);
555         return;
556     }
557
558     list_push_back(&ofproto->flow_mods, &fm->list_node);
559     ofproto->n_flow_mods++;
560     ovs_mutex_unlock(&ofproto->flow_mod_mutex);
561 }
562
563 /* Appends 'pin' to the queue of "packet ins" to be sent to the controller.
564  * Takes ownership of 'pin' and pin->packet. */
565 void
566 ofproto_dpif_send_packet_in(struct ofproto_dpif *ofproto,
567                             struct ofputil_packet_in *pin)
568 {
569     ovs_mutex_lock(&ofproto->pin_mutex);
570     if (ofproto->n_pins > 1024) {
571         ovs_mutex_unlock(&ofproto->pin_mutex);
572         COVERAGE_INC(packet_in_overflow);
573         free(CONST_CAST(void *, pin->packet));
574         free(pin);
575         return;
576     }
577
578     list_push_back(&ofproto->pins, &pin->list_node);
579     ofproto->n_pins++;
580     ovs_mutex_unlock(&ofproto->pin_mutex);
581 }
582 \f
583 /* Factory functions. */
584
585 static void
586 init(const struct shash *iface_hints)
587 {
588     struct shash_node *node;
589
590     /* Make a local copy, since we don't own 'iface_hints' elements. */
591     SHASH_FOR_EACH(node, iface_hints) {
592         const struct iface_hint *orig_hint = node->data;
593         struct iface_hint *new_hint = xmalloc(sizeof *new_hint);
594
595         new_hint->br_name = xstrdup(orig_hint->br_name);
596         new_hint->br_type = xstrdup(orig_hint->br_type);
597         new_hint->ofp_port = orig_hint->ofp_port;
598
599         shash_add(&init_ofp_ports, node->name, new_hint);
600     }
601 }
602
603 static void
604 enumerate_types(struct sset *types)
605 {
606     dp_enumerate_types(types);
607 }
608
609 static int
610 enumerate_names(const char *type, struct sset *names)
611 {
612     struct ofproto_dpif *ofproto;
613
614     sset_clear(names);
615     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
616         if (strcmp(type, ofproto->up.type)) {
617             continue;
618         }
619         sset_add(names, ofproto->up.name);
620     }
621
622     return 0;
623 }
624
625 static int
626 del(const char *type, const char *name)
627 {
628     struct dpif *dpif;
629     int error;
630
631     error = dpif_open(name, type, &dpif);
632     if (!error) {
633         error = dpif_delete(dpif);
634         dpif_close(dpif);
635     }
636     return error;
637 }
638 \f
639 static const char *
640 port_open_type(const char *datapath_type, const char *port_type)
641 {
642     return dpif_port_open_type(datapath_type, port_type);
643 }
644
645 /* Type functions. */
646
647 static void process_dpif_port_changes(struct dpif_backer *);
648 static void process_dpif_all_ports_changed(struct dpif_backer *);
649 static void process_dpif_port_change(struct dpif_backer *,
650                                      const char *devname);
651 static void process_dpif_port_error(struct dpif_backer *, int error);
652
653 static struct ofproto_dpif *
654 lookup_ofproto_dpif_by_port_name(const char *name)
655 {
656     struct ofproto_dpif *ofproto;
657
658     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
659         if (sset_contains(&ofproto->ports, name)) {
660             return ofproto;
661         }
662     }
663
664     return NULL;
665 }
666
667 static int
668 type_run(const char *type)
669 {
670     static long long int push_timer = LLONG_MIN;
671     struct dpif_backer *backer;
672
673     backer = shash_find_data(&all_dpif_backers, type);
674     if (!backer) {
675         /* This is not necessarily a problem, since backers are only
676          * created on demand. */
677         return 0;
678     }
679
680     dpif_run(backer->dpif);
681
682     /* The most natural place to push facet statistics is when they're pulled
683      * from the datapath.  However, when there are many flows in the datapath,
684      * this expensive operation can occur so frequently, that it reduces our
685      * ability to quickly set up flows.  To reduce the cost, we push statistics
686      * here instead. */
687     if (time_msec() > push_timer) {
688         push_timer = time_msec() + 2000;
689         push_all_stats();
690     }
691
692     /* If vswitchd started with other_config:flow_restore_wait set as "true",
693      * and the configuration has now changed to "false", enable receiving
694      * packets from the datapath. */
695     if (!backer->recv_set_enable && !ofproto_get_flow_restore_wait()) {
696         int error;
697
698         backer->recv_set_enable = true;
699
700         error = dpif_recv_set(backer->dpif, backer->recv_set_enable);
701         if (error) {
702             udpif_recv_set(backer->udpif, 0, false);
703             VLOG_ERR("Failed to enable receiving packets in dpif.");
704             return error;
705         }
706         udpif_recv_set(backer->udpif, n_handler_threads,
707                        backer->recv_set_enable);
708         dpif_flow_flush(backer->dpif);
709         backer->need_revalidate = REV_RECONFIGURE;
710     }
711
712     /* If the n_handler_threads is reconfigured, call udpif_recv_set()
713      * to reset the handler threads. */
714     if (backer->n_handler_threads != n_handler_threads) {
715         udpif_recv_set(backer->udpif, n_handler_threads,
716                        backer->recv_set_enable);
717         backer->n_handler_threads = n_handler_threads;
718     }
719
720     if (backer->need_revalidate) {
721         struct ofproto_dpif *ofproto;
722         struct simap_node *node;
723         struct simap tmp_backers;
724
725         /* Handle tunnel garbage collection. */
726         simap_init(&tmp_backers);
727         simap_swap(&backer->tnl_backers, &tmp_backers);
728
729         HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
730             struct ofport_dpif *iter;
731
732             if (backer != ofproto->backer) {
733                 continue;
734             }
735
736             HMAP_FOR_EACH (iter, up.hmap_node, &ofproto->up.ports) {
737                 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
738                 const char *dp_port;
739
740                 if (!iter->is_tunnel) {
741                     continue;
742                 }
743
744                 dp_port = netdev_vport_get_dpif_port(iter->up.netdev,
745                                                      namebuf, sizeof namebuf);
746                 node = simap_find(&tmp_backers, dp_port);
747                 if (node) {
748                     simap_put(&backer->tnl_backers, dp_port, node->data);
749                     simap_delete(&tmp_backers, node);
750                     node = simap_find(&backer->tnl_backers, dp_port);
751                 } else {
752                     node = simap_find(&backer->tnl_backers, dp_port);
753                     if (!node) {
754                         odp_port_t odp_port = ODPP_NONE;
755
756                         if (!dpif_port_add(backer->dpif, iter->up.netdev,
757                                            &odp_port)) {
758                             simap_put(&backer->tnl_backers, dp_port,
759                                       odp_to_u32(odp_port));
760                             node = simap_find(&backer->tnl_backers, dp_port);
761                         }
762                     }
763                 }
764
765                 iter->odp_port = node ? u32_to_odp(node->data) : ODPP_NONE;
766                 if (tnl_port_reconfigure(iter, iter->up.netdev,
767                                          iter->odp_port)) {
768                     backer->need_revalidate = REV_RECONFIGURE;
769                 }
770             }
771         }
772
773         SIMAP_FOR_EACH (node, &tmp_backers) {
774             dpif_port_del(backer->dpif, u32_to_odp(node->data));
775         }
776         simap_destroy(&tmp_backers);
777
778         switch (backer->need_revalidate) {
779         case REV_RECONFIGURE:   COVERAGE_INC(rev_reconfigure);   break;
780         case REV_STP:           COVERAGE_INC(rev_stp);           break;
781         case REV_BOND:          COVERAGE_INC(rev_bond);          break;
782         case REV_PORT_TOGGLED:  COVERAGE_INC(rev_port_toggled);  break;
783         case REV_FLOW_TABLE:    COVERAGE_INC(rev_flow_table);    break;
784         case REV_MAC_LEARNING:  COVERAGE_INC(rev_mac_learning);  break;
785         case REV_INCONSISTENCY: COVERAGE_INC(rev_inconsistency); break;
786         }
787         backer->need_revalidate = 0;
788
789         /* Clear the drop_keys in case we should now be accepting some
790          * formerly dropped flows. */
791         drop_key_clear(backer);
792
793         HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
794             struct facet *facet, *next;
795             struct ofport_dpif *ofport;
796             struct cls_cursor cursor;
797             struct ofbundle *bundle;
798
799             if (ofproto->backer != backer) {
800                 continue;
801             }
802
803             ovs_rwlock_wrlock(&xlate_rwlock);
804             xlate_ofproto_set(ofproto, ofproto->up.name,
805                               ofproto->backer->dpif, ofproto->miss_rule,
806                               ofproto->no_packet_in_rule, ofproto->ml,
807                               ofproto->stp, ofproto->mbridge,
808                               ofproto->sflow, ofproto->ipfix,
809                               ofproto->up.frag_handling,
810                               ofproto->up.forward_bpdu,
811                               connmgr_has_in_band(ofproto->up.connmgr),
812                               ofproto->netflow != NULL);
813
814             HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
815                 xlate_bundle_set(ofproto, bundle, bundle->name,
816                                  bundle->vlan_mode, bundle->vlan,
817                                  bundle->trunks, bundle->use_priority_tags,
818                                  bundle->bond, bundle->lacp,
819                                  bundle->floodable);
820             }
821
822             HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
823                 int stp_port = ofport->stp_port
824                     ? stp_port_no(ofport->stp_port)
825                     : 0;
826                 xlate_ofport_set(ofproto, ofport->bundle, ofport,
827                                  ofport->up.ofp_port, ofport->odp_port,
828                                  ofport->up.netdev, ofport->cfm,
829                                  ofport->bfd, ofport->peer, stp_port,
830                                  ofport->qdscp, ofport->n_qdscp,
831                                  ofport->up.pp.config, ofport->is_tunnel,
832                                  ofport->may_enable);
833             }
834             ovs_rwlock_unlock(&xlate_rwlock);
835
836             /* Only ofproto-dpif cares about the facet classifier so we just
837              * lock cls_cursor_init() to appease the thread safety analysis. */
838             ovs_rwlock_rdlock(&ofproto->facets.rwlock);
839             cls_cursor_init(&cursor, &ofproto->facets, NULL);
840             ovs_rwlock_unlock(&ofproto->facets.rwlock);
841             CLS_CURSOR_FOR_EACH_SAFE (facet, next, cr, &cursor) {
842                 facet_revalidate(facet);
843                 run_fast_rl();
844             }
845         }
846
847         udpif_revalidate(backer->udpif);
848     }
849
850     if (!backer->recv_set_enable) {
851         /* Wake up before a max of 1000ms. */
852         timer_set_duration(&backer->next_expiration, 1000);
853     } else if (timer_expired(&backer->next_expiration)) {
854         int delay = expire(backer);
855         timer_set_duration(&backer->next_expiration, delay);
856     }
857
858     process_dpif_port_changes(backer);
859
860     if (backer->governor) {
861         size_t n_subfacets;
862
863         governor_run(backer->governor);
864
865         /* If the governor has shrunk to its minimum size and the number of
866          * subfacets has dwindled, then drop the governor entirely.
867          *
868          * For hysteresis, the number of subfacets to drop the governor is
869          * smaller than the number needed to trigger its creation. */
870         n_subfacets = hmap_count(&backer->subfacets);
871         if (n_subfacets * 4 < flow_eviction_threshold
872             && governor_is_idle(backer->governor)) {
873             governor_destroy(backer->governor);
874             backer->governor = NULL;
875         }
876     }
877
878     return 0;
879 }
880
881 /* Check for and handle port changes in 'backer''s dpif. */
882 static void
883 process_dpif_port_changes(struct dpif_backer *backer)
884 {
885     for (;;) {
886         char *devname;
887         int error;
888
889         error = dpif_port_poll(backer->dpif, &devname);
890         switch (error) {
891         case EAGAIN:
892             return;
893
894         case ENOBUFS:
895             process_dpif_all_ports_changed(backer);
896             break;
897
898         case 0:
899             process_dpif_port_change(backer, devname);
900             free(devname);
901             break;
902
903         default:
904             process_dpif_port_error(backer, error);
905             break;
906         }
907     }
908 }
909
910 static void
911 process_dpif_all_ports_changed(struct dpif_backer *backer)
912 {
913     struct ofproto_dpif *ofproto;
914     struct dpif_port dpif_port;
915     struct dpif_port_dump dump;
916     struct sset devnames;
917     const char *devname;
918
919     sset_init(&devnames);
920     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
921         if (ofproto->backer == backer) {
922             struct ofport *ofport;
923
924             HMAP_FOR_EACH (ofport, hmap_node, &ofproto->up.ports) {
925                 sset_add(&devnames, netdev_get_name(ofport->netdev));
926             }
927         }
928     }
929     DPIF_PORT_FOR_EACH (&dpif_port, &dump, backer->dpif) {
930         sset_add(&devnames, dpif_port.name);
931     }
932
933     SSET_FOR_EACH (devname, &devnames) {
934         process_dpif_port_change(backer, devname);
935     }
936     sset_destroy(&devnames);
937 }
938
939 static void
940 process_dpif_port_change(struct dpif_backer *backer, const char *devname)
941 {
942     struct ofproto_dpif *ofproto;
943     struct dpif_port port;
944
945     /* Don't report on the datapath's device. */
946     if (!strcmp(devname, dpif_base_name(backer->dpif))) {
947         return;
948     }
949
950     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node,
951                    &all_ofproto_dpifs) {
952         if (simap_contains(&ofproto->backer->tnl_backers, devname)) {
953             return;
954         }
955     }
956
957     ofproto = lookup_ofproto_dpif_by_port_name(devname);
958     if (dpif_port_query_by_name(backer->dpif, devname, &port)) {
959         /* The port was removed.  If we know the datapath,
960          * report it through poll_set().  If we don't, it may be
961          * notifying us of a removal we initiated, so ignore it.
962          * If there's a pending ENOBUFS, let it stand, since
963          * everything will be reevaluated. */
964         if (ofproto && ofproto->port_poll_errno != ENOBUFS) {
965             sset_add(&ofproto->port_poll_set, devname);
966             ofproto->port_poll_errno = 0;
967         }
968     } else if (!ofproto) {
969         /* The port was added, but we don't know with which
970          * ofproto we should associate it.  Delete it. */
971         dpif_port_del(backer->dpif, port.port_no);
972     } else {
973         struct ofport_dpif *ofport;
974
975         ofport = ofport_dpif_cast(shash_find_data(
976                                       &ofproto->up.port_by_name, devname));
977         if (ofport
978             && ofport->odp_port != port.port_no
979             && !odp_port_to_ofport(backer, port.port_no))
980         {
981             /* 'ofport''s datapath port number has changed from
982              * 'ofport->odp_port' to 'port.port_no'.  Update our internal data
983              * structures to match. */
984             ovs_rwlock_wrlock(&backer->odp_to_ofport_lock);
985             hmap_remove(&backer->odp_to_ofport_map, &ofport->odp_port_node);
986             ofport->odp_port = port.port_no;
987             hmap_insert(&backer->odp_to_ofport_map, &ofport->odp_port_node,
988                         hash_odp_port(port.port_no));
989             ovs_rwlock_unlock(&backer->odp_to_ofport_lock);
990             backer->need_revalidate = REV_RECONFIGURE;
991         }
992     }
993     dpif_port_destroy(&port);
994 }
995
996 /* Propagate 'error' to all ofprotos based on 'backer'. */
997 static void
998 process_dpif_port_error(struct dpif_backer *backer, int error)
999 {
1000     struct ofproto_dpif *ofproto;
1001
1002     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
1003         if (ofproto->backer == backer) {
1004             sset_clear(&ofproto->port_poll_set);
1005             ofproto->port_poll_errno = error;
1006         }
1007     }
1008 }
1009
1010 static int
1011 dpif_backer_run_fast(struct dpif_backer *backer)
1012 {
1013     udpif_run(backer->udpif);
1014     handle_upcalls(backer);
1015
1016     return 0;
1017 }
1018
1019 static int
1020 type_run_fast(const char *type)
1021 {
1022     struct dpif_backer *backer;
1023
1024     backer = shash_find_data(&all_dpif_backers, type);
1025     if (!backer) {
1026         /* This is not necessarily a problem, since backers are only
1027          * created on demand. */
1028         return 0;
1029     }
1030
1031     return dpif_backer_run_fast(backer);
1032 }
1033
1034 static void
1035 run_fast_rl(void)
1036 {
1037     static long long int port_rl = LLONG_MIN;
1038
1039     if (time_msec() >= port_rl) {
1040         struct ofproto_dpif *ofproto;
1041
1042         HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
1043             run_fast(&ofproto->up);
1044         }
1045         port_rl = time_msec() + 200;
1046     }
1047 }
1048
1049 static void
1050 type_wait(const char *type)
1051 {
1052     struct dpif_backer *backer;
1053
1054     backer = shash_find_data(&all_dpif_backers, type);
1055     if (!backer) {
1056         /* This is not necessarily a problem, since backers are only
1057          * created on demand. */
1058         return;
1059     }
1060
1061     if (backer->governor) {
1062         governor_wait(backer->governor);
1063     }
1064
1065     timer_wait(&backer->next_expiration);
1066     dpif_wait(backer->dpif);
1067     udpif_wait(backer->udpif);
1068 }
1069 \f
1070 /* Basic life-cycle. */
1071
1072 static int add_internal_flows(struct ofproto_dpif *);
1073
1074 static struct ofproto *
1075 alloc(void)
1076 {
1077     struct ofproto_dpif *ofproto = xmalloc(sizeof *ofproto);
1078     return &ofproto->up;
1079 }
1080
1081 static void
1082 dealloc(struct ofproto *ofproto_)
1083 {
1084     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1085     free(ofproto);
1086 }
1087
1088 static void
1089 close_dpif_backer(struct dpif_backer *backer)
1090 {
1091     struct shash_node *node;
1092
1093     ovs_assert(backer->refcount > 0);
1094
1095     if (--backer->refcount) {
1096         return;
1097     }
1098
1099     drop_key_clear(backer);
1100     hmap_destroy(&backer->drop_keys);
1101
1102     simap_destroy(&backer->tnl_backers);
1103     ovs_rwlock_destroy(&backer->odp_to_ofport_lock);
1104     hmap_destroy(&backer->odp_to_ofport_map);
1105     node = shash_find(&all_dpif_backers, backer->type);
1106     free(backer->type);
1107     shash_delete(&all_dpif_backers, node);
1108     udpif_destroy(backer->udpif);
1109     dpif_close(backer->dpif);
1110
1111     ovs_assert(hmap_is_empty(&backer->subfacets));
1112     hmap_destroy(&backer->subfacets);
1113     governor_destroy(backer->governor);
1114
1115     free(backer);
1116 }
1117
1118 /* Datapath port slated for removal from datapath. */
1119 struct odp_garbage {
1120     struct list list_node;
1121     odp_port_t odp_port;
1122 };
1123
1124 static int
1125 open_dpif_backer(const char *type, struct dpif_backer **backerp)
1126 {
1127     struct dpif_backer *backer;
1128     struct dpif_port_dump port_dump;
1129     struct dpif_port port;
1130     struct shash_node *node;
1131     struct list garbage_list;
1132     struct odp_garbage *garbage, *next;
1133     struct sset names;
1134     char *backer_name;
1135     const char *name;
1136     int error;
1137
1138     backer = shash_find_data(&all_dpif_backers, type);
1139     if (backer) {
1140         backer->refcount++;
1141         *backerp = backer;
1142         return 0;
1143     }
1144
1145     backer_name = xasprintf("ovs-%s", type);
1146
1147     /* Remove any existing datapaths, since we assume we're the only
1148      * userspace controlling the datapath. */
1149     sset_init(&names);
1150     dp_enumerate_names(type, &names);
1151     SSET_FOR_EACH(name, &names) {
1152         struct dpif *old_dpif;
1153
1154         /* Don't remove our backer if it exists. */
1155         if (!strcmp(name, backer_name)) {
1156             continue;
1157         }
1158
1159         if (dpif_open(name, type, &old_dpif)) {
1160             VLOG_WARN("couldn't open old datapath %s to remove it", name);
1161         } else {
1162             dpif_delete(old_dpif);
1163             dpif_close(old_dpif);
1164         }
1165     }
1166     sset_destroy(&names);
1167
1168     backer = xmalloc(sizeof *backer);
1169
1170     error = dpif_create_and_open(backer_name, type, &backer->dpif);
1171     free(backer_name);
1172     if (error) {
1173         VLOG_ERR("failed to open datapath of type %s: %s", type,
1174                  ovs_strerror(error));
1175         free(backer);
1176         return error;
1177     }
1178     backer->udpif = udpif_create(backer, backer->dpif);
1179
1180     backer->type = xstrdup(type);
1181     backer->governor = NULL;
1182     backer->refcount = 1;
1183     hmap_init(&backer->odp_to_ofport_map);
1184     ovs_rwlock_init(&backer->odp_to_ofport_lock);
1185     hmap_init(&backer->drop_keys);
1186     hmap_init(&backer->subfacets);
1187     timer_set_duration(&backer->next_expiration, 1000);
1188     backer->need_revalidate = 0;
1189     simap_init(&backer->tnl_backers);
1190     backer->recv_set_enable = !ofproto_get_flow_restore_wait();
1191     *backerp = backer;
1192
1193     if (backer->recv_set_enable) {
1194         dpif_flow_flush(backer->dpif);
1195     }
1196
1197     /* Loop through the ports already on the datapath and remove any
1198      * that we don't need anymore. */
1199     list_init(&garbage_list);
1200     dpif_port_dump_start(&port_dump, backer->dpif);
1201     while (dpif_port_dump_next(&port_dump, &port)) {
1202         node = shash_find(&init_ofp_ports, port.name);
1203         if (!node && strcmp(port.name, dpif_base_name(backer->dpif))) {
1204             garbage = xmalloc(sizeof *garbage);
1205             garbage->odp_port = port.port_no;
1206             list_push_front(&garbage_list, &garbage->list_node);
1207         }
1208     }
1209     dpif_port_dump_done(&port_dump);
1210
1211     LIST_FOR_EACH_SAFE (garbage, next, list_node, &garbage_list) {
1212         dpif_port_del(backer->dpif, garbage->odp_port);
1213         list_remove(&garbage->list_node);
1214         free(garbage);
1215     }
1216
1217     shash_add(&all_dpif_backers, type, backer);
1218
1219     error = dpif_recv_set(backer->dpif, backer->recv_set_enable);
1220     if (error) {
1221         VLOG_ERR("failed to listen on datapath of type %s: %s",
1222                  type, ovs_strerror(error));
1223         close_dpif_backer(backer);
1224         return error;
1225     }
1226     udpif_recv_set(backer->udpif, n_handler_threads,
1227                    backer->recv_set_enable);
1228     backer->n_handler_threads = n_handler_threads;
1229
1230     backer->max_n_subfacet = 0;
1231     backer->created = time_msec();
1232     backer->last_minute = backer->created;
1233     memset(&backer->hourly, 0, sizeof backer->hourly);
1234     memset(&backer->daily, 0, sizeof backer->daily);
1235     memset(&backer->lifetime, 0, sizeof backer->lifetime);
1236     backer->subfacet_add_count = 0;
1237     backer->subfacet_del_count = 0;
1238     backer->total_subfacet_add_count = 0;
1239     backer->total_subfacet_del_count = 0;
1240     backer->avg_n_subfacet = 0;
1241     backer->avg_subfacet_life = 0;
1242
1243     return error;
1244 }
1245
1246 static int
1247 construct(struct ofproto *ofproto_)
1248 {
1249     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1250     struct shash_node *node, *next;
1251     odp_port_t max_ports;
1252     int error;
1253
1254     error = open_dpif_backer(ofproto->up.type, &ofproto->backer);
1255     if (error) {
1256         return error;
1257     }
1258
1259     max_ports = dpif_get_max_ports(ofproto->backer->dpif);
1260     ofproto_init_max_ports(ofproto_, u16_to_ofp(MIN(odp_to_u32(max_ports),
1261                                                     ofp_to_u16(OFPP_MAX))));
1262
1263     ofproto->netflow = NULL;
1264     ofproto->sflow = NULL;
1265     ofproto->ipfix = NULL;
1266     ofproto->stp = NULL;
1267     hmap_init(&ofproto->bundles);
1268     ofproto->ml = mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME);
1269     ofproto->mbridge = mbridge_create();
1270     ofproto->has_bonded_bundles = false;
1271     ovs_mutex_init(&ofproto->vsp_mutex, PTHREAD_MUTEX_NORMAL);
1272
1273     classifier_init(&ofproto->facets);
1274     ofproto->consistency_rl = LLONG_MIN;
1275
1276     list_init(&ofproto->completions);
1277
1278     ovs_mutex_init(&ofproto->flow_mod_mutex, PTHREAD_MUTEX_NORMAL);
1279     ovs_mutex_lock(&ofproto->flow_mod_mutex);
1280     list_init(&ofproto->flow_mods);
1281     ofproto->n_flow_mods = 0;
1282     ovs_mutex_unlock(&ofproto->flow_mod_mutex);
1283
1284     ovs_mutex_init(&ofproto->pin_mutex, PTHREAD_MUTEX_NORMAL);
1285     ovs_mutex_lock(&ofproto->pin_mutex);
1286     list_init(&ofproto->pins);
1287     ofproto->n_pins = 0;
1288     ovs_mutex_unlock(&ofproto->pin_mutex);
1289
1290     ofproto_dpif_unixctl_init();
1291
1292     hmap_init(&ofproto->vlandev_map);
1293     hmap_init(&ofproto->realdev_vid_map);
1294
1295     sset_init(&ofproto->ports);
1296     sset_init(&ofproto->ghost_ports);
1297     sset_init(&ofproto->port_poll_set);
1298     ofproto->port_poll_errno = 0;
1299
1300     SHASH_FOR_EACH_SAFE (node, next, &init_ofp_ports) {
1301         struct iface_hint *iface_hint = node->data;
1302
1303         if (!strcmp(iface_hint->br_name, ofproto->up.name)) {
1304             /* Check if the datapath already has this port. */
1305             if (dpif_port_exists(ofproto->backer->dpif, node->name)) {
1306                 sset_add(&ofproto->ports, node->name);
1307             }
1308
1309             free(iface_hint->br_name);
1310             free(iface_hint->br_type);
1311             free(iface_hint);
1312             shash_delete(&init_ofp_ports, node);
1313         }
1314     }
1315
1316     hmap_insert(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node,
1317                 hash_string(ofproto->up.name, 0));
1318     memset(&ofproto->stats, 0, sizeof ofproto->stats);
1319
1320     ofproto_init_tables(ofproto_, N_TABLES);
1321     error = add_internal_flows(ofproto);
1322     ofproto->up.tables[TBL_INTERNAL].flags = OFTABLE_HIDDEN | OFTABLE_READONLY;
1323
1324     ofproto->n_hit = 0;
1325     ofproto->n_missed = 0;
1326
1327     return error;
1328 }
1329
1330 static int
1331 add_internal_flow(struct ofproto_dpif *ofproto, int id,
1332                   const struct ofpbuf *ofpacts, struct rule_dpif **rulep)
1333 {
1334     struct ofputil_flow_mod fm;
1335     int error;
1336
1337     match_init_catchall(&fm.match);
1338     fm.priority = 0;
1339     match_set_reg(&fm.match, 0, id);
1340     fm.new_cookie = htonll(0);
1341     fm.cookie = htonll(0);
1342     fm.cookie_mask = htonll(0);
1343     fm.modify_cookie = false;
1344     fm.table_id = TBL_INTERNAL;
1345     fm.command = OFPFC_ADD;
1346     fm.idle_timeout = 0;
1347     fm.hard_timeout = 0;
1348     fm.buffer_id = 0;
1349     fm.out_port = 0;
1350     fm.flags = 0;
1351     fm.ofpacts = ofpacts->data;
1352     fm.ofpacts_len = ofpacts->size;
1353
1354     error = ofproto_flow_mod(&ofproto->up, &fm);
1355     if (error) {
1356         VLOG_ERR_RL(&rl, "failed to add internal flow %d (%s)",
1357                     id, ofperr_to_string(error));
1358         return error;
1359     }
1360
1361     if (rule_dpif_lookup_in_table(ofproto, &fm.match.flow, NULL, TBL_INTERNAL,
1362                                   rulep)) {
1363         ovs_rwlock_unlock(&(*rulep)->up.evict);
1364     } else {
1365         NOT_REACHED();
1366     }
1367
1368     return 0;
1369 }
1370
1371 static int
1372 add_internal_flows(struct ofproto_dpif *ofproto)
1373 {
1374     struct ofpact_controller *controller;
1375     uint64_t ofpacts_stub[128 / 8];
1376     struct ofpbuf ofpacts;
1377     int error;
1378     int id;
1379
1380     ofpbuf_use_stack(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
1381     id = 1;
1382
1383     controller = ofpact_put_CONTROLLER(&ofpacts);
1384     controller->max_len = UINT16_MAX;
1385     controller->controller_id = 0;
1386     controller->reason = OFPR_NO_MATCH;
1387     ofpact_pad(&ofpacts);
1388
1389     error = add_internal_flow(ofproto, id++, &ofpacts, &ofproto->miss_rule);
1390     if (error) {
1391         return error;
1392     }
1393
1394     ofpbuf_clear(&ofpacts);
1395     error = add_internal_flow(ofproto, id++, &ofpacts,
1396                               &ofproto->no_packet_in_rule);
1397     if (error) {
1398         return error;
1399     }
1400
1401     error = add_internal_flow(ofproto, id++, &ofpacts,
1402                               &ofproto->drop_frags_rule);
1403     return error;
1404 }
1405
1406 static void
1407 complete_operations(struct ofproto_dpif *ofproto)
1408 {
1409     struct dpif_completion *c, *next;
1410
1411     LIST_FOR_EACH_SAFE (c, next, list_node, &ofproto->completions) {
1412         ofoperation_complete(c->op, 0);
1413         list_remove(&c->list_node);
1414         free(c);
1415     }
1416 }
1417
1418 static void
1419 destruct(struct ofproto *ofproto_)
1420 {
1421     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1422     struct rule_dpif *rule, *next_rule;
1423     struct ofputil_packet_in *pin, *next_pin;
1424     struct ofputil_flow_mod *fm, *next_fm;
1425     struct oftable *table;
1426
1427     ofproto->backer->need_revalidate = REV_RECONFIGURE;
1428     ovs_rwlock_wrlock(&xlate_rwlock);
1429     xlate_remove_ofproto(ofproto);
1430     ovs_rwlock_unlock(&xlate_rwlock);
1431
1432     hmap_remove(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node);
1433     complete_operations(ofproto);
1434
1435     OFPROTO_FOR_EACH_TABLE (table, &ofproto->up) {
1436         struct cls_cursor cursor;
1437
1438         ovs_rwlock_wrlock(&table->cls.rwlock);
1439         cls_cursor_init(&cursor, &table->cls, NULL);
1440         CLS_CURSOR_FOR_EACH_SAFE (rule, next_rule, up.cr, &cursor) {
1441             ofproto_rule_destroy(&ofproto->up, &table->cls, &rule->up);
1442         }
1443         ovs_rwlock_unlock(&table->cls.rwlock);
1444     }
1445
1446     ovs_mutex_lock(&ofproto->flow_mod_mutex);
1447     LIST_FOR_EACH_SAFE (fm, next_fm, list_node, &ofproto->flow_mods) {
1448         list_remove(&fm->list_node);
1449         ofproto->n_flow_mods--;
1450         free(fm->ofpacts);
1451         free(fm);
1452     }
1453     ovs_mutex_unlock(&ofproto->flow_mod_mutex);
1454     ovs_mutex_destroy(&ofproto->flow_mod_mutex);
1455
1456     ovs_mutex_lock(&ofproto->pin_mutex);
1457     LIST_FOR_EACH_SAFE (pin, next_pin, list_node, &ofproto->pins) {
1458         list_remove(&pin->list_node);
1459         ofproto->n_pins--;
1460         free(CONST_CAST(void *, pin->packet));
1461         free(pin);
1462     }
1463     ovs_mutex_unlock(&ofproto->pin_mutex);
1464     ovs_mutex_destroy(&ofproto->pin_mutex);
1465
1466     mbridge_unref(ofproto->mbridge);
1467
1468     netflow_destroy(ofproto->netflow);
1469     dpif_sflow_unref(ofproto->sflow);
1470     hmap_destroy(&ofproto->bundles);
1471     mac_learning_unref(ofproto->ml);
1472
1473     classifier_destroy(&ofproto->facets);
1474
1475     hmap_destroy(&ofproto->vlandev_map);
1476     hmap_destroy(&ofproto->realdev_vid_map);
1477
1478     sset_destroy(&ofproto->ports);
1479     sset_destroy(&ofproto->ghost_ports);
1480     sset_destroy(&ofproto->port_poll_set);
1481
1482     ovs_mutex_destroy(&ofproto->vsp_mutex);
1483
1484     close_dpif_backer(ofproto->backer);
1485 }
1486
1487 static int
1488 run_fast(struct ofproto *ofproto_)
1489 {
1490     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1491     struct ofputil_packet_in *pin, *next_pin;
1492     struct ofputil_flow_mod *fm, *next_fm;
1493     struct list flow_mods, pins;
1494     struct ofport_dpif *ofport;
1495
1496     /* Do not perform any periodic activity required by 'ofproto' while
1497      * waiting for flow restore to complete. */
1498     if (ofproto_get_flow_restore_wait()) {
1499         return 0;
1500     }
1501
1502     ovs_mutex_lock(&ofproto->flow_mod_mutex);
1503     if (ofproto->n_flow_mods) {
1504         flow_mods = ofproto->flow_mods;
1505         list_moved(&flow_mods);
1506         list_init(&ofproto->flow_mods);
1507         ofproto->n_flow_mods = 0;
1508     } else {
1509         list_init(&flow_mods);
1510     }
1511     ovs_mutex_unlock(&ofproto->flow_mod_mutex);
1512
1513     LIST_FOR_EACH_SAFE (fm, next_fm, list_node, &flow_mods) {
1514         int error = ofproto_flow_mod(&ofproto->up, fm);
1515         if (error && !VLOG_DROP_WARN(&rl)) {
1516             VLOG_WARN("learning action failed to modify flow table (%s)",
1517                       ofperr_get_name(error));
1518         }
1519
1520         list_remove(&fm->list_node);
1521         free(fm->ofpacts);
1522         free(fm);
1523     }
1524
1525     ovs_mutex_lock(&ofproto->pin_mutex);
1526     if (ofproto->n_pins) {
1527         pins = ofproto->pins;
1528         list_moved(&pins);
1529         list_init(&ofproto->pins);
1530         ofproto->n_pins = 0;
1531     } else {
1532         list_init(&pins);
1533     }
1534     ovs_mutex_unlock(&ofproto->pin_mutex);
1535
1536     LIST_FOR_EACH_SAFE (pin, next_pin, list_node, &pins) {
1537         connmgr_send_packet_in(ofproto->up.connmgr, pin);
1538         list_remove(&pin->list_node);
1539         free(CONST_CAST(void *, pin->packet));
1540         free(pin);
1541     }
1542
1543     HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
1544         port_run_fast(ofport);
1545     }
1546
1547     return 0;
1548 }
1549
1550 static int
1551 run(struct ofproto *ofproto_)
1552 {
1553     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1554     struct ofport_dpif *ofport;
1555     struct ofbundle *bundle;
1556     int error;
1557
1558     if (!clogged) {
1559         complete_operations(ofproto);
1560     }
1561
1562     if (mbridge_need_revalidate(ofproto->mbridge)) {
1563         ofproto->backer->need_revalidate = REV_RECONFIGURE;
1564         ovs_rwlock_wrlock(&ofproto->ml->rwlock);
1565         mac_learning_flush(ofproto->ml);
1566         ovs_rwlock_unlock(&ofproto->ml->rwlock);
1567     }
1568
1569     /* Do not perform any periodic activity below required by 'ofproto' while
1570      * waiting for flow restore to complete. */
1571     if (ofproto_get_flow_restore_wait()) {
1572         return 0;
1573     }
1574
1575     error = run_fast(ofproto_);
1576     if (error) {
1577         return error;
1578     }
1579
1580     if (ofproto->netflow) {
1581         if (netflow_run(ofproto->netflow)) {
1582             send_netflow_active_timeouts(ofproto);
1583         }
1584     }
1585     if (ofproto->sflow) {
1586         dpif_sflow_run(ofproto->sflow);
1587     }
1588
1589     HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
1590         port_run(ofport);
1591     }
1592     HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
1593         bundle_run(bundle);
1594     }
1595
1596     stp_run(ofproto);
1597     ovs_rwlock_wrlock(&ofproto->ml->rwlock);
1598     if (mac_learning_run(ofproto->ml)) {
1599         ofproto->backer->need_revalidate = REV_MAC_LEARNING;
1600     }
1601     ovs_rwlock_unlock(&ofproto->ml->rwlock);
1602
1603     /* Check the consistency of a random facet, to aid debugging. */
1604     ovs_rwlock_rdlock(&ofproto->facets.rwlock);
1605     if (time_msec() >= ofproto->consistency_rl
1606         && !classifier_is_empty(&ofproto->facets)
1607         && !ofproto->backer->need_revalidate) {
1608         struct cls_table *table;
1609         struct cls_rule *cr;
1610         struct facet *facet;
1611
1612         ofproto->consistency_rl = time_msec() + 250;
1613
1614         table = CONTAINER_OF(hmap_random_node(&ofproto->facets.tables),
1615                              struct cls_table, hmap_node);
1616         cr = CONTAINER_OF(hmap_random_node(&table->rules), struct cls_rule,
1617                           hmap_node);
1618         facet = CONTAINER_OF(cr, struct facet, cr);
1619
1620         if (!facet_check_consistency(facet)) {
1621             ofproto->backer->need_revalidate = REV_INCONSISTENCY;
1622         }
1623     }
1624     ovs_rwlock_unlock(&ofproto->facets.rwlock);
1625
1626     return 0;
1627 }
1628
1629 static void
1630 wait(struct ofproto *ofproto_)
1631 {
1632     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1633     struct ofport_dpif *ofport;
1634     struct ofbundle *bundle;
1635
1636     if (!clogged && !list_is_empty(&ofproto->completions)) {
1637         poll_immediate_wake();
1638     }
1639
1640     if (ofproto_get_flow_restore_wait()) {
1641         return;
1642     }
1643
1644     if (ofproto->sflow) {
1645         dpif_sflow_wait(ofproto->sflow);
1646     }
1647     HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
1648         port_wait(ofport);
1649     }
1650     HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
1651         bundle_wait(bundle);
1652     }
1653     if (ofproto->netflow) {
1654         netflow_wait(ofproto->netflow);
1655     }
1656     ovs_rwlock_rdlock(&ofproto->ml->rwlock);
1657     mac_learning_wait(ofproto->ml);
1658     ovs_rwlock_unlock(&ofproto->ml->rwlock);
1659     stp_wait(ofproto);
1660     if (ofproto->backer->need_revalidate) {
1661         /* Shouldn't happen, but if it does just go around again. */
1662         VLOG_DBG_RL(&rl, "need revalidate in ofproto_wait_cb()");
1663         poll_immediate_wake();
1664     }
1665 }
1666
1667 static void
1668 get_memory_usage(const struct ofproto *ofproto_, struct simap *usage)
1669 {
1670     const struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1671     struct cls_cursor cursor;
1672     size_t n_subfacets = 0;
1673     struct facet *facet;
1674
1675     ovs_rwlock_rdlock(&ofproto->facets.rwlock);
1676     simap_increase(usage, "facets", classifier_count(&ofproto->facets));
1677     ovs_rwlock_unlock(&ofproto->facets.rwlock);
1678
1679     ovs_rwlock_rdlock(&ofproto->facets.rwlock);
1680     cls_cursor_init(&cursor, &ofproto->facets, NULL);
1681     CLS_CURSOR_FOR_EACH (facet, cr, &cursor) {
1682         n_subfacets += list_size(&facet->subfacets);
1683     }
1684     ovs_rwlock_unlock(&ofproto->facets.rwlock);
1685     simap_increase(usage, "subfacets", n_subfacets);
1686 }
1687
1688 static void
1689 flush(struct ofproto *ofproto_)
1690 {
1691     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1692     struct subfacet *subfacet, *next_subfacet;
1693     struct subfacet *batch[SUBFACET_DESTROY_MAX_BATCH];
1694     int n_batch;
1695
1696     n_batch = 0;
1697     HMAP_FOR_EACH_SAFE (subfacet, next_subfacet, hmap_node,
1698                         &ofproto->backer->subfacets) {
1699         if (subfacet->facet->ofproto != ofproto) {
1700             continue;
1701         }
1702
1703         if (subfacet->path != SF_NOT_INSTALLED) {
1704             batch[n_batch++] = subfacet;
1705             if (n_batch >= SUBFACET_DESTROY_MAX_BATCH) {
1706                 subfacet_destroy_batch(ofproto->backer, batch, n_batch);
1707                 n_batch = 0;
1708             }
1709         } else {
1710             subfacet_destroy(subfacet);
1711         }
1712     }
1713
1714     if (n_batch > 0) {
1715         subfacet_destroy_batch(ofproto->backer, batch, n_batch);
1716     }
1717 }
1718
1719 static void
1720 get_features(struct ofproto *ofproto_ OVS_UNUSED,
1721              bool *arp_match_ip, enum ofputil_action_bitmap *actions)
1722 {
1723     *arp_match_ip = true;
1724     *actions = (OFPUTIL_A_OUTPUT |
1725                 OFPUTIL_A_SET_VLAN_VID |
1726                 OFPUTIL_A_SET_VLAN_PCP |
1727                 OFPUTIL_A_STRIP_VLAN |
1728                 OFPUTIL_A_SET_DL_SRC |
1729                 OFPUTIL_A_SET_DL_DST |
1730                 OFPUTIL_A_SET_NW_SRC |
1731                 OFPUTIL_A_SET_NW_DST |
1732                 OFPUTIL_A_SET_NW_TOS |
1733                 OFPUTIL_A_SET_TP_SRC |
1734                 OFPUTIL_A_SET_TP_DST |
1735                 OFPUTIL_A_ENQUEUE);
1736 }
1737
1738 static void
1739 get_tables(struct ofproto *ofproto_, struct ofp12_table_stats *ots)
1740 {
1741     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1742     struct dpif_dp_stats s;
1743     uint64_t n_miss, n_no_pkt_in, n_bytes, n_dropped_frags;
1744     uint64_t n_lookup;
1745
1746     strcpy(ots->name, "classifier");
1747
1748     dpif_get_dp_stats(ofproto->backer->dpif, &s);
1749     rule_get_stats(&ofproto->miss_rule->up, &n_miss, &n_bytes);
1750     rule_get_stats(&ofproto->no_packet_in_rule->up, &n_no_pkt_in, &n_bytes);
1751     rule_get_stats(&ofproto->drop_frags_rule->up, &n_dropped_frags, &n_bytes);
1752
1753     n_lookup = s.n_hit + s.n_missed - n_dropped_frags;
1754     ots->lookup_count = htonll(n_lookup);
1755     ots->matched_count = htonll(n_lookup - n_miss - n_no_pkt_in);
1756 }
1757
1758 static struct ofport *
1759 port_alloc(void)
1760 {
1761     struct ofport_dpif *port = xmalloc(sizeof *port);
1762     return &port->up;
1763 }
1764
1765 static void
1766 port_dealloc(struct ofport *port_)
1767 {
1768     struct ofport_dpif *port = ofport_dpif_cast(port_);
1769     free(port);
1770 }
1771
1772 static int
1773 port_construct(struct ofport *port_)
1774 {
1775     struct ofport_dpif *port = ofport_dpif_cast(port_);
1776     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
1777     const struct netdev *netdev = port->up.netdev;
1778     char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
1779     struct dpif_port dpif_port;
1780     int error;
1781
1782     ofproto->backer->need_revalidate = REV_RECONFIGURE;
1783     port->bundle = NULL;
1784     port->cfm = NULL;
1785     port->bfd = NULL;
1786     port->may_enable = true;
1787     port->stp_port = NULL;
1788     port->stp_state = STP_DISABLED;
1789     port->is_tunnel = false;
1790     port->peer = NULL;
1791     port->qdscp = NULL;
1792     port->n_qdscp = 0;
1793     port->realdev_ofp_port = 0;
1794     port->vlandev_vid = 0;
1795     port->carrier_seq = netdev_get_carrier_resets(netdev);
1796
1797     if (netdev_vport_is_patch(netdev)) {
1798         /* By bailing out here, we don't submit the port to the sFlow module
1799          * to be considered for counter polling export.  This is correct
1800          * because the patch port represents an interface that sFlow considers
1801          * to be "internal" to the switch as a whole, and therefore not an
1802          * candidate for counter polling. */
1803         port->odp_port = ODPP_NONE;
1804         ofport_update_peer(port);
1805         return 0;
1806     }
1807
1808     error = dpif_port_query_by_name(ofproto->backer->dpif,
1809                                     netdev_vport_get_dpif_port(netdev, namebuf,
1810                                                                sizeof namebuf),
1811                                     &dpif_port);
1812     if (error) {
1813         return error;
1814     }
1815
1816     port->odp_port = dpif_port.port_no;
1817
1818     if (netdev_get_tunnel_config(netdev)) {
1819         tnl_port_add(port, port->up.netdev, port->odp_port);
1820         port->is_tunnel = true;
1821     } else {
1822         /* Sanity-check that a mapping doesn't already exist.  This
1823          * shouldn't happen for non-tunnel ports. */
1824         if (odp_port_to_ofp_port(ofproto, port->odp_port) != OFPP_NONE) {
1825             VLOG_ERR("port %s already has an OpenFlow port number",
1826                      dpif_port.name);
1827             dpif_port_destroy(&dpif_port);
1828             return EBUSY;
1829         }
1830
1831         ovs_rwlock_wrlock(&ofproto->backer->odp_to_ofport_lock);
1832         hmap_insert(&ofproto->backer->odp_to_ofport_map, &port->odp_port_node,
1833                     hash_odp_port(port->odp_port));
1834         ovs_rwlock_unlock(&ofproto->backer->odp_to_ofport_lock);
1835     }
1836     dpif_port_destroy(&dpif_port);
1837
1838     if (ofproto->sflow) {
1839         dpif_sflow_add_port(ofproto->sflow, port_, port->odp_port);
1840     }
1841
1842     return 0;
1843 }
1844
1845 static void
1846 port_destruct(struct ofport *port_)
1847 {
1848     struct ofport_dpif *port = ofport_dpif_cast(port_);
1849     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
1850     const char *devname = netdev_get_name(port->up.netdev);
1851     char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
1852     const char *dp_port_name;
1853
1854     ofproto->backer->need_revalidate = REV_RECONFIGURE;
1855     ovs_rwlock_wrlock(&xlate_rwlock);
1856     xlate_ofport_remove(port);
1857     ovs_rwlock_unlock(&xlate_rwlock);
1858
1859     dp_port_name = netdev_vport_get_dpif_port(port->up.netdev, namebuf,
1860                                               sizeof namebuf);
1861     if (dpif_port_exists(ofproto->backer->dpif, dp_port_name)) {
1862         /* The underlying device is still there, so delete it.  This
1863          * happens when the ofproto is being destroyed, since the caller
1864          * assumes that removal of attached ports will happen as part of
1865          * destruction. */
1866         if (!port->is_tunnel) {
1867             dpif_port_del(ofproto->backer->dpif, port->odp_port);
1868         }
1869     }
1870
1871     if (port->peer) {
1872         port->peer->peer = NULL;
1873         port->peer = NULL;
1874     }
1875
1876     if (port->odp_port != ODPP_NONE && !port->is_tunnel) {
1877         ovs_rwlock_wrlock(&ofproto->backer->odp_to_ofport_lock);
1878         hmap_remove(&ofproto->backer->odp_to_ofport_map, &port->odp_port_node);
1879         ovs_rwlock_unlock(&ofproto->backer->odp_to_ofport_lock);
1880     }
1881
1882     tnl_port_del(port);
1883     sset_find_and_delete(&ofproto->ports, devname);
1884     sset_find_and_delete(&ofproto->ghost_ports, devname);
1885     bundle_remove(port_);
1886     set_cfm(port_, NULL);
1887     set_bfd(port_, NULL);
1888     if (ofproto->sflow) {
1889         dpif_sflow_del_port(ofproto->sflow, port->odp_port);
1890     }
1891
1892     free(port->qdscp);
1893 }
1894
1895 static void
1896 port_modified(struct ofport *port_)
1897 {
1898     struct ofport_dpif *port = ofport_dpif_cast(port_);
1899
1900     if (port->bundle && port->bundle->bond) {
1901         bond_slave_set_netdev(port->bundle->bond, port, port->up.netdev);
1902     }
1903
1904     if (port->cfm) {
1905         cfm_set_netdev(port->cfm, port->up.netdev);
1906     }
1907
1908     if (port->is_tunnel && tnl_port_reconfigure(port, port->up.netdev,
1909                                                 port->odp_port)) {
1910         ofproto_dpif_cast(port->up.ofproto)->backer->need_revalidate =
1911             REV_RECONFIGURE;
1912     }
1913
1914     ofport_update_peer(port);
1915 }
1916
1917 static void
1918 port_reconfigured(struct ofport *port_, enum ofputil_port_config old_config)
1919 {
1920     struct ofport_dpif *port = ofport_dpif_cast(port_);
1921     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
1922     enum ofputil_port_config changed = old_config ^ port->up.pp.config;
1923
1924     if (changed & (OFPUTIL_PC_NO_RECV | OFPUTIL_PC_NO_RECV_STP |
1925                    OFPUTIL_PC_NO_FWD | OFPUTIL_PC_NO_FLOOD |
1926                    OFPUTIL_PC_NO_PACKET_IN)) {
1927         ofproto->backer->need_revalidate = REV_RECONFIGURE;
1928
1929         if (changed & OFPUTIL_PC_NO_FLOOD && port->bundle) {
1930             bundle_update(port->bundle);
1931         }
1932     }
1933 }
1934
1935 static int
1936 set_sflow(struct ofproto *ofproto_,
1937           const struct ofproto_sflow_options *sflow_options)
1938 {
1939     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1940     struct dpif_sflow *ds = ofproto->sflow;
1941
1942     if (sflow_options) {
1943         if (!ds) {
1944             struct ofport_dpif *ofport;
1945
1946             ds = ofproto->sflow = dpif_sflow_create();
1947             HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
1948                 dpif_sflow_add_port(ds, &ofport->up, ofport->odp_port);
1949             }
1950             ofproto->backer->need_revalidate = REV_RECONFIGURE;
1951         }
1952         dpif_sflow_set_options(ds, sflow_options);
1953     } else {
1954         if (ds) {
1955             dpif_sflow_unref(ds);
1956             ofproto->backer->need_revalidate = REV_RECONFIGURE;
1957             ofproto->sflow = NULL;
1958         }
1959     }
1960     return 0;
1961 }
1962
1963 static int
1964 set_ipfix(
1965     struct ofproto *ofproto_,
1966     const struct ofproto_ipfix_bridge_exporter_options *bridge_exporter_options,
1967     const struct ofproto_ipfix_flow_exporter_options *flow_exporters_options,
1968     size_t n_flow_exporters_options)
1969 {
1970     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1971     struct dpif_ipfix *di = ofproto->ipfix;
1972
1973     if (bridge_exporter_options || flow_exporters_options) {
1974         if (!di) {
1975             di = ofproto->ipfix = dpif_ipfix_create();
1976         }
1977         dpif_ipfix_set_options(
1978             di, bridge_exporter_options, flow_exporters_options,
1979             n_flow_exporters_options);
1980     } else {
1981         if (di) {
1982             dpif_ipfix_unref(di);
1983             ofproto->ipfix = NULL;
1984         }
1985     }
1986     return 0;
1987 }
1988
1989 static int
1990 set_cfm(struct ofport *ofport_, const struct cfm_settings *s)
1991 {
1992     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1993     int error;
1994
1995     if (!s) {
1996         error = 0;
1997     } else {
1998         if (!ofport->cfm) {
1999             struct ofproto_dpif *ofproto;
2000
2001             ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2002             ofproto->backer->need_revalidate = REV_RECONFIGURE;
2003             ofport->cfm = cfm_create(ofport->up.netdev);
2004         }
2005
2006         if (cfm_configure(ofport->cfm, s)) {
2007             return 0;
2008         }
2009
2010         error = EINVAL;
2011     }
2012     cfm_unref(ofport->cfm);
2013     ofport->cfm = NULL;
2014     return error;
2015 }
2016
2017 static bool
2018 get_cfm_status(const struct ofport *ofport_,
2019                struct ofproto_cfm_status *status)
2020 {
2021     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2022
2023     if (ofport->cfm) {
2024         status->faults = cfm_get_fault(ofport->cfm);
2025         status->remote_opstate = cfm_get_opup(ofport->cfm);
2026         status->health = cfm_get_health(ofport->cfm);
2027         cfm_get_remote_mpids(ofport->cfm, &status->rmps, &status->n_rmps);
2028         return true;
2029     } else {
2030         return false;
2031     }
2032 }
2033
2034 static int
2035 set_bfd(struct ofport *ofport_, const struct smap *cfg)
2036 {
2037     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport_->ofproto);
2038     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2039     struct bfd *old;
2040
2041     old = ofport->bfd;
2042     ofport->bfd = bfd_configure(old, netdev_get_name(ofport->up.netdev), cfg);
2043     if (ofport->bfd != old) {
2044         ofproto->backer->need_revalidate = REV_RECONFIGURE;
2045     }
2046
2047     return 0;
2048 }
2049
2050 static int
2051 get_bfd_status(struct ofport *ofport_, struct smap *smap)
2052 {
2053     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2054
2055     if (ofport->bfd) {
2056         bfd_get_status(ofport->bfd, smap);
2057         return 0;
2058     } else {
2059         return ENOENT;
2060     }
2061 }
2062 \f
2063 /* Spanning Tree. */
2064
2065 static void
2066 send_bpdu_cb(struct ofpbuf *pkt, int port_num, void *ofproto_)
2067 {
2068     struct ofproto_dpif *ofproto = ofproto_;
2069     struct stp_port *sp = stp_get_port(ofproto->stp, port_num);
2070     struct ofport_dpif *ofport;
2071
2072     ofport = stp_port_get_aux(sp);
2073     if (!ofport) {
2074         VLOG_WARN_RL(&rl, "%s: cannot send BPDU on unknown port %d",
2075                      ofproto->up.name, port_num);
2076     } else {
2077         struct eth_header *eth = pkt->l2;
2078
2079         netdev_get_etheraddr(ofport->up.netdev, eth->eth_src);
2080         if (eth_addr_is_zero(eth->eth_src)) {
2081             VLOG_WARN_RL(&rl, "%s: cannot send BPDU on port %d "
2082                          "with unknown MAC", ofproto->up.name, port_num);
2083         } else {
2084             send_packet(ofport, pkt);
2085         }
2086     }
2087     ofpbuf_delete(pkt);
2088 }
2089
2090 /* Configures STP on 'ofproto_' using the settings defined in 's'. */
2091 static int
2092 set_stp(struct ofproto *ofproto_, const struct ofproto_stp_settings *s)
2093 {
2094     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2095
2096     /* Only revalidate flows if the configuration changed. */
2097     if (!s != !ofproto->stp) {
2098         ofproto->backer->need_revalidate = REV_RECONFIGURE;
2099     }
2100
2101     if (s) {
2102         if (!ofproto->stp) {
2103             ofproto->stp = stp_create(ofproto_->name, s->system_id,
2104                                       send_bpdu_cb, ofproto);
2105             ofproto->stp_last_tick = time_msec();
2106         }
2107
2108         stp_set_bridge_id(ofproto->stp, s->system_id);
2109         stp_set_bridge_priority(ofproto->stp, s->priority);
2110         stp_set_hello_time(ofproto->stp, s->hello_time);
2111         stp_set_max_age(ofproto->stp, s->max_age);
2112         stp_set_forward_delay(ofproto->stp, s->fwd_delay);
2113     }  else {
2114         struct ofport *ofport;
2115
2116         HMAP_FOR_EACH (ofport, hmap_node, &ofproto->up.ports) {
2117             set_stp_port(ofport, NULL);
2118         }
2119
2120         stp_unref(ofproto->stp);
2121         ofproto->stp = NULL;
2122     }
2123
2124     return 0;
2125 }
2126
2127 static int
2128 get_stp_status(struct ofproto *ofproto_, struct ofproto_stp_status *s)
2129 {
2130     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2131
2132     if (ofproto->stp) {
2133         s->enabled = true;
2134         s->bridge_id = stp_get_bridge_id(ofproto->stp);
2135         s->designated_root = stp_get_designated_root(ofproto->stp);
2136         s->root_path_cost = stp_get_root_path_cost(ofproto->stp);
2137     } else {
2138         s->enabled = false;
2139     }
2140
2141     return 0;
2142 }
2143
2144 static void
2145 update_stp_port_state(struct ofport_dpif *ofport)
2146 {
2147     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2148     enum stp_state state;
2149
2150     /* Figure out new state. */
2151     state = ofport->stp_port ? stp_port_get_state(ofport->stp_port)
2152                              : STP_DISABLED;
2153
2154     /* Update state. */
2155     if (ofport->stp_state != state) {
2156         enum ofputil_port_state of_state;
2157         bool fwd_change;
2158
2159         VLOG_DBG_RL(&rl, "port %s: STP state changed from %s to %s",
2160                     netdev_get_name(ofport->up.netdev),
2161                     stp_state_name(ofport->stp_state),
2162                     stp_state_name(state));
2163         if (stp_learn_in_state(ofport->stp_state)
2164                 != stp_learn_in_state(state)) {
2165             /* xxx Learning action flows should also be flushed. */
2166             ovs_rwlock_wrlock(&ofproto->ml->rwlock);
2167             mac_learning_flush(ofproto->ml);
2168             ovs_rwlock_unlock(&ofproto->ml->rwlock);
2169         }
2170         fwd_change = stp_forward_in_state(ofport->stp_state)
2171                         != stp_forward_in_state(state);
2172
2173         ofproto->backer->need_revalidate = REV_STP;
2174         ofport->stp_state = state;
2175         ofport->stp_state_entered = time_msec();
2176
2177         if (fwd_change && ofport->bundle) {
2178             bundle_update(ofport->bundle);
2179         }
2180
2181         /* Update the STP state bits in the OpenFlow port description. */
2182         of_state = ofport->up.pp.state & ~OFPUTIL_PS_STP_MASK;
2183         of_state |= (state == STP_LISTENING ? OFPUTIL_PS_STP_LISTEN
2184                      : state == STP_LEARNING ? OFPUTIL_PS_STP_LEARN
2185                      : state == STP_FORWARDING ? OFPUTIL_PS_STP_FORWARD
2186                      : state == STP_BLOCKING ?  OFPUTIL_PS_STP_BLOCK
2187                      : 0);
2188         ofproto_port_set_state(&ofport->up, of_state);
2189     }
2190 }
2191
2192 /* Configures STP on 'ofport_' using the settings defined in 's'.  The
2193  * caller is responsible for assigning STP port numbers and ensuring
2194  * there are no duplicates. */
2195 static int
2196 set_stp_port(struct ofport *ofport_,
2197              const struct ofproto_port_stp_settings *s)
2198 {
2199     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2200     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2201     struct stp_port *sp = ofport->stp_port;
2202
2203     if (!s || !s->enable) {
2204         if (sp) {
2205             ofport->stp_port = NULL;
2206             stp_port_disable(sp);
2207             update_stp_port_state(ofport);
2208         }
2209         return 0;
2210     } else if (sp && stp_port_no(sp) != s->port_num
2211             && ofport == stp_port_get_aux(sp)) {
2212         /* The port-id changed, so disable the old one if it's not
2213          * already in use by another port. */
2214         stp_port_disable(sp);
2215     }
2216
2217     sp = ofport->stp_port = stp_get_port(ofproto->stp, s->port_num);
2218     stp_port_enable(sp);
2219
2220     stp_port_set_aux(sp, ofport);
2221     stp_port_set_priority(sp, s->priority);
2222     stp_port_set_path_cost(sp, s->path_cost);
2223
2224     update_stp_port_state(ofport);
2225
2226     return 0;
2227 }
2228
2229 static int
2230 get_stp_port_status(struct ofport *ofport_,
2231                     struct ofproto_port_stp_status *s)
2232 {
2233     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2234     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2235     struct stp_port *sp = ofport->stp_port;
2236
2237     if (!ofproto->stp || !sp) {
2238         s->enabled = false;
2239         return 0;
2240     }
2241
2242     s->enabled = true;
2243     s->port_id = stp_port_get_id(sp);
2244     s->state = stp_port_get_state(sp);
2245     s->sec_in_state = (time_msec() - ofport->stp_state_entered) / 1000;
2246     s->role = stp_port_get_role(sp);
2247     stp_port_get_counts(sp, &s->tx_count, &s->rx_count, &s->error_count);
2248
2249     return 0;
2250 }
2251
2252 static void
2253 stp_run(struct ofproto_dpif *ofproto)
2254 {
2255     if (ofproto->stp) {
2256         long long int now = time_msec();
2257         long long int elapsed = now - ofproto->stp_last_tick;
2258         struct stp_port *sp;
2259
2260         if (elapsed > 0) {
2261             stp_tick(ofproto->stp, MIN(INT_MAX, elapsed));
2262             ofproto->stp_last_tick = now;
2263         }
2264         while (stp_get_changed_port(ofproto->stp, &sp)) {
2265             struct ofport_dpif *ofport = stp_port_get_aux(sp);
2266
2267             if (ofport) {
2268                 update_stp_port_state(ofport);
2269             }
2270         }
2271
2272         if (stp_check_and_reset_fdb_flush(ofproto->stp)) {
2273             ovs_rwlock_wrlock(&ofproto->ml->rwlock);
2274             mac_learning_flush(ofproto->ml);
2275             ovs_rwlock_unlock(&ofproto->ml->rwlock);
2276         }
2277     }
2278 }
2279
2280 static void
2281 stp_wait(struct ofproto_dpif *ofproto)
2282 {
2283     if (ofproto->stp) {
2284         poll_timer_wait(1000);
2285     }
2286 }
2287 \f
2288 static int
2289 set_queues(struct ofport *ofport_, const struct ofproto_port_queue *qdscp,
2290            size_t n_qdscp)
2291 {
2292     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2293     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2294
2295     if (ofport->n_qdscp != n_qdscp
2296         || (n_qdscp && memcmp(ofport->qdscp, qdscp,
2297                               n_qdscp * sizeof *qdscp))) {
2298         ofproto->backer->need_revalidate = REV_RECONFIGURE;
2299         free(ofport->qdscp);
2300         ofport->qdscp = n_qdscp
2301             ? xmemdup(qdscp, n_qdscp * sizeof *qdscp)
2302             : NULL;
2303         ofport->n_qdscp = n_qdscp;
2304     }
2305
2306     return 0;
2307 }
2308 \f
2309 /* Bundles. */
2310
2311 /* Expires all MAC learning entries associated with 'bundle' and forces its
2312  * ofproto to revalidate every flow.
2313  *
2314  * Normally MAC learning entries are removed only from the ofproto associated
2315  * with 'bundle', but if 'all_ofprotos' is true, then the MAC learning entries
2316  * are removed from every ofproto.  When patch ports and SLB bonds are in use
2317  * and a VM migration happens and the gratuitous ARPs are somehow lost, this
2318  * avoids a MAC_ENTRY_IDLE_TIME delay before the migrated VM can communicate
2319  * with the host from which it migrated. */
2320 static void
2321 bundle_flush_macs(struct ofbundle *bundle, bool all_ofprotos)
2322 {
2323     struct ofproto_dpif *ofproto = bundle->ofproto;
2324     struct mac_learning *ml = ofproto->ml;
2325     struct mac_entry *mac, *next_mac;
2326
2327     ofproto->backer->need_revalidate = REV_RECONFIGURE;
2328     ovs_rwlock_wrlock(&ml->rwlock);
2329     LIST_FOR_EACH_SAFE (mac, next_mac, lru_node, &ml->lrus) {
2330         if (mac->port.p == bundle) {
2331             if (all_ofprotos) {
2332                 struct ofproto_dpif *o;
2333
2334                 HMAP_FOR_EACH (o, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
2335                     if (o != ofproto) {
2336                         struct mac_entry *e;
2337
2338                         ovs_rwlock_wrlock(&o->ml->rwlock);
2339                         e = mac_learning_lookup(o->ml, mac->mac, mac->vlan);
2340                         if (e) {
2341                             mac_learning_expire(o->ml, e);
2342                         }
2343                         ovs_rwlock_unlock(&o->ml->rwlock);
2344                     }
2345                 }
2346             }
2347
2348             mac_learning_expire(ml, mac);
2349         }
2350     }
2351     ovs_rwlock_unlock(&ml->rwlock);
2352 }
2353
2354 static struct ofbundle *
2355 bundle_lookup(const struct ofproto_dpif *ofproto, void *aux)
2356 {
2357     struct ofbundle *bundle;
2358
2359     HMAP_FOR_EACH_IN_BUCKET (bundle, hmap_node, hash_pointer(aux, 0),
2360                              &ofproto->bundles) {
2361         if (bundle->aux == aux) {
2362             return bundle;
2363         }
2364     }
2365     return NULL;
2366 }
2367
2368 static void
2369 bundle_update(struct ofbundle *bundle)
2370 {
2371     struct ofport_dpif *port;
2372
2373     bundle->floodable = true;
2374     LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
2375         if (port->up.pp.config & OFPUTIL_PC_NO_FLOOD
2376             || !stp_forward_in_state(port->stp_state)) {
2377             bundle->floodable = false;
2378             break;
2379         }
2380     }
2381 }
2382
2383 static void
2384 bundle_del_port(struct ofport_dpif *port)
2385 {
2386     struct ofbundle *bundle = port->bundle;
2387
2388     bundle->ofproto->backer->need_revalidate = REV_RECONFIGURE;
2389
2390     list_remove(&port->bundle_node);
2391     port->bundle = NULL;
2392
2393     if (bundle->lacp) {
2394         lacp_slave_unregister(bundle->lacp, port);
2395     }
2396     if (bundle->bond) {
2397         bond_slave_unregister(bundle->bond, port);
2398     }
2399
2400     bundle_update(bundle);
2401 }
2402
2403 static bool
2404 bundle_add_port(struct ofbundle *bundle, ofp_port_t ofp_port,
2405                 struct lacp_slave_settings *lacp)
2406 {
2407     struct ofport_dpif *port;
2408
2409     port = get_ofp_port(bundle->ofproto, ofp_port);
2410     if (!port) {
2411         return false;
2412     }
2413
2414     if (port->bundle != bundle) {
2415         bundle->ofproto->backer->need_revalidate = REV_RECONFIGURE;
2416         if (port->bundle) {
2417             bundle_remove(&port->up);
2418         }
2419
2420         port->bundle = bundle;
2421         list_push_back(&bundle->ports, &port->bundle_node);
2422         if (port->up.pp.config & OFPUTIL_PC_NO_FLOOD
2423             || !stp_forward_in_state(port->stp_state)) {
2424             bundle->floodable = false;
2425         }
2426     }
2427     if (lacp) {
2428         bundle->ofproto->backer->need_revalidate = REV_RECONFIGURE;
2429         lacp_slave_register(bundle->lacp, port, lacp);
2430     }
2431
2432     return true;
2433 }
2434
2435 static void
2436 bundle_destroy(struct ofbundle *bundle)
2437 {
2438     struct ofproto_dpif *ofproto;
2439     struct ofport_dpif *port, *next_port;
2440
2441     if (!bundle) {
2442         return;
2443     }
2444
2445     ofproto = bundle->ofproto;
2446     mbridge_unregister_bundle(ofproto->mbridge, bundle->aux);
2447
2448     ovs_rwlock_wrlock(&xlate_rwlock);
2449     xlate_bundle_remove(bundle);
2450     ovs_rwlock_unlock(&xlate_rwlock);
2451
2452     LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) {
2453         bundle_del_port(port);
2454     }
2455
2456     bundle_flush_macs(bundle, true);
2457     hmap_remove(&ofproto->bundles, &bundle->hmap_node);
2458     free(bundle->name);
2459     free(bundle->trunks);
2460     lacp_unref(bundle->lacp);
2461     bond_unref(bundle->bond);
2462     free(bundle);
2463 }
2464
2465 static int
2466 bundle_set(struct ofproto *ofproto_, void *aux,
2467            const struct ofproto_bundle_settings *s)
2468 {
2469     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2470     bool need_flush = false;
2471     struct ofport_dpif *port;
2472     struct ofbundle *bundle;
2473     unsigned long *trunks;
2474     int vlan;
2475     size_t i;
2476     bool ok;
2477
2478     if (!s) {
2479         bundle_destroy(bundle_lookup(ofproto, aux));
2480         return 0;
2481     }
2482
2483     ovs_assert(s->n_slaves == 1 || s->bond != NULL);
2484     ovs_assert((s->lacp != NULL) == (s->lacp_slaves != NULL));
2485
2486     bundle = bundle_lookup(ofproto, aux);
2487     if (!bundle) {
2488         bundle = xmalloc(sizeof *bundle);
2489
2490         bundle->ofproto = ofproto;
2491         hmap_insert(&ofproto->bundles, &bundle->hmap_node,
2492                     hash_pointer(aux, 0));
2493         bundle->aux = aux;
2494         bundle->name = NULL;
2495
2496         list_init(&bundle->ports);
2497         bundle->vlan_mode = PORT_VLAN_TRUNK;
2498         bundle->vlan = -1;
2499         bundle->trunks = NULL;
2500         bundle->use_priority_tags = s->use_priority_tags;
2501         bundle->lacp = NULL;
2502         bundle->bond = NULL;
2503
2504         bundle->floodable = true;
2505         mbridge_register_bundle(ofproto->mbridge, bundle);
2506     }
2507
2508     if (!bundle->name || strcmp(s->name, bundle->name)) {
2509         free(bundle->name);
2510         bundle->name = xstrdup(s->name);
2511     }
2512
2513     /* LACP. */
2514     if (s->lacp) {
2515         if (!bundle->lacp) {
2516             ofproto->backer->need_revalidate = REV_RECONFIGURE;
2517             bundle->lacp = lacp_create();
2518         }
2519         lacp_configure(bundle->lacp, s->lacp);
2520     } else {
2521         lacp_unref(bundle->lacp);
2522         bundle->lacp = NULL;
2523     }
2524
2525     /* Update set of ports. */
2526     ok = true;
2527     for (i = 0; i < s->n_slaves; i++) {
2528         if (!bundle_add_port(bundle, s->slaves[i],
2529                              s->lacp ? &s->lacp_slaves[i] : NULL)) {
2530             ok = false;
2531         }
2532     }
2533     if (!ok || list_size(&bundle->ports) != s->n_slaves) {
2534         struct ofport_dpif *next_port;
2535
2536         LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) {
2537             for (i = 0; i < s->n_slaves; i++) {
2538                 if (s->slaves[i] == port->up.ofp_port) {
2539                     goto found;
2540                 }
2541             }
2542
2543             bundle_del_port(port);
2544         found: ;
2545         }
2546     }
2547     ovs_assert(list_size(&bundle->ports) <= s->n_slaves);
2548
2549     if (list_is_empty(&bundle->ports)) {
2550         bundle_destroy(bundle);
2551         return EINVAL;
2552     }
2553
2554     /* Set VLAN tagging mode */
2555     if (s->vlan_mode != bundle->vlan_mode
2556         || s->use_priority_tags != bundle->use_priority_tags) {
2557         bundle->vlan_mode = s->vlan_mode;
2558         bundle->use_priority_tags = s->use_priority_tags;
2559         need_flush = true;
2560     }
2561
2562     /* Set VLAN tag. */
2563     vlan = (s->vlan_mode == PORT_VLAN_TRUNK ? -1
2564             : s->vlan >= 0 && s->vlan <= 4095 ? s->vlan
2565             : 0);
2566     if (vlan != bundle->vlan) {
2567         bundle->vlan = vlan;
2568         need_flush = true;
2569     }
2570
2571     /* Get trunked VLANs. */
2572     switch (s->vlan_mode) {
2573     case PORT_VLAN_ACCESS:
2574         trunks = NULL;
2575         break;
2576
2577     case PORT_VLAN_TRUNK:
2578         trunks = CONST_CAST(unsigned long *, s->trunks);
2579         break;
2580
2581     case PORT_VLAN_NATIVE_UNTAGGED:
2582     case PORT_VLAN_NATIVE_TAGGED:
2583         if (vlan != 0 && (!s->trunks
2584                           || !bitmap_is_set(s->trunks, vlan)
2585                           || bitmap_is_set(s->trunks, 0))) {
2586             /* Force trunking the native VLAN and prohibit trunking VLAN 0. */
2587             if (s->trunks) {
2588                 trunks = bitmap_clone(s->trunks, 4096);
2589             } else {
2590                 trunks = bitmap_allocate1(4096);
2591             }
2592             bitmap_set1(trunks, vlan);
2593             bitmap_set0(trunks, 0);
2594         } else {
2595             trunks = CONST_CAST(unsigned long *, s->trunks);
2596         }
2597         break;
2598
2599     default:
2600         NOT_REACHED();
2601     }
2602     if (!vlan_bitmap_equal(trunks, bundle->trunks)) {
2603         free(bundle->trunks);
2604         if (trunks == s->trunks) {
2605             bundle->trunks = vlan_bitmap_clone(trunks);
2606         } else {
2607             bundle->trunks = trunks;
2608             trunks = NULL;
2609         }
2610         need_flush = true;
2611     }
2612     if (trunks != s->trunks) {
2613         free(trunks);
2614     }
2615
2616     /* Bonding. */
2617     if (!list_is_short(&bundle->ports)) {
2618         bundle->ofproto->has_bonded_bundles = true;
2619         if (bundle->bond) {
2620             if (bond_reconfigure(bundle->bond, s->bond)) {
2621                 ofproto->backer->need_revalidate = REV_RECONFIGURE;
2622             }
2623         } else {
2624             bundle->bond = bond_create(s->bond);
2625             ofproto->backer->need_revalidate = REV_RECONFIGURE;
2626         }
2627
2628         LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
2629             bond_slave_register(bundle->bond, port, port->up.netdev);
2630         }
2631     } else {
2632         bond_unref(bundle->bond);
2633         bundle->bond = NULL;
2634     }
2635
2636     /* If we changed something that would affect MAC learning, un-learn
2637      * everything on this port and force flow revalidation. */
2638     if (need_flush) {
2639         bundle_flush_macs(bundle, false);
2640     }
2641
2642     return 0;
2643 }
2644
2645 static void
2646 bundle_remove(struct ofport *port_)
2647 {
2648     struct ofport_dpif *port = ofport_dpif_cast(port_);
2649     struct ofbundle *bundle = port->bundle;
2650
2651     if (bundle) {
2652         bundle_del_port(port);
2653         if (list_is_empty(&bundle->ports)) {
2654             bundle_destroy(bundle);
2655         } else if (list_is_short(&bundle->ports)) {
2656             bond_unref(bundle->bond);
2657             bundle->bond = NULL;
2658         }
2659     }
2660 }
2661
2662 static void
2663 send_pdu_cb(void *port_, const void *pdu, size_t pdu_size)
2664 {
2665     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 10);
2666     struct ofport_dpif *port = port_;
2667     uint8_t ea[ETH_ADDR_LEN];
2668     int error;
2669
2670     error = netdev_get_etheraddr(port->up.netdev, ea);
2671     if (!error) {
2672         struct ofpbuf packet;
2673         void *packet_pdu;
2674
2675         ofpbuf_init(&packet, 0);
2676         packet_pdu = eth_compose(&packet, eth_addr_lacp, ea, ETH_TYPE_LACP,
2677                                  pdu_size);
2678         memcpy(packet_pdu, pdu, pdu_size);
2679
2680         send_packet(port, &packet);
2681         ofpbuf_uninit(&packet);
2682     } else {
2683         VLOG_ERR_RL(&rl, "port %s: cannot obtain Ethernet address of iface "
2684                     "%s (%s)", port->bundle->name,
2685                     netdev_get_name(port->up.netdev), ovs_strerror(error));
2686     }
2687 }
2688
2689 static void
2690 bundle_send_learning_packets(struct ofbundle *bundle)
2691 {
2692     struct ofproto_dpif *ofproto = bundle->ofproto;
2693     int error, n_packets, n_errors;
2694     struct mac_entry *e;
2695
2696     error = n_packets = n_errors = 0;
2697     ovs_rwlock_rdlock(&ofproto->ml->rwlock);
2698     LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) {
2699         if (e->port.p != bundle) {
2700             struct ofpbuf *learning_packet;
2701             struct ofport_dpif *port;
2702             void *port_void;
2703             int ret;
2704
2705             /* The assignment to "port" is unnecessary but makes "grep"ing for
2706              * struct ofport_dpif more effective. */
2707             learning_packet = bond_compose_learning_packet(bundle->bond,
2708                                                            e->mac, e->vlan,
2709                                                            &port_void);
2710             port = port_void;
2711             ret = send_packet(port, learning_packet);
2712             ofpbuf_delete(learning_packet);
2713             if (ret) {
2714                 error = ret;
2715                 n_errors++;
2716             }
2717             n_packets++;
2718         }
2719     }
2720     ovs_rwlock_unlock(&ofproto->ml->rwlock);
2721
2722     if (n_errors) {
2723         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2724         VLOG_WARN_RL(&rl, "bond %s: %d errors sending %d gratuitous learning "
2725                      "packets, last error was: %s",
2726                      bundle->name, n_errors, n_packets, ovs_strerror(error));
2727     } else {
2728         VLOG_DBG("bond %s: sent %d gratuitous learning packets",
2729                  bundle->name, n_packets);
2730     }
2731 }
2732
2733 static void
2734 bundle_run(struct ofbundle *bundle)
2735 {
2736     if (bundle->lacp) {
2737         lacp_run(bundle->lacp, send_pdu_cb);
2738     }
2739     if (bundle->bond) {
2740         struct ofport_dpif *port;
2741
2742         LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
2743             bond_slave_set_may_enable(bundle->bond, port, port->may_enable);
2744         }
2745
2746         if (bond_run(bundle->bond, lacp_status(bundle->lacp))) {
2747             bundle->ofproto->backer->need_revalidate = REV_BOND;
2748         }
2749
2750         if (bond_should_send_learning_packets(bundle->bond)) {
2751             bundle_send_learning_packets(bundle);
2752         }
2753     }
2754 }
2755
2756 static void
2757 bundle_wait(struct ofbundle *bundle)
2758 {
2759     if (bundle->lacp) {
2760         lacp_wait(bundle->lacp);
2761     }
2762     if (bundle->bond) {
2763         bond_wait(bundle->bond);
2764     }
2765 }
2766 \f
2767 /* Mirrors. */
2768
2769 static int
2770 mirror_set__(struct ofproto *ofproto_, void *aux,
2771              const struct ofproto_mirror_settings *s)
2772 {
2773     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2774     struct ofbundle **srcs, **dsts;
2775     int error;
2776     size_t i;
2777
2778     if (!s) {
2779         mirror_destroy(ofproto->mbridge, aux);
2780         return 0;
2781     }
2782
2783     srcs = xmalloc(s->n_srcs * sizeof *srcs);
2784     dsts = xmalloc(s->n_dsts * sizeof *dsts);
2785
2786     for (i = 0; i < s->n_srcs; i++) {
2787         srcs[i] = bundle_lookup(ofproto, s->srcs[i]);
2788     }
2789
2790     for (i = 0; i < s->n_dsts; i++) {
2791         dsts[i] = bundle_lookup(ofproto, s->dsts[i]);
2792     }
2793
2794     error = mirror_set(ofproto->mbridge, aux, s->name, srcs, s->n_srcs, dsts,
2795                        s->n_dsts, s->src_vlans,
2796                        bundle_lookup(ofproto, s->out_bundle), s->out_vlan);
2797     free(srcs);
2798     free(dsts);
2799     return error;
2800 }
2801
2802 static int
2803 mirror_get_stats__(struct ofproto *ofproto, void *aux,
2804                    uint64_t *packets, uint64_t *bytes)
2805 {
2806     push_all_stats();
2807     return mirror_get_stats(ofproto_dpif_cast(ofproto)->mbridge, aux, packets,
2808                             bytes);
2809 }
2810
2811 static int
2812 set_flood_vlans(struct ofproto *ofproto_, unsigned long *flood_vlans)
2813 {
2814     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2815     ovs_rwlock_wrlock(&ofproto->ml->rwlock);
2816     if (mac_learning_set_flood_vlans(ofproto->ml, flood_vlans)) {
2817         mac_learning_flush(ofproto->ml);
2818     }
2819     ovs_rwlock_unlock(&ofproto->ml->rwlock);
2820     return 0;
2821 }
2822
2823 static bool
2824 is_mirror_output_bundle(const struct ofproto *ofproto_, void *aux)
2825 {
2826     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2827     struct ofbundle *bundle = bundle_lookup(ofproto, aux);
2828     return bundle && mirror_bundle_out(ofproto->mbridge, bundle) != 0;
2829 }
2830
2831 static void
2832 forward_bpdu_changed(struct ofproto *ofproto_)
2833 {
2834     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2835     ofproto->backer->need_revalidate = REV_RECONFIGURE;
2836 }
2837
2838 static void
2839 set_mac_table_config(struct ofproto *ofproto_, unsigned int idle_time,
2840                      size_t max_entries)
2841 {
2842     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2843     ovs_rwlock_wrlock(&ofproto->ml->rwlock);
2844     mac_learning_set_idle_time(ofproto->ml, idle_time);
2845     mac_learning_set_max_entries(ofproto->ml, max_entries);
2846     ovs_rwlock_unlock(&ofproto->ml->rwlock);
2847 }
2848 \f
2849 /* Ports. */
2850
2851 static struct ofport_dpif *
2852 get_ofp_port(const struct ofproto_dpif *ofproto, ofp_port_t ofp_port)
2853 {
2854     struct ofport *ofport = ofproto_get_port(&ofproto->up, ofp_port);
2855     return ofport ? ofport_dpif_cast(ofport) : NULL;
2856 }
2857
2858 static struct ofport_dpif *
2859 get_odp_port(const struct ofproto_dpif *ofproto, odp_port_t odp_port)
2860 {
2861     struct ofport_dpif *port = odp_port_to_ofport(ofproto->backer, odp_port);
2862     return port && &ofproto->up == port->up.ofproto ? port : NULL;
2863 }
2864
2865 static void
2866 ofproto_port_from_dpif_port(struct ofproto_dpif *ofproto,
2867                             struct ofproto_port *ofproto_port,
2868                             struct dpif_port *dpif_port)
2869 {
2870     ofproto_port->name = dpif_port->name;
2871     ofproto_port->type = dpif_port->type;
2872     ofproto_port->ofp_port = odp_port_to_ofp_port(ofproto, dpif_port->port_no);
2873 }
2874
2875 static void
2876 ofport_update_peer(struct ofport_dpif *ofport)
2877 {
2878     const struct ofproto_dpif *ofproto;
2879     struct dpif_backer *backer;
2880     char *peer_name;
2881
2882     if (!netdev_vport_is_patch(ofport->up.netdev)) {
2883         return;
2884     }
2885
2886     backer = ofproto_dpif_cast(ofport->up.ofproto)->backer;
2887     backer->need_revalidate = REV_RECONFIGURE;
2888
2889     if (ofport->peer) {
2890         ofport->peer->peer = NULL;
2891         ofport->peer = NULL;
2892     }
2893
2894     peer_name = netdev_vport_patch_peer(ofport->up.netdev);
2895     if (!peer_name) {
2896         return;
2897     }
2898
2899     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
2900         struct ofport *peer_ofport;
2901         struct ofport_dpif *peer;
2902         char *peer_peer;
2903
2904         if (ofproto->backer != backer) {
2905             continue;
2906         }
2907
2908         peer_ofport = shash_find_data(&ofproto->up.port_by_name, peer_name);
2909         if (!peer_ofport) {
2910             continue;
2911         }
2912
2913         peer = ofport_dpif_cast(peer_ofport);
2914         peer_peer = netdev_vport_patch_peer(peer->up.netdev);
2915         if (peer_peer && !strcmp(netdev_get_name(ofport->up.netdev),
2916                                  peer_peer)) {
2917             ofport->peer = peer;
2918             ofport->peer->peer = ofport;
2919         }
2920         free(peer_peer);
2921
2922         break;
2923     }
2924     free(peer_name);
2925 }
2926
2927 static void
2928 port_run_fast(struct ofport_dpif *ofport)
2929 {
2930     if (ofport->cfm && cfm_should_send_ccm(ofport->cfm)) {
2931         struct ofpbuf packet;
2932
2933         ofpbuf_init(&packet, 0);
2934         cfm_compose_ccm(ofport->cfm, &packet, ofport->up.pp.hw_addr);
2935         send_packet(ofport, &packet);
2936         ofpbuf_uninit(&packet);
2937     }
2938
2939     if (ofport->bfd && bfd_should_send_packet(ofport->bfd)) {
2940         struct ofpbuf packet;
2941
2942         ofpbuf_init(&packet, 0);
2943         bfd_put_packet(ofport->bfd, &packet, ofport->up.pp.hw_addr);
2944         send_packet(ofport, &packet);
2945         ofpbuf_uninit(&packet);
2946     }
2947 }
2948
2949 static void
2950 port_run(struct ofport_dpif *ofport)
2951 {
2952     long long int carrier_seq = netdev_get_carrier_resets(ofport->up.netdev);
2953     bool carrier_changed = carrier_seq != ofport->carrier_seq;
2954     bool enable = netdev_get_carrier(ofport->up.netdev);
2955
2956     ofport->carrier_seq = carrier_seq;
2957
2958     port_run_fast(ofport);
2959
2960     if (ofport->cfm) {
2961         int cfm_opup = cfm_get_opup(ofport->cfm);
2962
2963         cfm_run(ofport->cfm);
2964         enable = enable && !cfm_get_fault(ofport->cfm);
2965
2966         if (cfm_opup >= 0) {
2967             enable = enable && cfm_opup;
2968         }
2969     }
2970
2971     if (ofport->bfd) {
2972         bfd_run(ofport->bfd);
2973         enable = enable && bfd_forwarding(ofport->bfd);
2974     }
2975
2976     if (ofport->bundle) {
2977         enable = enable && lacp_slave_may_enable(ofport->bundle->lacp, ofport);
2978         if (carrier_changed) {
2979             lacp_slave_carrier_changed(ofport->bundle->lacp, ofport);
2980         }
2981     }
2982
2983     if (ofport->may_enable != enable) {
2984         struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2985         ofproto->backer->need_revalidate = REV_PORT_TOGGLED;
2986     }
2987
2988     ofport->may_enable = enable;
2989 }
2990
2991 static void
2992 port_wait(struct ofport_dpif *ofport)
2993 {
2994     if (ofport->cfm) {
2995         cfm_wait(ofport->cfm);
2996     }
2997
2998     if (ofport->bfd) {
2999         bfd_wait(ofport->bfd);
3000     }
3001 }
3002
3003 static int
3004 port_query_by_name(const struct ofproto *ofproto_, const char *devname,
3005                    struct ofproto_port *ofproto_port)
3006 {
3007     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3008     struct dpif_port dpif_port;
3009     int error;
3010
3011     if (sset_contains(&ofproto->ghost_ports, devname)) {
3012         const char *type = netdev_get_type_from_name(devname);
3013
3014         /* We may be called before ofproto->up.port_by_name is populated with
3015          * the appropriate ofport.  For this reason, we must get the name and
3016          * type from the netdev layer directly. */
3017         if (type) {
3018             const struct ofport *ofport;
3019
3020             ofport = shash_find_data(&ofproto->up.port_by_name, devname);
3021             ofproto_port->ofp_port = ofport ? ofport->ofp_port : OFPP_NONE;
3022             ofproto_port->name = xstrdup(devname);
3023             ofproto_port->type = xstrdup(type);
3024             return 0;
3025         }
3026         return ENODEV;
3027     }
3028
3029     if (!sset_contains(&ofproto->ports, devname)) {
3030         return ENODEV;
3031     }
3032     error = dpif_port_query_by_name(ofproto->backer->dpif,
3033                                     devname, &dpif_port);
3034     if (!error) {
3035         ofproto_port_from_dpif_port(ofproto, ofproto_port, &dpif_port);
3036     }
3037     return error;
3038 }
3039
3040 static int
3041 port_add(struct ofproto *ofproto_, struct netdev *netdev)
3042 {
3043     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3044     const char *devname = netdev_get_name(netdev);
3045     char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
3046     const char *dp_port_name;
3047
3048     if (netdev_vport_is_patch(netdev)) {
3049         sset_add(&ofproto->ghost_ports, netdev_get_name(netdev));
3050         return 0;
3051     }
3052
3053     dp_port_name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
3054     if (!dpif_port_exists(ofproto->backer->dpif, dp_port_name)) {
3055         odp_port_t port_no = ODPP_NONE;
3056         int error;
3057
3058         error = dpif_port_add(ofproto->backer->dpif, netdev, &port_no);
3059         if (error) {
3060             return error;
3061         }
3062         if (netdev_get_tunnel_config(netdev)) {
3063             simap_put(&ofproto->backer->tnl_backers,
3064                       dp_port_name, odp_to_u32(port_no));
3065         }
3066     }
3067
3068     if (netdev_get_tunnel_config(netdev)) {
3069         sset_add(&ofproto->ghost_ports, devname);
3070     } else {
3071         sset_add(&ofproto->ports, devname);
3072     }
3073     return 0;
3074 }
3075
3076 static int
3077 port_del(struct ofproto *ofproto_, ofp_port_t ofp_port)
3078 {
3079     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3080     struct ofport_dpif *ofport = get_ofp_port(ofproto, ofp_port);
3081     int error = 0;
3082
3083     if (!ofport) {
3084         return 0;
3085     }
3086
3087     sset_find_and_delete(&ofproto->ghost_ports,
3088                          netdev_get_name(ofport->up.netdev));
3089     ofproto->backer->need_revalidate = REV_RECONFIGURE;
3090     if (!ofport->is_tunnel) {
3091         error = dpif_port_del(ofproto->backer->dpif, ofport->odp_port);
3092         if (!error) {
3093             /* The caller is going to close ofport->up.netdev.  If this is a
3094              * bonded port, then the bond is using that netdev, so remove it
3095              * from the bond.  The client will need to reconfigure everything
3096              * after deleting ports, so then the slave will get re-added. */
3097             bundle_remove(&ofport->up);
3098         }
3099     }
3100     return error;
3101 }
3102
3103 static int
3104 port_get_stats(const struct ofport *ofport_, struct netdev_stats *stats)
3105 {
3106     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
3107     int error;
3108
3109     push_all_stats();
3110
3111     error = netdev_get_stats(ofport->up.netdev, stats);
3112
3113     if (!error && ofport_->ofp_port == OFPP_LOCAL) {
3114         struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
3115
3116         /* ofproto->stats.tx_packets represents packets that we created
3117          * internally and sent to some port (e.g. packets sent with
3118          * send_packet()).  Account for them as if they had come from
3119          * OFPP_LOCAL and got forwarded. */
3120
3121         if (stats->rx_packets != UINT64_MAX) {
3122             stats->rx_packets += ofproto->stats.tx_packets;
3123         }
3124
3125         if (stats->rx_bytes != UINT64_MAX) {
3126             stats->rx_bytes += ofproto->stats.tx_bytes;
3127         }
3128
3129         /* ofproto->stats.rx_packets represents packets that were received on
3130          * some port and we processed internally and dropped (e.g. STP).
3131          * Account for them as if they had been forwarded to OFPP_LOCAL. */
3132
3133         if (stats->tx_packets != UINT64_MAX) {
3134             stats->tx_packets += ofproto->stats.rx_packets;
3135         }
3136
3137         if (stats->tx_bytes != UINT64_MAX) {
3138             stats->tx_bytes += ofproto->stats.rx_bytes;
3139         }
3140     }
3141
3142     return error;
3143 }
3144
3145 struct port_dump_state {
3146     uint32_t bucket;
3147     uint32_t offset;
3148     bool ghost;
3149
3150     struct ofproto_port port;
3151     bool has_port;
3152 };
3153
3154 static int
3155 port_dump_start(const struct ofproto *ofproto_ OVS_UNUSED, void **statep)
3156 {
3157     *statep = xzalloc(sizeof(struct port_dump_state));
3158     return 0;
3159 }
3160
3161 static int
3162 port_dump_next(const struct ofproto *ofproto_, void *state_,
3163                struct ofproto_port *port)
3164 {
3165     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3166     struct port_dump_state *state = state_;
3167     const struct sset *sset;
3168     struct sset_node *node;
3169
3170     if (state->has_port) {
3171         ofproto_port_destroy(&state->port);
3172         state->has_port = false;
3173     }
3174     sset = state->ghost ? &ofproto->ghost_ports : &ofproto->ports;
3175     while ((node = sset_at_position(sset, &state->bucket, &state->offset))) {
3176         int error;
3177
3178         error = port_query_by_name(ofproto_, node->name, &state->port);
3179         if (!error) {
3180             *port = state->port;
3181             state->has_port = true;
3182             return 0;
3183         } else if (error != ENODEV) {
3184             return error;
3185         }
3186     }
3187
3188     if (!state->ghost) {
3189         state->ghost = true;
3190         state->bucket = 0;
3191         state->offset = 0;
3192         return port_dump_next(ofproto_, state_, port);
3193     }
3194
3195     return EOF;
3196 }
3197
3198 static int
3199 port_dump_done(const struct ofproto *ofproto_ OVS_UNUSED, void *state_)
3200 {
3201     struct port_dump_state *state = state_;
3202
3203     if (state->has_port) {
3204         ofproto_port_destroy(&state->port);
3205     }
3206     free(state);
3207     return 0;
3208 }
3209
3210 static int
3211 port_poll(const struct ofproto *ofproto_, char **devnamep)
3212 {
3213     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3214
3215     if (ofproto->port_poll_errno) {
3216         int error = ofproto->port_poll_errno;
3217         ofproto->port_poll_errno = 0;
3218         return error;
3219     }
3220
3221     if (sset_is_empty(&ofproto->port_poll_set)) {
3222         return EAGAIN;
3223     }
3224
3225     *devnamep = sset_pop(&ofproto->port_poll_set);
3226     return 0;
3227 }
3228
3229 static void
3230 port_poll_wait(const struct ofproto *ofproto_)
3231 {
3232     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3233     dpif_port_poll_wait(ofproto->backer->dpif);
3234 }
3235
3236 static int
3237 port_is_lacp_current(const struct ofport *ofport_)
3238 {
3239     const struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
3240     return (ofport->bundle && ofport->bundle->lacp
3241             ? lacp_slave_is_current(ofport->bundle->lacp, ofport)
3242             : -1);
3243 }
3244 \f
3245 /* Upcall handling. */
3246
3247 struct flow_miss_op {
3248     struct dpif_op dpif_op;
3249
3250     uint64_t slow_stub[128 / 8]; /* Buffer for compose_slow_path() */
3251     struct xlate_out xout;
3252     bool xout_garbage;           /* 'xout' needs to be uninitialized? */
3253
3254     struct ofpbuf mask;          /* Flow mask for "put" ops. */
3255     struct odputil_keybuf maskbuf;
3256
3257     /* If this is a "put" op, then a pointer to the subfacet that should
3258      * be marked as uninstalled if the operation fails. */
3259     struct subfacet *subfacet;
3260 };
3261
3262 /* Figures out whether a flow that missed in 'ofproto', whose details are in
3263  * 'miss' masked by 'wc', is likely to be worth tracking in detail in userspace
3264  * and (usually) installing a datapath flow.  The answer is usually "yes" (a
3265  * return value of true).  However, for short flows the cost of bookkeeping is
3266  * much higher than the benefits, so when the datapath holds a large number of
3267  * flows we impose some heuristics to decide which flows are likely to be worth
3268  * tracking. */
3269 static bool
3270 flow_miss_should_make_facet(struct flow_miss *miss)
3271 {
3272     struct dpif_backer *backer = miss->ofproto->backer;
3273     uint32_t hash;
3274
3275     switch (flow_miss_model) {
3276     case OFPROTO_HANDLE_MISS_AUTO:
3277         break;
3278     case OFPROTO_HANDLE_MISS_WITH_FACETS:
3279         return true;
3280     case OFPROTO_HANDLE_MISS_WITHOUT_FACETS:
3281         return false;
3282     }
3283
3284     if (!backer->governor) {
3285         size_t n_subfacets;
3286
3287         n_subfacets = hmap_count(&backer->subfacets);
3288         if (n_subfacets * 2 <= flow_eviction_threshold) {
3289             return true;
3290         }
3291
3292         backer->governor = governor_create();
3293     }
3294
3295     hash = flow_hash_in_wildcards(&miss->flow, &miss->xout.wc, 0);
3296     return governor_should_install_flow(backer->governor, hash,
3297                                         list_size(&miss->packets));
3298 }
3299
3300 /* Handles 'miss', which matches 'facet'.  May add any required datapath
3301  * operations to 'ops', incrementing '*n_ops' for each new op.
3302  *
3303  * All of the packets in 'miss' are considered to have arrived at time
3304  * 'miss->stats.used'.  This is really important only for new facets: if we
3305  * just called time_msec() here, then the new subfacet or its packets could
3306  * look (occasionally) as though it was used some time after the facet was
3307  * used.  That can make a one-packet flow look like it has a nonzero duration,
3308  * which looks odd in e.g. NetFlow statistics. */
3309 static void
3310 handle_flow_miss_with_facet(struct flow_miss *miss, struct facet *facet,
3311                             struct flow_miss_op *ops, size_t *n_ops)
3312 {
3313     enum subfacet_path want_path;
3314     struct subfacet *subfacet;
3315
3316     facet->packet_count += miss->stats.n_packets;
3317     facet->prev_packet_count += miss->stats.n_packets;
3318     facet->byte_count += miss->stats.n_bytes;
3319     facet->prev_byte_count += miss->stats.n_bytes;
3320
3321     subfacet = subfacet_create(facet, miss);
3322     want_path = facet->xout.slow ? SF_SLOW_PATH : SF_FAST_PATH;
3323
3324     /* Don't install the flow if it's the result of the "userspace"
3325      * action for an already installed facet.  This can occur when a
3326      * datapath flow with wildcards has a "userspace" action and flows
3327      * sent to userspace result in a different subfacet, which will then
3328      * be rejected as overlapping by the datapath. */
3329     if (miss->upcall_type == DPIF_UC_ACTION
3330         && !list_is_empty(&facet->subfacets)) {
3331         return;
3332     }
3333
3334     subfacet = subfacet_create(facet, miss);
3335     if (subfacet->path != want_path) {
3336         struct flow_miss_op *op = &ops[(*n_ops)++];
3337         struct dpif_flow_put *put = &op->dpif_op.u.flow_put;
3338
3339         subfacet->path = want_path;
3340
3341         ofpbuf_use_stack(&op->mask, &op->maskbuf, sizeof op->maskbuf);
3342         if (enable_megaflows) {
3343             odp_flow_key_from_mask(&op->mask, &facet->xout.wc.masks,
3344                                    &miss->flow, UINT32_MAX);
3345         }
3346
3347         op->xout_garbage = false;
3348         op->dpif_op.type = DPIF_OP_FLOW_PUT;
3349         op->subfacet = subfacet;
3350         put->flags = DPIF_FP_CREATE;
3351         put->key = miss->key;
3352         put->key_len = miss->key_len;
3353         put->mask = op->mask.data;
3354         put->mask_len = op->mask.size;
3355
3356         if (want_path == SF_FAST_PATH) {
3357             put->actions = facet->xout.odp_actions.data;
3358             put->actions_len = facet->xout.odp_actions.size;
3359         } else {
3360             compose_slow_path(facet->ofproto, &miss->flow, facet->xout.slow,
3361                               op->slow_stub, sizeof op->slow_stub,
3362                               &put->actions, &put->actions_len);
3363         }
3364         put->stats = NULL;
3365     }
3366 }
3367
3368 /* Handles flow miss 'miss'.  May add any required datapath operations
3369  * to 'ops', incrementing '*n_ops' for each new op. */
3370 static void
3371 handle_flow_miss(struct flow_miss *miss, struct flow_miss_op *ops,
3372                  size_t *n_ops)
3373 {
3374     struct facet *facet;
3375
3376     miss->ofproto->n_missed += list_size(&miss->packets);
3377
3378     facet = facet_lookup_valid(miss->ofproto, &miss->flow);
3379     if (!facet) {
3380         /* There does not exist a bijection between 'struct flow' and datapath
3381          * flow keys with fitness ODP_FIT_TO_LITTLE.  This breaks a fundamental
3382          * assumption used throughout the facet and subfacet handling code.
3383          * Since we have to handle these misses in userspace anyway, we simply
3384          * skip facet creation, avoiding the problem altogether. */
3385         if (miss->key_fitness == ODP_FIT_TOO_LITTLE
3386             || !flow_miss_should_make_facet(miss)) {
3387             return;
3388         }
3389
3390         facet = facet_create(miss);
3391     }
3392     handle_flow_miss_with_facet(miss, facet, ops, n_ops);
3393 }
3394
3395 static struct drop_key *
3396 drop_key_lookup(const struct dpif_backer *backer, const struct nlattr *key,
3397                 size_t key_len)
3398 {
3399     struct drop_key *drop_key;
3400
3401     HMAP_FOR_EACH_WITH_HASH (drop_key, hmap_node, hash_bytes(key, key_len, 0),
3402                              &backer->drop_keys) {
3403         if (drop_key->key_len == key_len
3404             && !memcmp(drop_key->key, key, key_len)) {
3405             return drop_key;
3406         }
3407     }
3408     return NULL;
3409 }
3410
3411 static void
3412 drop_key_clear(struct dpif_backer *backer)
3413 {
3414     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 15);
3415     struct drop_key *drop_key, *next;
3416
3417     HMAP_FOR_EACH_SAFE (drop_key, next, hmap_node, &backer->drop_keys) {
3418         int error;
3419
3420         error = dpif_flow_del(backer->dpif, drop_key->key, drop_key->key_len,
3421                               NULL);
3422         if (error && !VLOG_DROP_WARN(&rl)) {
3423             struct ds ds = DS_EMPTY_INITIALIZER;
3424             odp_flow_key_format(drop_key->key, drop_key->key_len, &ds);
3425             VLOG_WARN("Failed to delete drop key (%s) (%s)",
3426                       ovs_strerror(error), ds_cstr(&ds));
3427             ds_destroy(&ds);
3428         }
3429
3430         hmap_remove(&backer->drop_keys, &drop_key->hmap_node);
3431         drop_key_destroy(drop_key);
3432     }
3433
3434     udpif_drop_key_clear(backer->udpif);
3435 }
3436
3437 static void
3438 handle_flow_misses(struct dpif_backer *backer, struct flow_miss_batch *fmb)
3439 {
3440     struct flow_miss_op flow_miss_ops[FLOW_MISS_MAX_BATCH];
3441     struct dpif_op *dpif_ops[FLOW_MISS_MAX_BATCH];
3442     struct flow_miss *miss;
3443     size_t n_ops, i;
3444
3445     /* Process each element in the to-do list, constructing the set of
3446      * operations to batch. */
3447     n_ops = 0;
3448     HMAP_FOR_EACH (miss, hmap_node, &fmb->misses) {
3449         handle_flow_miss(miss, flow_miss_ops, &n_ops);
3450     }
3451     ovs_assert(n_ops <= ARRAY_SIZE(flow_miss_ops));
3452
3453     /* Execute batch. */
3454     for (i = 0; i < n_ops; i++) {
3455         dpif_ops[i] = &flow_miss_ops[i].dpif_op;
3456     }
3457     dpif_operate(backer->dpif, dpif_ops, n_ops);
3458
3459     for (i = 0; i < n_ops; i++) {
3460         if (dpif_ops[i]->error != 0
3461             && flow_miss_ops[i].dpif_op.type == DPIF_OP_FLOW_PUT
3462             && flow_miss_ops[i].subfacet) {
3463             struct subfacet *subfacet = flow_miss_ops[i].subfacet;
3464
3465             COVERAGE_INC(subfacet_install_fail);
3466
3467             /* Zero-out subfacet counters when installation failed, but
3468              * datapath reported hits.  This should not happen and
3469              * indicates a bug, since if the datapath flow exists, we
3470              * should not be attempting to create a new subfacet.  A
3471              * buggy datapath could trigger this, so just zero out the
3472              * counters and log an error. */
3473             if (subfacet->dp_packet_count || subfacet->dp_byte_count) {
3474                 VLOG_ERR_RL(&rl, "failed to install subfacet for which "
3475                             "datapath reported hits");
3476                 subfacet->dp_packet_count = subfacet->dp_byte_count = 0;
3477             }
3478
3479             subfacet->path = SF_NOT_INSTALLED;
3480         }
3481     }
3482 }
3483
3484 static void
3485 handle_sflow_upcall(struct dpif_backer *backer,
3486                     const struct dpif_upcall *upcall)
3487 {
3488     struct ofproto_dpif *ofproto;
3489     union user_action_cookie cookie;
3490     struct flow flow;
3491     odp_port_t odp_in_port;
3492
3493     if (xlate_receive(backer, upcall->packet, upcall->key, upcall->key_len,
3494                       &flow, NULL, &ofproto, &odp_in_port)
3495         || !ofproto->sflow) {
3496         return;
3497     }
3498
3499     memset(&cookie, 0, sizeof cookie);
3500     memcpy(&cookie, nl_attr_get(upcall->userdata), sizeof cookie.sflow);
3501     dpif_sflow_received(ofproto->sflow, upcall->packet, &flow,
3502                         odp_in_port, &cookie);
3503 }
3504
3505 static void
3506 handle_flow_sample_upcall(struct dpif_backer *backer,
3507                           const struct dpif_upcall *upcall)
3508 {
3509     struct ofproto_dpif *ofproto;
3510     union user_action_cookie cookie;
3511     struct flow flow;
3512
3513     if (xlate_receive(backer, upcall->packet, upcall->key, upcall->key_len,
3514                       &flow, NULL, &ofproto, NULL)
3515         || !ofproto->ipfix) {
3516         return;
3517     }
3518
3519     memset(&cookie, 0, sizeof cookie);
3520     memcpy(&cookie, nl_attr_get(upcall->userdata), sizeof cookie.flow_sample);
3521
3522     /* The flow reflects exactly the contents of the packet.  Sample
3523      * the packet using it. */
3524     dpif_ipfix_flow_sample(ofproto->ipfix, upcall->packet, &flow,
3525                            cookie.flow_sample.collector_set_id,
3526                            cookie.flow_sample.probability,
3527                            cookie.flow_sample.obs_domain_id,
3528                            cookie.flow_sample.obs_point_id);
3529 }
3530
3531 static void
3532 handle_ipfix_upcall(struct dpif_backer *backer,
3533                     const struct dpif_upcall *upcall)
3534 {
3535     struct ofproto_dpif *ofproto;
3536     struct flow flow;
3537
3538     if (xlate_receive(backer, upcall->packet, upcall->key, upcall->key_len,
3539                       &flow, NULL, &ofproto, NULL)
3540         || !ofproto->ipfix) {
3541         return;
3542     }
3543
3544     /* The flow reflects exactly the contents of the packet.  Sample
3545      * the packet using it. */
3546     dpif_ipfix_bridge_sample(ofproto->ipfix, upcall->packet, &flow);
3547 }
3548
3549 static void
3550 handle_upcalls(struct dpif_backer *backer)
3551 {
3552     struct flow_miss_batch *fmb;
3553     int n_processed;
3554
3555     for (n_processed = 0; n_processed < FLOW_MISS_MAX_BATCH; n_processed++) {
3556         struct upcall *upcall = upcall_next(backer->udpif);
3557
3558         if (!upcall) {
3559             break;
3560         }
3561
3562         switch (upcall->type) {
3563         case SFLOW_UPCALL:
3564             handle_sflow_upcall(backer, &upcall->dpif_upcall);
3565             break;
3566
3567         case FLOW_SAMPLE_UPCALL:
3568             handle_flow_sample_upcall(backer, &upcall->dpif_upcall);
3569             break;
3570
3571         case IPFIX_UPCALL:
3572             handle_ipfix_upcall(backer, &upcall->dpif_upcall);
3573             break;
3574
3575         case BAD_UPCALL:
3576             break;
3577
3578         case MISS_UPCALL:
3579             NOT_REACHED();
3580         }
3581
3582         upcall_destroy(upcall);
3583     }
3584
3585     for (n_processed = 0; n_processed < FLOW_MISS_MAX_BATCH; n_processed++) {
3586         struct drop_key *drop_key = drop_key_next(backer->udpif);
3587         if (!drop_key) {
3588             break;
3589         }
3590
3591         if (!drop_key_lookup(backer, drop_key->key, drop_key->key_len)) {
3592             hmap_insert(&backer->drop_keys, &drop_key->hmap_node,
3593                         hash_bytes(drop_key->key, drop_key->key_len, 0));
3594             dpif_flow_put(backer->dpif, DPIF_FP_CREATE | DPIF_FP_MODIFY,
3595                           drop_key->key, drop_key->key_len,
3596                           NULL, 0, NULL, 0, NULL);
3597         } else {
3598             drop_key_destroy(drop_key);
3599         }
3600     }
3601
3602     fmb = flow_miss_batch_next(backer->udpif);
3603     if (fmb) {
3604         handle_flow_misses(backer, fmb);
3605         flow_miss_batch_destroy(fmb);
3606     }
3607 }
3608 \f
3609 /* Flow expiration. */
3610
3611 static int subfacet_max_idle(const struct dpif_backer *);
3612 static void update_stats(struct dpif_backer *);
3613 static void rule_expire(struct rule_dpif *);
3614 static void expire_subfacets(struct dpif_backer *, int dp_max_idle);
3615
3616 /* This function is called periodically by run().  Its job is to collect
3617  * updates for the flows that have been installed into the datapath, most
3618  * importantly when they last were used, and then use that information to
3619  * expire flows that have not been used recently.
3620  *
3621  * Returns the number of milliseconds after which it should be called again. */
3622 static int
3623 expire(struct dpif_backer *backer)
3624 {
3625     struct ofproto_dpif *ofproto;
3626     size_t n_subfacets;
3627     int max_idle;
3628
3629     /* Periodically clear out the drop keys in an effort to keep them
3630      * relatively few. */
3631     drop_key_clear(backer);
3632
3633     /* Update stats for each flow in the backer. */
3634     update_stats(backer);
3635
3636     n_subfacets = hmap_count(&backer->subfacets);
3637     if (n_subfacets) {
3638         struct subfacet *subfacet;
3639         long long int total, now;
3640
3641         total = 0;
3642         now = time_msec();
3643         HMAP_FOR_EACH (subfacet, hmap_node, &backer->subfacets) {
3644             total += now - subfacet->created;
3645         }
3646         backer->avg_subfacet_life += total / n_subfacets;
3647     }
3648     backer->avg_subfacet_life /= 2;
3649
3650     backer->avg_n_subfacet += n_subfacets;
3651     backer->avg_n_subfacet /= 2;
3652
3653     backer->max_n_subfacet = MAX(backer->max_n_subfacet, n_subfacets);
3654
3655     max_idle = subfacet_max_idle(backer);
3656     expire_subfacets(backer, max_idle);
3657
3658     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
3659         struct rule *rule, *next_rule;
3660
3661         if (ofproto->backer != backer) {
3662             continue;
3663         }
3664
3665         /* Expire OpenFlow flows whose idle_timeout or hard_timeout
3666          * has passed. */
3667         ovs_mutex_lock(&ofproto->up.expirable_mutex);
3668         LIST_FOR_EACH_SAFE (rule, next_rule, expirable,
3669                             &ofproto->up.expirable) {
3670             rule_expire(rule_dpif_cast(rule));
3671         }
3672         ovs_mutex_unlock(&ofproto->up.expirable_mutex);
3673
3674         /* All outstanding data in existing flows has been accounted, so it's a
3675          * good time to do bond rebalancing. */
3676         if (ofproto->has_bonded_bundles) {
3677             struct ofbundle *bundle;
3678
3679             HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
3680                 if (bundle->bond) {
3681                     bond_rebalance(bundle->bond);
3682                 }
3683             }
3684         }
3685     }
3686
3687     return MIN(max_idle, 1000);
3688 }
3689
3690 /* Updates flow table statistics given that the datapath just reported 'stats'
3691  * as 'subfacet''s statistics. */
3692 static void
3693 update_subfacet_stats(struct subfacet *subfacet,
3694                       const struct dpif_flow_stats *stats)
3695 {
3696     struct facet *facet = subfacet->facet;
3697     struct dpif_flow_stats diff;
3698
3699     diff.tcp_flags = stats->tcp_flags;
3700     diff.used = stats->used;
3701
3702     if (stats->n_packets >= subfacet->dp_packet_count) {
3703         diff.n_packets = stats->n_packets - subfacet->dp_packet_count;
3704     } else {
3705         VLOG_WARN_RL(&rl, "unexpected packet count from the datapath");
3706         diff.n_packets = 0;
3707     }
3708
3709     if (stats->n_bytes >= subfacet->dp_byte_count) {
3710         diff.n_bytes = stats->n_bytes - subfacet->dp_byte_count;
3711     } else {
3712         VLOG_WARN_RL(&rl, "unexpected byte count from datapath");
3713         diff.n_bytes = 0;
3714     }
3715
3716     facet->ofproto->n_hit += diff.n_packets;
3717     subfacet->dp_packet_count = stats->n_packets;
3718     subfacet->dp_byte_count = stats->n_bytes;
3719     subfacet_update_stats(subfacet, &diff);
3720
3721     if (facet->accounted_bytes < facet->byte_count) {
3722         facet_learn(facet);
3723         facet_account(facet);
3724         facet->accounted_bytes = facet->byte_count;
3725     }
3726 }
3727
3728 /* 'key' with length 'key_len' bytes is a flow in 'dpif' that we know nothing
3729  * about, or a flow that shouldn't be installed but was anyway.  Delete it. */
3730 static void
3731 delete_unexpected_flow(struct dpif_backer *backer,
3732                        const struct nlattr *key, size_t key_len)
3733 {
3734     if (!VLOG_DROP_WARN(&rl)) {
3735         struct ds s;
3736
3737         ds_init(&s);
3738         odp_flow_key_format(key, key_len, &s);
3739         VLOG_WARN("unexpected flow: %s", ds_cstr(&s));
3740         ds_destroy(&s);
3741     }
3742
3743     COVERAGE_INC(facet_unexpected);
3744     dpif_flow_del(backer->dpif, key, key_len, NULL);
3745 }
3746
3747 /* Update 'packet_count', 'byte_count', and 'used' members of installed facets.
3748  *
3749  * This function also pushes statistics updates to rules which each facet
3750  * resubmits into.  Generally these statistics will be accurate.  However, if a
3751  * facet changes the rule it resubmits into at some time in between
3752  * update_stats() runs, it is possible that statistics accrued to the
3753  * old rule will be incorrectly attributed to the new rule.  This could be
3754  * avoided by calling update_stats() whenever rules are created or
3755  * deleted.  However, the performance impact of making so many calls to the
3756  * datapath do not justify the benefit of having perfectly accurate statistics.
3757  *
3758  * In addition, this function maintains per ofproto flow hit counts. The patch
3759  * port is not treated specially. e.g. A packet ingress from br0 patched into
3760  * br1 will increase the hit count of br0 by 1, however, does not affect
3761  * the hit or miss counts of br1.
3762  */
3763 static void
3764 update_stats(struct dpif_backer *backer)
3765 {
3766     const struct dpif_flow_stats *stats;
3767     struct dpif_flow_dump dump;
3768     const struct nlattr *key, *mask;
3769     size_t key_len, mask_len;
3770
3771     dpif_flow_dump_start(&dump, backer->dpif);
3772     while (dpif_flow_dump_next(&dump, &key, &key_len,
3773                                &mask, &mask_len, NULL, NULL, &stats)) {
3774         struct subfacet *subfacet;
3775         uint32_t key_hash;
3776
3777         key_hash = odp_flow_key_hash(key, key_len);
3778         subfacet = subfacet_find(backer, key, key_len, key_hash);
3779         switch (subfacet ? subfacet->path : SF_NOT_INSTALLED) {
3780         case SF_FAST_PATH:
3781             update_subfacet_stats(subfacet, stats);
3782             break;
3783
3784         case SF_SLOW_PATH:
3785             /* Stats are updated per-packet. */
3786             break;
3787
3788         case SF_NOT_INSTALLED:
3789         default:
3790             delete_unexpected_flow(backer, key, key_len);
3791             break;
3792         }
3793         run_fast_rl();
3794     }
3795     dpif_flow_dump_done(&dump);
3796
3797     update_moving_averages(backer);
3798 }
3799
3800 /* Calculates and returns the number of milliseconds of idle time after which
3801  * subfacets should expire from the datapath.  When a subfacet expires, we fold
3802  * its statistics into its facet, and when a facet's last subfacet expires, we
3803  * fold its statistic into its rule. */
3804 static int
3805 subfacet_max_idle(const struct dpif_backer *backer)
3806 {
3807     /*
3808      * Idle time histogram.
3809      *
3810      * Most of the time a switch has a relatively small number of subfacets.
3811      * When this is the case we might as well keep statistics for all of them
3812      * in userspace and to cache them in the kernel datapath for performance as
3813      * well.
3814      *
3815      * As the number of subfacets increases, the memory required to maintain
3816      * statistics about them in userspace and in the kernel becomes
3817      * significant.  However, with a large number of subfacets it is likely
3818      * that only a few of them are "heavy hitters" that consume a large amount
3819      * of bandwidth.  At this point, only heavy hitters are worth caching in
3820      * the kernel and maintaining in userspaces; other subfacets we can
3821      * discard.
3822      *
3823      * The technique used to compute the idle time is to build a histogram with
3824      * N_BUCKETS buckets whose width is BUCKET_WIDTH msecs each.  Each subfacet
3825      * that is installed in the kernel gets dropped in the appropriate bucket.
3826      * After the histogram has been built, we compute the cutoff so that only
3827      * the most-recently-used 1% of subfacets (but at least
3828      * flow_eviction_threshold flows) are kept cached.  At least
3829      * the most-recently-used bucket of subfacets is kept, so actually an
3830      * arbitrary number of subfacets can be kept in any given expiration run
3831      * (though the next run will delete most of those unless they receive
3832      * additional data).
3833      *
3834      * This requires a second pass through the subfacets, in addition to the
3835      * pass made by update_stats(), because the former function never looks at
3836      * uninstallable subfacets.
3837      */
3838     enum { BUCKET_WIDTH = ROUND_UP(100, TIME_UPDATE_INTERVAL) };
3839     enum { N_BUCKETS = 5000 / BUCKET_WIDTH };
3840     int buckets[N_BUCKETS] = { 0 };
3841     int total, subtotal, bucket;
3842     struct subfacet *subfacet;
3843     long long int now;
3844     int i;
3845
3846     total = hmap_count(&backer->subfacets);
3847     if (total <= flow_eviction_threshold) {
3848         return N_BUCKETS * BUCKET_WIDTH;
3849     }
3850
3851     /* Build histogram. */
3852     now = time_msec();
3853     HMAP_FOR_EACH (subfacet, hmap_node, &backer->subfacets) {
3854         long long int idle = now - subfacet->used;
3855         int bucket = (idle <= 0 ? 0
3856                       : idle >= BUCKET_WIDTH * N_BUCKETS ? N_BUCKETS - 1
3857                       : (unsigned int) idle / BUCKET_WIDTH);
3858         buckets[bucket]++;
3859     }
3860
3861     /* Find the first bucket whose flows should be expired. */
3862     subtotal = bucket = 0;
3863     do {
3864         subtotal += buckets[bucket++];
3865     } while (bucket < N_BUCKETS &&
3866              subtotal < MAX(flow_eviction_threshold, total / 100));
3867
3868     if (VLOG_IS_DBG_ENABLED()) {
3869         struct ds s;
3870
3871         ds_init(&s);
3872         ds_put_cstr(&s, "keep");
3873         for (i = 0; i < N_BUCKETS; i++) {
3874             if (i == bucket) {
3875                 ds_put_cstr(&s, ", drop");
3876             }
3877             if (buckets[i]) {
3878                 ds_put_format(&s, " %d:%d", i * BUCKET_WIDTH, buckets[i]);
3879             }
3880         }
3881         VLOG_INFO("%s (msec:count)", ds_cstr(&s));
3882         ds_destroy(&s);
3883     }
3884
3885     return bucket * BUCKET_WIDTH;
3886 }
3887
3888 static void
3889 expire_subfacets(struct dpif_backer *backer, int dp_max_idle)
3890 {
3891     /* Cutoff time for most flows. */
3892     long long int normal_cutoff = time_msec() - dp_max_idle;
3893
3894     /* We really want to keep flows for special protocols around, so use a more
3895      * conservative cutoff. */
3896     long long int special_cutoff = time_msec() - 10000;
3897
3898     struct subfacet *subfacet, *next_subfacet;
3899     struct subfacet *batch[SUBFACET_DESTROY_MAX_BATCH];
3900     int n_batch;
3901
3902     n_batch = 0;
3903     HMAP_FOR_EACH_SAFE (subfacet, next_subfacet, hmap_node,
3904                         &backer->subfacets) {
3905         long long int cutoff;
3906
3907         cutoff = (subfacet->facet->xout.slow & (SLOW_CFM | SLOW_BFD | SLOW_LACP
3908                                                 | SLOW_STP)
3909                   ? special_cutoff
3910                   : normal_cutoff);
3911         if (subfacet->used < cutoff) {
3912             if (subfacet->path != SF_NOT_INSTALLED) {
3913                 batch[n_batch++] = subfacet;
3914                 if (n_batch >= SUBFACET_DESTROY_MAX_BATCH) {
3915                     subfacet_destroy_batch(backer, batch, n_batch);
3916                     n_batch = 0;
3917                 }
3918             } else {
3919                 subfacet_destroy(subfacet);
3920             }
3921         }
3922     }
3923
3924     if (n_batch > 0) {
3925         subfacet_destroy_batch(backer, batch, n_batch);
3926     }
3927 }
3928
3929 /* If 'rule' is an OpenFlow rule, that has expired according to OpenFlow rules,
3930  * then delete it entirely. */
3931 static void
3932 rule_expire(struct rule_dpif *rule)
3933 {
3934     uint16_t idle_timeout, hard_timeout;
3935     long long int now;
3936     uint8_t reason;
3937
3938     if (rule->up.pending) {
3939         /* We'll have to expire it later. */
3940         return;
3941     }
3942
3943     ovs_mutex_lock(&rule->up.timeout_mutex);
3944     hard_timeout = rule->up.hard_timeout;
3945     idle_timeout = rule->up.idle_timeout;
3946     ovs_mutex_unlock(&rule->up.timeout_mutex);
3947
3948     /* Has 'rule' expired? */
3949     now = time_msec();
3950     if (hard_timeout && now > rule->up.modified + hard_timeout * 1000) {
3951         reason = OFPRR_HARD_TIMEOUT;
3952     } else if (idle_timeout && now > rule->up.used + idle_timeout * 1000) {
3953         reason = OFPRR_IDLE_TIMEOUT;
3954     } else {
3955         return;
3956     }
3957
3958     if (!ovs_rwlock_trywrlock(&rule->up.evict)) {
3959         COVERAGE_INC(ofproto_dpif_expired);
3960
3961         /* Get rid of the rule. */
3962         ofproto_rule_expire(&rule->up, reason);
3963     }
3964 }
3965 \f
3966 /* Facets. */
3967
3968 /* Creates and returns a new facet based on 'miss'.
3969  *
3970  * The caller must already have determined that no facet with an identical
3971  * 'miss->flow' exists in 'miss->ofproto'.
3972  *
3973  * 'rule' and 'xout' must have been created based on 'miss'.
3974  *
3975  * 'facet'' statistics are initialized based on 'stats'.
3976  *
3977  * The facet will initially have no subfacets.  The caller should create (at
3978  * least) one subfacet with subfacet_create(). */
3979 static struct facet *
3980 facet_create(const struct flow_miss *miss)
3981 {
3982     struct ofproto_dpif *ofproto = miss->ofproto;
3983     struct facet *facet;
3984     struct match match;
3985
3986     facet = xzalloc(sizeof *facet);
3987     facet->ofproto = miss->ofproto;
3988     facet->used = miss->stats.used;
3989     facet->flow = miss->flow;
3990     facet->learn_rl = time_msec() + 500;
3991
3992     list_init(&facet->subfacets);
3993     netflow_flow_init(&facet->nf_flow);
3994     netflow_flow_update_time(ofproto->netflow, &facet->nf_flow, facet->used);
3995
3996     xlate_out_copy(&facet->xout, &miss->xout);
3997
3998     match_init(&match, &facet->flow, &facet->xout.wc);
3999     cls_rule_init(&facet->cr, &match, OFP_DEFAULT_PRIORITY);
4000     ovs_rwlock_wrlock(&ofproto->facets.rwlock);
4001     classifier_insert(&ofproto->facets, &facet->cr);
4002     ovs_rwlock_unlock(&ofproto->facets.rwlock);
4003
4004     facet->nf_flow.output_iface = facet->xout.nf_output_iface;
4005     return facet;
4006 }
4007
4008 static void
4009 facet_free(struct facet *facet)
4010 {
4011     if (facet) {
4012         xlate_out_uninit(&facet->xout);
4013         free(facet);
4014     }
4015 }
4016
4017 /* Executes, within 'ofproto', the 'n_actions' actions in 'actions' on
4018  * 'packet', which arrived on 'in_port'. */
4019 static bool
4020 execute_odp_actions(struct ofproto_dpif *ofproto, const struct flow *flow,
4021                     const struct nlattr *odp_actions, size_t actions_len,
4022                     struct ofpbuf *packet)
4023 {
4024     struct odputil_keybuf keybuf;
4025     struct ofpbuf key;
4026     int error;
4027
4028     ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
4029     odp_flow_key_from_flow(&key, flow,
4030                            ofp_port_to_odp_port(ofproto, flow->in_port.ofp_port));
4031
4032     error = dpif_execute(ofproto->backer->dpif, key.data, key.size,
4033                          odp_actions, actions_len, packet);
4034     return !error;
4035 }
4036
4037 /* Remove 'facet' from its ofproto and free up the associated memory:
4038  *
4039  *   - If 'facet' was installed in the datapath, uninstalls it and updates its
4040  *     rule's statistics, via subfacet_uninstall().
4041  *
4042  *   - Removes 'facet' from its rule and from ofproto->facets.
4043  */
4044 static void
4045 facet_remove(struct facet *facet)
4046 {
4047     struct subfacet *subfacet, *next_subfacet;
4048
4049     ovs_assert(!list_is_empty(&facet->subfacets));
4050
4051     /* First uninstall all of the subfacets to get final statistics. */
4052     LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) {
4053         subfacet_uninstall(subfacet);
4054     }
4055
4056     /* Flush the final stats to the rule.
4057      *
4058      * This might require us to have at least one subfacet around so that we
4059      * can use its actions for accounting in facet_account(), which is why we
4060      * have uninstalled but not yet destroyed the subfacets. */
4061     facet_flush_stats(facet);
4062
4063     /* Now we're really all done so destroy everything. */
4064     LIST_FOR_EACH_SAFE (subfacet, next_subfacet, list_node,
4065                         &facet->subfacets) {
4066         subfacet_destroy__(subfacet);
4067     }
4068     ovs_rwlock_wrlock(&facet->ofproto->facets.rwlock);
4069     classifier_remove(&facet->ofproto->facets, &facet->cr);
4070     ovs_rwlock_unlock(&facet->ofproto->facets.rwlock);
4071     cls_rule_destroy(&facet->cr);
4072     facet_free(facet);
4073 }
4074
4075 /* Feed information from 'facet' back into the learning table to keep it in
4076  * sync with what is actually flowing through the datapath. */
4077 static void
4078 facet_learn(struct facet *facet)
4079 {
4080     long long int now = time_msec();
4081
4082     if (!facet->xout.has_fin_timeout && now < facet->learn_rl) {
4083         return;
4084     }
4085
4086     facet->learn_rl = now + 500;
4087
4088     if (!facet->xout.has_learn
4089         && !facet->xout.has_normal
4090         && (!facet->xout.has_fin_timeout
4091             || !(facet->tcp_flags & (TCP_FIN | TCP_RST)))) {
4092         return;
4093     }
4094
4095     facet_push_stats(facet, true);
4096 }
4097
4098 static void
4099 facet_account(struct facet *facet)
4100 {
4101     const struct nlattr *a;
4102     unsigned int left;
4103     ovs_be16 vlan_tci;
4104     uint64_t n_bytes;
4105
4106     if (!facet->xout.has_normal || !facet->ofproto->has_bonded_bundles) {
4107         return;
4108     }
4109     n_bytes = facet->byte_count - facet->accounted_bytes;
4110
4111     /* This loop feeds byte counters to bond_account() for rebalancing to use
4112      * as a basis.  We also need to track the actual VLAN on which the packet
4113      * is going to be sent to ensure that it matches the one passed to
4114      * bond_choose_output_slave().  (Otherwise, we will account to the wrong
4115      * hash bucket.)
4116      *
4117      * We use the actions from an arbitrary subfacet because they should all
4118      * be equally valid for our purpose. */
4119     vlan_tci = facet->flow.vlan_tci;
4120     NL_ATTR_FOR_EACH_UNSAFE (a, left, facet->xout.odp_actions.data,
4121                              facet->xout.odp_actions.size) {
4122         const struct ovs_action_push_vlan *vlan;
4123         struct ofport_dpif *port;
4124
4125         switch (nl_attr_type(a)) {
4126         case OVS_ACTION_ATTR_OUTPUT:
4127             port = get_odp_port(facet->ofproto, nl_attr_get_odp_port(a));
4128             if (port && port->bundle && port->bundle->bond) {
4129                 bond_account(port->bundle->bond, &facet->flow,
4130                              vlan_tci_to_vid(vlan_tci), n_bytes);
4131             }
4132             break;
4133
4134         case OVS_ACTION_ATTR_POP_VLAN:
4135             vlan_tci = htons(0);
4136             break;
4137
4138         case OVS_ACTION_ATTR_PUSH_VLAN:
4139             vlan = nl_attr_get(a);
4140             vlan_tci = vlan->vlan_tci;
4141             break;
4142         }
4143     }
4144 }
4145
4146 /* Returns true if the only action for 'facet' is to send to the controller.
4147  * (We don't report NetFlow expiration messages for such facets because they
4148  * are just part of the control logic for the network, not real traffic). */
4149 static bool
4150 facet_is_controller_flow(struct facet *facet)
4151 {
4152     if (facet) {
4153         struct ofproto_dpif *ofproto = facet->ofproto;
4154         const struct ofpact *ofpacts;
4155         struct rule_dpif *rule;
4156         size_t ofpacts_len;
4157         bool is_controller;
4158
4159         rule_dpif_lookup(ofproto, &facet->flow, NULL, &rule);
4160         ofpacts_len = rule->up.ofpacts_len;
4161         ofpacts = rule->up.ofpacts;
4162         is_controller = ofpacts_len > 0
4163             && ofpacts->type == OFPACT_CONTROLLER
4164             && ofpact_next(ofpacts) >= ofpact_end(ofpacts, ofpacts_len);
4165         rule_release(rule);
4166         return is_controller;
4167     }
4168     return false;
4169 }
4170
4171 /* Folds all of 'facet''s statistics into its rule.  Also updates the
4172  * accounting ofhook and emits a NetFlow expiration if appropriate.  All of
4173  * 'facet''s statistics in the datapath should have been zeroed and folded into
4174  * its packet and byte counts before this function is called. */
4175 static void
4176 facet_flush_stats(struct facet *facet)
4177 {
4178     struct ofproto_dpif *ofproto = facet->ofproto;
4179     struct subfacet *subfacet;
4180
4181     LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) {
4182         ovs_assert(!subfacet->dp_byte_count);
4183         ovs_assert(!subfacet->dp_packet_count);
4184     }
4185
4186     facet_push_stats(facet, false);
4187     if (facet->accounted_bytes < facet->byte_count) {
4188         facet_account(facet);
4189         facet->accounted_bytes = facet->byte_count;
4190     }
4191
4192     if (ofproto->netflow && !facet_is_controller_flow(facet)) {
4193         struct ofexpired expired;
4194         expired.flow = facet->flow;
4195         expired.packet_count = facet->packet_count;
4196         expired.byte_count = facet->byte_count;
4197         expired.used = facet->used;
4198         netflow_expire(ofproto->netflow, &facet->nf_flow, &expired);
4199     }
4200
4201     /* Reset counters to prevent double counting if 'facet' ever gets
4202      * reinstalled. */
4203     facet_reset_counters(facet);
4204
4205     netflow_flow_clear(&facet->nf_flow);
4206     facet->tcp_flags = 0;
4207 }
4208
4209 /* Searches 'ofproto''s table of facets for one which would be responsible for
4210  * 'flow'.  Returns it if found, otherwise a null pointer.
4211  *
4212  * The returned facet might need revalidation; use facet_lookup_valid()
4213  * instead if that is important. */
4214 static struct facet *
4215 facet_find(struct ofproto_dpif *ofproto, const struct flow *flow)
4216 {
4217     struct cls_rule *cr;
4218
4219     ovs_rwlock_rdlock(&ofproto->facets.rwlock);
4220     cr = classifier_lookup(&ofproto->facets, flow, NULL);
4221     ovs_rwlock_unlock(&ofproto->facets.rwlock);
4222     return cr ? CONTAINER_OF(cr, struct facet, cr) : NULL;
4223 }
4224
4225 /* Searches 'ofproto''s table of facets for one capable that covers
4226  * 'flow'.  Returns it if found, otherwise a null pointer.
4227  *
4228  * The returned facet is guaranteed to be valid. */
4229 static struct facet *
4230 facet_lookup_valid(struct ofproto_dpif *ofproto, const struct flow *flow)
4231 {
4232     struct facet *facet;
4233
4234     facet = facet_find(ofproto, flow);
4235     if (facet
4236         && ofproto->backer->need_revalidate
4237         && !facet_revalidate(facet)) {
4238         return NULL;
4239     }
4240
4241     return facet;
4242 }
4243
4244 static bool
4245 facet_check_consistency(struct facet *facet)
4246 {
4247     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 15);
4248
4249     struct xlate_out xout;
4250     struct xlate_in xin;
4251
4252     struct rule_dpif *rule;
4253     bool ok;
4254
4255     /* Check the datapath actions for consistency. */
4256     rule_dpif_lookup(facet->ofproto, &facet->flow, NULL, &rule);
4257     xlate_in_init(&xin, facet->ofproto, &facet->flow, rule, 0, NULL);
4258     xlate_actions(&xin, &xout);
4259     rule_release(rule);
4260
4261     ok = ofpbuf_equal(&facet->xout.odp_actions, &xout.odp_actions)
4262         && facet->xout.slow == xout.slow;
4263     if (!ok && !VLOG_DROP_WARN(&rl)) {
4264         struct ds s = DS_EMPTY_INITIALIZER;
4265
4266         flow_format(&s, &facet->flow);
4267         ds_put_cstr(&s, ": inconsistency in facet");
4268
4269         if (!ofpbuf_equal(&facet->xout.odp_actions, &xout.odp_actions)) {
4270             ds_put_cstr(&s, " (actions were: ");
4271             format_odp_actions(&s, facet->xout.odp_actions.data,
4272                                facet->xout.odp_actions.size);
4273             ds_put_cstr(&s, ") (correct actions: ");
4274             format_odp_actions(&s, xout.odp_actions.data,
4275                                xout.odp_actions.size);
4276             ds_put_char(&s, ')');
4277         }
4278
4279         if (facet->xout.slow != xout.slow) {
4280             ds_put_format(&s, " slow path incorrect. should be %d", xout.slow);
4281         }
4282
4283         ds_destroy(&s);
4284     }
4285     xlate_out_uninit(&xout);
4286
4287     return ok;
4288 }
4289
4290 /* Re-searches the classifier for 'facet':
4291  *
4292  *   - If the rule found is different from 'facet''s current rule, moves
4293  *     'facet' to the new rule and recompiles its actions.
4294  *
4295  *   - If the rule found is the same as 'facet''s current rule, leaves 'facet'
4296  *     where it is and recompiles its actions anyway.
4297  *
4298  *   - If any of 'facet''s subfacets correspond to a new flow according to
4299  *     xlate_receive(), 'facet' is removed.
4300  *
4301  *   Returns true if 'facet' is still valid.  False if 'facet' was removed. */
4302 static bool
4303 facet_revalidate(struct facet *facet)
4304 {
4305     struct ofproto_dpif *ofproto = facet->ofproto;
4306     struct rule_dpif *new_rule;
4307     struct subfacet *subfacet;
4308     struct flow_wildcards wc;
4309     struct xlate_out xout;
4310     struct xlate_in xin;
4311
4312     COVERAGE_INC(facet_revalidate);
4313
4314     /* Check that child subfacets still correspond to this facet.  Tunnel
4315      * configuration changes could cause a subfacet's OpenFlow in_port to
4316      * change. */
4317     LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) {
4318         struct ofproto_dpif *recv_ofproto;
4319         struct flow recv_flow;
4320         int error;
4321
4322         error = xlate_receive(ofproto->backer, NULL, subfacet->key,
4323                               subfacet->key_len, &recv_flow, NULL,
4324                               &recv_ofproto, NULL);
4325         if (error
4326             || recv_ofproto != ofproto
4327             || facet != facet_find(ofproto, &recv_flow)) {
4328             facet_remove(facet);
4329             return false;
4330         }
4331     }
4332
4333     flow_wildcards_init_catchall(&wc);
4334     rule_dpif_lookup(ofproto, &facet->flow, &wc, &new_rule);
4335
4336     /* Calculate new datapath actions.
4337      *
4338      * We do not modify any 'facet' state yet, because we might need to, e.g.,
4339      * emit a NetFlow expiration and, if so, we need to have the old state
4340      * around to properly compose it. */
4341     xlate_in_init(&xin, ofproto, &facet->flow, new_rule, 0, NULL);
4342     xlate_actions(&xin, &xout);
4343     flow_wildcards_or(&xout.wc, &xout.wc, &wc);
4344
4345     /* A facet's slow path reason should only change under dramatic
4346      * circumstances.  Rather than try to update everything, it's simpler to
4347      * remove the facet and start over.
4348      *
4349      * More importantly, if a facet's wildcards change, it will be relatively
4350      * difficult to figure out if its subfacets still belong to it, and if not
4351      * which facet they may belong to.  Again, to avoid the complexity, we
4352      * simply give up instead. */
4353     if (facet->xout.slow != xout.slow
4354         || memcmp(&facet->xout.wc, &xout.wc, sizeof xout.wc)) {
4355         facet_remove(facet);
4356         xlate_out_uninit(&xout);
4357         rule_release(new_rule);
4358         return false;
4359     }
4360
4361     if (!ofpbuf_equal(&facet->xout.odp_actions, &xout.odp_actions)) {
4362         LIST_FOR_EACH(subfacet, list_node, &facet->subfacets) {
4363             if (subfacet->path == SF_FAST_PATH) {
4364                 struct dpif_flow_stats stats;
4365
4366                 subfacet_install(subfacet, &xout.odp_actions, &stats);
4367                 subfacet_update_stats(subfacet, &stats);
4368             }
4369         }
4370
4371         facet_flush_stats(facet);
4372
4373         ofpbuf_clear(&facet->xout.odp_actions);
4374         ofpbuf_put(&facet->xout.odp_actions, xout.odp_actions.data,
4375                    xout.odp_actions.size);
4376     }
4377
4378     /* Update 'facet' now that we've taken care of all the old state. */
4379     facet->xout.slow = xout.slow;
4380     facet->xout.has_learn = xout.has_learn;
4381     facet->xout.has_normal = xout.has_normal;
4382     facet->xout.has_fin_timeout = xout.has_fin_timeout;
4383     facet->xout.nf_output_iface = xout.nf_output_iface;
4384     facet->xout.mirrors = xout.mirrors;
4385     facet->nf_flow.output_iface = facet->xout.nf_output_iface;
4386     facet->used = MAX(facet->used, new_rule->up.created);
4387
4388     xlate_out_uninit(&xout);
4389     rule_release(new_rule);
4390     return true;
4391 }
4392
4393 static void
4394 facet_reset_counters(struct facet *facet)
4395 {
4396     facet->packet_count = 0;
4397     facet->byte_count = 0;
4398     facet->prev_packet_count = 0;
4399     facet->prev_byte_count = 0;
4400     facet->accounted_bytes = 0;
4401 }
4402
4403 static void
4404 flow_push_stats(struct ofproto_dpif *ofproto, struct flow *flow,
4405                 struct dpif_flow_stats *stats, bool may_learn)
4406 {
4407     struct ofport_dpif *in_port;
4408     struct rule_dpif *rule;
4409     struct xlate_in xin;
4410
4411     in_port = get_ofp_port(ofproto, flow->in_port.ofp_port);
4412     if (in_port && in_port->is_tunnel) {
4413         netdev_vport_inc_rx(in_port->up.netdev, stats);
4414     }
4415
4416     rule_dpif_lookup(ofproto, flow, NULL, &rule);
4417     rule_credit_stats(rule, stats);
4418     xlate_in_init(&xin, ofproto, flow, rule, stats->tcp_flags, NULL);
4419     xin.resubmit_stats = stats;
4420     xin.may_learn = may_learn;
4421     xlate_actions_for_side_effects(&xin);
4422     rule_release(rule);
4423 }
4424
4425 static void
4426 facet_push_stats(struct facet *facet, bool may_learn)
4427 {
4428     struct dpif_flow_stats stats;
4429
4430     ovs_assert(facet->packet_count >= facet->prev_packet_count);
4431     ovs_assert(facet->byte_count >= facet->prev_byte_count);
4432     ovs_assert(facet->used >= facet->prev_used);
4433
4434     stats.n_packets = facet->packet_count - facet->prev_packet_count;
4435     stats.n_bytes = facet->byte_count - facet->prev_byte_count;
4436     stats.used = facet->used;
4437     stats.tcp_flags = facet->tcp_flags;
4438
4439     if (may_learn || stats.n_packets || facet->used > facet->prev_used) {
4440         facet->prev_packet_count = facet->packet_count;
4441         facet->prev_byte_count = facet->byte_count;
4442         facet->prev_used = facet->used;
4443
4444         netflow_flow_update_time(facet->ofproto->netflow, &facet->nf_flow,
4445                                  facet->used);
4446         netflow_flow_update_flags(&facet->nf_flow, facet->tcp_flags);
4447         mirror_update_stats(facet->ofproto->mbridge, facet->xout.mirrors,
4448                             stats.n_packets, stats.n_bytes);
4449         flow_push_stats(facet->ofproto, &facet->flow, &stats, may_learn);
4450     }
4451 }
4452
4453 static void
4454 push_all_stats__(bool run_fast)
4455 {
4456     static long long int rl = LLONG_MIN;
4457     struct ofproto_dpif *ofproto;
4458
4459     if (time_msec() < rl) {
4460         return;
4461     }
4462
4463     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
4464         struct cls_cursor cursor;
4465         struct facet *facet;
4466
4467         ovs_rwlock_rdlock(&ofproto->facets.rwlock);
4468         cls_cursor_init(&cursor, &ofproto->facets, NULL);
4469         CLS_CURSOR_FOR_EACH (facet, cr, &cursor) {
4470             facet_push_stats(facet, false);
4471             if (run_fast) {
4472                 run_fast_rl();
4473             }
4474         }
4475         ovs_rwlock_unlock(&ofproto->facets.rwlock);
4476     }
4477
4478     rl = time_msec() + 100;
4479 }
4480
4481 static void
4482 push_all_stats(void)
4483 {
4484     push_all_stats__(true);
4485 }
4486
4487 void
4488 rule_credit_stats(struct rule_dpif *rule, const struct dpif_flow_stats *stats)
4489 {
4490     ovs_mutex_lock(&rule->stats_mutex);
4491     rule->packet_count += stats->n_packets;
4492     rule->byte_count += stats->n_bytes;
4493     ofproto_rule_update_used(&rule->up, stats->used);
4494     ovs_mutex_unlock(&rule->stats_mutex);
4495 }
4496 \f
4497 /* Subfacets. */
4498
4499 static struct subfacet *
4500 subfacet_find(struct dpif_backer *backer, const struct nlattr *key,
4501               size_t key_len, uint32_t key_hash)
4502 {
4503     struct subfacet *subfacet;
4504
4505     HMAP_FOR_EACH_WITH_HASH (subfacet, hmap_node, key_hash,
4506                              &backer->subfacets) {
4507         if (subfacet->key_len == key_len
4508             && !memcmp(key, subfacet->key, key_len)) {
4509             return subfacet;
4510         }
4511     }
4512
4513     return NULL;
4514 }
4515
4516 /* Searches 'facet' (within 'ofproto') for a subfacet with the specified
4517  * 'key_fitness', 'key', and 'key_len' members in 'miss'.  Returns the
4518  * existing subfacet if there is one, otherwise creates and returns a
4519  * new subfacet. */
4520 static struct subfacet *
4521 subfacet_create(struct facet *facet, struct flow_miss *miss)
4522 {
4523     struct dpif_backer *backer = miss->ofproto->backer;
4524     enum odp_key_fitness key_fitness = miss->key_fitness;
4525     const struct nlattr *key = miss->key;
4526     size_t key_len = miss->key_len;
4527     uint32_t key_hash;
4528     struct subfacet *subfacet;
4529
4530     key_hash = odp_flow_key_hash(key, key_len);
4531
4532     if (list_is_empty(&facet->subfacets)) {
4533         subfacet = &facet->one_subfacet;
4534     } else {
4535         subfacet = subfacet_find(backer, key, key_len, key_hash);
4536         if (subfacet) {
4537             if (subfacet->facet == facet) {
4538                 return subfacet;
4539             }
4540
4541             /* This shouldn't happen. */
4542             VLOG_ERR_RL(&rl, "subfacet with wrong facet");
4543             subfacet_destroy(subfacet);
4544         }
4545
4546         subfacet = xmalloc(sizeof *subfacet);
4547     }
4548
4549     hmap_insert(&backer->subfacets, &subfacet->hmap_node, key_hash);
4550     list_push_back(&facet->subfacets, &subfacet->list_node);
4551     subfacet->facet = facet;
4552     subfacet->key_fitness = key_fitness;
4553     subfacet->key = xmemdup(key, key_len);
4554     subfacet->key_len = key_len;
4555     subfacet->used = miss->stats.used;
4556     subfacet->created = subfacet->used;
4557     subfacet->dp_packet_count = 0;
4558     subfacet->dp_byte_count = 0;
4559     subfacet->path = SF_NOT_INSTALLED;
4560     subfacet->backer = backer;
4561
4562     backer->subfacet_add_count++;
4563     return subfacet;
4564 }
4565
4566 /* Uninstalls 'subfacet' from the datapath, if it is installed, removes it from
4567  * its facet within 'ofproto', and frees it. */
4568 static void
4569 subfacet_destroy__(struct subfacet *subfacet)
4570 {
4571     struct facet *facet = subfacet->facet;
4572     struct ofproto_dpif *ofproto = facet->ofproto;
4573
4574     /* Update ofproto stats before uninstall the subfacet. */
4575     ofproto->backer->subfacet_del_count++;
4576
4577     subfacet_uninstall(subfacet);
4578     hmap_remove(&subfacet->backer->subfacets, &subfacet->hmap_node);
4579     list_remove(&subfacet->list_node);
4580     free(subfacet->key);
4581     if (subfacet != &facet->one_subfacet) {
4582         free(subfacet);
4583     }
4584 }
4585
4586 /* Destroys 'subfacet', as with subfacet_destroy__(), and then if this was the
4587  * last remaining subfacet in its facet destroys the facet too. */
4588 static void
4589 subfacet_destroy(struct subfacet *subfacet)
4590 {
4591     struct facet *facet = subfacet->facet;
4592
4593     if (list_is_singleton(&facet->subfacets)) {
4594         /* facet_remove() needs at least one subfacet (it will remove it). */
4595         facet_remove(facet);
4596     } else {
4597         subfacet_destroy__(subfacet);
4598     }
4599 }
4600
4601 static void
4602 subfacet_destroy_batch(struct dpif_backer *backer,
4603                        struct subfacet **subfacets, int n)
4604 {
4605     struct dpif_op ops[SUBFACET_DESTROY_MAX_BATCH];
4606     struct dpif_op *opsp[SUBFACET_DESTROY_MAX_BATCH];
4607     struct dpif_flow_stats stats[SUBFACET_DESTROY_MAX_BATCH];
4608     int i;
4609
4610     for (i = 0; i < n; i++) {
4611         ops[i].type = DPIF_OP_FLOW_DEL;
4612         ops[i].u.flow_del.key = subfacets[i]->key;
4613         ops[i].u.flow_del.key_len = subfacets[i]->key_len;
4614         ops[i].u.flow_del.stats = &stats[i];
4615         opsp[i] = &ops[i];
4616     }
4617
4618     dpif_operate(backer->dpif, opsp, n);
4619     for (i = 0; i < n; i++) {
4620         subfacet_reset_dp_stats(subfacets[i], &stats[i]);
4621         subfacets[i]->path = SF_NOT_INSTALLED;
4622         subfacet_destroy(subfacets[i]);
4623         run_fast_rl();
4624     }
4625 }
4626
4627 /* Updates 'subfacet''s datapath flow, setting its actions to 'actions_len'
4628  * bytes of actions in 'actions'.  If 'stats' is non-null, statistics counters
4629  * in the datapath will be zeroed and 'stats' will be updated with traffic new
4630  * since 'subfacet' was last updated.
4631  *
4632  * Returns 0 if successful, otherwise a positive errno value. */
4633 static int
4634 subfacet_install(struct subfacet *subfacet, const struct ofpbuf *odp_actions,
4635                  struct dpif_flow_stats *stats)
4636 {
4637     struct facet *facet = subfacet->facet;
4638     enum subfacet_path path = facet->xout.slow ? SF_SLOW_PATH : SF_FAST_PATH;
4639     const struct nlattr *actions = odp_actions->data;
4640     size_t actions_len = odp_actions->size;
4641     struct odputil_keybuf maskbuf;
4642     struct ofpbuf mask;
4643
4644     uint64_t slow_path_stub[128 / 8];
4645     enum dpif_flow_put_flags flags;
4646     int ret;
4647
4648     flags = subfacet->path == SF_NOT_INSTALLED ? DPIF_FP_CREATE
4649                                                : DPIF_FP_MODIFY;
4650     if (stats) {
4651         flags |= DPIF_FP_ZERO_STATS;
4652     }
4653
4654     if (path == SF_SLOW_PATH) {
4655         compose_slow_path(facet->ofproto, &facet->flow, facet->xout.slow,
4656                           slow_path_stub, sizeof slow_path_stub,
4657                           &actions, &actions_len);
4658     }
4659
4660     ofpbuf_use_stack(&mask, &maskbuf, sizeof maskbuf);
4661     if (enable_megaflows) {
4662         odp_flow_key_from_mask(&mask, &facet->xout.wc.masks,
4663                                &facet->flow, UINT32_MAX);
4664     }
4665
4666     ret = dpif_flow_put(subfacet->backer->dpif, flags, subfacet->key,
4667                         subfacet->key_len,  mask.data, mask.size,
4668                         actions, actions_len, stats);
4669
4670     if (stats) {
4671         subfacet_reset_dp_stats(subfacet, stats);
4672     }
4673
4674     if (ret) {
4675         COVERAGE_INC(subfacet_install_fail);
4676     } else {
4677         subfacet->path = path;
4678     }
4679     return ret;
4680 }
4681
4682 /* If 'subfacet' is installed in the datapath, uninstalls it. */
4683 static void
4684 subfacet_uninstall(struct subfacet *subfacet)
4685 {
4686     if (subfacet->path != SF_NOT_INSTALLED) {
4687         struct ofproto_dpif *ofproto = subfacet->facet->ofproto;
4688         struct dpif_flow_stats stats;
4689         int error;
4690
4691         error = dpif_flow_del(ofproto->backer->dpif, subfacet->key,
4692                               subfacet->key_len, &stats);
4693         subfacet_reset_dp_stats(subfacet, &stats);
4694         if (!error) {
4695             subfacet_update_stats(subfacet, &stats);
4696         }
4697         subfacet->path = SF_NOT_INSTALLED;
4698     } else {
4699         ovs_assert(subfacet->dp_packet_count == 0);
4700         ovs_assert(subfacet->dp_byte_count == 0);
4701     }
4702 }
4703
4704 /* Resets 'subfacet''s datapath statistics counters.  This should be called
4705  * when 'subfacet''s statistics are cleared in the datapath.  If 'stats' is
4706  * non-null, it should contain the statistics returned by dpif when 'subfacet'
4707  * was reset in the datapath.  'stats' will be modified to include only
4708  * statistics new since 'subfacet' was last updated. */
4709 static void
4710 subfacet_reset_dp_stats(struct subfacet *subfacet,
4711                         struct dpif_flow_stats *stats)
4712 {
4713     if (stats
4714         && subfacet->dp_packet_count <= stats->n_packets
4715         && subfacet->dp_byte_count <= stats->n_bytes) {
4716         stats->n_packets -= subfacet->dp_packet_count;
4717         stats->n_bytes -= subfacet->dp_byte_count;
4718     }
4719
4720     subfacet->dp_packet_count = 0;
4721     subfacet->dp_byte_count = 0;
4722 }
4723
4724 /* Folds the statistics from 'stats' into the counters in 'subfacet'.
4725  *
4726  * Because of the meaning of a subfacet's counters, it only makes sense to do
4727  * this if 'stats' are not tracked in the datapath, that is, if 'stats'
4728  * represents a packet that was sent by hand or if it represents statistics
4729  * that have been cleared out of the datapath. */
4730 static void
4731 subfacet_update_stats(struct subfacet *subfacet,
4732                       const struct dpif_flow_stats *stats)
4733 {
4734     if (stats->n_packets || stats->used > subfacet->used) {
4735         struct facet *facet = subfacet->facet;
4736
4737         subfacet->used = MAX(subfacet->used, stats->used);
4738         facet->used = MAX(facet->used, stats->used);
4739         facet->packet_count += stats->n_packets;
4740         facet->byte_count += stats->n_bytes;
4741         facet->tcp_flags |= stats->tcp_flags;
4742     }
4743 }
4744 \f
4745 /* Rules. */
4746
4747 /* Lookup 'flow' in 'ofproto''s classifier.  If 'wc' is non-null, sets
4748  * the fields that were relevant as part of the lookup. */
4749 void
4750 rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow,
4751                  struct flow_wildcards *wc, struct rule_dpif **rule)
4752 {
4753     struct ofport_dpif *port;
4754
4755     if (rule_dpif_lookup_in_table(ofproto, flow, wc, 0, rule)) {
4756         return;
4757     }
4758     port = get_ofp_port(ofproto, flow->in_port.ofp_port);
4759     if (!port) {
4760         VLOG_WARN_RL(&rl, "packet-in on unknown OpenFlow port %"PRIu16,
4761                      flow->in_port.ofp_port);
4762     }
4763
4764     *rule = choose_miss_rule(port ? port->up.pp.config : 0, ofproto->miss_rule,
4765                              ofproto->no_packet_in_rule);
4766     ovs_rwlock_rdlock(&(*rule)->up.evict);
4767 }
4768
4769 bool
4770 rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto,
4771                           const struct flow *flow, struct flow_wildcards *wc,
4772                           uint8_t table_id, struct rule_dpif **rule)
4773     OVS_ACQ_RDLOCK((*rule)->up.evict)
4774 {
4775     struct cls_rule *cls_rule;
4776     struct classifier *cls;
4777     bool frag;
4778
4779     *rule = NULL;
4780     if (table_id >= N_TABLES) {
4781         return false;
4782     }
4783
4784     if (wc) {
4785         memset(&wc->masks.dl_type, 0xff, sizeof wc->masks.dl_type);
4786         wc->masks.nw_frag |= FLOW_NW_FRAG_MASK;
4787     }
4788
4789     cls = &ofproto->up.tables[table_id].cls;
4790     ovs_rwlock_rdlock(&cls->rwlock);
4791     frag = (flow->nw_frag & FLOW_NW_FRAG_ANY) != 0;
4792     if (frag && ofproto->up.frag_handling == OFPC_FRAG_NORMAL) {
4793         /* We must pretend that transport ports are unavailable. */
4794         struct flow ofpc_normal_flow = *flow;
4795         ofpc_normal_flow.tp_src = htons(0);
4796         ofpc_normal_flow.tp_dst = htons(0);
4797         cls_rule = classifier_lookup(cls, &ofpc_normal_flow, wc);
4798     } else if (frag && ofproto->up.frag_handling == OFPC_FRAG_DROP) {
4799         cls_rule = &ofproto->drop_frags_rule->up.cr;
4800         if (wc) {
4801             flow_wildcards_init_exact(wc);
4802         }
4803     } else {
4804         cls_rule = classifier_lookup(cls, flow, wc);
4805     }
4806
4807     *rule = rule_dpif_cast(rule_from_cls_rule(cls_rule));
4808     if (*rule && ovs_rwlock_tryrdlock(&(*rule)->up.evict)) {
4809         /* The rule is in the process of being removed.  Best we can do is
4810          * pretend it isn't there. */
4811         *rule = NULL;
4812     }
4813     ovs_rwlock_unlock(&cls->rwlock);
4814
4815     return *rule != NULL;
4816 }
4817
4818 /* Given a port configuration (specified as zero if there's no port), chooses
4819  * which of 'miss_rule' and 'no_packet_in_rule' should be used in case of a
4820  * flow table miss. */
4821 struct rule_dpif *
4822 choose_miss_rule(enum ofputil_port_config config, struct rule_dpif *miss_rule,
4823                  struct rule_dpif *no_packet_in_rule)
4824 {
4825     return config & OFPUTIL_PC_NO_PACKET_IN ? no_packet_in_rule : miss_rule;
4826 }
4827
4828 void
4829 rule_release(struct rule_dpif *rule)
4830 {
4831     if (rule) {
4832         ovs_rwlock_unlock(&rule->up.evict);
4833     }
4834 }
4835
4836 static void
4837 complete_operation(struct rule_dpif *rule)
4838 {
4839     struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
4840
4841     ofproto->backer->need_revalidate = REV_FLOW_TABLE;
4842     if (clogged) {
4843         struct dpif_completion *c = xmalloc(sizeof *c);
4844         c->op = rule->up.pending;
4845         list_push_back(&ofproto->completions, &c->list_node);
4846     } else {
4847         ofoperation_complete(rule->up.pending, 0);
4848     }
4849 }
4850
4851 static struct rule *
4852 rule_alloc(void)
4853 {
4854     struct rule_dpif *rule = xmalloc(sizeof *rule);
4855     return &rule->up;
4856 }
4857
4858 static void
4859 rule_dealloc(struct rule *rule_)
4860 {
4861     struct rule_dpif *rule = rule_dpif_cast(rule_);
4862     free(rule);
4863 }
4864
4865 static enum ofperr
4866 rule_construct(struct rule *rule_)
4867 {
4868     struct rule_dpif *rule = rule_dpif_cast(rule_);
4869     ovs_mutex_init(&rule->stats_mutex, PTHREAD_MUTEX_NORMAL);
4870     ovs_mutex_lock(&rule->stats_mutex);
4871     rule->packet_count = 0;
4872     rule->byte_count = 0;
4873     ovs_mutex_unlock(&rule->stats_mutex);
4874     complete_operation(rule);
4875     return 0;
4876 }
4877
4878 static void
4879 rule_destruct(struct rule *rule_)
4880 {
4881     struct rule_dpif *rule = rule_dpif_cast(rule_);
4882     complete_operation(rule);
4883     ovs_mutex_destroy(&rule->stats_mutex);
4884 }
4885
4886 static void
4887 rule_get_stats(struct rule *rule_, uint64_t *packets, uint64_t *bytes)
4888 {
4889     struct rule_dpif *rule = rule_dpif_cast(rule_);
4890
4891     /* push_all_stats() can handle flow misses which, when using the learn
4892      * action, can cause rules to be added and deleted.  This can corrupt our
4893      * caller's datastructures which assume that rule_get_stats() doesn't have
4894      * an impact on the flow table. To be safe, we disable miss handling. */
4895     push_all_stats__(false);
4896
4897     /* Start from historical data for 'rule' itself that are no longer tracked
4898      * in facets.  This counts, for example, facets that have expired. */
4899     ovs_mutex_lock(&rule->stats_mutex);
4900     *packets = rule->packet_count;
4901     *bytes = rule->byte_count;
4902     ovs_mutex_unlock(&rule->stats_mutex);
4903 }
4904
4905 static void
4906 rule_dpif_execute(struct rule_dpif *rule, const struct flow *flow,
4907                   struct ofpbuf *packet)
4908 {
4909     struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
4910     struct dpif_flow_stats stats;
4911     struct xlate_out xout;
4912     struct xlate_in xin;
4913
4914     dpif_flow_stats_extract(flow, packet, time_msec(), &stats);
4915     rule_credit_stats(rule, &stats);
4916
4917     xlate_in_init(&xin, ofproto, flow, rule, stats.tcp_flags, packet);
4918     xin.resubmit_stats = &stats;
4919     xlate_actions(&xin, &xout);
4920
4921     execute_odp_actions(ofproto, flow, xout.odp_actions.data,
4922                         xout.odp_actions.size, packet);
4923
4924     xlate_out_uninit(&xout);
4925 }
4926
4927 static enum ofperr
4928 rule_execute(struct rule *rule, const struct flow *flow,
4929              struct ofpbuf *packet)
4930 {
4931     rule_dpif_execute(rule_dpif_cast(rule), flow, packet);
4932     ofpbuf_delete(packet);
4933     return 0;
4934 }
4935
4936 static void
4937 rule_modify_actions(struct rule *rule_)
4938 {
4939     struct rule_dpif *rule = rule_dpif_cast(rule_);
4940
4941     complete_operation(rule);
4942 }
4943 \f
4944 /* Sends 'packet' out 'ofport'.
4945  * May modify 'packet'.
4946  * Returns 0 if successful, otherwise a positive errno value. */
4947 static int
4948 send_packet(const struct ofport_dpif *ofport, struct ofpbuf *packet)
4949 {
4950     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
4951     uint64_t odp_actions_stub[1024 / 8];
4952     struct ofpbuf key, odp_actions;
4953     struct dpif_flow_stats stats;
4954     struct odputil_keybuf keybuf;
4955     struct ofpact_output output;
4956     struct xlate_out xout;
4957     struct xlate_in xin;
4958     struct flow flow;
4959     union flow_in_port in_port_;
4960     int error;
4961
4962     ofpbuf_use_stub(&odp_actions, odp_actions_stub, sizeof odp_actions_stub);
4963     ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
4964
4965     /* Use OFPP_NONE as the in_port to avoid special packet processing. */
4966     in_port_.ofp_port = OFPP_NONE;
4967     flow_extract(packet, 0, 0, NULL, &in_port_, &flow);
4968     odp_flow_key_from_flow(&key, &flow, ofp_port_to_odp_port(ofproto,
4969                                                              OFPP_LOCAL));
4970     dpif_flow_stats_extract(&flow, packet, time_msec(), &stats);
4971
4972     ofpact_init(&output.ofpact, OFPACT_OUTPUT, sizeof output);
4973     output.port = ofport->up.ofp_port;
4974     output.max_len = 0;
4975
4976     xlate_in_init(&xin, ofproto, &flow, NULL, 0, packet);
4977     xin.ofpacts_len = sizeof output;
4978     xin.ofpacts = &output.ofpact;
4979     xin.resubmit_stats = &stats;
4980     xlate_actions(&xin, &xout);
4981
4982     error = dpif_execute(ofproto->backer->dpif,
4983                          key.data, key.size,
4984                          xout.odp_actions.data, xout.odp_actions.size,
4985                          packet);
4986     xlate_out_uninit(&xout);
4987
4988     if (error) {
4989         VLOG_WARN_RL(&rl, "%s: failed to send packet on port %s (%s)",
4990                      ofproto->up.name, netdev_get_name(ofport->up.netdev),
4991                      ovs_strerror(error));
4992     }
4993
4994     ofproto->stats.tx_packets++;
4995     ofproto->stats.tx_bytes += packet->size;
4996     return error;
4997 }
4998
4999 /* Composes an ODP action for a "slow path" action for 'flow' within 'ofproto'.
5000  * The action will state 'slow' as the reason that the action is in the slow
5001  * path.  (This is purely informational: it allows a human viewing "ovs-dpctl
5002  * dump-flows" output to see why a flow is in the slow path.)
5003  *
5004  * The 'stub_size' bytes in 'stub' will be used to store the action.
5005  * 'stub_size' must be large enough for the action.
5006  *
5007  * The action and its size will be stored in '*actionsp' and '*actions_lenp',
5008  * respectively. */
5009 static void
5010 compose_slow_path(const struct ofproto_dpif *ofproto, const struct flow *flow,
5011                   enum slow_path_reason slow,
5012                   uint64_t *stub, size_t stub_size,
5013                   const struct nlattr **actionsp, size_t *actions_lenp)
5014 {
5015     union user_action_cookie cookie;
5016     struct ofpbuf buf;
5017
5018     cookie.type = USER_ACTION_COOKIE_SLOW_PATH;
5019     cookie.slow_path.unused = 0;
5020     cookie.slow_path.reason = slow;
5021
5022     ofpbuf_use_stack(&buf, stub, stub_size);
5023     if (slow & (SLOW_CFM | SLOW_BFD | SLOW_LACP | SLOW_STP)) {
5024         uint32_t pid = dpif_port_get_pid(ofproto->backer->dpif,
5025                                          ODPP_NONE);
5026         odp_put_userspace_action(pid, &cookie, sizeof cookie.slow_path, &buf);
5027     } else {
5028         odp_port_t odp_port;
5029         uint32_t pid;
5030
5031         odp_port = ofp_port_to_odp_port(ofproto, flow->in_port.ofp_port);
5032         pid = dpif_port_get_pid(ofproto->backer->dpif, odp_port);
5033         odp_put_userspace_action(pid, &cookie, sizeof cookie.slow_path, &buf);
5034     }
5035     *actionsp = buf.data;
5036     *actions_lenp = buf.size;
5037 }
5038 \f
5039 static bool
5040 set_frag_handling(struct ofproto *ofproto_,
5041                   enum ofp_config_flags frag_handling)
5042 {
5043     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
5044     if (frag_handling != OFPC_FRAG_REASM) {
5045         ofproto->backer->need_revalidate = REV_RECONFIGURE;
5046         return true;
5047     } else {
5048         return false;
5049     }
5050 }
5051
5052 static enum ofperr
5053 packet_out(struct ofproto *ofproto_, struct ofpbuf *packet,
5054            const struct flow *flow,
5055            const struct ofpact *ofpacts, size_t ofpacts_len)
5056 {
5057     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
5058     struct odputil_keybuf keybuf;
5059     struct dpif_flow_stats stats;
5060     struct xlate_out xout;
5061     struct xlate_in xin;
5062     struct ofpbuf key;
5063
5064
5065     ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
5066     odp_flow_key_from_flow(&key, flow,
5067                            ofp_port_to_odp_port(ofproto,
5068                                       flow->in_port.ofp_port));
5069
5070     dpif_flow_stats_extract(flow, packet, time_msec(), &stats);
5071
5072     xlate_in_init(&xin, ofproto, flow, NULL, stats.tcp_flags, packet);
5073     xin.resubmit_stats = &stats;
5074     xin.ofpacts_len = ofpacts_len;
5075     xin.ofpacts = ofpacts;
5076
5077     xlate_actions(&xin, &xout);
5078     dpif_execute(ofproto->backer->dpif, key.data, key.size,
5079                  xout.odp_actions.data, xout.odp_actions.size, packet);
5080     xlate_out_uninit(&xout);
5081
5082     return 0;
5083 }
5084 \f
5085 /* NetFlow. */
5086
5087 static int
5088 set_netflow(struct ofproto *ofproto_,
5089             const struct netflow_options *netflow_options)
5090 {
5091     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
5092
5093     if (netflow_options) {
5094         if (!ofproto->netflow) {
5095             ofproto->netflow = netflow_create();
5096             ofproto->backer->need_revalidate = REV_RECONFIGURE;
5097         }
5098         return netflow_set_options(ofproto->netflow, netflow_options);
5099     } else if (ofproto->netflow) {
5100         ofproto->backer->need_revalidate = REV_RECONFIGURE;
5101         netflow_destroy(ofproto->netflow);
5102         ofproto->netflow = NULL;
5103     }
5104
5105     return 0;
5106 }
5107
5108 static void
5109 get_netflow_ids(const struct ofproto *ofproto_,
5110                 uint8_t *engine_type, uint8_t *engine_id)
5111 {
5112     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
5113
5114     dpif_get_netflow_ids(ofproto->backer->dpif, engine_type, engine_id);
5115 }
5116
5117 static void
5118 send_active_timeout(struct ofproto_dpif *ofproto, struct facet *facet)
5119 {
5120     if (!facet_is_controller_flow(facet) &&
5121         netflow_active_timeout_expired(ofproto->netflow, &facet->nf_flow)) {
5122         struct subfacet *subfacet;
5123         struct ofexpired expired;
5124
5125         LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) {
5126             if (subfacet->path == SF_FAST_PATH) {
5127                 struct dpif_flow_stats stats;
5128
5129                 subfacet_install(subfacet, &facet->xout.odp_actions,
5130                                  &stats);
5131                 subfacet_update_stats(subfacet, &stats);
5132             }
5133         }
5134
5135         expired.flow = facet->flow;
5136         expired.packet_count = facet->packet_count;
5137         expired.byte_count = facet->byte_count;
5138         expired.used = facet->used;
5139         netflow_expire(ofproto->netflow, &facet->nf_flow, &expired);
5140     }
5141 }
5142
5143 static void
5144 send_netflow_active_timeouts(struct ofproto_dpif *ofproto)
5145 {
5146     struct cls_cursor cursor;
5147     struct facet *facet;
5148
5149     ovs_rwlock_rdlock(&ofproto->facets.rwlock);
5150     cls_cursor_init(&cursor, &ofproto->facets, NULL);
5151     CLS_CURSOR_FOR_EACH (facet, cr, &cursor) {
5152         send_active_timeout(ofproto, facet);
5153     }
5154     ovs_rwlock_unlock(&ofproto->facets.rwlock);
5155 }
5156 \f
5157 static struct ofproto_dpif *
5158 ofproto_dpif_lookup(const char *name)
5159 {
5160     struct ofproto_dpif *ofproto;
5161
5162     HMAP_FOR_EACH_WITH_HASH (ofproto, all_ofproto_dpifs_node,
5163                              hash_string(name, 0), &all_ofproto_dpifs) {
5164         if (!strcmp(ofproto->up.name, name)) {
5165             return ofproto;
5166         }
5167     }
5168     return NULL;
5169 }
5170
5171 static void
5172 ofproto_unixctl_fdb_flush(struct unixctl_conn *conn, int argc,
5173                           const char *argv[], void *aux OVS_UNUSED)
5174 {
5175     struct ofproto_dpif *ofproto;
5176
5177     if (argc > 1) {
5178         ofproto = ofproto_dpif_lookup(argv[1]);
5179         if (!ofproto) {
5180             unixctl_command_reply_error(conn, "no such bridge");
5181             return;
5182         }
5183         ovs_rwlock_wrlock(&ofproto->ml->rwlock);
5184         mac_learning_flush(ofproto->ml);
5185         ovs_rwlock_unlock(&ofproto->ml->rwlock);
5186     } else {
5187         HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
5188             ovs_rwlock_wrlock(&ofproto->ml->rwlock);
5189             mac_learning_flush(ofproto->ml);
5190             ovs_rwlock_unlock(&ofproto->ml->rwlock);
5191         }
5192     }
5193
5194     unixctl_command_reply(conn, "table successfully flushed");
5195 }
5196
5197 static struct ofport_dpif *
5198 ofbundle_get_a_port(const struct ofbundle *bundle)
5199 {
5200     return CONTAINER_OF(list_front(&bundle->ports), struct ofport_dpif,
5201                         bundle_node);
5202 }
5203
5204 static void
5205 ofproto_unixctl_fdb_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
5206                          const char *argv[], void *aux OVS_UNUSED)
5207 {
5208     struct ds ds = DS_EMPTY_INITIALIZER;
5209     const struct ofproto_dpif *ofproto;
5210     const struct mac_entry *e;
5211
5212     ofproto = ofproto_dpif_lookup(argv[1]);
5213     if (!ofproto) {
5214         unixctl_command_reply_error(conn, "no such bridge");
5215         return;
5216     }
5217
5218     ds_put_cstr(&ds, " port  VLAN  MAC                Age\n");
5219     ovs_rwlock_rdlock(&ofproto->ml->rwlock);
5220     LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) {
5221         struct ofbundle *bundle = e->port.p;
5222         char name[OFP_MAX_PORT_NAME_LEN];
5223
5224         ofputil_port_to_string(ofbundle_get_a_port(bundle)->up.ofp_port,
5225                                name, sizeof name);
5226         ds_put_format(&ds, "%5s  %4d  "ETH_ADDR_FMT"  %3d\n",
5227                       name, e->vlan, ETH_ADDR_ARGS(e->mac),
5228                       mac_entry_age(ofproto->ml, e));
5229     }
5230     ovs_rwlock_unlock(&ofproto->ml->rwlock);
5231     unixctl_command_reply(conn, ds_cstr(&ds));
5232     ds_destroy(&ds);
5233 }
5234
5235 struct trace_ctx {
5236     struct xlate_out xout;
5237     struct xlate_in xin;
5238     struct flow flow;
5239     struct ds *result;
5240 };
5241
5242 static void
5243 trace_format_rule(struct ds *result, int level, const struct rule_dpif *rule)
5244 {
5245     ds_put_char_multiple(result, '\t', level);
5246     if (!rule) {
5247         ds_put_cstr(result, "No match\n");
5248         return;
5249     }
5250
5251     ds_put_format(result, "Rule: table=%"PRIu8" cookie=%#"PRIx64" ",
5252                   rule ? rule->up.table_id : 0, ntohll(rule->up.flow_cookie));
5253     cls_rule_format(&rule->up.cr, result);
5254     ds_put_char(result, '\n');
5255
5256     ds_put_char_multiple(result, '\t', level);
5257     ds_put_cstr(result, "OpenFlow ");
5258     ofpacts_format(rule->up.ofpacts, rule->up.ofpacts_len, result);
5259     ds_put_char(result, '\n');
5260 }
5261
5262 static void
5263 trace_format_flow(struct ds *result, int level, const char *title,
5264                   struct trace_ctx *trace)
5265 {
5266     ds_put_char_multiple(result, '\t', level);
5267     ds_put_format(result, "%s: ", title);
5268     if (flow_equal(&trace->xin.flow, &trace->flow)) {
5269         ds_put_cstr(result, "unchanged");
5270     } else {
5271         flow_format(result, &trace->xin.flow);
5272         trace->flow = trace->xin.flow;
5273     }
5274     ds_put_char(result, '\n');
5275 }
5276
5277 static void
5278 trace_format_regs(struct ds *result, int level, const char *title,
5279                   struct trace_ctx *trace)
5280 {
5281     size_t i;
5282
5283     ds_put_char_multiple(result, '\t', level);
5284     ds_put_format(result, "%s:", title);
5285     for (i = 0; i < FLOW_N_REGS; i++) {
5286         ds_put_format(result, " reg%zu=0x%"PRIx32, i, trace->flow.regs[i]);
5287     }
5288     ds_put_char(result, '\n');
5289 }
5290
5291 static void
5292 trace_format_odp(struct ds *result, int level, const char *title,
5293                  struct trace_ctx *trace)
5294 {
5295     struct ofpbuf *odp_actions = &trace->xout.odp_actions;
5296
5297     ds_put_char_multiple(result, '\t', level);
5298     ds_put_format(result, "%s: ", title);
5299     format_odp_actions(result, odp_actions->data, odp_actions->size);
5300     ds_put_char(result, '\n');
5301 }
5302
5303 static void
5304 trace_resubmit(struct xlate_in *xin, struct rule_dpif *rule, int recurse)
5305 {
5306     struct trace_ctx *trace = CONTAINER_OF(xin, struct trace_ctx, xin);
5307     struct ds *result = trace->result;
5308
5309     ds_put_char(result, '\n');
5310     trace_format_flow(result, recurse + 1, "Resubmitted flow", trace);
5311     trace_format_regs(result, recurse + 1, "Resubmitted regs", trace);
5312     trace_format_odp(result,  recurse + 1, "Resubmitted  odp", trace);
5313     trace_format_rule(result, recurse + 1, rule);
5314 }
5315
5316 static void
5317 trace_report(struct xlate_in *xin, const char *s, int recurse)
5318 {
5319     struct trace_ctx *trace = CONTAINER_OF(xin, struct trace_ctx, xin);
5320     struct ds *result = trace->result;
5321
5322     ds_put_char_multiple(result, '\t', recurse);
5323     ds_put_cstr(result, s);
5324     ds_put_char(result, '\n');
5325 }
5326
5327 static void
5328 ofproto_unixctl_trace(struct unixctl_conn *conn, int argc, const char *argv[],
5329                       void *aux OVS_UNUSED)
5330 {
5331     const struct dpif_backer *backer;
5332     struct ofproto_dpif *ofproto;
5333     struct ofpbuf odp_key, odp_mask;
5334     struct ofpbuf *packet;
5335     struct ds result;
5336     struct flow flow;
5337     char *s;
5338
5339     packet = NULL;
5340     backer = NULL;
5341     ds_init(&result);
5342     ofpbuf_init(&odp_key, 0);
5343     ofpbuf_init(&odp_mask, 0);
5344
5345     /* Handle "-generate" or a hex string as the last argument. */
5346     if (!strcmp(argv[argc - 1], "-generate")) {
5347         packet = ofpbuf_new(0);
5348         argc--;
5349     } else {
5350         const char *error = eth_from_hex(argv[argc - 1], &packet);
5351         if (!error) {
5352             argc--;
5353         } else if (argc == 4) {
5354             /* The 3-argument form must end in "-generate' or a hex string. */
5355             unixctl_command_reply_error(conn, error);
5356             goto exit;
5357         }
5358     }
5359
5360     /* Parse the flow and determine whether a datapath or
5361      * bridge is specified. If function odp_flow_key_from_string()
5362      * returns 0, the flow is a odp_flow. If function
5363      * parse_ofp_exact_flow() returns 0, the flow is a br_flow. */
5364     if (!odp_flow_from_string(argv[argc - 1], NULL, &odp_key, &odp_mask)) {
5365         /* If the odp_flow is the second argument,
5366          * the datapath name is the first argument. */
5367         if (argc == 3) {
5368             const char *dp_type;
5369             if (!strncmp(argv[1], "ovs-", 4)) {
5370                 dp_type = argv[1] + 4;
5371             } else {
5372                 dp_type = argv[1];
5373             }
5374             backer = shash_find_data(&all_dpif_backers, dp_type);
5375             if (!backer) {
5376                 unixctl_command_reply_error(conn, "Cannot find datapath "
5377                                "of this name");
5378                 goto exit;
5379             }
5380         } else {
5381             /* No datapath name specified, so there should be only one
5382              * datapath. */
5383             struct shash_node *node;
5384             if (shash_count(&all_dpif_backers) != 1) {
5385                 unixctl_command_reply_error(conn, "Must specify datapath "
5386                          "name, there is more than one type of datapath");
5387                 goto exit;
5388             }
5389             node = shash_first(&all_dpif_backers);
5390             backer = node->data;
5391         }
5392
5393         if (xlate_receive(backer, NULL, odp_key.data, odp_key.size, &flow,
5394                           NULL, &ofproto, NULL)) {
5395             unixctl_command_reply_error(conn, "Invalid datapath flow");
5396             goto exit;
5397         }
5398         ds_put_format(&result, "Bridge: %s\n", ofproto->up.name);
5399     } else if (!parse_ofp_exact_flow(&flow, argv[argc - 1])) {
5400         if (argc != 3) {
5401             unixctl_command_reply_error(conn, "Must specify bridge name");
5402             goto exit;
5403         }
5404
5405         ofproto = ofproto_dpif_lookup(argv[1]);
5406         if (!ofproto) {
5407             unixctl_command_reply_error(conn, "Unknown bridge name");
5408             goto exit;
5409         }
5410     } else {
5411         unixctl_command_reply_error(conn, "Bad flow syntax");
5412         goto exit;
5413     }
5414
5415     /* Generate a packet, if requested. */
5416     if (packet) {
5417         if (!packet->size) {
5418             flow_compose(packet, &flow);
5419         } else {
5420             union flow_in_port in_port_;
5421
5422             in_port_ = flow.in_port;
5423             ds_put_cstr(&result, "Packet: ");
5424             s = ofp_packet_to_string(packet->data, packet->size);
5425             ds_put_cstr(&result, s);
5426             free(s);
5427
5428             /* Use the metadata from the flow and the packet argument
5429              * to reconstruct the flow. */
5430             flow_extract(packet, flow.skb_priority, flow.pkt_mark, NULL,
5431                          &in_port_, &flow);
5432         }
5433     }
5434
5435     ofproto_trace(ofproto, &flow, packet, &result);
5436     unixctl_command_reply(conn, ds_cstr(&result));
5437
5438 exit:
5439     ds_destroy(&result);
5440     ofpbuf_delete(packet);
5441     ofpbuf_uninit(&odp_key);
5442     ofpbuf_uninit(&odp_mask);
5443 }
5444
5445 static void
5446 ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow,
5447               const struct ofpbuf *packet, struct ds *ds)
5448 {
5449     struct rule_dpif *rule;
5450     struct flow_wildcards wc;
5451
5452     ds_put_cstr(ds, "Flow: ");
5453     flow_format(ds, flow);
5454     ds_put_char(ds, '\n');
5455
5456     flow_wildcards_init_catchall(&wc);
5457     rule_dpif_lookup(ofproto, flow, &wc, &rule);
5458
5459     trace_format_rule(ds, 0, rule);
5460     if (rule == ofproto->miss_rule) {
5461         ds_put_cstr(ds, "\nNo match, flow generates \"packet in\"s.\n");
5462     } else if (rule == ofproto->no_packet_in_rule) {
5463         ds_put_cstr(ds, "\nNo match, packets dropped because "
5464                     "OFPPC_NO_PACKET_IN is set on in_port.\n");
5465     } else if (rule == ofproto->drop_frags_rule) {
5466         ds_put_cstr(ds, "\nPackets dropped because they are IP fragments "
5467                     "and the fragment handling mode is \"drop\".\n");
5468     }
5469
5470     if (rule) {
5471         uint64_t odp_actions_stub[1024 / 8];
5472         struct ofpbuf odp_actions;
5473         struct trace_ctx trace;
5474         struct match match;
5475         uint8_t tcp_flags;
5476
5477         tcp_flags = packet ? packet_get_tcp_flags(packet, flow) : 0;
5478         trace.result = ds;
5479         trace.flow = *flow;
5480         ofpbuf_use_stub(&odp_actions,
5481                         odp_actions_stub, sizeof odp_actions_stub);
5482         xlate_in_init(&trace.xin, ofproto, flow, rule, tcp_flags, packet);
5483         trace.xin.resubmit_hook = trace_resubmit;
5484         trace.xin.report_hook = trace_report;
5485
5486         xlate_actions(&trace.xin, &trace.xout);
5487         flow_wildcards_or(&trace.xout.wc, &trace.xout.wc, &wc);
5488
5489         ds_put_char(ds, '\n');
5490         trace_format_flow(ds, 0, "Final flow", &trace);
5491
5492         match_init(&match, flow, &trace.xout.wc);
5493         ds_put_cstr(ds, "Relevant fields: ");
5494         match_format(&match, ds, OFP_DEFAULT_PRIORITY);
5495         ds_put_char(ds, '\n');
5496
5497         ds_put_cstr(ds, "Datapath actions: ");
5498         format_odp_actions(ds, trace.xout.odp_actions.data,
5499                            trace.xout.odp_actions.size);
5500
5501         if (trace.xout.slow) {
5502             ds_put_cstr(ds, "\nThis flow is handled by the userspace "
5503                         "slow path because it:");
5504             switch (trace.xout.slow) {
5505             case SLOW_CFM:
5506                 ds_put_cstr(ds, "\n\t- Consists of CFM packets.");
5507                 break;
5508             case SLOW_LACP:
5509                 ds_put_cstr(ds, "\n\t- Consists of LACP packets.");
5510                 break;
5511             case SLOW_STP:
5512                 ds_put_cstr(ds, "\n\t- Consists of STP packets.");
5513                 break;
5514             case SLOW_BFD:
5515                 ds_put_cstr(ds, "\n\t- Consists of BFD packets.");
5516                 break;
5517             case SLOW_CONTROLLER:
5518                 ds_put_cstr(ds, "\n\t- Sends \"packet-in\" messages "
5519                             "to the OpenFlow controller.");
5520                 break;
5521             case __SLOW_MAX:
5522                 NOT_REACHED();
5523             }
5524         }
5525
5526         xlate_out_uninit(&trace.xout);
5527     }
5528
5529     rule_release(rule);
5530 }
5531
5532 static void
5533 ofproto_dpif_clog(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED,
5534                   const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
5535 {
5536     clogged = true;
5537     unixctl_command_reply(conn, NULL);
5538 }
5539
5540 static void
5541 ofproto_dpif_unclog(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED,
5542                     const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
5543 {
5544     clogged = false;
5545     unixctl_command_reply(conn, NULL);
5546 }
5547
5548 /* Runs a self-check of flow translations in 'ofproto'.  Appends a message to
5549  * 'reply' describing the results. */
5550 static void
5551 ofproto_dpif_self_check__(struct ofproto_dpif *ofproto, struct ds *reply)
5552 {
5553     struct cls_cursor cursor;
5554     struct facet *facet;
5555     int errors;
5556
5557     errors = 0;
5558     ovs_rwlock_rdlock(&ofproto->facets.rwlock);
5559     cls_cursor_init(&cursor, &ofproto->facets, NULL);
5560     CLS_CURSOR_FOR_EACH (facet, cr, &cursor) {
5561         if (!facet_check_consistency(facet)) {
5562             errors++;
5563         }
5564     }
5565     ovs_rwlock_unlock(&ofproto->facets.rwlock);
5566     if (errors) {
5567         ofproto->backer->need_revalidate = REV_INCONSISTENCY;
5568     }
5569
5570     if (errors) {
5571         ds_put_format(reply, "%s: self-check failed (%d errors)\n",
5572                       ofproto->up.name, errors);
5573     } else {
5574         ds_put_format(reply, "%s: self-check passed\n", ofproto->up.name);
5575     }
5576 }
5577
5578 static void
5579 ofproto_dpif_self_check(struct unixctl_conn *conn,
5580                         int argc, const char *argv[], void *aux OVS_UNUSED)
5581 {
5582     struct ds reply = DS_EMPTY_INITIALIZER;
5583     struct ofproto_dpif *ofproto;
5584
5585     if (argc > 1) {
5586         ofproto = ofproto_dpif_lookup(argv[1]);
5587         if (!ofproto) {
5588             unixctl_command_reply_error(conn, "Unknown ofproto (use "
5589                                         "ofproto/list for help)");
5590             return;
5591         }
5592         ofproto_dpif_self_check__(ofproto, &reply);
5593     } else {
5594         HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
5595             ofproto_dpif_self_check__(ofproto, &reply);
5596         }
5597     }
5598
5599     unixctl_command_reply(conn, ds_cstr(&reply));
5600     ds_destroy(&reply);
5601 }
5602
5603 /* Store the current ofprotos in 'ofproto_shash'.  Returns a sorted list
5604  * of the 'ofproto_shash' nodes.  It is the responsibility of the caller
5605  * to destroy 'ofproto_shash' and free the returned value. */
5606 static const struct shash_node **
5607 get_ofprotos(struct shash *ofproto_shash)
5608 {
5609     const struct ofproto_dpif *ofproto;
5610
5611     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
5612         char *name = xasprintf("%s@%s", ofproto->up.type, ofproto->up.name);
5613         shash_add_nocopy(ofproto_shash, name, ofproto);
5614     }
5615
5616     return shash_sort(ofproto_shash);
5617 }
5618
5619 static void
5620 ofproto_unixctl_dpif_dump_dps(struct unixctl_conn *conn, int argc OVS_UNUSED,
5621                               const char *argv[] OVS_UNUSED,
5622                               void *aux OVS_UNUSED)
5623 {
5624     struct ds ds = DS_EMPTY_INITIALIZER;
5625     struct shash ofproto_shash;
5626     const struct shash_node **sorted_ofprotos;
5627     int i;
5628
5629     shash_init(&ofproto_shash);
5630     sorted_ofprotos = get_ofprotos(&ofproto_shash);
5631     for (i = 0; i < shash_count(&ofproto_shash); i++) {
5632         const struct shash_node *node = sorted_ofprotos[i];
5633         ds_put_format(&ds, "%s\n", node->name);
5634     }
5635
5636     shash_destroy(&ofproto_shash);
5637     free(sorted_ofprotos);
5638
5639     unixctl_command_reply(conn, ds_cstr(&ds));
5640     ds_destroy(&ds);
5641 }
5642
5643 static void
5644 show_dp_rates(struct ds *ds, const char *heading,
5645               const struct avg_subfacet_rates *rates)
5646 {
5647     ds_put_format(ds, "%s add rate: %5.3f/min, del rate: %5.3f/min\n",
5648                   heading, rates->add_rate, rates->del_rate);
5649 }
5650
5651 static void
5652 dpif_show_backer(const struct dpif_backer *backer, struct ds *ds)
5653 {
5654     const struct shash_node **ofprotos;
5655     struct ofproto_dpif *ofproto;
5656     struct shash ofproto_shash;
5657     uint64_t n_hit, n_missed;
5658     long long int minutes;
5659     size_t i;
5660
5661     n_hit = n_missed = 0;
5662     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
5663         if (ofproto->backer == backer) {
5664             n_missed += ofproto->n_missed;
5665             n_hit += ofproto->n_hit;
5666         }
5667     }
5668
5669     ds_put_format(ds, "%s: hit:%"PRIu64" missed:%"PRIu64"\n",
5670                   dpif_name(backer->dpif), n_hit, n_missed);
5671     ds_put_format(ds, "\tflows: cur: %zu, avg: %u, max: %u,"
5672                   " life span: %lldms\n", hmap_count(&backer->subfacets),
5673                   backer->avg_n_subfacet, backer->max_n_subfacet,
5674                   backer->avg_subfacet_life);
5675
5676     minutes = (time_msec() - backer->created) / (1000 * 60);
5677     if (minutes >= 60) {
5678         show_dp_rates(ds, "\thourly avg:", &backer->hourly);
5679     }
5680     if (minutes >= 60 * 24) {
5681         show_dp_rates(ds, "\tdaily avg:",  &backer->daily);
5682     }
5683     show_dp_rates(ds, "\toverall avg:",  &backer->lifetime);
5684
5685     shash_init(&ofproto_shash);
5686     ofprotos = get_ofprotos(&ofproto_shash);
5687     for (i = 0; i < shash_count(&ofproto_shash); i++) {
5688         struct ofproto_dpif *ofproto = ofprotos[i]->data;
5689         const struct shash_node **ports;
5690         size_t j;
5691
5692         if (ofproto->backer != backer) {
5693             continue;
5694         }
5695
5696         ds_put_format(ds, "\t%s: hit:%"PRIu64" missed:%"PRIu64"\n",
5697                       ofproto->up.name, ofproto->n_hit, ofproto->n_missed);
5698
5699         ports = shash_sort(&ofproto->up.port_by_name);
5700         for (j = 0; j < shash_count(&ofproto->up.port_by_name); j++) {
5701             const struct shash_node *node = ports[j];
5702             struct ofport *ofport = node->data;
5703             struct smap config;
5704             odp_port_t odp_port;
5705
5706             ds_put_format(ds, "\t\t%s %u/", netdev_get_name(ofport->netdev),
5707                           ofport->ofp_port);
5708
5709             odp_port = ofp_port_to_odp_port(ofproto, ofport->ofp_port);
5710             if (odp_port != ODPP_NONE) {
5711                 ds_put_format(ds, "%"PRIu32":", odp_port);
5712             } else {
5713                 ds_put_cstr(ds, "none:");
5714             }
5715
5716             ds_put_format(ds, " (%s", netdev_get_type(ofport->netdev));
5717
5718             smap_init(&config);
5719             if (!netdev_get_config(ofport->netdev, &config)) {
5720                 const struct smap_node **nodes;
5721                 size_t i;
5722
5723                 nodes = smap_sort(&config);
5724                 for (i = 0; i < smap_count(&config); i++) {
5725                     const struct smap_node *node = nodes[i];
5726                     ds_put_format(ds, "%c %s=%s", i ? ',' : ':',
5727                                   node->key, node->value);
5728                 }
5729                 free(nodes);
5730             }
5731             smap_destroy(&config);
5732
5733             ds_put_char(ds, ')');
5734             ds_put_char(ds, '\n');
5735         }
5736         free(ports);
5737     }
5738     shash_destroy(&ofproto_shash);
5739     free(ofprotos);
5740 }
5741
5742 static void
5743 ofproto_unixctl_dpif_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
5744                           const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
5745 {
5746     struct ds ds = DS_EMPTY_INITIALIZER;
5747     const struct shash_node **backers;
5748     int i;
5749
5750     backers = shash_sort(&all_dpif_backers);
5751     for (i = 0; i < shash_count(&all_dpif_backers); i++) {
5752         dpif_show_backer(backers[i]->data, &ds);
5753     }
5754     free(backers);
5755
5756     unixctl_command_reply(conn, ds_cstr(&ds));
5757     ds_destroy(&ds);
5758 }
5759
5760 /* Dump the megaflow (facet) cache.  This is useful to check the
5761  * correctness of flow wildcarding, since the same mechanism is used for
5762  * both xlate caching and kernel wildcarding.
5763  *
5764  * It's important to note that in the output the flow description uses
5765  * OpenFlow (OFP) ports, but the actions use datapath (ODP) ports.
5766  *
5767  * This command is only needed for advanced debugging, so it's not
5768  * documented in the man page. */
5769 static void
5770 ofproto_unixctl_dpif_dump_megaflows(struct unixctl_conn *conn,
5771                                     int argc OVS_UNUSED, const char *argv[],
5772                                     void *aux OVS_UNUSED)
5773 {
5774     struct ds ds = DS_EMPTY_INITIALIZER;
5775     const struct ofproto_dpif *ofproto;
5776     long long int now = time_msec();
5777     struct cls_cursor cursor;
5778     struct facet *facet;
5779
5780     ofproto = ofproto_dpif_lookup(argv[1]);
5781     if (!ofproto) {
5782         unixctl_command_reply_error(conn, "no such bridge");
5783         return;
5784     }
5785
5786     ovs_rwlock_rdlock(&ofproto->facets.rwlock);
5787     cls_cursor_init(&cursor, &ofproto->facets, NULL);
5788     CLS_CURSOR_FOR_EACH (facet, cr, &cursor) {
5789         cls_rule_format(&facet->cr, &ds);
5790         ds_put_cstr(&ds, ", ");
5791         ds_put_format(&ds, "n_subfacets:%zu, ", list_size(&facet->subfacets));
5792         ds_put_format(&ds, "used:%.3fs, ", (now - facet->used) / 1000.0);
5793         ds_put_cstr(&ds, "Datapath actions: ");
5794         if (facet->xout.slow) {
5795             uint64_t slow_path_stub[128 / 8];
5796             const struct nlattr *actions;
5797             size_t actions_len;
5798
5799             compose_slow_path(ofproto, &facet->flow, facet->xout.slow,
5800                               slow_path_stub, sizeof slow_path_stub,
5801                               &actions, &actions_len);
5802             format_odp_actions(&ds, actions, actions_len);
5803         } else {
5804             format_odp_actions(&ds, facet->xout.odp_actions.data,
5805                                facet->xout.odp_actions.size);
5806         }
5807         ds_put_cstr(&ds, "\n");
5808     }
5809     ovs_rwlock_unlock(&ofproto->facets.rwlock);
5810
5811     ds_chomp(&ds, '\n');
5812     unixctl_command_reply(conn, ds_cstr(&ds));
5813     ds_destroy(&ds);
5814 }
5815
5816 /* Disable using the megaflows.
5817  *
5818  * This command is only needed for advanced debugging, so it's not
5819  * documented in the man page. */
5820 static void
5821 ofproto_unixctl_dpif_disable_megaflows(struct unixctl_conn *conn,
5822                                        int argc OVS_UNUSED,
5823                                        const char *argv[] OVS_UNUSED,
5824                                        void *aux OVS_UNUSED)
5825 {
5826     struct ofproto_dpif *ofproto;
5827
5828     enable_megaflows = false;
5829
5830     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
5831         flush(&ofproto->up);
5832     }
5833
5834     unixctl_command_reply(conn, "megaflows disabled");
5835 }
5836
5837 /* Re-enable using megaflows.
5838  *
5839  * This command is only needed for advanced debugging, so it's not
5840  * documented in the man page. */
5841 static void
5842 ofproto_unixctl_dpif_enable_megaflows(struct unixctl_conn *conn,
5843                                       int argc OVS_UNUSED,
5844                                       const char *argv[] OVS_UNUSED,
5845                                       void *aux OVS_UNUSED)
5846 {
5847     struct ofproto_dpif *ofproto;
5848
5849     enable_megaflows = true;
5850
5851     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
5852         flush(&ofproto->up);
5853     }
5854
5855     unixctl_command_reply(conn, "megaflows enabled");
5856 }
5857
5858 static void
5859 ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn,
5860                                 int argc OVS_UNUSED, const char *argv[],
5861                                 void *aux OVS_UNUSED)
5862 {
5863     struct ds ds = DS_EMPTY_INITIALIZER;
5864     const struct ofproto_dpif *ofproto;
5865     struct subfacet *subfacet;
5866
5867     ofproto = ofproto_dpif_lookup(argv[1]);
5868     if (!ofproto) {
5869         unixctl_command_reply_error(conn, "no such bridge");
5870         return;
5871     }
5872
5873     update_stats(ofproto->backer);
5874
5875     HMAP_FOR_EACH (subfacet, hmap_node, &ofproto->backer->subfacets) {
5876         struct facet *facet = subfacet->facet;
5877         struct odputil_keybuf maskbuf;
5878         struct ofpbuf mask;
5879
5880         if (facet->ofproto != ofproto) {
5881             continue;
5882         }
5883
5884         ofpbuf_use_stack(&mask, &maskbuf, sizeof maskbuf);
5885         if (enable_megaflows) {
5886             odp_flow_key_from_mask(&mask, &facet->xout.wc.masks,
5887                                    &facet->flow, UINT32_MAX);
5888         }
5889
5890         odp_flow_format(subfacet->key, subfacet->key_len,
5891                         mask.data, mask.size, &ds, false);
5892
5893         ds_put_format(&ds, ", packets:%"PRIu64", bytes:%"PRIu64", used:",
5894                       subfacet->dp_packet_count, subfacet->dp_byte_count);
5895         if (subfacet->used) {
5896             ds_put_format(&ds, "%.3fs",
5897                           (time_msec() - subfacet->used) / 1000.0);
5898         } else {
5899             ds_put_format(&ds, "never");
5900         }
5901         if (subfacet->facet->tcp_flags) {
5902             ds_put_cstr(&ds, ", flags:");
5903             packet_format_tcp_flags(&ds, subfacet->facet->tcp_flags);
5904         }
5905
5906         ds_put_cstr(&ds, ", actions:");
5907         if (facet->xout.slow) {
5908             uint64_t slow_path_stub[128 / 8];
5909             const struct nlattr *actions;
5910             size_t actions_len;
5911
5912             compose_slow_path(ofproto, &facet->flow, facet->xout.slow,
5913                               slow_path_stub, sizeof slow_path_stub,
5914                               &actions, &actions_len);
5915             format_odp_actions(&ds, actions, actions_len);
5916         } else {
5917             format_odp_actions(&ds, facet->xout.odp_actions.data,
5918                                facet->xout.odp_actions.size);
5919         }
5920         ds_put_char(&ds, '\n');
5921     }
5922
5923     unixctl_command_reply(conn, ds_cstr(&ds));
5924     ds_destroy(&ds);
5925 }
5926
5927 static void
5928 ofproto_unixctl_dpif_del_flows(struct unixctl_conn *conn,
5929                                int argc OVS_UNUSED, const char *argv[],
5930                                void *aux OVS_UNUSED)
5931 {
5932     struct ds ds = DS_EMPTY_INITIALIZER;
5933     struct ofproto_dpif *ofproto;
5934
5935     ofproto = ofproto_dpif_lookup(argv[1]);
5936     if (!ofproto) {
5937         unixctl_command_reply_error(conn, "no such bridge");
5938         return;
5939     }
5940
5941     flush(&ofproto->up);
5942
5943     unixctl_command_reply(conn, ds_cstr(&ds));
5944     ds_destroy(&ds);
5945 }
5946
5947 static void
5948 ofproto_dpif_unixctl_init(void)
5949 {
5950     static bool registered;
5951     if (registered) {
5952         return;
5953     }
5954     registered = true;
5955
5956     unixctl_command_register(
5957         "ofproto/trace",
5958         "[dp_name]|bridge odp_flow|br_flow [-generate|packet]",
5959         1, 3, ofproto_unixctl_trace, NULL);
5960     unixctl_command_register("fdb/flush", "[bridge]", 0, 1,
5961                              ofproto_unixctl_fdb_flush, NULL);
5962     unixctl_command_register("fdb/show", "bridge", 1, 1,
5963                              ofproto_unixctl_fdb_show, NULL);
5964     unixctl_command_register("ofproto/clog", "", 0, 0,
5965                              ofproto_dpif_clog, NULL);
5966     unixctl_command_register("ofproto/unclog", "", 0, 0,
5967                              ofproto_dpif_unclog, NULL);
5968     unixctl_command_register("ofproto/self-check", "[bridge]", 0, 1,
5969                              ofproto_dpif_self_check, NULL);
5970     unixctl_command_register("dpif/dump-dps", "", 0, 0,
5971                              ofproto_unixctl_dpif_dump_dps, NULL);
5972     unixctl_command_register("dpif/show", "", 0, 0, ofproto_unixctl_dpif_show,
5973                              NULL);
5974     unixctl_command_register("dpif/dump-flows", "bridge", 1, 1,
5975                              ofproto_unixctl_dpif_dump_flows, NULL);
5976     unixctl_command_register("dpif/del-flows", "bridge", 1, 1,
5977                              ofproto_unixctl_dpif_del_flows, NULL);
5978     unixctl_command_register("dpif/dump-megaflows", "bridge", 1, 1,
5979                              ofproto_unixctl_dpif_dump_megaflows, NULL);
5980     unixctl_command_register("dpif/disable-megaflows", "", 0, 0,
5981                              ofproto_unixctl_dpif_disable_megaflows, NULL);
5982     unixctl_command_register("dpif/enable-megaflows", "", 0, 0,
5983                              ofproto_unixctl_dpif_enable_megaflows, NULL);
5984 }
5985 \f
5986 /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
5987  *
5988  * This is deprecated.  It is only for compatibility with broken device drivers
5989  * in old versions of Linux that do not properly support VLANs when VLAN
5990  * devices are not used.  When broken device drivers are no longer in
5991  * widespread use, we will delete these interfaces. */
5992
5993 static int
5994 set_realdev(struct ofport *ofport_, ofp_port_t realdev_ofp_port, int vid)
5995 {
5996     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport_->ofproto);
5997     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
5998
5999     if (realdev_ofp_port == ofport->realdev_ofp_port
6000         && vid == ofport->vlandev_vid) {
6001         return 0;
6002     }
6003
6004     ofproto->backer->need_revalidate = REV_RECONFIGURE;
6005
6006     if (ofport->realdev_ofp_port) {
6007         vsp_remove(ofport);
6008     }
6009     if (realdev_ofp_port && ofport->bundle) {
6010         /* vlandevs are enslaved to their realdevs, so they are not allowed to
6011          * themselves be part of a bundle. */
6012         bundle_set(ofport->up.ofproto, ofport->bundle, NULL);
6013     }
6014
6015     ofport->realdev_ofp_port = realdev_ofp_port;
6016     ofport->vlandev_vid = vid;
6017
6018     if (realdev_ofp_port) {
6019         vsp_add(ofport, realdev_ofp_port, vid);
6020     }
6021
6022     return 0;
6023 }
6024
6025 static uint32_t
6026 hash_realdev_vid(ofp_port_t realdev_ofp_port, int vid)
6027 {
6028     return hash_2words(ofp_to_u16(realdev_ofp_port), vid);
6029 }
6030
6031 bool
6032 ofproto_has_vlan_splinters(const struct ofproto_dpif *ofproto)
6033     OVS_EXCLUDED(ofproto->vsp_mutex)
6034 {
6035     bool ret;
6036
6037     ovs_mutex_lock(&ofproto->vsp_mutex);
6038     ret = !hmap_is_empty(&ofproto->realdev_vid_map);
6039     ovs_mutex_unlock(&ofproto->vsp_mutex);
6040     return ret;
6041 }
6042
6043 static ofp_port_t
6044 vsp_realdev_to_vlandev__(const struct ofproto_dpif *ofproto,
6045                          ofp_port_t realdev_ofp_port, ovs_be16 vlan_tci)
6046     OVS_REQUIRES(ofproto->vsp_mutex)
6047 {
6048     if (!hmap_is_empty(&ofproto->realdev_vid_map)) {
6049         int vid = vlan_tci_to_vid(vlan_tci);
6050         const struct vlan_splinter *vsp;
6051
6052         HMAP_FOR_EACH_WITH_HASH (vsp, realdev_vid_node,
6053                                  hash_realdev_vid(realdev_ofp_port, vid),
6054                                  &ofproto->realdev_vid_map) {
6055             if (vsp->realdev_ofp_port == realdev_ofp_port
6056                 && vsp->vid == vid) {
6057                 return vsp->vlandev_ofp_port;
6058             }
6059         }
6060     }
6061     return realdev_ofp_port;
6062 }
6063
6064 /* Returns the OFP port number of the Linux VLAN device that corresponds to
6065  * 'vlan_tci' on the network device with port number 'realdev_ofp_port' in
6066  * 'struct ofport_dpif'.  For example, given 'realdev_ofp_port' of eth0 and
6067  * 'vlan_tci' 9, it would return the port number of eth0.9.
6068  *
6069  * Unless VLAN splinters are enabled for port 'realdev_ofp_port', this
6070  * function just returns its 'realdev_ofp_port' argument. */
6071 ofp_port_t
6072 vsp_realdev_to_vlandev(const struct ofproto_dpif *ofproto,
6073                        ofp_port_t realdev_ofp_port, ovs_be16 vlan_tci)
6074     OVS_EXCLUDED(ofproto->vsp_mutex)
6075 {
6076     ofp_port_t ret;
6077
6078     ovs_mutex_lock(&ofproto->vsp_mutex);
6079     ret = vsp_realdev_to_vlandev__(ofproto, realdev_ofp_port, vlan_tci);
6080     ovs_mutex_unlock(&ofproto->vsp_mutex);
6081     return ret;
6082 }
6083
6084 static struct vlan_splinter *
6085 vlandev_find(const struct ofproto_dpif *ofproto, ofp_port_t vlandev_ofp_port)
6086 {
6087     struct vlan_splinter *vsp;
6088
6089     HMAP_FOR_EACH_WITH_HASH (vsp, vlandev_node,
6090                              hash_ofp_port(vlandev_ofp_port),
6091                              &ofproto->vlandev_map) {
6092         if (vsp->vlandev_ofp_port == vlandev_ofp_port) {
6093             return vsp;
6094         }
6095     }
6096
6097     return NULL;
6098 }
6099
6100 /* Returns the OpenFlow port number of the "real" device underlying the Linux
6101  * VLAN device with OpenFlow port number 'vlandev_ofp_port' and stores the
6102  * VLAN VID of the Linux VLAN device in '*vid'.  For example, given
6103  * 'vlandev_ofp_port' of eth0.9, it would return the OpenFlow port number of
6104  * eth0 and store 9 in '*vid'.
6105  *
6106  * Returns 0 and does not modify '*vid' if 'vlandev_ofp_port' is not a Linux
6107  * VLAN device.  Unless VLAN splinters are enabled, this is what this function
6108  * always does.*/
6109 static ofp_port_t
6110 vsp_vlandev_to_realdev(const struct ofproto_dpif *ofproto,
6111                        ofp_port_t vlandev_ofp_port, int *vid)
6112     OVS_REQUIRES(ofproto->vsp_mutex)
6113 {
6114     if (!hmap_is_empty(&ofproto->vlandev_map)) {
6115         const struct vlan_splinter *vsp;
6116
6117         vsp = vlandev_find(ofproto, vlandev_ofp_port);
6118         if (vsp) {
6119             if (vid) {
6120                 *vid = vsp->vid;
6121             }
6122             return vsp->realdev_ofp_port;
6123         }
6124     }
6125     return 0;
6126 }
6127
6128 /* Given 'flow', a flow representing a packet received on 'ofproto', checks
6129  * whether 'flow->in_port' represents a Linux VLAN device.  If so, changes
6130  * 'flow->in_port' to the "real" device backing the VLAN device, sets
6131  * 'flow->vlan_tci' to the VLAN VID, and returns true.  Otherwise (which is
6132  * always the case unless VLAN splinters are enabled), returns false without
6133  * making any changes. */
6134 bool
6135 vsp_adjust_flow(const struct ofproto_dpif *ofproto, struct flow *flow)
6136     OVS_EXCLUDED(ofproto->vsp_mutex)
6137 {
6138     ofp_port_t realdev;
6139     int vid;
6140
6141     ovs_mutex_lock(&ofproto->vsp_mutex);
6142     realdev = vsp_vlandev_to_realdev(ofproto, flow->in_port.ofp_port, &vid);
6143     ovs_mutex_unlock(&ofproto->vsp_mutex);
6144     if (!realdev) {
6145         return false;
6146     }
6147
6148     /* Cause the flow to be processed as if it came in on the real device with
6149      * the VLAN device's VLAN ID. */
6150     flow->in_port.ofp_port = realdev;
6151     flow->vlan_tci = htons((vid & VLAN_VID_MASK) | VLAN_CFI);
6152     return true;
6153 }
6154
6155 static void
6156 vsp_remove(struct ofport_dpif *port)
6157 {
6158     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
6159     struct vlan_splinter *vsp;
6160
6161     ovs_mutex_lock(&ofproto->vsp_mutex);
6162     vsp = vlandev_find(ofproto, port->up.ofp_port);
6163     if (vsp) {
6164         hmap_remove(&ofproto->vlandev_map, &vsp->vlandev_node);
6165         hmap_remove(&ofproto->realdev_vid_map, &vsp->realdev_vid_node);
6166         free(vsp);
6167
6168         port->realdev_ofp_port = 0;
6169     } else {
6170         VLOG_ERR("missing vlan device record");
6171     }
6172     ovs_mutex_unlock(&ofproto->vsp_mutex);
6173 }
6174
6175 static void
6176 vsp_add(struct ofport_dpif *port, ofp_port_t realdev_ofp_port, int vid)
6177 {
6178     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
6179
6180     ovs_mutex_lock(&ofproto->vsp_mutex);
6181     if (!vsp_vlandev_to_realdev(ofproto, port->up.ofp_port, NULL)
6182         && (vsp_realdev_to_vlandev__(ofproto, realdev_ofp_port, htons(vid))
6183             == realdev_ofp_port)) {
6184         struct vlan_splinter *vsp;
6185
6186         vsp = xmalloc(sizeof *vsp);
6187         vsp->realdev_ofp_port = realdev_ofp_port;
6188         vsp->vlandev_ofp_port = port->up.ofp_port;
6189         vsp->vid = vid;
6190
6191         port->realdev_ofp_port = realdev_ofp_port;
6192
6193         hmap_insert(&ofproto->vlandev_map, &vsp->vlandev_node,
6194                     hash_ofp_port(port->up.ofp_port));
6195         hmap_insert(&ofproto->realdev_vid_map, &vsp->realdev_vid_node,
6196                     hash_realdev_vid(realdev_ofp_port, vid));
6197     } else {
6198         VLOG_ERR("duplicate vlan device record");
6199     }
6200     ovs_mutex_unlock(&ofproto->vsp_mutex);
6201 }
6202
6203 static odp_port_t
6204 ofp_port_to_odp_port(const struct ofproto_dpif *ofproto, ofp_port_t ofp_port)
6205 {
6206     const struct ofport_dpif *ofport = get_ofp_port(ofproto, ofp_port);
6207     return ofport ? ofport->odp_port : ODPP_NONE;
6208 }
6209
6210 struct ofport_dpif *
6211 odp_port_to_ofport(const struct dpif_backer *backer, odp_port_t odp_port)
6212 {
6213     struct ofport_dpif *port;
6214
6215     ovs_rwlock_rdlock(&backer->odp_to_ofport_lock);
6216     HMAP_FOR_EACH_IN_BUCKET (port, odp_port_node, hash_odp_port(odp_port),
6217                              &backer->odp_to_ofport_map) {
6218         if (port->odp_port == odp_port) {
6219             ovs_rwlock_unlock(&backer->odp_to_ofport_lock);
6220             return port;
6221         }
6222     }
6223
6224     ovs_rwlock_unlock(&backer->odp_to_ofport_lock);
6225     return NULL;
6226 }
6227
6228 static ofp_port_t
6229 odp_port_to_ofp_port(const struct ofproto_dpif *ofproto, odp_port_t odp_port)
6230 {
6231     struct ofport_dpif *port;
6232
6233     port = odp_port_to_ofport(ofproto->backer, odp_port);
6234     if (port && &ofproto->up == port->up.ofproto) {
6235         return port->up.ofp_port;
6236     } else {
6237         return OFPP_NONE;
6238     }
6239 }
6240
6241 /* Compute exponentially weighted moving average, adding 'new' as the newest,
6242  * most heavily weighted element.  'base' designates the rate of decay: after
6243  * 'base' further updates, 'new''s weight in the EWMA decays to about 1/e
6244  * (about .37). */
6245 static void
6246 exp_mavg(double *avg, int base, double new)
6247 {
6248     *avg = (*avg * (base - 1) + new) / base;
6249 }
6250
6251 static void
6252 update_moving_averages(struct dpif_backer *backer)
6253 {
6254     const int min_ms = 60 * 1000; /* milliseconds in one minute. */
6255     long long int minutes = (time_msec() - backer->created) / min_ms;
6256
6257     if (minutes > 0) {
6258         backer->lifetime.add_rate = (double) backer->total_subfacet_add_count
6259             / minutes;
6260         backer->lifetime.del_rate = (double) backer->total_subfacet_del_count
6261             / minutes;
6262     } else {
6263         backer->lifetime.add_rate = 0.0;
6264         backer->lifetime.del_rate = 0.0;
6265     }
6266
6267     /* Update hourly averages on the minute boundaries. */
6268     if (time_msec() - backer->last_minute >= min_ms) {
6269         exp_mavg(&backer->hourly.add_rate, 60, backer->subfacet_add_count);
6270         exp_mavg(&backer->hourly.del_rate, 60, backer->subfacet_del_count);
6271
6272         /* Update daily averages on the hour boundaries. */
6273         if ((backer->last_minute - backer->created) / min_ms % 60 == 59) {
6274             exp_mavg(&backer->daily.add_rate, 24, backer->hourly.add_rate);
6275             exp_mavg(&backer->daily.del_rate, 24, backer->hourly.del_rate);
6276         }
6277
6278         backer->total_subfacet_add_count += backer->subfacet_add_count;
6279         backer->total_subfacet_del_count += backer->subfacet_del_count;
6280         backer->subfacet_add_count = 0;
6281         backer->subfacet_del_count = 0;
6282         backer->last_minute += min_ms;
6283     }
6284 }
6285
6286 const struct ofproto_class ofproto_dpif_class = {
6287     init,
6288     enumerate_types,
6289     enumerate_names,
6290     del,
6291     port_open_type,
6292     type_run,
6293     type_run_fast,
6294     type_wait,
6295     alloc,
6296     construct,
6297     destruct,
6298     dealloc,
6299     run,
6300     run_fast,
6301     wait,
6302     get_memory_usage,
6303     flush,
6304     get_features,
6305     get_tables,
6306     port_alloc,
6307     port_construct,
6308     port_destruct,
6309     port_dealloc,
6310     port_modified,
6311     port_reconfigured,
6312     port_query_by_name,
6313     port_add,
6314     port_del,
6315     port_get_stats,
6316     port_dump_start,
6317     port_dump_next,
6318     port_dump_done,
6319     port_poll,
6320     port_poll_wait,
6321     port_is_lacp_current,
6322     NULL,                       /* rule_choose_table */
6323     rule_alloc,
6324     rule_construct,
6325     rule_destruct,
6326     rule_dealloc,
6327     rule_get_stats,
6328     rule_execute,
6329     rule_modify_actions,
6330     set_frag_handling,
6331     packet_out,
6332     set_netflow,
6333     get_netflow_ids,
6334     set_sflow,
6335     set_ipfix,
6336     set_cfm,
6337     get_cfm_status,
6338     set_bfd,
6339     get_bfd_status,
6340     set_stp,
6341     get_stp_status,
6342     set_stp_port,
6343     get_stp_port_status,
6344     set_queues,
6345     bundle_set,
6346     bundle_remove,
6347     mirror_set__,
6348     mirror_get_stats__,
6349     set_flood_vlans,
6350     is_mirror_output_bundle,
6351     forward_bpdu_changed,
6352     set_mac_table_config,
6353     set_realdev,
6354     NULL,                       /* meter_get_features */
6355     NULL,                       /* meter_set */
6356     NULL,                       /* meter_get */
6357     NULL,                       /* meter_del */
6358 };