#include "dpif.h"
#include "dynamic-string.h"
#include "fail-open.h"
+#include "guarded-list.h"
#include "hmapx.h"
#include "lacp.h"
#include "learn.h"
struct flow_miss;
struct facet;
+struct rule_dpif {
+ struct rule up;
+
+ /* These statistics:
+ *
+ * - Do include packets and bytes from facets that have been deleted or
+ * whose own statistics have been folded into the rule.
+ *
+ * - Do include packets and bytes sent "by hand" that were accounted to
+ * the rule without any facet being involved (this is a rare corner
+ * case in rule_execute()).
+ *
+ * - Do not include packet or bytes that can be obtained from any facet's
+ * packet_count or byte_count member or that can be obtained from the
+ * datapath by, e.g., dpif_flow_get() for any subfacet.
+ */
+ struct ovs_mutex stats_mutex;
+ uint64_t packet_count OVS_GUARDED; /* Number of packets received. */
+ uint64_t byte_count OVS_GUARDED; /* Number of bytes received. */
+};
+
static void rule_get_stats(struct rule *, uint64_t *packets, uint64_t *bytes);
+static struct rule_dpif *rule_dpif_cast(const struct rule *);
struct ofbundle {
struct hmap_node hmap_node; /* In struct ofproto's "bundles" hmap. */
* Flow expiration works in terms of subfacets, so a facet must have at
* least one subfacet or it will never expire, leaking memory. */
struct facet {
- /* Owners. */
- struct hmap_node hmap_node; /* In owning ofproto's 'facets' hmap. */
+ /* Owner. */
struct ofproto_dpif *ofproto;
/* Owned data. */
struct classifier facets; /* Contains 'struct facet's. */
long long int consistency_rl;
- /* Support for debugging async flow mods. */
- struct list completions;
-
struct netdev_stats stats; /* To account packets generated and consumed in
* userspace. */
uint64_t n_missed;
/* Work queues. */
- struct ovs_mutex flow_mod_mutex;
- struct list flow_mods OVS_GUARDED;
- size_t n_flow_mods OVS_GUARDED;
-
- struct ovs_mutex pin_mutex;
- struct list pins OVS_GUARDED;
- size_t n_pins OVS_GUARDED;
+ struct guarded_list flow_mods; /* Contains "struct flow_mod"s. */
+ struct guarded_list pins; /* Contains "struct ofputil_packet_in"s. */
};
-/* Defer flow mod completion until "ovs-appctl ofproto/unclog"? (Useful only
- * for debugging the asynchronous flow_mod implementation.) */
-static bool clogged;
-
/* By default, flows in the datapath are wildcarded (megaflows). They
* may be disabled with the "ovs-appctl dpif/disable-megaflows" command. */
static bool enable_megaflows = true;
ofproto_dpif_flow_mod(struct ofproto_dpif *ofproto,
struct ofputil_flow_mod *fm)
{
- ovs_mutex_lock(&ofproto->flow_mod_mutex);
- if (ofproto->n_flow_mods > 1024) {
- ovs_mutex_unlock(&ofproto->flow_mod_mutex);
+ if (!guarded_list_push_back(&ofproto->flow_mods, &fm->list_node, 1024)) {
COVERAGE_INC(flow_mod_overflow);
free(fm->ofpacts);
free(fm);
- return;
}
-
- list_push_back(&ofproto->flow_mods, &fm->list_node);
- ofproto->n_flow_mods++;
- ovs_mutex_unlock(&ofproto->flow_mod_mutex);
}
/* Appends 'pin' to the queue of "packet ins" to be sent to the controller.
ofproto_dpif_send_packet_in(struct ofproto_dpif *ofproto,
struct ofputil_packet_in *pin)
{
- ovs_mutex_lock(&ofproto->pin_mutex);
- if (ofproto->n_pins > 1024) {
- ovs_mutex_unlock(&ofproto->pin_mutex);
+ if (!guarded_list_push_back(&ofproto->pins, &pin->list_node, 1024)) {
COVERAGE_INC(packet_in_overflow);
free(CONST_CAST(void *, pin->packet));
free(pin);
- return;
}
-
- list_push_back(&ofproto->pins, &pin->list_node);
- ofproto->n_pins++;
- ovs_mutex_unlock(&ofproto->pin_mutex);
}
\f
/* Factory functions. */
HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
int stp_port = ofport->stp_port
? stp_port_no(ofport->stp_port)
- : 0;
+ : -1;
xlate_ofport_set(ofproto, ofport->bundle, ofport,
ofport->up.ofp_port, ofport->odp_port,
ofport->up.netdev, ofport->cfm,
static int
dpif_backer_run_fast(struct dpif_backer *backer)
{
- udpif_run(backer->udpif);
handle_upcalls(backer);
return 0;
classifier_init(&ofproto->facets);
ofproto->consistency_rl = LLONG_MIN;
- list_init(&ofproto->completions);
-
- ovs_mutex_init(&ofproto->flow_mod_mutex);
- ovs_mutex_lock(&ofproto->flow_mod_mutex);
- list_init(&ofproto->flow_mods);
- ofproto->n_flow_mods = 0;
- ovs_mutex_unlock(&ofproto->flow_mod_mutex);
-
- ovs_mutex_init(&ofproto->pin_mutex);
- ovs_mutex_lock(&ofproto->pin_mutex);
- list_init(&ofproto->pins);
- ofproto->n_pins = 0;
- ovs_mutex_unlock(&ofproto->pin_mutex);
+ guarded_list_init(&ofproto->flow_mods);
+ guarded_list_init(&ofproto->pins);
ofproto_dpif_unixctl_init();
if (rule_dpif_lookup_in_table(ofproto, &fm.match.flow, NULL, TBL_INTERNAL,
rulep)) {
- ovs_rwlock_unlock(&(*rulep)->up.evict);
+ rule_dpif_unref(*rulep);
} else {
NOT_REACHED();
}
return error;
}
-static void
-complete_operations(struct ofproto_dpif *ofproto)
-{
- struct dpif_completion *c, *next;
-
- LIST_FOR_EACH_SAFE (c, next, list_node, &ofproto->completions) {
- ofoperation_complete(c->op, 0);
- list_remove(&c->list_node);
- free(c);
- }
-}
-
static void
destruct(struct ofproto *ofproto_)
{
struct rule_dpif *rule, *next_rule;
struct ofputil_packet_in *pin, *next_pin;
struct ofputil_flow_mod *fm, *next_fm;
+ struct facet *facet, *next_facet;
+ struct list flow_mods, pins;
+ struct cls_cursor cursor;
struct oftable *table;
+ ovs_rwlock_rdlock(&ofproto->facets.rwlock);
+ cls_cursor_init(&cursor, &ofproto->facets, NULL);
+ ovs_rwlock_unlock(&ofproto->facets.rwlock);
+ CLS_CURSOR_FOR_EACH_SAFE (facet, next_facet, cr, &cursor) {
+ facet_remove(facet);
+ }
+
ofproto->backer->need_revalidate = REV_RECONFIGURE;
ovs_rwlock_wrlock(&xlate_rwlock);
xlate_remove_ofproto(ofproto);
ovs_rwlock_unlock(&xlate_rwlock);
+ /* Discard any flow_miss_batches queued up for 'ofproto', avoiding a
+ * use-after-free error. */
+ udpif_revalidate(ofproto->backer->udpif);
+
hmap_remove(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node);
- complete_operations(ofproto);
OFPROTO_FOR_EACH_TABLE (table, &ofproto->up) {
struct cls_cursor cursor;
}
ovs_rwlock_unlock(&table->cls.rwlock);
}
- complete_operations(ofproto);
- ovs_mutex_lock(&ofproto->flow_mod_mutex);
- LIST_FOR_EACH_SAFE (fm, next_fm, list_node, &ofproto->flow_mods) {
+ guarded_list_pop_all(&ofproto->flow_mods, &flow_mods);
+ LIST_FOR_EACH_SAFE (fm, next_fm, list_node, &flow_mods) {
list_remove(&fm->list_node);
- ofproto->n_flow_mods--;
free(fm->ofpacts);
free(fm);
}
- ovs_mutex_unlock(&ofproto->flow_mod_mutex);
- ovs_mutex_destroy(&ofproto->flow_mod_mutex);
+ guarded_list_destroy(&ofproto->flow_mods);
- ovs_mutex_lock(&ofproto->pin_mutex);
- LIST_FOR_EACH_SAFE (pin, next_pin, list_node, &ofproto->pins) {
+ guarded_list_pop_all(&ofproto->pins, &pins);
+ LIST_FOR_EACH_SAFE (pin, next_pin, list_node, &pins) {
list_remove(&pin->list_node);
- ofproto->n_pins--;
free(CONST_CAST(void *, pin->packet));
free(pin);
}
- ovs_mutex_unlock(&ofproto->pin_mutex);
- ovs_mutex_destroy(&ofproto->pin_mutex);
+ guarded_list_destroy(&ofproto->pins);
mbridge_unref(ofproto->mbridge);
return 0;
}
- ovs_mutex_lock(&ofproto->flow_mod_mutex);
- if (ofproto->n_flow_mods) {
- flow_mods = ofproto->flow_mods;
- list_moved(&flow_mods);
- list_init(&ofproto->flow_mods);
- ofproto->n_flow_mods = 0;
- } else {
- list_init(&flow_mods);
- }
- ovs_mutex_unlock(&ofproto->flow_mod_mutex);
-
+ guarded_list_pop_all(&ofproto->flow_mods, &flow_mods);
LIST_FOR_EACH_SAFE (fm, next_fm, list_node, &flow_mods) {
int error = ofproto_flow_mod(&ofproto->up, fm);
if (error && !VLOG_DROP_WARN(&rl)) {
free(fm);
}
- ovs_mutex_lock(&ofproto->pin_mutex);
- if (ofproto->n_pins) {
- pins = ofproto->pins;
- list_moved(&pins);
- list_init(&ofproto->pins);
- ofproto->n_pins = 0;
- } else {
- list_init(&pins);
- }
- ovs_mutex_unlock(&ofproto->pin_mutex);
-
+ guarded_list_pop_all(&ofproto->pins, &pins);
LIST_FOR_EACH_SAFE (pin, next_pin, list_node, &pins) {
connmgr_send_packet_in(ofproto->up.connmgr, pin);
list_remove(&pin->list_node);
struct ofbundle *bundle;
int error;
- if (!clogged) {
- complete_operations(ofproto);
- }
-
if (mbridge_need_revalidate(ofproto->mbridge)) {
ofproto->backer->need_revalidate = REV_RECONFIGURE;
ovs_rwlock_wrlock(&ofproto->ml->rwlock);
struct ofport_dpif *ofport;
struct ofbundle *bundle;
- if (!clogged && !list_is_empty(&ofproto->completions)) {
- poll_immediate_wake();
- }
-
if (ofproto_get_flow_restore_wait()) {
return;
}
bundle_send_learning_packets(struct ofbundle *bundle)
{
struct ofproto_dpif *ofproto = bundle->ofproto;
+ struct ofpbuf *learning_packet;
int error, n_packets, n_errors;
struct mac_entry *e;
+ struct list packets;
- error = n_packets = n_errors = 0;
+ list_init(&packets);
ovs_rwlock_rdlock(&ofproto->ml->rwlock);
LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) {
if (e->port.p != bundle) {
- struct ofpbuf *learning_packet;
- struct ofport_dpif *port;
void *port_void;
- int ret;
- /* The assignment to "port" is unnecessary but makes "grep"ing for
- * struct ofport_dpif more effective. */
learning_packet = bond_compose_learning_packet(bundle->bond,
e->mac, e->vlan,
&port_void);
- port = port_void;
- ret = send_packet(port, learning_packet);
- ofpbuf_delete(learning_packet);
- if (ret) {
- error = ret;
- n_errors++;
- }
- n_packets++;
+ learning_packet->private_p = port_void;
+ list_push_back(&packets, &learning_packet->list_node);
}
}
ovs_rwlock_unlock(&ofproto->ml->rwlock);
+ error = n_packets = n_errors = 0;
+ LIST_FOR_EACH (learning_packet, list_node, &packets) {
+ int ret;
+
+ ret = send_packet(learning_packet->private_p, learning_packet);
+ if (ret) {
+ error = ret;
+ n_errors++;
+ }
+ n_packets++;
+ }
+ ofpbuf_list_delete(&packets);
+
if (n_errors) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
VLOG_WARN_RL(&rl, "bond %s: %d errors sending %d gratuitous learning "
sset_find_and_delete(&ofproto->ghost_ports,
netdev_get_name(ofport->up.netdev));
ofproto->backer->need_revalidate = REV_RECONFIGURE;
- if (!ofport->is_tunnel) {
+ if (!ofport->is_tunnel && !netdev_vport_is_patch(ofport->up.netdev)) {
error = dpif_port_del(ofproto->backer->dpif, ofport->odp_port);
if (!error) {
/* The caller is going to close ofport->up.netdev. If this is a
* pass made by update_stats(), because the former function never looks at
* uninstallable subfacets.
*/
- enum { BUCKET_WIDTH = ROUND_UP(100, TIME_UPDATE_INTERVAL) };
+ enum { BUCKET_WIDTH = 100 };
enum { N_BUCKETS = 5000 / BUCKET_WIDTH };
int buckets[N_BUCKETS] = { 0 };
int total, subtotal, bucket;
long long int now;
uint8_t reason;
- if (rule->up.pending) {
- /* We'll have to expire it later. */
- return;
- }
+ ovs_assert(!rule->up.pending);
ovs_mutex_lock(&rule->up.timeout_mutex);
hard_timeout = rule->up.hard_timeout;
bool is_controller;
rule_dpif_lookup(ofproto, &facet->flow, NULL, &rule);
- ofpacts_len = rule->up.ofpacts_len;
- ofpacts = rule->up.ofpacts;
+ ofpacts_len = rule->up.actions->ofpacts_len;
+ ofpacts = rule->up.actions->ofpacts;
is_controller = ofpacts_len > 0
&& ofpacts->type == OFPACT_CONTROLLER
&& ofpact_next(ofpacts) >= ofpact_end(ofpacts, ofpacts_len);
- rule_release(rule);
+ rule_dpif_unref(rule);
+
return is_controller;
}
return false;
rule_dpif_lookup(facet->ofproto, &facet->flow, NULL, &rule);
xlate_in_init(&xin, facet->ofproto, &facet->flow, rule, 0, NULL);
xlate_actions(&xin, &xout);
- rule_release(rule);
+ rule_dpif_unref(rule);
ok = ofpbuf_equal(&facet->xout.odp_actions, &xout.odp_actions)
&& facet->xout.slow == xout.slow;
|| memcmp(&facet->xout.wc, &xout.wc, sizeof xout.wc)) {
facet_remove(facet);
xlate_out_uninit(&xout);
- rule_release(new_rule);
+ rule_dpif_unref(new_rule);
return false;
}
facet->used = MAX(facet->used, new_rule->up.created);
xlate_out_uninit(&xout);
- rule_release(new_rule);
+ rule_dpif_unref(new_rule);
return true;
}
}
rule_dpif_lookup(ofproto, flow, NULL, &rule);
- rule_credit_stats(rule, stats);
+ rule_dpif_credit_stats(rule, stats);
xlate_in_init(&xin, ofproto, flow, rule, stats->tcp_flags, NULL);
xin.resubmit_stats = stats;
xin.may_learn = may_learn;
xlate_actions_for_side_effects(&xin);
- rule_release(rule);
+ rule_dpif_unref(rule);
}
static void
}
void
-rule_credit_stats(struct rule_dpif *rule, const struct dpif_flow_stats *stats)
+rule_dpif_credit_stats(struct rule_dpif *rule,
+ const struct dpif_flow_stats *stats)
{
ovs_mutex_lock(&rule->stats_mutex);
rule->packet_count += stats->n_packets;
rule->byte_count += stats->n_bytes;
- ofproto_rule_update_used(&rule->up, stats->used);
+ rule->up.used = MAX(rule->up.used, stats->used);
ovs_mutex_unlock(&rule->stats_mutex);
}
+
+bool
+rule_dpif_fail_open(const struct rule_dpif *rule)
+{
+ return rule->up.cr.priority == FAIL_OPEN_PRIORITY;
+}
+
+ovs_be64
+rule_dpif_get_flow_cookie(const struct rule_dpif *rule)
+{
+ return rule->up.flow_cookie;
+}
+
+void
+rule_dpif_reduce_timeouts(struct rule_dpif *rule, uint16_t idle_timeout,
+ uint16_t hard_timeout)
+{
+ ofproto_rule_reduce_timeouts(&rule->up, idle_timeout, hard_timeout);
+}
+
+/* Returns 'rule''s actions. The caller owns a reference on the returned
+ * actions and must eventually release it (with rule_actions_unref()) to avoid
+ * a memory leak. */
+struct rule_actions *
+rule_dpif_get_actions(const struct rule_dpif *rule)
+{
+ struct rule_actions *actions;
+
+ ovs_rwlock_rdlock(&rule->up.rwlock);
+ actions = rule->up.actions;
+ rule_actions_ref(actions);
+ ovs_rwlock_unlock(&rule->up.rwlock);
+
+ return actions;
+}
\f
/* Subfacets. */
flow->in_port.ofp_port);
}
- *rule = choose_miss_rule(port ? port->up.pp.config : 0, ofproto->miss_rule,
- ofproto->no_packet_in_rule);
- ovs_rwlock_rdlock(&(*rule)->up.evict);
+ choose_miss_rule(port ? port->up.pp.config : 0, ofproto->miss_rule,
+ ofproto->no_packet_in_rule, rule);
}
bool
rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto,
const struct flow *flow, struct flow_wildcards *wc,
uint8_t table_id, struct rule_dpif **rule)
- OVS_TRY_RDLOCK(true, (*rule)->up.evict)
{
struct cls_rule *cls_rule;
struct classifier *cls;
}
*rule = rule_dpif_cast(rule_from_cls_rule(cls_rule));
- if (*rule && ovs_rwlock_tryrdlock(&(*rule)->up.evict)) {
- /* The rule is in the process of being removed. Best we can do is
- * pretend it isn't there. */
- *rule = NULL;
- }
+ rule_dpif_ref(*rule);
ovs_rwlock_unlock(&cls->rwlock);
return *rule != NULL;
/* Given a port configuration (specified as zero if there's no port), chooses
* which of 'miss_rule' and 'no_packet_in_rule' should be used in case of a
* flow table miss. */
-struct rule_dpif *
+void
choose_miss_rule(enum ofputil_port_config config, struct rule_dpif *miss_rule,
- struct rule_dpif *no_packet_in_rule)
+ struct rule_dpif *no_packet_in_rule, struct rule_dpif **rule)
+{
+ *rule = config & OFPUTIL_PC_NO_PACKET_IN ? no_packet_in_rule : miss_rule;
+ rule_dpif_ref(*rule);
+}
+
+void
+rule_dpif_ref(struct rule_dpif *rule)
{
- return config & OFPUTIL_PC_NO_PACKET_IN ? no_packet_in_rule : miss_rule;
+ if (rule) {
+ ofproto_rule_ref(&rule->up);
+ }
}
void
-rule_release(struct rule_dpif *rule)
- OVS_NO_THREAD_SAFETY_ANALYSIS
+rule_dpif_unref(struct rule_dpif *rule)
{
if (rule) {
- ovs_rwlock_unlock(&rule->up.evict);
+ ofproto_rule_unref(&rule->up);
}
}
struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
ofproto->backer->need_revalidate = REV_FLOW_TABLE;
- if (clogged) {
- struct dpif_completion *c = xmalloc(sizeof *c);
- c->op = rule->up.pending;
- list_push_back(&ofproto->completions, &c->list_node);
- } else {
- ofoperation_complete(rule->up.pending, 0);
- }
+ ofoperation_complete(rule->up.pending, 0);
+}
+
+static struct rule_dpif *rule_dpif_cast(const struct rule *rule)
+{
+ return rule ? CONTAINER_OF(rule, struct rule_dpif, up) : NULL;
}
static struct rule *
struct xlate_in xin;
dpif_flow_stats_extract(flow, packet, time_msec(), &stats);
- rule_credit_stats(rule, &stats);
+ rule_dpif_credit_stats(rule, &stats);
xlate_in_init(&xin, ofproto, flow, rule, stats.tcp_flags, packet);
xin.resubmit_stats = &stats;
ds_put_char_multiple(result, '\t', level);
ds_put_cstr(result, "OpenFlow ");
- ofpacts_format(rule->up.ofpacts, rule->up.ofpacts_len, result);
+ ofpacts_format(rule->up.actions->ofpacts, rule->up.actions->ofpacts_len,
+ result);
ds_put_char(result, '\n');
}
xlate_out_uninit(&trace.xout);
}
- rule_release(rule);
-}
-
-static void
-ofproto_dpif_clog(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED,
- const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
-{
- clogged = true;
- unixctl_command_reply(conn, NULL);
-}
-
-static void
-ofproto_dpif_unclog(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED,
- const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
-{
- clogged = false;
- unixctl_command_reply(conn, NULL);
+ rule_dpif_unref(rule);
}
/* Runs a self-check of flow translations in 'ofproto'. Appends a message to
ofproto_unixctl_fdb_flush, NULL);
unixctl_command_register("fdb/show", "bridge", 1, 1,
ofproto_unixctl_fdb_show, NULL);
- unixctl_command_register("ofproto/clog", "", 0, 0,
- ofproto_dpif_clog, NULL);
- unixctl_command_register("ofproto/unclog", "", 0, 0,
- ofproto_dpif_unclog, NULL);
unixctl_command_register("ofproto/self-check", "[bridge]", 0, 1,
ofproto_dpif_self_check, NULL);
unixctl_command_register("dpif/dump-dps", "", 0, 0,
NULL, /* meter_set */
NULL, /* meter_get */
NULL, /* meter_del */
+ NULL, /* group_alloc */
+ NULL, /* group_construct */
+ NULL, /* group_destruct */
+ NULL, /* group_dealloc */
+ NULL, /* group_modify */
+ NULL, /* group_get_stats */
};