/*
- * Copyright (c) 2008, 2009 Nicira Networks.
+ * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc.
*
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
*
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*/
#include <config.h>
#include "mac-learning.h"
-#include <assert.h>
#include <inttypes.h>
#include <stdlib.h>
+#include "bitmap.h"
#include "coverage.h"
#include "hash.h"
#include "list.h"
#include "poll-loop.h"
#include "tag.h"
#include "timeval.h"
+#include "unaligned.h"
#include "util.h"
-
-#define THIS_MODULE VLM_mac_learning
+#include "vlan-bitmap.h"
#include "vlog.h"
-#define MAC_HASH_BITS 10
-#define MAC_HASH_MASK (MAC_HASH_SIZE - 1)
-#define MAC_HASH_SIZE (1u << MAC_HASH_BITS)
-
-#define MAC_MAX 1024
-
-/* A MAC learning table entry. */
-struct mac_entry {
- struct list hash_node; /* Element in a mac_learning 'table' list. */
- struct list lru_node; /* Element in 'lrus' or 'free' list. */
- time_t expires; /* Expiration time. */
- uint8_t mac[ETH_ADDR_LEN]; /* Known MAC address. */
- uint16_t vlan; /* VLAN tag. */
- int port; /* Port on which MAC was most recently seen. */
- tag_type tag; /* Tag for this learning entry. */
-};
-
-/* MAC learning table. */
-struct mac_learning {
- struct list free; /* Not-in-use entries. */
- struct list lrus; /* In-use entries, least recently used at the
- front, most recently used at the back. */
- struct list table[MAC_HASH_SIZE]; /* Hash table. */
- struct mac_entry entries[MAC_MAX]; /* All entries. */
- uint32_t secret; /* Secret for */
-};
+VLOG_DEFINE_THIS_MODULE(mac_learning);
+
+COVERAGE_DEFINE(mac_learning_learned);
+COVERAGE_DEFINE(mac_learning_expired);
+
+/* Returns the number of seconds since 'e' (within 'ml') was last learned. */
+int
+mac_entry_age(const struct mac_learning *ml, const struct mac_entry *e)
+{
+ time_t remaining = e->expires - time_now();
+ return ml->idle_time - remaining;
+}
static uint32_t
-mac_table_hash(const uint8_t mac[ETH_ADDR_LEN], uint16_t vlan)
+mac_table_hash(const struct mac_learning *ml, const uint8_t mac[ETH_ADDR_LEN],
+ uint16_t vlan)
{
- return hash_bytes(mac, ETH_ADDR_LEN, vlan);
+ unsigned int mac1 = get_unaligned_u32((uint32_t *) mac);
+ unsigned int mac2 = get_unaligned_u16((uint16_t *) (mac + 4));
+ return hash_3words(mac1, mac2 | (vlan << 16), ml->secret);
}
static struct mac_entry *
make_unknown_mac_tag(const struct mac_learning *ml,
const uint8_t mac[ETH_ADDR_LEN], uint16_t vlan)
{
- uint32_t h = hash_int(ml->secret, mac_table_hash(mac, vlan));
- return tag_create_deterministic(h);
-}
-
-static struct list *
-mac_table_bucket(const struct mac_learning *ml,
- const uint8_t mac[ETH_ADDR_LEN],
- uint16_t vlan)
-{
- uint32_t hash = mac_table_hash(mac, vlan);
- const struct list *list = &ml->table[hash & MAC_HASH_BITS];
- return (struct list *) list;
+ return tag_create_deterministic(mac_table_hash(ml, mac, vlan));
}
static struct mac_entry *
-search_bucket(struct list *bucket, const uint8_t mac[ETH_ADDR_LEN],
- uint16_t vlan)
+mac_entry_lookup(const struct mac_learning *ml,
+ const uint8_t mac[ETH_ADDR_LEN], uint16_t vlan)
{
struct mac_entry *e;
- LIST_FOR_EACH (e, struct mac_entry, hash_node, bucket) {
- if (eth_addr_equals(e->mac, mac) && e->vlan == vlan) {
+
+ HMAP_FOR_EACH_WITH_HASH (e, hmap_node, mac_table_hash(ml, mac, vlan),
+ &ml->table) {
+ if (e->vlan == vlan && eth_addr_equals(e->mac, mac)) {
return e;
}
}
}
}
-/* Removes 'e' from the 'ml' hash table. 'e' must not already be on the free
- * list. */
-static void
-free_mac_entry(struct mac_learning *ml, struct mac_entry *e)
+static unsigned int
+normalize_idle_time(unsigned int idle_time)
{
- list_remove(&e->hash_node);
- list_remove(&e->lru_node);
- list_push_front(&ml->free, &e->lru_node);
+ return (idle_time < 15 ? 15
+ : idle_time > 3600 ? 3600
+ : idle_time);
}
-/* Creates and returns a new MAC learning table. */
+/* Creates and returns a new MAC learning table with an initial MAC aging
+ * timeout of 'idle_time' seconds and an initial maximum of MAC_DEFAULT_MAX
+ * entries. */
struct mac_learning *
-mac_learning_create(void)
+mac_learning_create(unsigned int idle_time)
{
struct mac_learning *ml;
- int i;
ml = xmalloc(sizeof *ml);
list_init(&ml->lrus);
- list_init(&ml->free);
- for (i = 0; i < MAC_HASH_SIZE; i++) {
- list_init(&ml->table[i]);
- }
- for (i = 0; i < MAC_MAX; i++) {
- struct mac_entry *s = &ml->entries[i];
- list_push_front(&ml->free, &s->lru_node);
- }
+ hmap_init(&ml->table);
ml->secret = random_uint32();
+ ml->flood_vlans = NULL;
+ ml->idle_time = normalize_idle_time(idle_time);
+ ml->max_entries = MAC_DEFAULT_MAX;
+ tag_set_init(&ml->tags);
+ ml->ref_cnt = 1;
+ return ml;
+}
+
+struct mac_learning *
+mac_learning_ref(const struct mac_learning *ml_)
+{
+ struct mac_learning *ml = CONST_CAST(struct mac_learning *, ml_);
+ if (ml) {
+ ovs_assert(ml->ref_cnt > 0);
+ ml->ref_cnt++;
+ }
return ml;
}
-/* Destroys MAC learning table 'ml'. */
+/* Unreferences (and possibly destroys) MAC learning table 'ml'. */
void
-mac_learning_destroy(struct mac_learning *ml)
+mac_learning_unref(struct mac_learning *ml)
{
- free(ml);
+ if (!ml) {
+ return;
+ }
+
+ ovs_assert(ml->ref_cnt > 0);
+ if (!--ml->ref_cnt) {
+ struct mac_entry *e, *next;
+
+ HMAP_FOR_EACH_SAFE (e, next, hmap_node, &ml->table) {
+ hmap_remove(&ml->table, &e->hmap_node);
+ free(e);
+ }
+ hmap_destroy(&ml->table);
+
+ bitmap_free(ml->flood_vlans);
+ free(ml);
+ }
}
-/* Attempts to make 'ml' learn from the fact that a frame from 'src_mac' was
- * just observed arriving from 'src_port' on the given 'vlan'.
- *
- * Returns nonzero if we actually learned something from this, zero if it just
- * confirms what we already knew. The nonzero return value is the tag of flows
- * that now need revalidation.
- *
- * The 'vlan' parameter is used to maintain separate per-VLAN learning tables.
- * Specify 0 if this behavior is undesirable. */
-tag_type
-mac_learning_learn(struct mac_learning *ml,
- const uint8_t src_mac[ETH_ADDR_LEN], uint16_t vlan,
- uint16_t src_port)
+/* Provides a bitmap of VLANs which have learning disabled, that is, VLANs on
+ * which all packets are flooded. Returns true if the set has changed from the
+ * previous value. */
+bool
+mac_learning_set_flood_vlans(struct mac_learning *ml,
+ const unsigned long *bitmap)
{
- struct mac_entry *e;
- struct list *bucket;
+ if (vlan_bitmap_equal(ml->flood_vlans, bitmap)) {
+ return false;
+ } else {
+ bitmap_free(ml->flood_vlans);
+ ml->flood_vlans = vlan_bitmap_clone(bitmap);
+ return true;
+ }
+}
+
+/* Changes the MAC aging timeout of 'ml' to 'idle_time' seconds. */
+void
+mac_learning_set_idle_time(struct mac_learning *ml, unsigned int idle_time)
+{
+ idle_time = normalize_idle_time(idle_time);
+ if (idle_time != ml->idle_time) {
+ struct mac_entry *e;
+ int delta;
- if (eth_addr_is_multicast(src_mac)) {
- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 30);
- VLOG_DBG_RL(&rl, "multicast packet source "ETH_ADDR_FMT,
- ETH_ADDR_ARGS(src_mac));
- return 0;
+ delta = (int) idle_time - (int) ml->idle_time;
+ LIST_FOR_EACH (e, lru_node, &ml->lrus) {
+ e->expires += delta;
+ }
+ ml->idle_time = idle_time;
}
+}
+
+/* Sets the maximum number of entries in 'ml' to 'max_entries', adjusting it
+ * to be within a reasonable range. */
+void
+mac_learning_set_max_entries(struct mac_learning *ml, size_t max_entries)
+{
+ ml->max_entries = (max_entries < 10 ? 10
+ : max_entries > 1000 * 1000 ? 1000 * 1000
+ : max_entries);
+}
+
+static bool
+is_learning_vlan(const struct mac_learning *ml, uint16_t vlan)
+{
+ return !ml->flood_vlans || !bitmap_is_set(ml->flood_vlans, vlan);
+}
+
+/* Returns true if 'src_mac' may be learned on 'vlan' for 'ml'.
+ * Returns false if 'ml' is NULL, if src_mac is not valid for learning, or if
+ * 'vlan' is configured on 'ml' to flood all packets. */
+bool
+mac_learning_may_learn(const struct mac_learning *ml,
+ const uint8_t src_mac[ETH_ADDR_LEN], uint16_t vlan)
+{
+ return ml && is_learning_vlan(ml, vlan) && !eth_addr_is_multicast(src_mac);
+}
- bucket = mac_table_bucket(ml, src_mac, vlan);
- e = search_bucket(bucket, src_mac, vlan);
+/* Searches 'ml' for and returns a MAC learning entry for 'src_mac' in 'vlan',
+ * inserting a new entry if necessary. The caller must have already verified,
+ * by calling mac_learning_may_learn(), that 'src_mac' and 'vlan' are
+ * learnable.
+ *
+ * If the returned MAC entry is new (as may be determined by calling
+ * mac_entry_is_new()), then the caller must pass the new entry to
+ * mac_learning_changed(). The caller must also initialize the new entry's
+ * 'port' member. Otherwise calling those functions is at the caller's
+ * discretion. */
+struct mac_entry *
+mac_learning_insert(struct mac_learning *ml,
+ const uint8_t src_mac[ETH_ADDR_LEN], uint16_t vlan)
+{
+ struct mac_entry *e;
+
+ e = mac_entry_lookup(ml, src_mac, vlan);
if (!e) {
- if (!list_is_empty(&ml->free)) {
- e = mac_entry_from_lru_node(ml->free.next);
- } else {
- e = mac_entry_from_lru_node(ml->lrus.next);
- list_remove(&e->hash_node);
+ uint32_t hash = mac_table_hash(ml, src_mac, vlan);
+
+ if (hmap_count(&ml->table) >= ml->max_entries) {
+ get_lru(ml, &e);
+ mac_learning_expire(ml, e);
}
+
+ e = xmalloc(sizeof *e);
+ hmap_insert(&ml->table, &e->hmap_node, hash);
memcpy(e->mac, src_mac, ETH_ADDR_LEN);
- list_push_front(bucket, &e->hash_node);
- e->port = -1;
e->vlan = vlan;
- e->tag = make_unknown_mac_tag(ml, src_mac, vlan);
+ e->tag = 0;
+ e->grat_arp_lock = TIME_MIN;
+ } else {
+ list_remove(&e->lru_node);
}
- /* Make the entry most-recently-used. */
- list_remove(&e->lru_node);
+ /* Mark 'e' as recently used. */
list_push_back(&ml->lrus, &e->lru_node);
- e->expires = time_now() + 60;
-
- /* Did we learn something? */
- if (e->port != src_port) {
- tag_type old_tag = e->tag;
- e->port = src_port;
- e->tag = tag_create_random();
- COVERAGE_INC(mac_learning_learned);
- return old_tag;
- }
- return 0;
+ e->expires = time_now() + ml->idle_time;
+
+ return e;
}
-/* Looks up MAC 'dst' for VLAN 'vlan' in 'ml'. Returns the port on which a
- * frame destined for 'dst' should be sent, -1 if unknown. */
-int
-mac_learning_lookup(const struct mac_learning *ml,
- const uint8_t dst[ETH_ADDR_LEN], uint16_t vlan)
+/* Changes 'e''s tag to a new, randomly selected one. Causes
+ * mac_learning_run() to flag for revalidation the tag that would have been
+ * previously used for this entry's MAC and VLAN (either before 'e' was
+ * inserted, if it is new, or otherwise before its port was updated.)
+ *
+ * The client should call this function after obtaining a MAC learning entry
+ * from mac_learning_insert(), if the entry is either new or if its learned
+ * port has changed. */
+void
+mac_learning_changed(struct mac_learning *ml, struct mac_entry *e)
{
- tag_type tag = 0;
- return mac_learning_lookup_tag(ml, dst, vlan, &tag);
+ tag_type tag = e->tag ? e->tag : make_unknown_mac_tag(ml, e->mac, e->vlan);
+
+ COVERAGE_INC(mac_learning_learned);
+
+ e->tag = tag_create_random();
+ tag_set_add(&ml->tags, tag);
}
-/* Looks up MAC 'dst' for VLAN 'vlan' in 'ml'. Returns the port on which a
- * frame destined for 'dst' should be sent, -1 if unknown.
- *
- * Adds to '*tag' (which the caller must have initialized) the tag that should
- * be attached to any flow created based on the return value, if any, to allow
- * those flows to be revalidated when the MAC learning entry changes. */
-int
-mac_learning_lookup_tag(const struct mac_learning *ml,
- const uint8_t dst[ETH_ADDR_LEN], uint16_t vlan,
- tag_type *tag)
+/* Looks up MAC 'dst' for VLAN 'vlan' in 'ml' and returns the associated MAC
+ * learning entry, if any. If 'tag' is nonnull, then the tag that associates
+ * 'dst' and 'vlan' with its currently learned port will be OR'd into
+ * '*tag'. */
+struct mac_entry *
+mac_learning_lookup(const struct mac_learning *ml,
+ const uint8_t dst[ETH_ADDR_LEN], uint16_t vlan,
+ tag_type *tag)
{
if (eth_addr_is_multicast(dst)) {
- return -1;
+ /* No tag because the treatment of multicast destinations never
+ * changes. */
+ return NULL;
+ } else if (!is_learning_vlan(ml, vlan)) {
+ /* We don't tag this property. The set of learning VLANs changes so
+ * rarely that we revalidate every flow when it changes. */
+ return NULL;
} else {
- struct mac_entry *e = search_bucket(mac_table_bucket(ml, dst, vlan),
- dst, vlan);
- if (e) {
- *tag |= e->tag;
- return e->port;
- } else {
- *tag |= make_unknown_mac_tag(ml, dst, vlan);
- return -1;
+ struct mac_entry *e = mac_entry_lookup(ml, dst, vlan);
+
+ ovs_assert(e == NULL || e->tag != 0);
+ if (tag) {
+ /* Tag either the learned port or the lack thereof. */
+ *tag |= e ? e->tag : make_unknown_mac_tag(ml, dst, vlan);
}
+ return e;
}
}
-/* Expires all the mac-learning entries in 'ml'. The tags in 'ml' are
- * discarded, so the client is responsible for revalidating any flows that
- * depend on 'ml', if necessary. */
+/* Expires 'e' from the 'ml' hash table. */
void
-mac_learning_flush(struct mac_learning *ml)
+mac_learning_expire(struct mac_learning *ml, struct mac_entry *e)
+{
+ hmap_remove(&ml->table, &e->hmap_node);
+ list_remove(&e->lru_node);
+ free(e);
+}
+
+/* Expires all the mac-learning entries in 'ml'. If not NULL, the tags in 'ml'
+ * are added to 'tags'. Otherwise the tags in 'ml' are discarded. The client
+ * is responsible for revalidating any flows that depend on 'ml', if
+ * necessary. */
+void
+mac_learning_flush(struct mac_learning *ml, struct tag_set *tags)
{
struct mac_entry *e;
while (get_lru(ml, &e)){
- free_mac_entry(ml, e);
+ if (tags) {
+ tag_set_add(tags, e->tag);
+ }
+ mac_learning_expire(ml, e);
}
+ hmap_shrink(&ml->table);
}
void
mac_learning_run(struct mac_learning *ml, struct tag_set *set)
{
struct mac_entry *e;
- while (get_lru(ml, &e) && time_now() >= e->expires) {
+
+ if (set) {
+ tag_set_union(set, &ml->tags);
+ }
+ tag_set_init(&ml->tags);
+
+ while (get_lru(ml, &e)
+ && (hmap_count(&ml->table) > ml->max_entries
+ || time_now() >= e->expires)) {
COVERAGE_INC(mac_learning_expired);
if (set) {
tag_set_add(set, e->tag);
}
- free_mac_entry(ml, e);
+ mac_learning_expire(ml, e);
}
}
void
mac_learning_wait(struct mac_learning *ml)
{
- if (!list_is_empty(&ml->lrus)) {
+ if (hmap_count(&ml->table) > ml->max_entries
+ || !tag_set_is_empty(&ml->tags)) {
+ poll_immediate_wake();
+ } else if (!list_is_empty(&ml->lrus)) {
struct mac_entry *e = mac_entry_from_lru_node(ml->lrus.next);
- poll_timer_wait((e->expires - time_now()) * 1000);
+ poll_timer_wait_until(e->expires * 1000LL);
}
}