X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Fmac-learning.c;h=7dcce4154fe06306b71580260d782d17c6b0c67c;hb=HEAD;hp=f03668082c8508d2bdf321b4485546a95e80ecd7;hpb=064af42167bf4fc9aaea2702d80ce08074b889c0;p=sliver-openvswitch.git diff --git a/lib/mac-learning.c b/lib/mac-learning.c index f03668082..7dcce4154 100644 --- a/lib/mac-learning.c +++ b/lib/mac-learning.c @@ -1,68 +1,51 @@ /* - * Copyright (c) 2008, 2009 Nicira Networks. + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ #include #include "mac-learning.h" -#include #include #include +#include "bitmap.h" #include "coverage.h" #include "hash.h" #include "list.h" #include "poll-loop.h" -#include "tag.h" #include "timeval.h" +#include "unaligned.h" #include "util.h" +#include "vlan-bitmap.h" + +COVERAGE_DEFINE(mac_learning_learned); +COVERAGE_DEFINE(mac_learning_expired); -#define THIS_MODULE VLM_mac_learning -#include "vlog.h" - -#define MAC_HASH_BITS 10 -#define MAC_HASH_MASK (MAC_HASH_SIZE - 1) -#define MAC_HASH_SIZE (1u << MAC_HASH_BITS) - -#define MAC_MAX 1024 - -/* A MAC learning table entry. */ -struct mac_entry { - struct list hash_node; /* Element in a mac_learning 'table' list. */ - struct list lru_node; /* Element in 'lrus' or 'free' list. */ - time_t expires; /* Expiration time. */ - uint8_t mac[ETH_ADDR_LEN]; /* Known MAC address. */ - uint16_t vlan; /* VLAN tag. */ - int port; /* Port on which MAC was most recently seen. */ - tag_type tag; /* Tag for this learning entry. */ -}; - -/* MAC learning table. */ -struct mac_learning { - struct list free; /* Not-in-use entries. */ - struct list lrus; /* In-use entries, least recently used at the - front, most recently used at the back. */ - struct list table[MAC_HASH_SIZE]; /* Hash table. */ - struct mac_entry entries[MAC_MAX]; /* All entries. */ - uint32_t secret; /* Secret for */ -}; +/* Returns the number of seconds since 'e' (within 'ml') was last learned. */ +int +mac_entry_age(const struct mac_learning *ml, const struct mac_entry *e) +{ + time_t remaining = e->expires - time_now(); + return ml->idle_time - remaining; +} static uint32_t -mac_table_hash(const uint8_t mac[ETH_ADDR_LEN], uint16_t vlan) +mac_table_hash(const struct mac_learning *ml, const uint8_t mac[ETH_ADDR_LEN], + uint16_t vlan) { - return hash_bytes(mac, ETH_ADDR_LEN, vlan); + return hash_mac(mac, vlan, ml->secret); } static struct mac_entry * @@ -71,34 +54,15 @@ mac_entry_from_lru_node(struct list *list) return CONTAINER_OF(list, struct mac_entry, lru_node); } -/* Returns a tag that represents that 'mac' is on an unknown port in 'vlan'. - * (When we learn where 'mac' is in 'vlan', this allows flows that were - * flooded to be revalidated.) */ -static tag_type -make_unknown_mac_tag(const struct mac_learning *ml, - const uint8_t mac[ETH_ADDR_LEN], uint16_t vlan) -{ - uint32_t h = hash_int(ml->secret, mac_table_hash(mac, vlan)); - return tag_create_deterministic(h); -} - -static struct list * -mac_table_bucket(const struct mac_learning *ml, - const uint8_t mac[ETH_ADDR_LEN], - uint16_t vlan) -{ - uint32_t hash = mac_table_hash(mac, vlan); - const struct list *list = &ml->table[hash & MAC_HASH_BITS]; - return (struct list *) list; -} - static struct mac_entry * -search_bucket(struct list *bucket, const uint8_t mac[ETH_ADDR_LEN], - uint16_t vlan) +mac_entry_lookup(const struct mac_learning *ml, + const uint8_t mac[ETH_ADDR_LEN], uint16_t vlan) { struct mac_entry *e; - LIST_FOR_EACH (e, struct mac_entry, hash_node, bucket) { - if (eth_addr_equals(e->mac, mac) && e->vlan == vlan) { + + HMAP_FOR_EACH_WITH_HASH (e, hmap_node, mac_table_hash(ml, mac, vlan), + &ml->table) { + if (e->vlan == vlan && eth_addr_equals(e->mac, mac)) { return e; } } @@ -110,6 +74,7 @@ search_bucket(struct list *bucket, const uint8_t mac[ETH_ADDR_LEN], * and return false. */ static bool get_lru(struct mac_learning *ml, struct mac_entry **e) + OVS_REQ_RDLOCK(ml->rwlock) { if (!list_is_empty(&ml->lrus)) { *e = mac_entry_from_lru_node(ml->lrus.next); @@ -120,166 +85,257 @@ get_lru(struct mac_learning *ml, struct mac_entry **e) } } -/* Removes 'e' from the 'ml' hash table. 'e' must not already be on the free - * list. */ -static void -free_mac_entry(struct mac_learning *ml, struct mac_entry *e) +static unsigned int +normalize_idle_time(unsigned int idle_time) { - list_remove(&e->hash_node); - list_remove(&e->lru_node); - list_push_front(&ml->free, &e->lru_node); + return (idle_time < 15 ? 15 + : idle_time > 3600 ? 3600 + : idle_time); } -/* Creates and returns a new MAC learning table. */ +/* Creates and returns a new MAC learning table with an initial MAC aging + * timeout of 'idle_time' seconds and an initial maximum of MAC_DEFAULT_MAX + * entries. */ struct mac_learning * -mac_learning_create(void) +mac_learning_create(unsigned int idle_time) { struct mac_learning *ml; - int i; ml = xmalloc(sizeof *ml); list_init(&ml->lrus); - list_init(&ml->free); - for (i = 0; i < MAC_HASH_SIZE; i++) { - list_init(&ml->table[i]); - } - for (i = 0; i < MAC_MAX; i++) { - struct mac_entry *s = &ml->entries[i]; - list_push_front(&ml->free, &s->lru_node); - } + hmap_init(&ml->table); ml->secret = random_uint32(); + ml->flood_vlans = NULL; + ml->idle_time = normalize_idle_time(idle_time); + ml->max_entries = MAC_DEFAULT_MAX; + ml->need_revalidate = false; + ovs_refcount_init(&ml->ref_cnt); + ovs_rwlock_init(&ml->rwlock); + return ml; +} + +struct mac_learning * +mac_learning_ref(const struct mac_learning *ml_) +{ + struct mac_learning *ml = CONST_CAST(struct mac_learning *, ml_); + if (ml) { + ovs_refcount_ref(&ml->ref_cnt); + } return ml; } -/* Destroys MAC learning table 'ml'. */ +/* Unreferences (and possibly destroys) MAC learning table 'ml'. */ void -mac_learning_destroy(struct mac_learning *ml) +mac_learning_unref(struct mac_learning *ml) { - free(ml); + if (ml && ovs_refcount_unref(&ml->ref_cnt) == 1) { + struct mac_entry *e, *next; + + HMAP_FOR_EACH_SAFE (e, next, hmap_node, &ml->table) { + hmap_remove(&ml->table, &e->hmap_node); + free(e); + } + hmap_destroy(&ml->table); + + bitmap_free(ml->flood_vlans); + ovs_rwlock_destroy(&ml->rwlock); + free(ml); + } } -/* Attempts to make 'ml' learn from the fact that a frame from 'src_mac' was - * just observed arriving from 'src_port' on the given 'vlan'. - * - * Returns nonzero if we actually learned something from this, zero if it just - * confirms what we already knew. The nonzero return value is the tag of flows - * that now need revalidation. - * - * The 'vlan' parameter is used to maintain separate per-VLAN learning tables. - * Specify 0 if this behavior is undesirable. */ -tag_type -mac_learning_learn(struct mac_learning *ml, - const uint8_t src_mac[ETH_ADDR_LEN], uint16_t vlan, - uint16_t src_port) +/* Provides a bitmap of VLANs which have learning disabled, that is, VLANs on + * which all packets are flooded. Returns true if the set has changed from the + * previous value. */ +bool +mac_learning_set_flood_vlans(struct mac_learning *ml, + const unsigned long *bitmap) { - struct mac_entry *e; - struct list *bucket; + if (vlan_bitmap_equal(ml->flood_vlans, bitmap)) { + return false; + } else { + bitmap_free(ml->flood_vlans); + ml->flood_vlans = vlan_bitmap_clone(bitmap); + return true; + } +} + +/* Changes the MAC aging timeout of 'ml' to 'idle_time' seconds. */ +void +mac_learning_set_idle_time(struct mac_learning *ml, unsigned int idle_time) +{ + idle_time = normalize_idle_time(idle_time); + if (idle_time != ml->idle_time) { + struct mac_entry *e; + int delta; - if (eth_addr_is_multicast(src_mac)) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 30); - VLOG_DBG_RL(&rl, "multicast packet source "ETH_ADDR_FMT, - ETH_ADDR_ARGS(src_mac)); - return 0; + delta = (int) idle_time - (int) ml->idle_time; + LIST_FOR_EACH (e, lru_node, &ml->lrus) { + e->expires += delta; + } + ml->idle_time = idle_time; } +} + +/* Sets the maximum number of entries in 'ml' to 'max_entries', adjusting it + * to be within a reasonable range. */ +void +mac_learning_set_max_entries(struct mac_learning *ml, size_t max_entries) +{ + ml->max_entries = (max_entries < 10 ? 10 + : max_entries > 1000 * 1000 ? 1000 * 1000 + : max_entries); +} + +static bool +is_learning_vlan(const struct mac_learning *ml, uint16_t vlan) +{ + return !ml->flood_vlans || !bitmap_is_set(ml->flood_vlans, vlan); +} + +/* Returns true if 'src_mac' may be learned on 'vlan' for 'ml'. + * Returns false if 'ml' is NULL, if src_mac is not valid for learning, or if + * 'vlan' is configured on 'ml' to flood all packets. */ +bool +mac_learning_may_learn(const struct mac_learning *ml, + const uint8_t src_mac[ETH_ADDR_LEN], uint16_t vlan) +{ + return ml && is_learning_vlan(ml, vlan) && !eth_addr_is_multicast(src_mac); +} + +/* Searches 'ml' for and returns a MAC learning entry for 'src_mac' in 'vlan', + * inserting a new entry if necessary. The caller must have already verified, + * by calling mac_learning_may_learn(), that 'src_mac' and 'vlan' are + * learnable. + * + * If the returned MAC entry is new (as may be determined by calling + * mac_entry_is_new()), then the caller must pass the new entry to + * mac_learning_changed(). The caller must also initialize the new entry's + * 'port' member. Otherwise calling those functions is at the caller's + * discretion. */ +struct mac_entry * +mac_learning_insert(struct mac_learning *ml, + const uint8_t src_mac[ETH_ADDR_LEN], uint16_t vlan) +{ + struct mac_entry *e; - bucket = mac_table_bucket(ml, src_mac, vlan); - e = search_bucket(bucket, src_mac, vlan); + e = mac_entry_lookup(ml, src_mac, vlan); if (!e) { - if (!list_is_empty(&ml->free)) { - e = mac_entry_from_lru_node(ml->free.next); - } else { - e = mac_entry_from_lru_node(ml->lrus.next); - list_remove(&e->hash_node); + uint32_t hash = mac_table_hash(ml, src_mac, vlan); + + if (hmap_count(&ml->table) >= ml->max_entries) { + get_lru(ml, &e); + mac_learning_expire(ml, e); } + + e = xmalloc(sizeof *e); + hmap_insert(&ml->table, &e->hmap_node, hash); memcpy(e->mac, src_mac, ETH_ADDR_LEN); - list_push_front(bucket, &e->hash_node); - e->port = -1; e->vlan = vlan; - e->tag = make_unknown_mac_tag(ml, src_mac, vlan); + e->grat_arp_lock = TIME_MIN; + e->port.p = NULL; + } else { + list_remove(&e->lru_node); } - /* Make the entry most-recently-used. */ - list_remove(&e->lru_node); + /* Mark 'e' as recently used. */ list_push_back(&ml->lrus, &e->lru_node); - e->expires = time_now() + 60; - - /* Did we learn something? */ - if (e->port != src_port) { - tag_type old_tag = e->tag; - e->port = src_port; - e->tag = tag_create_random(); - COVERAGE_INC(mac_learning_learned); - return old_tag; - } - return 0; + e->expires = time_now() + ml->idle_time; + + return e; } -/* Looks up MAC 'dst' for VLAN 'vlan' in 'ml'. Returns the port on which a - * frame destined for 'dst' should be sent, -1 if unknown. */ -int -mac_learning_lookup(const struct mac_learning *ml, - const uint8_t dst[ETH_ADDR_LEN], uint16_t vlan) +/* Changes 'e''s tag to a new, randomly selected one. Causes + * mac_learning_run() to flag for revalidation the tag that would have been + * previously used for this entry's MAC and VLAN (either before 'e' was + * inserted, if it is new, or otherwise before its port was updated.) + * + * The client should call this function after obtaining a MAC learning entry + * from mac_learning_insert(), if the entry is either new or if its learned + * port has changed. */ +void +mac_learning_changed(struct mac_learning *ml) { - tag_type tag = 0; - return mac_learning_lookup_tag(ml, dst, vlan, &tag); + COVERAGE_INC(mac_learning_learned); + ml->need_revalidate = true; } -/* Looks up MAC 'dst' for VLAN 'vlan' in 'ml'. Returns the port on which a - * frame destined for 'dst' should be sent, -1 if unknown. - * - * Adds to '*tag' (which the caller must have initialized) the tag that should - * be attached to any flow created based on the return value, if any, to allow - * those flows to be revalidated when the MAC learning entry changes. */ -int -mac_learning_lookup_tag(const struct mac_learning *ml, - const uint8_t dst[ETH_ADDR_LEN], uint16_t vlan, - tag_type *tag) +/* Looks up MAC 'dst' for VLAN 'vlan' in 'ml' and returns the associated MAC + * learning entry, if any. If 'tag' is nonnull, then the tag that associates + * 'dst' and 'vlan' with its currently learned port will be OR'd into + * '*tag'. */ +struct mac_entry * +mac_learning_lookup(const struct mac_learning *ml, + const uint8_t dst[ETH_ADDR_LEN], uint16_t vlan) { if (eth_addr_is_multicast(dst)) { - return -1; + /* No tag because the treatment of multicast destinations never + * changes. */ + return NULL; + } else if (!is_learning_vlan(ml, vlan)) { + /* We don't tag this property. The set of learning VLANs changes so + * rarely that we revalidate every flow when it changes. */ + return NULL; } else { - struct mac_entry *e = search_bucket(mac_table_bucket(ml, dst, vlan), - dst, vlan); - if (e) { - *tag |= e->tag; - return e->port; - } else { - *tag |= make_unknown_mac_tag(ml, dst, vlan); - return -1; - } + struct mac_entry *e = mac_entry_lookup(ml, dst, vlan); + + ovs_assert(e == NULL || e->port.p != NULL); + return e; } } -/* Expires all the mac-learning entries in 'ml'. The tags in 'ml' are - * discarded, so the client is responsible for revalidating any flows that - * depend on 'ml', if necessary. */ +/* Expires 'e' from the 'ml' hash table. */ +void +mac_learning_expire(struct mac_learning *ml, struct mac_entry *e) +{ + hmap_remove(&ml->table, &e->hmap_node); + list_remove(&e->lru_node); + free(e); +} + +/* Expires all the mac-learning entries in 'ml'. If not NULL, the tags in 'ml' + * are added to 'tags'. Otherwise the tags in 'ml' are discarded. The client + * is responsible for revalidating any flows that depend on 'ml', if + * necessary. */ void mac_learning_flush(struct mac_learning *ml) { struct mac_entry *e; while (get_lru(ml, &e)){ - free_mac_entry(ml, e); + ml->need_revalidate = true; + mac_learning_expire(ml, e); } + hmap_shrink(&ml->table); } -void -mac_learning_run(struct mac_learning *ml, struct tag_set *set) +/* Does periodic work required by 'ml'. Returns true if something changed that + * may require flow revalidation. */ +bool +mac_learning_run(struct mac_learning *ml) { + bool need_revalidate; struct mac_entry *e; - while (get_lru(ml, &e) && time_now() >= e->expires) { + + while (get_lru(ml, &e) + && (hmap_count(&ml->table) > ml->max_entries + || time_now() >= e->expires)) { COVERAGE_INC(mac_learning_expired); - if (set) { - tag_set_add(set, e->tag); - } - free_mac_entry(ml, e); + ml->need_revalidate = true; + mac_learning_expire(ml, e); } + + need_revalidate = ml->need_revalidate; + ml->need_revalidate = false; + return need_revalidate; } void mac_learning_wait(struct mac_learning *ml) { - if (!list_is_empty(&ml->lrus)) { + if (hmap_count(&ml->table) > ml->max_entries + || ml->need_revalidate) { + poll_immediate_wake(); + } else if (!list_is_empty(&ml->lrus)) { struct mac_entry *e = mac_entry_from_lru_node(ml->lrus.next); - poll_timer_wait((e->expires - time_now()) * 1000); + poll_timer_wait_until(e->expires * 1000LL); } }