From 135faefe4af624489ac310810a5ffb5223606252 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 15 Jul 2011 09:59:06 -0700 Subject: [PATCH] datapath: Backport flex_arrays. flex_arrays didn't exist at all until 2.6.30, weren't exported to modules until 2.6.38, and performed poorly until 3.0, so this backports the functionality to older kernels. Signed-off-by: Jesse Gross Acked-by: Ben Pfaff --- datapath/linux/.gitignore | 1 + datapath/linux/Modules.mk | 3 + datapath/linux/compat/flex_array.c | 390 ++++++++++++++++++ .../linux/compat/include/linux/flex_array.h | 86 ++++ datapath/linux/compat/include/linux/poison.h | 11 + 5 files changed, 491 insertions(+) create mode 100644 datapath/linux/compat/flex_array.c create mode 100644 datapath/linux/compat/include/linux/flex_array.h create mode 100644 datapath/linux/compat/include/linux/poison.h diff --git a/datapath/linux/.gitignore b/datapath/linux/.gitignore index d4341759a..0aee74651 100644 --- a/datapath/linux/.gitignore +++ b/datapath/linux/.gitignore @@ -14,6 +14,7 @@ /datapath.c /dp_dev.c /dp_notify.c +/flex_array.c /flow.c /genetlink-brcompat.c /genetlink-openvswitch.c diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk index a3fdc7829..cb6801007 100644 --- a/datapath/linux/Modules.mk +++ b/datapath/linux/Modules.mk @@ -1,6 +1,7 @@ openvswitch_sources += \ linux/compat/addrconf_core-openvswitch.c \ linux/compat/dev-openvswitch.c \ + linux/compat/flex_array.c \ linux/compat/genetlink-openvswitch.c \ linux/compat/ip_output-openvswitch.c \ linux/compat/kmemdup.c \ @@ -16,6 +17,7 @@ openvswitch_headers += \ linux/compat/include/linux/cpumask.h \ linux/compat/include/linux/dmi.h \ linux/compat/include/linux/err.h \ + linux/compat/include/linux/flex_array.h \ linux/compat/include/linux/genetlink.h \ linux/compat/include/linux/icmp.h \ linux/compat/include/linux/icmpv6.h \ @@ -37,6 +39,7 @@ openvswitch_headers += \ linux/compat/include/linux/netfilter_bridge.h \ linux/compat/include/linux/netfilter_ipv4.h \ linux/compat/include/linux/netlink.h \ + linux/compat/include/linux/poison.h \ linux/compat/include/linux/rculist.h \ linux/compat/include/linux/rcupdate.h \ linux/compat/include/linux/reciprocal_div.h \ diff --git a/datapath/linux/compat/flex_array.c b/datapath/linux/compat/flex_array.c new file mode 100644 index 000000000..3b96b8e46 --- /dev/null +++ b/datapath/linux/compat/flex_array.c @@ -0,0 +1,390 @@ +#include + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,0,0) + +/* + * Flexible array managed in PAGE_SIZE parts + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2009 + * + * Author: Dave Hansen + */ + +#include +#include +#include +#include +#include + +struct flex_array_part { + char elements[FLEX_ARRAY_PART_SIZE]; +}; + +/* + * If a user requests an allocation which is small + * enough, we may simply use the space in the + * flex_array->parts[] array to store the user + * data. + */ +static inline int elements_fit_in_base(struct flex_array *fa) +{ + int data_size = fa->element_size * fa->total_nr_elements; + if (data_size <= FLEX_ARRAY_BASE_BYTES_LEFT) + return 1; + return 0; +} + +/** + * flex_array_alloc - allocate a new flexible array + * @element_size: the size of individual elements in the array + * @total: total number of elements that this should hold + * @flags: page allocation flags to use for base array + * + * Note: all locking must be provided by the caller. + * + * @total is used to size internal structures. If the user ever + * accesses any array indexes >=@total, it will produce errors. + * + * The maximum number of elements is defined as: the number of + * elements that can be stored in a page times the number of + * page pointers that we can fit in the base structure or (using + * integer math): + * + * (PAGE_SIZE/element_size) * (PAGE_SIZE-8)/sizeof(void *) + * + * Here's a table showing example capacities. Note that the maximum + * index that the get/put() functions is just nr_objects-1. This + * basically means that you get 4MB of storage on 32-bit and 2MB on + * 64-bit. + * + * + * Element size | Objects | Objects | + * PAGE_SIZE=4k | 32-bit | 64-bit | + * ---------------------------------| + * 1 bytes | 4177920 | 2088960 | + * 2 bytes | 2088960 | 1044480 | + * 3 bytes | 1392300 | 696150 | + * 4 bytes | 1044480 | 522240 | + * 32 bytes | 130560 | 65408 | + * 33 bytes | 126480 | 63240 | + * 2048 bytes | 2040 | 1020 | + * 2049 bytes | 1020 | 510 | + * void * | 1044480 | 261120 | + * + * Since 64-bit pointers are twice the size, we lose half the + * capacity in the base structure. Also note that no effort is made + * to efficiently pack objects across page boundaries. + */ +struct flex_array *flex_array_alloc(int element_size, unsigned int total, + gfp_t flags) +{ + struct flex_array *ret; + int elems_per_part = 0; + int reciprocal_elems = 0; + int max_size = 0; + + if (element_size) { + elems_per_part = FLEX_ARRAY_ELEMENTS_PER_PART(element_size); + reciprocal_elems = reciprocal_value(elems_per_part); + max_size = FLEX_ARRAY_NR_BASE_PTRS * elems_per_part; + } + + /* max_size will end up 0 if element_size > PAGE_SIZE */ + if (total > max_size) + return NULL; + ret = kzalloc(sizeof(struct flex_array), flags); + if (!ret) + return NULL; + ret->element_size = element_size; + ret->total_nr_elements = total; + ret->elems_per_part = elems_per_part; + ret->reciprocal_elems = reciprocal_elems; + if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO)) + memset(&ret->parts[0], FLEX_ARRAY_FREE, + FLEX_ARRAY_BASE_BYTES_LEFT); + return ret; +} + +static int fa_element_to_part_nr(struct flex_array *fa, + unsigned int element_nr) +{ + return reciprocal_divide(element_nr, fa->reciprocal_elems); +} + +/** + * flex_array_free_parts - just free the second-level pages + * @fa: the flex array from which to free parts + * + * This is to be used in cases where the base 'struct flex_array' + * has been statically allocated and should not be free. + */ +void flex_array_free_parts(struct flex_array *fa) +{ + int part_nr; + + if (elements_fit_in_base(fa)) + return; + for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) + kfree(fa->parts[part_nr]); +} + +void flex_array_free(struct flex_array *fa) +{ + flex_array_free_parts(fa); + kfree(fa); +} + +static unsigned int index_inside_part(struct flex_array *fa, + unsigned int element_nr, + unsigned int part_nr) +{ + unsigned int part_offset; + + part_offset = element_nr - part_nr * fa->elems_per_part; + return part_offset * fa->element_size; +} + +static struct flex_array_part * +__fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags) +{ + struct flex_array_part *part = fa->parts[part_nr]; + if (!part) { + part = kmalloc(sizeof(struct flex_array_part), flags); + if (!part) + return NULL; + if (!(flags & __GFP_ZERO)) + memset(part, FLEX_ARRAY_FREE, + sizeof(struct flex_array_part)); + fa->parts[part_nr] = part; + } + return part; +} + +/** + * flex_array_put - copy data into the array at @element_nr + * @fa: the flex array to copy data into + * @element_nr: index of the position in which to insert + * the new element. + * @src: address of data to copy into the array + * @flags: page allocation flags to use for array expansion + * + * + * Note that this *copies* the contents of @src into + * the array. If you are trying to store an array of + * pointers, make sure to pass in &ptr instead of ptr. + * You may instead wish to use the flex_array_put_ptr() + * helper function. + * + * Locking must be provided by the caller. + */ +int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, + gfp_t flags) +{ + int part_nr = 0; + struct flex_array_part *part; + void *dst; + + if (element_nr >= fa->total_nr_elements) + return -ENOSPC; + if (!fa->element_size) + return 0; + if (elements_fit_in_base(fa)) + part = (struct flex_array_part *)&fa->parts[0]; + else { + part_nr = fa_element_to_part_nr(fa, element_nr); + part = __fa_get_part(fa, part_nr, flags); + if (!part) + return -ENOMEM; + } + dst = &part->elements[index_inside_part(fa, element_nr, part_nr)]; + memcpy(dst, src, fa->element_size); + return 0; +} + +/** + * flex_array_clear - clear element in array at @element_nr + * @fa: the flex array of the element. + * @element_nr: index of the position to clear. + * + * Locking must be provided by the caller. + */ +int flex_array_clear(struct flex_array *fa, unsigned int element_nr) +{ + int part_nr = 0; + struct flex_array_part *part; + void *dst; + + if (element_nr >= fa->total_nr_elements) + return -ENOSPC; + if (!fa->element_size) + return 0; + if (elements_fit_in_base(fa)) + part = (struct flex_array_part *)&fa->parts[0]; + else { + part_nr = fa_element_to_part_nr(fa, element_nr); + part = fa->parts[part_nr]; + if (!part) + return -EINVAL; + } + dst = &part->elements[index_inside_part(fa, element_nr, part_nr)]; + memset(dst, FLEX_ARRAY_FREE, fa->element_size); + return 0; +} + +/** + * flex_array_prealloc - guarantee that array space exists + * @fa: the flex array for which to preallocate parts + * @start: index of first array element for which space is allocated + * @nr_elements: number of elements for which space is allocated + * @flags: page allocation flags + * + * This will guarantee that no future calls to flex_array_put() + * will allocate memory. It can be used if you are expecting to + * be holding a lock or in some atomic context while writing + * data into the array. + * + * Locking must be provided by the caller. + */ +int flex_array_prealloc(struct flex_array *fa, unsigned int start, + unsigned int nr_elements, gfp_t flags) +{ + int start_part; + int end_part; + int part_nr; + unsigned int end; + struct flex_array_part *part; + + if (!start && !nr_elements) + return 0; + if (start >= fa->total_nr_elements) + return -ENOSPC; + if (!nr_elements) + return 0; + + end = start + nr_elements - 1; + + if (end >= fa->total_nr_elements) + return -ENOSPC; + if (!fa->element_size) + return 0; + if (elements_fit_in_base(fa)) + return 0; + start_part = fa_element_to_part_nr(fa, start); + end_part = fa_element_to_part_nr(fa, end); + for (part_nr = start_part; part_nr <= end_part; part_nr++) { + part = __fa_get_part(fa, part_nr, flags); + if (!part) + return -ENOMEM; + } + return 0; +} + +/** + * flex_array_get - pull data back out of the array + * @fa: the flex array from which to extract data + * @element_nr: index of the element to fetch from the array + * + * Returns a pointer to the data at index @element_nr. Note + * that this is a copy of the data that was passed in. If you + * are using this to store pointers, you'll get back &ptr. You + * may instead wish to use the flex_array_get_ptr helper. + * + * Locking must be provided by the caller. + */ +void *flex_array_get(struct flex_array *fa, unsigned int element_nr) +{ + int part_nr = 0; + struct flex_array_part *part; + + if (!fa->element_size) + return NULL; + if (element_nr >= fa->total_nr_elements) + return NULL; + if (elements_fit_in_base(fa)) + part = (struct flex_array_part *)&fa->parts[0]; + else { + part_nr = fa_element_to_part_nr(fa, element_nr); + part = fa->parts[part_nr]; + if (!part) + return NULL; + } + return &part->elements[index_inside_part(fa, element_nr, part_nr)]; +} + +/** + * flex_array_get_ptr - pull a ptr back out of the array + * @fa: the flex array from which to extract data + * @element_nr: index of the element to fetch from the array + * + * Returns the pointer placed in the flex array at element_nr using + * flex_array_put_ptr(). This function should not be called if the + * element in question was not set using the _put_ptr() helper. + */ +void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr) +{ + void **tmp; + + tmp = flex_array_get(fa, element_nr); + if (!tmp) + return NULL; + + return *tmp; +} + +static int part_is_free(struct flex_array_part *part) +{ + int i; + + for (i = 0; i < sizeof(struct flex_array_part); i++) + if (part->elements[i] != FLEX_ARRAY_FREE) + return 0; + return 1; +} + +/** + * flex_array_shrink - free unused second-level pages + * @fa: the flex array to shrink + * + * Frees all second-level pages that consist solely of unused + * elements. Returns the number of pages freed. + * + * Locking must be provided by the caller. + */ +int flex_array_shrink(struct flex_array *fa) +{ + struct flex_array_part *part; + int part_nr; + int ret = 0; + + if (!fa->total_nr_elements || !fa->element_size) + return 0; + if (elements_fit_in_base(fa)) + return ret; + for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) { + part = fa->parts[part_nr]; + if (!part) + continue; + if (part_is_free(part)) { + fa->parts[part_nr] = NULL; + kfree(part); + ret++; + } + } + return ret; +} + +#endif /* Linux version < 3.0.0 */ diff --git a/datapath/linux/compat/include/linux/flex_array.h b/datapath/linux/compat/include/linux/flex_array.h new file mode 100644 index 000000000..1cc6648a5 --- /dev/null +++ b/datapath/linux/compat/include/linux/flex_array.h @@ -0,0 +1,86 @@ +#ifndef __LINUX_FLEX_ARRAY_WRAPPER_H +#define __LINUX_FLEX_ARRAY_WRAPPER_H + +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,0,0) +#include_next +#else + +#include +#include + +#define FLEX_ARRAY_PART_SIZE PAGE_SIZE +#define FLEX_ARRAY_BASE_SIZE PAGE_SIZE + +struct flex_array_part; + +/* + * This is meant to replace cases where an array-like + * structure has gotten too big to fit into kmalloc() + * and the developer is getting tempted to use + * vmalloc(). + */ + +struct flex_array { + union { + struct { + int element_size; + int total_nr_elements; + int elems_per_part; + u32 reciprocal_elems; + struct flex_array_part *parts[]; + }; + /* + * This little trick makes sure that + * sizeof(flex_array) == PAGE_SIZE + */ + char padding[FLEX_ARRAY_BASE_SIZE]; + }; +}; + +/* Number of bytes left in base struct flex_array, excluding metadata */ +#define FLEX_ARRAY_BASE_BYTES_LEFT \ + (FLEX_ARRAY_BASE_SIZE - offsetof(struct flex_array, parts)) + +/* Number of pointers in base to struct flex_array_part pages */ +#define FLEX_ARRAY_NR_BASE_PTRS \ + (FLEX_ARRAY_BASE_BYTES_LEFT / sizeof(struct flex_array_part *)) + +/* Number of elements of size that fit in struct flex_array_part */ +#define FLEX_ARRAY_ELEMENTS_PER_PART(size) \ + (FLEX_ARRAY_PART_SIZE / size) + +/* + * Defines a statically allocated flex array and ensures its parameters are + * valid. + */ +#define DEFINE_FLEX_ARRAY(__arrayname, __element_size, __total) \ + struct flex_array __arrayname = { { { \ + .element_size = (__element_size), \ + .total_nr_elements = (__total), \ + } } }; \ + static inline void __arrayname##_invalid_parameter(void) \ + { \ + BUILD_BUG_ON((__total) > FLEX_ARRAY_NR_BASE_PTRS * \ + FLEX_ARRAY_ELEMENTS_PER_PART(__element_size)); \ + } + +struct flex_array *flex_array_alloc(int element_size, unsigned int total, + gfp_t flags); +int flex_array_prealloc(struct flex_array *fa, unsigned int start, + unsigned int nr_elements, gfp_t flags); +void flex_array_free(struct flex_array *fa); +void flex_array_free_parts(struct flex_array *fa); +int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, + gfp_t flags); +int flex_array_clear(struct flex_array *fa, unsigned int element_nr); +void *flex_array_get(struct flex_array *fa, unsigned int element_nr); +int flex_array_shrink(struct flex_array *fa); + +#define flex_array_put_ptr(fa, nr, src, gfp) \ + flex_array_put(fa, nr, (void *)&(src), gfp) + +void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr); + +#endif /* Linux version < 3.0.0 */ +#endif /* __LINUX_FLEX_ARRAY_WRAPPER_H */ diff --git a/datapath/linux/compat/include/linux/poison.h b/datapath/linux/compat/include/linux/poison.h new file mode 100644 index 000000000..96e8620d5 --- /dev/null +++ b/datapath/linux/compat/include/linux/poison.h @@ -0,0 +1,11 @@ +#ifndef __LINUX_POISON_WRAPPER_H +#define __LINUX_POISON_WRAPPER_H 1 + +#include_next + +#ifndef FLEX_ARRAY_FREE +/********** lib/flex_array.c **********/ +#define FLEX_ARRAY_FREE 0x6c /* for use-after-free poisoning */ +#endif + +#endif -- 2.43.0