From: Giuseppe Lettieri Date: Tue, 1 Apr 2014 15:46:48 +0000 (+0200) Subject: Merge branch 'master' of ssh://git.onelab.eu/git/sliver-openvswitch X-Git-Tag: sliver-openvswitch-2.2.90-1~7 X-Git-Url: http://git.onelab.eu/?a=commitdiff_plain;h=7685b7a9e5b3f6db6832e52e111000ff36d3acb4;hp=4034fcdc7bd5f93a92ae7fe7db2f4bf17537ae27;p=sliver-openvswitch.git Merge branch 'master' of ssh://git.onelab.eu/git/sliver-openvswitch --- diff --git a/AUTHORS b/AUTHORS index d8f13ba43..34e53c321 100644 --- a/AUTHORS +++ b/AUTHORS @@ -144,6 +144,7 @@ André Ruß andre.russ@hybris.com Andreas Beckmann debian@abeckmann.de Andrei Andone andrei.andone@softvision.ro Anton Matsiuk anton.matsiuk@gmail.com +Anuprem Chalvadi achalvadi@vmware.com Atzm Watanabe atzm@stratosphere.co.jp Bastian Blank waldi@debian.org Ben Basler bbasler@nicira.com diff --git a/INSTALL b/INSTALL index 9f9491ff0..cc89cc380 100644 --- a/INSTALL +++ b/INSTALL @@ -36,11 +36,15 @@ you will need the following software: - Python 2.x, for x >= 4. -To compile the kernel module on Linux, you must also install the -following. If you cannot build or install the kernel module, you may -use the userspace-only implementation, at a cost in performance. The -userspace implementation may also lack some features. Refer to -INSTALL.userspace for more information. +On Linux, you may choose to compile the kernel module that comes with +the Open vSwitch distribution or to use the kernel module built into +the Linux kernel (version 3.3 or later). See the FAQ question "What +features are not available in the Open vSwitch kernel datapath that +ships as part of the upstream Linux kernel?" for more information on +this trade-off. You may also use the userspace-only implementation, +at some cost in features and performance (see INSTALL.userspace for +details). To compile the kernel module on Linux, you must also +install the following: - A supported Linux kernel version. Please refer to README for a list of supported versions. diff --git a/acinclude.m4 b/acinclude.m4 index 8ff58280e..830fd3f0d 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -1,6 +1,6 @@ # -*- autoconf -*- -# Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. +# Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -579,3 +579,18 @@ AC_DEFUN([OVS_CHECK_PTHREAD_SET_NAME], ;; esac fi]) + +dnl OVS_CHECK_LINUX_HOST. +dnl +dnl Checks whether we're building for a Linux host, based on the presence of +dnl the __linux__ preprocessor symbol, and sets up an Automake conditional +dnl LINUX based on the result. +AC_DEFUN([OVS_CHECK_LINUX_HOST], + [AC_CACHE_CHECK( + [whether __linux__ is defined], + [ovs_cv_linux], + [AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([enum { LINUX = __linux__};], [])], + [ovs_cv_linux=true], + [ovs_cv_linux=false])]) + AM_CONDITIONAL([LINUX], [$ovs_cv_linux])]) diff --git a/build-aux/cccl b/build-aux/cccl index 0c7f3d967..855f1831e 100644 --- a/build-aux/cccl +++ b/build-aux/cccl @@ -97,11 +97,7 @@ EOF -l*) lib=`echo "$1" | sed 's/-l//'` - if [ $lib != "dnsapi" -a $lib != "ws2_32" -a $lib != "wsock32" ]; then - lib="lib$lib.lib" - else - lib="$lib.lib" - fi + lib="$lib.lib" clopt="$clopt $lib" linkopt="$linkopt $lib" diff --git a/configure.ac b/configure.ac index 2fccc648f..09e01fbc2 100644 --- a/configure.ac +++ b/configure.ac @@ -90,6 +90,7 @@ OVS_CHECK_ATOMIC_ALWAYS_LOCK_FREE(4) OVS_CHECK_ATOMIC_ALWAYS_LOCK_FREE(8) OVS_CHECK_POSIX_AIO OVS_CHECK_PTHREAD_SET_NAME +OVS_CHECK_LINUX_HOST OVS_CHECK_INCLUDE_NEXT([stdio.h string.h]) AC_CONFIG_FILES([lib/stdio.h lib/string.h]) @@ -132,11 +133,6 @@ AC_CONFIG_COMMANDS([include/openflow/openflow.h.stamp]) AC_CONFIG_COMMANDS([utilities/bugtool/dummy], [:]) -AM_CONDITIONAL([LINUX_DATAPATH], [test "$HAVE_NETLINK" = yes && test "$ESX" = no]) -if test "$HAVE_NETLINK" = yes && test "$ESX" = no; then - AC_DEFINE([LINUX_DATAPATH], [1], [System uses the linux datapath module.]) -fi - m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES]) AC_OUTPUT diff --git a/datapath/flow.c b/datapath/flow.c index c3e3fcb64..e9a2a2725 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2013 Nicira, Inc. + * Copyright (c) 2007-2014 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -148,7 +148,7 @@ void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, * block bottom-halves here. */ spin_lock_bh(&stats->lock); - if (time_after(stats->used, *used)) + if (!*used || time_after(stats->used, *used)) *used = stats->used; *tcp_flags |= stats->tcp_flags; ovs_stats->n_packets += stats->packet_count; diff --git a/include/openflow/openflow-common.h b/include/openflow/openflow-common.h index bf16d59c3..53aa67eb7 100644 --- a/include/openflow/openflow-common.h +++ b/include/openflow/openflow-common.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2008, 2011, 2012, 2013 The Board of Trustees of The Leland Stanford +/* Copyright (c) 2008, 2011, 2012, 2013, 2014 The Board of Trustees of The Leland Stanford * Junior University * * We are making the OpenFlow specification and associated documentation @@ -32,7 +32,7 @@ */ /* - * Copyright (c) 2008, 2009, 2010, 2011 Nicira, Inc. + * Copyright (c) 2008-2014 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -76,6 +76,12 @@ enum ofp_version { OFP11_VERSION = 0x02, OFP12_VERSION = 0x03, OFP13_VERSION = 0x04 + + /* When we add real support for these versions, add them to the enum so + * that we get compiler warnings everywhere we might forget to provide + * support. Until then, keep them as macros to avoid those warnings. */ +#define OFP14_VERSION 0x05 +#define OFP15_VERSION 0x06 }; /* Vendor (aka experimenter) IDs. diff --git a/include/sparse/netinet/in.h b/include/sparse/netinet/in.h index a2204282d..d51722e95 100644 --- a/include/sparse/netinet/in.h +++ b/include/sparse/netinet/in.h @@ -58,6 +58,7 @@ struct sockaddr_in6 { }; #define IPPROTO_IP 0 +#define IPPROTO_IPV6 41 #define IPPROTO_HOPOPTS 0 #define IPPROTO_ICMP 1 #define IPPROTO_TCP 6 @@ -98,6 +99,8 @@ struct sockaddr_in6 { #define INET6_ADDRSTRLEN 46 +#define IPV6_TCLASS 67 + static inline ovs_be32 htonl(uint32_t x) { return (OVS_FORCE ovs_be32) x; diff --git a/include/windows/automake.mk b/include/windows/automake.mk index 2771270ad..b8f144e84 100644 --- a/include/windows/automake.mk +++ b/include/windows/automake.mk @@ -8,4 +8,5 @@ noinst_HEADERS += \ include/windows/getopt.h \ include/windows/syslog.h \ + include/windows/sys/resource.h \ include/windows/windefs.h diff --git a/include/windows/sys/resource.h b/include/windows/sys/resource.h new file mode 100644 index 000000000..d4628f259 --- /dev/null +++ b/include/windows/sys/resource.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2014 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SYS_RESOURCE_H +#define SYS_RESOURCE_H 1 + +struct rusage { + struct timeval ru_utime; /* user CPU time used */ + struct timeval ru_stime; /* system CPU time used */ + long ru_maxrss; /* maximum resident set size */ + long ru_ixrss; /* integral shared memory size */ + long ru_idrss; /* integral unshared data size */ + long ru_isrss; /* integral unshared stack size */ + long ru_minflt; /* page reclaims (soft page faults) */ + long ru_majflt; /* page faults (hard page faults) */ + long ru_nswap; /* swaps */ + long ru_inblock; /* block input operations */ + long ru_oublock; /* block output operations */ + long ru_msgsnd; /* IPC messages sent */ + long ru_msgrcv; /* IPC messages received */ + long ru_nsignals; /* signals received */ + long ru_nvcsw; /* voluntary context switches */ + long ru_nivcsw; /* involuntary context switches */ +}; + +#ifndef RUSAGE_SELF +#define RUSAGE_SELF 1 +#endif + +#ifndef RUSAGE_CHILDREN +#define RUSAGE_CHILDREN 2 +#endif + +#ifndef RUSAGE_THREAD +#define RUSAGE_THREAD 3 +#endif + +#endif /* sys/resource.h */ diff --git a/lib/automake.mk b/lib/automake.mk index 0ec18a441..9a55b1535 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -178,8 +178,6 @@ lib_libopenvswitch_la_SOURCES = \ lib/shash.h \ lib/simap.c \ lib/simap.h \ - lib/signals.c \ - lib/signals.h \ lib/smap.c \ lib/smap.h \ lib/socket-util.c \ @@ -190,7 +188,6 @@ lib_libopenvswitch_la_SOURCES = \ lib/sset.h \ lib/stp.c \ lib/stp.h \ - lib/stream-fd.c \ lib/stream-fd.h \ lib/stream-provider.h \ lib/stream-ssl.h \ @@ -243,11 +240,16 @@ if WIN32 lib_libopenvswitch_la_SOURCES += \ lib/daemon-windows.c \ lib/getopt_long.c \ - lib/latch-windows.c + lib/getrusage-windows.c \ + lib/latch-windows.c \ + lib/stream-fd-windows.c else lib_libopenvswitch_la_SOURCES += \ lib/daemon.c \ - lib/latch.c \ + lib/latch-unix.c \ + lib/signals.c \ + lib/signals.h \ + lib/stream-fd-unix.c \ lib/stream-unix.c endif @@ -277,7 +279,7 @@ if HAVE_WNO_UNUSED_PARAMETER lib_libsflow_la_CFLAGS += -Wno-unused-parameter endif -if LINUX_DATAPATH +if LINUX lib_libopenvswitch_la_SOURCES += \ lib/dpif-linux.c \ lib/dpif-linux.h \ diff --git a/lib/command-line.c b/lib/command-line.c index 805e51b35..cb73a25c9 100644 --- a/lib/command-line.c +++ b/lib/command-line.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009, 2010, 2011, 2013 Nicira, Inc. + * Copyright (c) 2008, 2009, 2010, 2011, 2013, 2014 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -93,7 +93,7 @@ run_command(int argc, char *argv[], const struct command commands[]) /* Process title. */ -#ifdef LINUX_DATAPATH +#ifdef __linux__ static struct ovs_mutex proctitle_mutex = OVS_MUTEX_INITIALIZER; /* Start of command-line arguments in memory. */ @@ -199,7 +199,7 @@ proctitle_restore(void) } ovs_mutex_unlock(&proctitle_mutex); } -#else /* !LINUX_DATAPATH*/ +#else /* !__linux__ */ /* Stubs that don't do anything on non-Linux systems. */ void @@ -219,4 +219,4 @@ void proctitle_restore(void) { } -#endif /* !LINUX_DATAPATH */ +#endif /* !__linux__ */ diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c index f7f529266..c2579f6bf 100644 --- a/lib/dpif-linux.c +++ b/lib/dpif-linux.c @@ -190,7 +190,8 @@ static int dpif_linux_enumerate(struct sset *all_dps) { struct nl_dump dump; - struct ofpbuf msg; + uint64_t reply_stub[NL_DUMP_BUFSIZE / 8]; + struct ofpbuf msg, buf; int error; error = dpif_linux_init(); @@ -198,14 +199,16 @@ dpif_linux_enumerate(struct sset *all_dps) return error; } + ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub); dpif_linux_dp_dump_start(&dump); - while (nl_dump_next(&dump, &msg)) { + while (nl_dump_next(&dump, &msg, &buf)) { struct dpif_linux_dp dp; if (!dpif_linux_dp_from_ofpbuf(&dp, &msg)) { sset_add(all_dps, dp.name); } } + ofpbuf_uninit(&buf); return nl_dump_done(&dump); } @@ -439,8 +442,11 @@ get_vport_type(const struct dpif_linux_vport *vport) static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); switch (vport->type) { - case OVS_VPORT_TYPE_NETDEV: - return "system"; + case OVS_VPORT_TYPE_NETDEV: { + const char *type = netdev_get_type_from_name(vport->name); + + return type ? type : "system"; + } case OVS_VPORT_TYPE_INTERNAL: return "internal"; @@ -705,6 +711,7 @@ dpif_linux_flow_flush(struct dpif *dpif_) struct dpif_linux_port_state { struct nl_dump dump; + struct ofpbuf buf; }; static void @@ -732,18 +739,20 @@ dpif_linux_port_dump_start(const struct dpif *dpif, void **statep) *statep = state = xmalloc(sizeof *state); dpif_linux_port_dump_start__(dpif, &state->dump); + ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE); return 0; } static int dpif_linux_port_dump_next__(const struct dpif *dpif_, struct nl_dump *dump, - struct dpif_linux_vport *vport) + struct dpif_linux_vport *vport, + struct ofpbuf *buffer) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); struct ofpbuf buf; int error; - if (!nl_dump_next(dump, &buf)) { + if (!nl_dump_next(dump, &buf, buffer)) { return EOF; } @@ -763,7 +772,8 @@ dpif_linux_port_dump_next(const struct dpif *dpif OVS_UNUSED, void *state_, struct dpif_linux_vport vport; int error; - error = dpif_linux_port_dump_next__(dpif, &state->dump, &vport); + error = dpif_linux_port_dump_next__(dpif, &state->dump, &vport, + &state->buf); if (error) { return error; } @@ -779,6 +789,7 @@ dpif_linux_port_dump_done(const struct dpif *dpif_ OVS_UNUSED, void *state_) struct dpif_linux_port_state *state = state_; int error = nl_dump_done(&state->dump); + ofpbuf_uninit(&state->buf); free(state); return error; } @@ -981,21 +992,46 @@ dpif_linux_flow_del(struct dpif *dpif_, const struct dpif_flow_del *del) } struct dpif_linux_flow_state { - struct nl_dump dump; struct dpif_linux_flow flow; struct dpif_flow_stats stats; - struct ofpbuf *buf; + struct ofpbuf buffer; /* Always used to store flows. */ + struct ofpbuf *tmp; /* Used if kernel does not supply actions. */ }; +struct dpif_linux_flow_iter { + struct nl_dump dump; + atomic_int status; +}; + +static void +dpif_linux_flow_dump_state_init(void **statep) +{ + struct dpif_linux_flow_state *state; + + *statep = state = xmalloc(sizeof *state); + ofpbuf_init(&state->buffer, NL_DUMP_BUFSIZE); + state->tmp = NULL; +} + +static void +dpif_linux_flow_dump_state_uninit(void *state_) +{ + struct dpif_linux_flow_state *state = state_; + + ofpbuf_uninit(&state->buffer); + ofpbuf_delete(state->tmp); + free(state); +} + static int -dpif_linux_flow_dump_start(const struct dpif *dpif_, void **statep) +dpif_linux_flow_dump_start(const struct dpif *dpif_, void **iterp) { const struct dpif_linux *dpif = dpif_linux_cast(dpif_); - struct dpif_linux_flow_state *state; + struct dpif_linux_flow_iter *iter; struct dpif_linux_flow request; struct ofpbuf *buf; - *statep = state = xmalloc(sizeof *state); + *iterp = iter = xmalloc(sizeof *iter); dpif_linux_flow_init(&request); request.cmd = OVS_FLOW_CMD_GET; @@ -1003,42 +1039,43 @@ dpif_linux_flow_dump_start(const struct dpif *dpif_, void **statep) buf = ofpbuf_new(1024); dpif_linux_flow_to_ofpbuf(&request, buf); - nl_dump_start(&state->dump, NETLINK_GENERIC, buf); + nl_dump_start(&iter->dump, NETLINK_GENERIC, buf); ofpbuf_delete(buf); - - state->buf = NULL; + atomic_init(&iter->status, 0); return 0; } static int -dpif_linux_flow_dump_next(const struct dpif *dpif_, void *state_, +dpif_linux_flow_dump_next(const struct dpif *dpif_, void *iter_, void *state_, const struct nlattr **key, size_t *key_len, const struct nlattr **mask, size_t *mask_len, const struct nlattr **actions, size_t *actions_len, const struct dpif_flow_stats **stats) { + struct dpif_linux_flow_iter *iter = iter_; struct dpif_linux_flow_state *state = state_; struct ofpbuf buf; int error; do { - ofpbuf_delete(state->buf); - state->buf = NULL; + ofpbuf_delete(state->tmp); + state->tmp = NULL; - if (!nl_dump_next(&state->dump, &buf)) { + if (!nl_dump_next(&iter->dump, &buf, &state->buffer)) { return EOF; } error = dpif_linux_flow_from_ofpbuf(&state->flow, &buf); if (error) { + atomic_store(&iter->status, error); return error; } if (actions && !state->flow.actions) { error = dpif_linux_flow_get__(dpif_, state->flow.key, state->flow.key_len, - &state->flow, &state->buf); + &state->flow, &state->tmp); if (error == ENOENT) { VLOG_DBG("dumped flow disappeared on get"); } else if (error) { @@ -1067,14 +1104,25 @@ dpif_linux_flow_dump_next(const struct dpif *dpif_, void *state_, return error; } -static int -dpif_linux_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_) +static bool +dpif_linux_flow_dump_next_may_destroy_keys(void *state_) { struct dpif_linux_flow_state *state = state_; - int error = nl_dump_done(&state->dump); - ofpbuf_delete(state->buf); - free(state); - return error; + + return state->buffer.size ? false : true; +} + +static int +dpif_linux_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *iter_) +{ + struct dpif_linux_flow_iter *iter = iter_; + int dump_status; + unsigned int nl_status = nl_dump_done(&iter->dump); + + atomic_read(&iter->status, &dump_status); + atomic_destroy(&iter->status); + free(iter); + return dump_status ? dump_status : nl_status; } static void @@ -1284,6 +1332,8 @@ dpif_linux_refresh_channels(struct dpif *dpif_) struct dpif_linux_vport vport; size_t keep_channels_nbits; struct nl_dump dump; + uint64_t reply_stub[NL_DUMP_BUFSIZE / 8]; + struct ofpbuf buf; int retval = 0; size_t i; @@ -1300,8 +1350,9 @@ dpif_linux_refresh_channels(struct dpif *dpif_) dpif->n_events = dpif->event_offset = 0; + ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub); dpif_linux_port_dump_start__(dpif_, &dump); - while (!dpif_linux_port_dump_next__(dpif_, &dump, &vport)) { + while (!dpif_linux_port_dump_next__(dpif_, &dump, &vport, &buf)) { uint32_t port_no = odp_to_u32(vport.port_no); struct nl_sock *sock = (port_no < dpif->uc_array_size ? dpif->channels[port_no].sock @@ -1367,6 +1418,7 @@ dpif_linux_refresh_channels(struct dpif *dpif_) nl_sock_destroy(sock); } nl_dump_done(&dump); + ofpbuf_uninit(&buf); /* Discard any saved channels that we didn't reuse. */ for (i = 0; i < keep_channels_nbits; i++) { @@ -1622,9 +1674,12 @@ const struct dpif_class dpif_linux_class = { dpif_linux_flow_put, dpif_linux_flow_del, dpif_linux_flow_flush, + dpif_linux_flow_dump_state_init, dpif_linux_flow_dump_start, dpif_linux_flow_dump_next, + dpif_linux_flow_dump_next_may_destroy_keys, dpif_linux_flow_dump_done, + dpif_linux_flow_dump_state_uninit, dpif_linux_execute, dpif_linux_operate, dpif_linux_recv_set, diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index b1c25c82b..5aff18a2c 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -1314,47 +1314,82 @@ dpif_netdev_flow_del(struct dpif *dpif, const struct dpif_flow_del *del) } struct dp_netdev_flow_state { - uint32_t bucket; - uint32_t offset; struct dp_netdev_actions *actions; struct odputil_keybuf keybuf; struct odputil_keybuf maskbuf; struct dpif_flow_stats stats; }; -static int -dpif_netdev_flow_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep) +struct dp_netdev_flow_iter { + uint32_t bucket; + uint32_t offset; + int status; + struct ovs_mutex mutex; +}; + +static void +dpif_netdev_flow_dump_state_init(void **statep) { struct dp_netdev_flow_state *state; *statep = state = xmalloc(sizeof *state); - state->bucket = 0; - state->offset = 0; state->actions = NULL; +} + +static void +dpif_netdev_flow_dump_state_uninit(void *state_) +{ + struct dp_netdev_flow_state *state = state_; + + dp_netdev_actions_unref(state->actions); + free(state); +} + +static int +dpif_netdev_flow_dump_start(const struct dpif *dpif OVS_UNUSED, void **iterp) +{ + struct dp_netdev_flow_iter *iter; + + *iterp = iter = xmalloc(sizeof *iter); + iter->bucket = 0; + iter->offset = 0; + iter->status = 0; + ovs_mutex_init(&iter->mutex); return 0; } static int -dpif_netdev_flow_dump_next(const struct dpif *dpif, void *state_, +dpif_netdev_flow_dump_next(const struct dpif *dpif, void *iter_, void *state_, const struct nlattr **key, size_t *key_len, const struct nlattr **mask, size_t *mask_len, const struct nlattr **actions, size_t *actions_len, const struct dpif_flow_stats **stats) { + struct dp_netdev_flow_iter *iter = iter_; struct dp_netdev_flow_state *state = state_; struct dp_netdev *dp = get_dp_netdev(dpif); struct dp_netdev_flow *netdev_flow; - struct hmap_node *node; + int error; - fat_rwlock_rdlock(&dp->cls.rwlock); - node = hmap_at_position(&dp->flow_table, &state->bucket, &state->offset); - if (node) { - netdev_flow = CONTAINER_OF(node, struct dp_netdev_flow, node); - dp_netdev_flow_ref(netdev_flow); + ovs_mutex_lock(&iter->mutex); + error = iter->status; + if (!error) { + struct hmap_node *node; + + fat_rwlock_rdlock(&dp->cls.rwlock); + node = hmap_at_position(&dp->flow_table, &iter->bucket, &iter->offset); + if (node) { + netdev_flow = CONTAINER_OF(node, struct dp_netdev_flow, node); + dp_netdev_flow_ref(netdev_flow); + } + fat_rwlock_unlock(&dp->cls.rwlock); + if (!node) { + iter->status = error = EOF; + } } - fat_rwlock_unlock(&dp->cls.rwlock); - if (!node) { - return EOF; + ovs_mutex_unlock(&iter->mutex); + if (error) { + return error; } if (key) { @@ -1405,12 +1440,12 @@ dpif_netdev_flow_dump_next(const struct dpif *dpif, void *state_, } static int -dpif_netdev_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_) +dpif_netdev_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *iter_) { - struct dp_netdev_flow_state *state = state_; + struct dp_netdev_flow_iter *iter = iter_; - dp_netdev_actions_unref(state->actions); - free(state); + ovs_mutex_destroy(&iter->mutex); + free(iter); return 0; } @@ -1427,8 +1462,7 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute) } /* Extract flow key. */ - flow_extract(execute->packet, md->skb_priority, md->pkt_mark, &md->tunnel, - (union flow_in_port *)&md->in_port, &key); + flow_extract(execute->packet, md, &key); ovs_rwlock_rdlock(&dp->port_rwlock); dp_netdev_execute_actions(dp, &key, execute->packet, md, execute->actions, @@ -1600,8 +1634,8 @@ dp_forwarder_main(void *f_) if (!error) { struct pkt_metadata md = PKT_METADATA_INITIALIZER(port->port_no); - dp_netdev_port_input(dp, &packet, &md); + dp_netdev_port_input(dp, &packet, &md); received_anything = true; } else if (error != EAGAIN && error != EOPNOTSUPP) { static struct vlog_rate_limit rl @@ -1701,8 +1735,7 @@ dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet, if (packet->size < ETH_HEADER_LEN) { return; } - flow_extract(packet, md->skb_priority, md->pkt_mark, &md->tunnel, - (union flow_in_port *)&md->in_port, &key); + flow_extract(packet, md, &key); netdev_flow = dp_netdev_lookup_flow(dp, &key); if (netdev_flow) { struct dp_netdev_actions *actions; @@ -1861,9 +1894,12 @@ dp_netdev_execute_actions(struct dp_netdev *dp, const struct flow *key, dpif_netdev_flow_put, \ dpif_netdev_flow_del, \ dpif_netdev_flow_flush, \ + dpif_netdev_flow_dump_state_init, \ dpif_netdev_flow_dump_start, \ dpif_netdev_flow_dump_next, \ + NULL, \ dpif_netdev_flow_dump_done, \ + dpif_netdev_flow_dump_state_uninit, \ dpif_netdev_execute, \ NULL, /* operate */ \ dpif_netdev_recv_set, \ diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h index f41ab6b9b..704a64278 100644 --- a/lib/dpif-provider.h +++ b/lib/dpif-provider.h @@ -262,18 +262,28 @@ struct dpif_class { * packets. */ int (*flow_flush)(struct dpif *dpif); - /* Attempts to begin dumping the flows in a dpif. On success, returns 0 - * and initializes '*statep' with any data needed for iteration. On - * failure, returns a positive errno value. */ - int (*flow_dump_start)(const struct dpif *dpif, void **statep); + /* Allocates thread-local state for use with the function 'flow_dump_next'. + * On return, initializes '*statep' with any private data needed for + * iteration. */ + void (*flow_dump_state_init)(void **statep); - /* Attempts to retrieve another flow from 'dpif' for 'state', which was - * initialized by a successful call to the 'flow_dump_start' function for - * 'dpif'. On success, updates the output parameters as described below - * and returns 0. Returns EOF if the end of the flow table has been - * reached, or a positive errno value on error. This function will not be - * called again once it returns nonzero within a given iteration (but the - * 'flow_dump_done' function will be called afterward). + /* Attempts to begin dumping the flows in a dpif. On success, returns 0 + * and initializes '*iterp' with any shared data needed for iteration. + * On failure, returns a positive errno value. */ + int (*flow_dump_start)(const struct dpif *dpif, void **iterp); + + /* Attempts to retrieve another flow from 'dpif' for 'iter', using + * 'state' for storage. 'iter' must have been initialized by a successful + * call to the 'flow_dump_start' function for 'dpif'. 'state' must have + * been initialised with a call to the 'flow_dump_state_init' function for + * 'dpif. + * + * On success, updates the output parameters as described below and returns + * 0. Returns EOF if the end of the flow table has been reached, or a + * positive errno value on error. Multiple threads may use the same 'dpif' + * and 'iter' with this function, but all other parameters must be + * different for each thread. If this function returns non-zero, + * subsequent calls with the same arguments will also return non-zero. * * On success: * @@ -295,16 +305,35 @@ struct dpif_class { * All of the returned data is owned by 'dpif', not by the caller, and the * caller must not modify or free it. 'dpif' must guarantee that it * remains accessible and unchanging until at least the next call to - * 'flow_dump_next' or 'flow_dump_done' for 'state'. */ - int (*flow_dump_next)(const struct dpif *dpif, void *state, + * 'flow_dump_next' or 'flow_dump_done' for 'iter' and 'state'. */ + int (*flow_dump_next)(const struct dpif *dpif, void *iter, void *state, const struct nlattr **key, size_t *key_len, const struct nlattr **mask, size_t *mask_len, const struct nlattr **actions, size_t *actions_len, const struct dpif_flow_stats **stats); - /* Releases resources from 'dpif' for 'state', which was initialized by a - * successful call to the 'flow_dump_start' function for 'dpif'. */ - int (*flow_dump_done)(const struct dpif *dpif, void *state); + /* Determines whether the next call to 'flow_dump_next' with 'state' will + * modify or free the keys that it previously returned. 'state' must have + * been initialized by a call to 'flow_dump_state_init' for 'dpif'. + * + * 'dpif' guarantees that data returned by flow_dump_next() will remain + * accessible and unchanging until the next call. This function provides a + * way for callers to determine whether that guarantee extends beyond the + * next call. + * + * Returns true if the next call to flow_dump_next() is expected to be + * destructive to previously returned keys for 'state', false otherwise. */ + bool (*flow_dump_next_may_destroy_keys)(void *state); + + /* Releases resources from 'dpif' for 'iter', which was initialized by a + * successful call to the 'flow_dump_start' function for 'dpif'. Callers + * must ensure that this function is called once within a given iteration, + * as the final flow dump operation. */ + int (*flow_dump_done)(const struct dpif *dpif, void *iter); + + /* Releases 'state' which was initialized by a call to the + * 'flow_dump_state_init' function for this 'dpif'. */ + void (*flow_dump_state_uninit)(void *statep); /* Performs the 'execute->actions_len' bytes of actions in * 'execute->actions' on the Ethernet frame in 'execute->packet' diff --git a/lib/dpif.c b/lib/dpif.c index aa27d62c5..db1a1302a 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -57,7 +57,7 @@ COVERAGE_DEFINE(dpif_purge); COVERAGE_DEFINE(dpif_execute_with_help); static const struct dpif_class *base_dpif_classes[] = { -#ifdef LINUX_DATAPATH +#ifdef __linux__ &dpif_linux_class, #endif &dpif_netdev_class, @@ -962,26 +962,47 @@ dpif_flow_del(struct dpif *dpif, return dpif_flow_del__(dpif, &del); } -/* Initializes 'dump' to begin dumping the flows in a dpif. - * - * This function provides no status indication. An error status for the entire - * dump operation is provided when it is completed by calling - * dpif_flow_dump_done(). - */ +/* Allocates thread-local state for use with the 'flow_dump_next' function for + * 'dpif'. On return, initializes '*statep' with any private data needed for + * iteration. */ void +dpif_flow_dump_state_init(const struct dpif *dpif, void **statep) +{ + dpif->dpif_class->flow_dump_state_init(statep); +} + +/* Releases 'state' which was initialized by a call to the + * 'flow_dump_state_init' function for 'dpif'. */ +void +dpif_flow_dump_state_uninit(const struct dpif *dpif, void *state) +{ + dpif->dpif_class->flow_dump_state_uninit(state); +} + +/* Initializes 'dump' to begin dumping the flows in a dpif. On sucess, + * initializes 'dump' with any data needed for iteration and returns 0. + * Otherwise, returns a positive errno value describing the problem. */ +int dpif_flow_dump_start(struct dpif_flow_dump *dump, const struct dpif *dpif) { + int error; dump->dpif = dpif; - dump->error = dpif->dpif_class->flow_dump_start(dpif, &dump->state); - log_operation(dpif, "flow_dump_start", dump->error); + error = dpif->dpif_class->flow_dump_start(dpif, &dump->iter); + log_operation(dpif, "flow_dump_start", error); + return error; } -/* Attempts to retrieve another flow from 'dump', which must have been - * initialized with dpif_flow_dump_start(). On success, updates the output - * parameters as described below and returns true. Otherwise, returns false. - * Failure might indicate an actual error or merely the end of the flow table. - * An error status for the entire dump operation is provided when it is - * completed by calling dpif_flow_dump_done(). +/* Attempts to retrieve another flow from 'dump', using 'state' for + * thread-local storage. 'dump' must have been initialized with a successful + * call to dpif_flow_dump_start(), and 'state' must have been initialized with + * dpif_flow_state_init(). + * + * On success, updates the output parameters as described below and returns + * true. Otherwise, returns false. Failure might indicate an actual error or + * merely the end of the flow table. An error status for the entire dump + * operation is provided when it is completed by calling dpif_flow_dump_done(). + * Multiple threads may use the same 'dump' with this function, but all other + * parameters must not be shared. * * On success, if 'key' and 'key_len' are nonnull then '*key' and '*key_len' * will be set to Netlink attributes with types OVS_KEY_ATTR_* representing the @@ -993,27 +1014,20 @@ dpif_flow_dump_start(struct dpif_flow_dump *dump, const struct dpif *dpif) * All of the returned data is owned by 'dpif', not by the caller, and the * caller must not modify or free it. 'dpif' guarantees that it remains * accessible and unchanging until at least the next call to 'flow_dump_next' - * or 'flow_dump_done' for 'dump'. */ + * or 'flow_dump_done' for 'dump' and 'state'. */ bool -dpif_flow_dump_next(struct dpif_flow_dump *dump, +dpif_flow_dump_next(struct dpif_flow_dump *dump, void *state, const struct nlattr **key, size_t *key_len, const struct nlattr **mask, size_t *mask_len, const struct nlattr **actions, size_t *actions_len, const struct dpif_flow_stats **stats) { const struct dpif *dpif = dump->dpif; - int error = dump->error; + int error; - if (!error) { - error = dpif->dpif_class->flow_dump_next(dpif, dump->state, - key, key_len, - mask, mask_len, - actions, actions_len, - stats); - if (error) { - dpif->dpif_class->flow_dump_done(dpif, dump->state); - } - } + error = dpif->dpif_class->flow_dump_next(dpif, dump->iter, state, + key, key_len, mask, mask_len, + actions, actions_len, stats); if (error) { if (key) { *key = NULL; @@ -1031,33 +1045,50 @@ dpif_flow_dump_next(struct dpif_flow_dump *dump, *stats = NULL; } } - if (!dump->error) { - if (error == EOF) { - VLOG_DBG_RL(&dpmsg_rl, "%s: dumped all flows", dpif_name(dpif)); - } else if (should_log_flow_message(error)) { - log_flow_message(dpif, error, "flow_dump", - key ? *key : NULL, key ? *key_len : 0, - mask ? *mask : NULL, mask ? *mask_len : 0, - stats ? *stats : NULL, actions ? *actions : NULL, - actions ? *actions_len : 0); - } + if (error == EOF) { + VLOG_DBG_RL(&dpmsg_rl, "%s: dumped all flows", dpif_name(dpif)); + } else if (should_log_flow_message(error)) { + log_flow_message(dpif, error, "flow_dump", + key ? *key : NULL, key ? *key_len : 0, + mask ? *mask : NULL, mask ? *mask_len : 0, + stats ? *stats : NULL, actions ? *actions : NULL, + actions ? *actions_len : 0); } - dump->error = error; return !error; } +/* Determines whether the next call to 'dpif_flow_dump_next' for 'dump' and + * 'state' will modify or free the keys that it previously returned. 'state' + * must have been initialized by a call to 'dpif_flow_dump_state_init' for + * 'dump'. + * + * 'dpif' guarantees that data returned by flow_dump_next() will remain + * accessible and unchanging until the next call. This function provides a way + * for callers to determine whether that guarantee extends beyond the next + * call. + * + * Returns true if the next call to flow_dump_next() is expected to be + * destructive to previously returned keys for 'state', false otherwise. */ +bool +dpif_flow_dump_next_may_destroy_keys(struct dpif_flow_dump *dump, void *state) +{ + const struct dpif *dpif = dump->dpif; + return (dpif->dpif_class->flow_dump_next_may_destroy_keys + ? dpif->dpif_class->flow_dump_next_may_destroy_keys(state) + : true); +} + /* Completes flow table dump operation 'dump', which must have been initialized - * with dpif_flow_dump_start(). Returns 0 if the dump operation was - * error-free, otherwise a positive errno value describing the problem. */ + * with a successful call to dpif_flow_dump_start(). Returns 0 if the dump + * operation was error-free, otherwise a positive errno value describing the + * problem. */ int dpif_flow_dump_done(struct dpif_flow_dump *dump) { const struct dpif *dpif = dump->dpif; - if (!dump->error) { - dump->error = dpif->dpif_class->flow_dump_done(dpif, dump->state); - log_operation(dpif, "flow_dump_done", dump->error); - } - return dump->error == EOF ? 0 : dump->error; + int error = dpif->dpif_class->flow_dump_done(dpif, dump->iter); + log_operation(dpif, "flow_dump_done", error); + return error == EOF ? 0 : error; } struct dpif_execute_helper_aux { diff --git a/lib/dpif.h b/lib/dpif.h index 7f986f957..9cd8f6afa 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -356,11 +356,19 @@ * thread-safe: they may be called from different threads only on * different dpif objects. * - * - Functions that operate on struct dpif_port_dump or struct - * dpif_flow_dump are conditionally thread-safe with respect to those - * objects. That is, one may dump ports or flows from any number of - * threads at once, but each thread must use its own struct dpif_port_dump - * or dpif_flow_dump. + * - dpif_flow_dump_next() is conditionally thread-safe: It may be called + * from different threads with the same 'struct dpif_flow_dump', but all + * other parameters must be different for each thread. + * + * - dpif_flow_dump_done() is conditionally thread-safe: All threads that + * share the same 'struct dpif_flow_dump' must have finished using it. + * This function must then be called exactly once for a particular + * dpif_flow_dump to finish the corresponding flow dump operation. + * + * - Functions that operate on 'struct dpif_port_dump' are conditionally + * thread-safe with respect to those objects. That is, one may dump ports + * from any number of threads at once, but each thread must use its own + * struct dpif_port_dump. */ #ifndef DPIF_H #define DPIF_H 1 @@ -506,16 +514,19 @@ int dpif_flow_get(const struct dpif *, struct dpif_flow_dump { const struct dpif *dpif; - int error; - void *state; + void *iter; }; -void dpif_flow_dump_start(struct dpif_flow_dump *, const struct dpif *); -bool dpif_flow_dump_next(struct dpif_flow_dump *, +void dpif_flow_dump_state_init(const struct dpif *, void **statep); +int dpif_flow_dump_start(struct dpif_flow_dump *, const struct dpif *); +bool dpif_flow_dump_next(struct dpif_flow_dump *, void *state, const struct nlattr **key, size_t *key_len, const struct nlattr **mask, size_t *mask_len, const struct nlattr **actions, size_t *actions_len, const struct dpif_flow_stats **); +bool dpif_flow_dump_next_may_destroy_keys(struct dpif_flow_dump *dump, + void *state); int dpif_flow_dump_done(struct dpif_flow_dump *); +void dpif_flow_dump_state_uninit(const struct dpif *, void *state); /* Operation batching interface. * diff --git a/lib/fatal-signal.c b/lib/fatal-signal.c index b1a0341f5..ef3fbc047 100644 --- a/lib/fatal-signal.c +++ b/lib/fatal-signal.c @@ -41,7 +41,11 @@ VLOG_DEFINE_THIS_MODULE(fatal_signal); /* Signals to catch. */ +#ifndef _WIN32 static const int fatal_signals[] = { SIGTERM, SIGINT, SIGHUP, SIGALRM }; +#else +static const int fatal_signals[] = { SIGTERM }; +#endif /* Hooks to call upon catching a signal */ struct hook { @@ -55,12 +59,16 @@ static struct hook hooks[MAX_HOOKS]; static size_t n_hooks; static int signal_fds[2]; +static HANDLE wevent; static volatile sig_atomic_t stored_sig_nr = SIG_ATOMIC_MAX; static struct ovs_mutex mutex; static void atexit_handler(void); static void call_hooks(int sig_nr); +#ifdef _WIN32 +static BOOL WINAPI ConsoleHandlerRoutine(DWORD dwCtrlType); +#endif /* Initializes the fatal signal handling module. Calling this function is * optional, because calling any other function in the module will also @@ -78,10 +86,22 @@ fatal_signal_init(void) inited = true; ovs_mutex_init_recursive(&mutex); +#ifndef _WIN32 xpipe_nonblocking(signal_fds); +#else + wevent = CreateEvent(NULL, TRUE, FALSE, NULL); + if (!wevent) { + char *msg_buf = ovs_lasterror_to_string(); + VLOG_FATAL("Failed to create a event (%s).", msg_buf); + } + + /* Register a function to handle Ctrl+C. */ + SetConsoleCtrlHandler(ConsoleHandlerRoutine, true); +#endif for (i = 0; i < ARRAY_SIZE(fatal_signals); i++) { int sig_nr = fatal_signals[i]; +#ifndef _WIN32 struct sigaction old_sa; xsigaction(sig_nr, NULL, &old_sa); @@ -89,6 +109,11 @@ fatal_signal_init(void) && signal(sig_nr, fatal_signal_handler) == SIG_ERR) { VLOG_FATAL("signal failed (%s)", ovs_strerror(errno)); } +#else + if (signal(sig_nr, fatal_signal_handler) == SIG_ERR) { + VLOG_FATAL("signal failed (%s)", ovs_strerror(errno)); + } +#endif } atexit(atexit_handler); } @@ -136,7 +161,11 @@ fatal_signal_add_hook(void (*hook_cb)(void *aux), void (*cancel_cb)(void *aux), void fatal_signal_handler(int sig_nr) { +#ifndef _WIN32 ignore(write(signal_fds[1], "", 1)); +#else + SetEvent(wevent); +#endif stored_sig_nr = sig_nr; } @@ -164,8 +193,12 @@ fatal_signal_run(void) ovs_mutex_lock(&mutex); +#ifndef _WIN32 VLOG_WARN("terminating with signal %d (%s)", (int)sig_nr, signal_name(sig_nr, namebuf, sizeof namebuf)); +#else + VLOG_WARN("terminating with signal %d", (int)sig_nr); +#endif call_hooks(sig_nr); /* Re-raise the signal with the default handling so that the program @@ -182,7 +215,15 @@ void fatal_signal_wait(void) { fatal_signal_init(); - poll_fd_wait(signal_fds[0], POLLIN); + poll_fd_wait_event(signal_fds[0], wevent, POLLIN); +} + +void +fatal_ignore_sigpipe(void) +{ +#ifndef _WIN32 + signal(SIGPIPE, SIG_IGN); +#endif } static void @@ -208,6 +249,15 @@ call_hooks(int sig_nr) } } } + +#ifdef _WIN32 +BOOL WINAPI ConsoleHandlerRoutine(DWORD dwCtrlType) +{ + stored_sig_nr = SIGINT; + SetEvent(wevent); + return true; +} +#endif /* Files to delete on exit. */ static struct sset files = SSET_INITIALIZER(&files); diff --git a/lib/fatal-signal.h b/lib/fatal-signal.h index b458d3d61..caf24ec47 100644 --- a/lib/fatal-signal.h +++ b/lib/fatal-signal.h @@ -27,6 +27,7 @@ void fatal_signal_add_hook(void (*hook_cb)(void *aux), void fatal_signal_fork(void); void fatal_signal_run(void); void fatal_signal_wait(void); +void fatal_ignore_sigpipe(void); /* Convenience functions for unlinking files upon termination. * diff --git a/lib/flow.c b/lib/flow.c index e7fe4d349..82d672931 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -35,6 +35,7 @@ #include "ofpbuf.h" #include "openflow/openflow.h" #include "packets.h" +#include "odp-util.h" #include "random.h" #include "unaligned.h" @@ -361,8 +362,7 @@ invalid: } -/* Initializes 'flow' members from 'packet', 'skb_priority', 'tnl', and - * 'in_port'. +/* Initializes 'flow' members from 'packet' and 'md' * * Initializes 'packet' header pointers as follows: * @@ -381,8 +381,7 @@ invalid: * present and has a correct length, and otherwise NULL. */ void -flow_extract(struct ofpbuf *packet, uint32_t skb_priority, uint32_t pkt_mark, - const struct flow_tnl *tnl, const union flow_in_port *in_port, +flow_extract(struct ofpbuf *packet, const struct pkt_metadata *md, struct flow *flow) { struct ofpbuf b = *packet; @@ -392,15 +391,14 @@ flow_extract(struct ofpbuf *packet, uint32_t skb_priority, uint32_t pkt_mark, memset(flow, 0, sizeof *flow); - if (tnl) { - ovs_assert(tnl != &flow->tunnel); - flow->tunnel = *tnl; - } - if (in_port) { - flow->in_port = *in_port; + if (md) { + flow->tunnel = md->tunnel; + if (md->in_port.odp_port != ODPP_NONE) { + flow->in_port = md->in_port; + }; + flow->skb_priority = md->skb_priority; + flow->pkt_mark = md->pkt_mark; } - flow->skb_priority = skb_priority; - flow->pkt_mark = pkt_mark; packet->l2 = b.data; packet->l2_5 = NULL; diff --git a/lib/flow.h b/lib/flow.h index 3109a84a2..8165bcf79 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -32,6 +32,7 @@ struct ds; struct flow_wildcards; struct minimask; struct ofpbuf; +struct pkt_metadata; /* This sequence number should be incremented whenever anything involving flows * or the wildcarding of flows changes. This will cause build assertion @@ -72,8 +73,8 @@ struct flow_tnl { * numbers and other times datapath (dpif) port numbers. This union allows * access to both. */ union flow_in_port { - ofp_port_t ofp_port; odp_port_t odp_port; + ofp_port_t ofp_port; }; /* Maximum number of supported MPLS labels. */ @@ -173,8 +174,7 @@ struct flow_metadata { ofp_port_t in_port; /* OpenFlow port or zero. */ }; -void flow_extract(struct ofpbuf *, uint32_t priority, uint32_t mark, - const struct flow_tnl *, const union flow_in_port *in_port, +void flow_extract(struct ofpbuf *, const struct pkt_metadata *md, struct flow *); void flow_zero_wildcards(struct flow *, const struct flow_wildcards *); diff --git a/lib/getrusage-windows.c b/lib/getrusage-windows.c new file mode 100644 index 000000000..0282a17c6 --- /dev/null +++ b/lib/getrusage-windows.c @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2014 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include "util.h" +#include "vlog.h" + +VLOG_DEFINE_THIS_MODULE(getrusage_windows); + +static void +usage_to_timeval(FILETIME *ft, struct timeval *tv) +{ + ULARGE_INTEGER time; + time.LowPart = ft->dwLowDateTime; + time.HighPart = ft->dwHighDateTime; + + tv->tv_sec = time.QuadPart / 10000000; + tv->tv_usec = (time.QuadPart % 10000000) / 10; +} + +int +getrusage(int who, struct rusage *usage) +{ + FILETIME creation_time, exit_time, kernel_time, user_time; + PROCESS_MEMORY_COUNTERS pmc; + + memset(usage, 0, sizeof(struct rusage)); + + if (who == RUSAGE_SELF) { + if (!GetProcessTimes(GetCurrentProcess(), &creation_time, &exit_time, + &kernel_time, &user_time)) { + VLOG_ERR("failed at GetProcessTimes: %s", + ovs_lasterror_to_string()); + return -1; + } + + if (!GetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc))) { + VLOG_ERR("failed at GetProcessMemoryInfo: %s", + ovs_lasterror_to_string()); + return -1; + } + + usage_to_timeval(&kernel_time, &usage->ru_stime); + usage_to_timeval(&user_time, &usage->ru_utime); + usage->ru_majflt = pmc.PageFaultCount; + usage->ru_maxrss = pmc.PeakWorkingSetSize / 1024; + return 0; + } else if (who == RUSAGE_THREAD) { + if (!GetThreadTimes(GetCurrentThread(), &creation_time, &exit_time, + &kernel_time, &user_time)) { + VLOG_ERR("failed at GetThreadTimes: %s", + ovs_lasterror_to_string()); + return -1; + } + usage_to_timeval(&kernel_time, &usage->ru_stime); + usage_to_timeval(&user_time, &usage->ru_utime); + return 0; + } else { + return -1; + } +} diff --git a/lib/latch.c b/lib/latch-unix.c similarity index 100% rename from lib/latch.c rename to lib/latch-unix.c diff --git a/lib/learning-switch.c b/lib/learning-switch.c index 8efbce1a5..56209909f 100644 --- a/lib/learning-switch.c +++ b/lib/learning-switch.c @@ -556,7 +556,6 @@ process_packet_in(struct lswitch *sw, const struct ofp_header *oh) struct ofpbuf pkt; struct flow flow; - union flow_in_port in_port_; error = ofputil_decode_packet_in(&pi, oh); if (error) { @@ -574,8 +573,8 @@ process_packet_in(struct lswitch *sw, const struct ofp_header *oh) /* Extract flow data from 'opi' into 'flow'. */ ofpbuf_use_const(&pkt, pi.packet, pi.packet_len); - in_port_.ofp_port = pi.fmd.in_port; - flow_extract(&pkt, 0, 0, NULL, &in_port_, &flow); + flow_extract(&pkt, NULL, &flow); + flow.in_port.ofp_port = pi.fmd.in_port; flow.tunnel.tun_id = pi.fmd.tun_id; /* Choose output port. */ diff --git a/lib/meta-flow.c b/lib/meta-flow.c index 3afcd4cde..d90477a18 100644 --- a/lib/meta-flow.c +++ b/lib/meta-flow.c @@ -1298,7 +1298,6 @@ mf_get_value(const struct mf_field *mf, const struct flow *flow, case MFF_MPLS_BOS: value->u8 = mpls_lse_to_bos(flow->mpls_lse[0]); break; - break; case MFF_IPV4_SRC: value->be32 = flow->nw_src; @@ -1495,7 +1494,6 @@ mf_set_value(const struct mf_field *mf, case MFF_MPLS_BOS: match_set_mpls_bos(match, 0, value->u8); break; - break; case MFF_IPV4_SRC: match_set_nw_src(match, value->be32); @@ -1709,7 +1707,6 @@ mf_set_flow_value(const struct mf_field *mf, case MFF_MPLS_BOS: flow_set_mpls_bos(flow, 0, value->u8); break; - break; case MFF_IPV4_SRC: flow->nw_src = value->be32; @@ -1920,7 +1917,6 @@ mf_set_wild(const struct mf_field *mf, struct match *match) case MFF_MPLS_BOS: match_set_any_mpls_bos(match, 0); break; - break; case MFF_IPV4_SRC: case MFF_ARP_SPA: diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index e756d88a9..828540ddf 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -2184,8 +2184,13 @@ netdev_linux_get_queue_stats(const struct netdev *netdev_, return error; } +struct queue_dump_state { + struct nl_dump dump; + struct ofpbuf buf; +}; + static bool -start_queue_dump(const struct netdev *netdev, struct nl_dump *dump) +start_queue_dump(const struct netdev *netdev, struct queue_dump_state *state) { struct ofpbuf request; struct tcmsg *tcmsg; @@ -2195,11 +2200,20 @@ start_queue_dump(const struct netdev *netdev, struct nl_dump *dump) return false; } tcmsg->tcm_parent = 0; - nl_dump_start(dump, NETLINK_ROUTE, &request); + nl_dump_start(&state->dump, NETLINK_ROUTE, &request); ofpbuf_uninit(&request); + + ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE); return true; } +static int +finish_queue_dump(struct queue_dump_state *state) +{ + ofpbuf_uninit(&state->buf); + return nl_dump_done(&state->dump); +} + struct netdev_linux_queue_state { unsigned int *queues; size_t cur_queue; @@ -2283,17 +2297,17 @@ netdev_linux_dump_queue_stats(const struct netdev *netdev_, ovs_mutex_lock(&netdev->mutex); error = tc_query_qdisc(netdev_); if (!error) { - struct nl_dump dump; + struct queue_dump_state state; if (!netdev->tc->ops->class_dump_stats) { error = EOPNOTSUPP; - } else if (!start_queue_dump(netdev_, &dump)) { + } else if (!start_queue_dump(netdev_, &state)) { error = ENODEV; } else { struct ofpbuf msg; int retval; - while (nl_dump_next(&dump, &msg)) { + while (nl_dump_next(&state.dump, &msg, &state.buf)) { retval = netdev->tc->ops->class_dump_stats(netdev_, &msg, cb, aux); if (retval) { @@ -2301,7 +2315,7 @@ netdev_linux_dump_queue_stats(const struct netdev *netdev_, } } - retval = nl_dump_done(&dump); + retval = finish_queue_dump(&state); if (retval) { error = retval; } @@ -3079,7 +3093,7 @@ static int htb_tc_load(struct netdev *netdev, struct ofpbuf *nlmsg OVS_UNUSED) { struct ofpbuf msg; - struct nl_dump dump; + struct queue_dump_state state; struct htb_class hc; /* Get qdisc options. */ @@ -3088,17 +3102,17 @@ htb_tc_load(struct netdev *netdev, struct ofpbuf *nlmsg OVS_UNUSED) htb_install__(netdev, hc.max_rate); /* Get queues. */ - if (!start_queue_dump(netdev, &dump)) { + if (!start_queue_dump(netdev, &state)) { return ENODEV; } - while (nl_dump_next(&dump, &msg)) { + while (nl_dump_next(&state.dump, &msg, &state.buf)) { unsigned int queue_id; if (!htb_parse_tcmsg__(&msg, &queue_id, &hc, NULL)) { htb_update_queue__(netdev, queue_id, &hc); } } - nl_dump_done(&dump); + finish_queue_dump(&state); return 0; } @@ -3579,18 +3593,18 @@ static int hfsc_tc_load(struct netdev *netdev, struct ofpbuf *nlmsg OVS_UNUSED) { struct ofpbuf msg; - struct nl_dump dump; + struct queue_dump_state state; struct hfsc_class hc; hc.max_rate = 0; hfsc_query_class__(netdev, tc_make_handle(1, 0xfffe), 0, &hc, NULL); hfsc_install__(netdev, hc.max_rate); - if (!start_queue_dump(netdev, &dump)) { + if (!start_queue_dump(netdev, &state)) { return ENODEV; } - while (nl_dump_next(&dump, &msg)) { + while (nl_dump_next(&state.dump, &msg, &state.buf)) { unsigned int queue_id; if (!hfsc_parse_tcmsg__(&msg, &queue_id, &hc, NULL)) { @@ -3598,7 +3612,7 @@ hfsc_tc_load(struct netdev *netdev, struct ofpbuf *nlmsg OVS_UNUSED) } } - nl_dump_done(&dump); + finish_queue_dump(&state); return 0; } diff --git a/lib/netdev.c b/lib/netdev.c index f5f92333d..6aca2f495 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -102,7 +102,7 @@ netdev_initialize(void) fatal_signal_add_hook(restore_all_flags, NULL, NULL, true); netdev_vport_patch_register(); -#ifdef LINUX_DATAPATH +#ifdef __linux__ netdev_register_provider(&netdev_linux_class); netdev_register_provider(&netdev_internal_class); netdev_register_provider(&netdev_tap_class); diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c index 8cb1b8eef..375772f5e 100644 --- a/lib/netlink-socket.c +++ b/lib/netlink-socket.c @@ -31,6 +31,7 @@ #include "ofpbuf.h" #include "ovs-thread.h" #include "poll-loop.h" +#include "seq.h" #include "socket-util.h" #include "util.h" #include "vlog.h" @@ -690,91 +691,110 @@ nl_sock_drain(struct nl_sock *sock) void nl_dump_start(struct nl_dump *dump, int protocol, const struct ofpbuf *request) { - ofpbuf_init(&dump->buffer, 4096); - dump->status = nl_pool_alloc(protocol, &dump->sock); - if (dump->status) { + int status = nl_pool_alloc(protocol, &dump->sock); + + if (status) { return; } nl_msg_nlmsghdr(request)->nlmsg_flags |= NLM_F_DUMP | NLM_F_ACK; - dump->status = nl_sock_send__(dump->sock, request, - nl_sock_allocate_seq(dump->sock, 1), true); + status = nl_sock_send__(dump->sock, request, + nl_sock_allocate_seq(dump->sock, 1), true); + atomic_init(&dump->status, status << 1); dump->nl_seq = nl_msg_nlmsghdr(request)->nlmsg_seq; + dump->status_seq = seq_create(); } -/* Helper function for nl_dump_next(). */ -static int -nl_dump_recv(struct nl_dump *dump) -{ - struct nlmsghdr *nlmsghdr; - int retval; - - retval = nl_sock_recv__(dump->sock, &dump->buffer, true); - if (retval) { - return retval == EINTR ? EAGAIN : retval; - } - - nlmsghdr = nl_msg_nlmsghdr(&dump->buffer); - if (dump->nl_seq != nlmsghdr->nlmsg_seq) { - VLOG_DBG_RL(&rl, "ignoring seq %#"PRIx32" != expected %#"PRIx32, - nlmsghdr->nlmsg_seq, dump->nl_seq); - return EAGAIN; - } - - if (nl_msg_nlmsgerr(&dump->buffer, &retval)) { - VLOG_INFO_RL(&rl, "netlink dump request error (%s)", - ovs_strerror(retval)); - return retval && retval != EAGAIN ? retval : EPROTO; - } - - return 0; -} - -/* Attempts to retrieve another reply from 'dump', which must have been - * initialized with nl_dump_start(). +/* Attempts to retrieve another reply from 'dump' into 'buffer'. 'dump' must + * have been initialized with nl_dump_start(), and 'buffer' must have been + * initialized. 'buffer' should be at least NL_DUMP_BUFSIZE bytes long. * * If successful, returns true and points 'reply->data' and 'reply->size' to - * the message that was retrieved. The caller must not modify 'reply' (because - * it points into the middle of a larger buffer). + * the message that was retrieved. The caller must not modify 'reply' (because + * it points within 'buffer', which will be used by future calls to this + * function). * * On failure, returns false and sets 'reply->data' to NULL and 'reply->size' * to 0. Failure might indicate an actual error or merely the end of replies. * An error status for the entire dump operation is provided when it is * completed by calling nl_dump_done(). + * + * Multiple threads may call this function, passing the same nl_dump, however + * each must provide independent buffers. This function may cache multiple + * replies in the buffer, and these will be processed before more replies are + * fetched. When this function returns false, other threads may continue to + * process replies in their buffers, but they will not fetch more replies. */ bool -nl_dump_next(struct nl_dump *dump, struct ofpbuf *reply) +nl_dump_next(struct nl_dump *dump, struct ofpbuf *reply, struct ofpbuf *buffer) { struct nlmsghdr *nlmsghdr; + int error = 0; reply->data = NULL; reply->size = 0; - if (dump->status) { - return false; - } - while (!dump->buffer.size) { - int retval = nl_dump_recv(dump); + /* If 'buffer' is empty, fetch another batch of nlmsgs. */ + while (!buffer->size) { + unsigned int status; + int retval, seq; + + seq = seq_read(dump->status_seq); + atomic_read(&dump->status, &status); + if (status) { + return false; + } + + retval = nl_sock_recv__(dump->sock, buffer, false); if (retval) { - ofpbuf_clear(&dump->buffer); - if (retval != EAGAIN) { - dump->status = retval; - return false; + ofpbuf_clear(buffer); + if (retval == EAGAIN) { + nl_sock_wait(dump->sock, POLLIN); + seq_wait(dump->status_seq, seq); + poll_block(); + continue; + } else { + error = retval; + goto exit; } } + + nlmsghdr = nl_msg_nlmsghdr(buffer); + if (dump->nl_seq != nlmsghdr->nlmsg_seq) { + VLOG_DBG_RL(&rl, "ignoring seq %#"PRIx32" != expected %#"PRIx32, + nlmsghdr->nlmsg_seq, dump->nl_seq); + ofpbuf_clear(buffer); + continue; + } + + if (nl_msg_nlmsgerr(buffer, &retval) && retval) { + VLOG_INFO_RL(&rl, "netlink dump request error (%s)", + ovs_strerror(retval)); + error = retval == EAGAIN ? EPROTO : retval; + ofpbuf_clear(buffer); + goto exit; + } } - nlmsghdr = nl_msg_next(&dump->buffer, reply); + /* Fetch the next nlmsg in the current batch. */ + nlmsghdr = nl_msg_next(buffer, reply); if (!nlmsghdr) { VLOG_WARN_RL(&rl, "netlink dump reply contains message fragment"); - dump->status = EPROTO; - return false; + error = EPROTO; } else if (nlmsghdr->nlmsg_type == NLMSG_DONE) { - dump->status = EOF; - return false; + error = EOF; } - return true; +exit: + if (error == EOF) { + unsigned int old; + atomic_or(&dump->status, 1, &old); + seq_change(dump->status_seq); + } else if (error) { + atomic_store(&dump->status, error << 1); + seq_change(dump->status_seq); + } + return !error; } /* Completes Netlink dump operation 'dump', which must have been initialized @@ -783,19 +803,29 @@ nl_dump_next(struct nl_dump *dump, struct ofpbuf *reply) int nl_dump_done(struct nl_dump *dump) { + int status; + /* Drain any remaining messages that the client didn't read. Otherwise the * kernel will continue to queue them up and waste buffer space. * * XXX We could just destroy and discard the socket in this case. */ - while (!dump->status) { - struct ofpbuf reply; - if (!nl_dump_next(dump, &reply)) { - ovs_assert(dump->status); + atomic_read(&dump->status, &status); + if (!status) { + uint64_t tmp_reply_stub[NL_DUMP_BUFSIZE / 8]; + struct ofpbuf reply, buf; + + ofpbuf_use_stub(&buf, tmp_reply_stub, sizeof tmp_reply_stub); + while (nl_dump_next(dump, &reply, &buf)) { + /* Nothing to do. */ } + atomic_read(&dump->status, &status); + ovs_assert(status); + ofpbuf_uninit(&buf); } + atomic_destroy(&dump->status); nl_pool_release(dump->sock); - ofpbuf_uninit(&dump->buffer); - return dump->status == EOF ? 0 : dump->status; + seq_destroy(dump->status_seq); + return status >> 1; } /* Causes poll_block() to wake up when any of the specified 'events' (which is diff --git a/lib/netlink-socket.h b/lib/netlink-socket.h index 5fedfe957..dd3240907 100644 --- a/lib/netlink-socket.h +++ b/lib/netlink-socket.h @@ -35,13 +35,23 @@ * Thread-safety * ============= * - * Only a single thread may use a given nl_sock or nl_dump at one time. + * Most of the netlink functions are not fully thread-safe: Only a single + * thread may use a given nl_sock or nl_dump at one time. The exceptions are: + * + * - nl_sock_recv() is conditionally thread-safe: it may be called from + * different threads with the same nl_sock, but each caller must provide + * an independent receive buffer. + * + * - nl_dump_next() is conditionally thread-safe: it may be called from + * different threads with the same nl_dump, but each caller must provide + * independent buffers. */ #include #include #include #include "ofpbuf.h" +#include "ovs-atomic.h" struct nl_sock; @@ -96,16 +106,19 @@ int nl_transact(int protocol, const struct ofpbuf *request, void nl_transact_multiple(int protocol, struct nl_transaction **, size_t n); /* Table dumping. */ +#define NL_DUMP_BUFSIZE 4096 + struct nl_dump { struct nl_sock *sock; /* Socket being dumped. */ uint32_t nl_seq; /* Expected nlmsg_seq for replies. */ - struct ofpbuf buffer; /* Receive buffer currently being iterated. */ - int status; /* 0=OK, EOF=done, or positive errno value. */ + atomic_uint status; /* Low bit set if we read final message. + * Other bits hold an errno (0 for success). */ + struct seq *status_seq; /* Tracks changes to the above 'status'. */ }; void nl_dump_start(struct nl_dump *, int protocol, const struct ofpbuf *request); -bool nl_dump_next(struct nl_dump *, struct ofpbuf *reply); +bool nl_dump_next(struct nl_dump *, struct ofpbuf *reply, struct ofpbuf *buf); int nl_dump_done(struct nl_dump *); /* Miscellaneous */ diff --git a/lib/odp-util.c b/lib/odp-util.c index e20564f29..463f0082f 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -872,7 +872,8 @@ tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key) tun_key_ofs = nl_msg_start_nested(a, OVS_KEY_ATTR_TUNNEL); - if (tun_key->flags & FLOW_TNL_F_KEY) { + /* tun_id != 0 without FLOW_TNL_F_KEY is valid if tun_key is a mask. */ + if (tun_key->tun_id || tun_key->flags & FLOW_TNL_F_KEY) { nl_msg_put_be64(a, OVS_TUNNEL_KEY_ATTR_ID, tun_key->tun_id); } if (tun_key->ip_src) { @@ -2646,8 +2647,8 @@ odp_key_from_pkt_metadata(struct ofpbuf *buf, const struct pkt_metadata *md) /* Add an ingress port attribute if 'odp_in_port' is not the magical * value "ODPP_NONE". */ - if (md->in_port != ODPP_NONE) { - nl_msg_put_odp_port(buf, OVS_KEY_ATTR_IN_PORT, md->in_port); + if (md->in_port.odp_port != ODPP_NONE) { + nl_msg_put_odp_port(buf, OVS_KEY_ATTR_IN_PORT, md->in_port.odp_port); } } @@ -2662,8 +2663,7 @@ odp_key_to_pkt_metadata(const struct nlattr *key, size_t key_len, 1u << OVS_KEY_ATTR_SKB_MARK | 1u << OVS_KEY_ATTR_TUNNEL | 1u << OVS_KEY_ATTR_IN_PORT; - memset(md, 0, sizeof *md); - md->in_port = ODPP_NONE; + *md = PKT_METADATA_INITIALIZER(ODPP_NONE); NL_ATTR_FOR_EACH (nla, left, key, key_len) { uint16_t type = nl_attr_type(nla); @@ -2690,7 +2690,7 @@ odp_key_to_pkt_metadata(const struct nlattr *key, size_t key_len, wanted_attrs &= ~(1u << OVS_KEY_ATTR_TUNNEL); } } else if (type == OVS_KEY_ATTR_IN_PORT) { - md->in_port = nl_attr_get_odp_port(nla); + md->in_port.odp_port = nl_attr_get_odp_port(nla); wanted_attrs &= ~(1u << OVS_KEY_ATTR_IN_PORT); } diff --git a/lib/ofp-print.c b/lib/ofp-print.c index 4c89b3676..06e64f662 100644 --- a/lib/ofp-print.c +++ b/lib/ofp-print.c @@ -46,6 +46,7 @@ #include "packets.h" #include "type-props.h" #include "unaligned.h" +#include "odp-util.h" #include "util.h" static void ofp_print_queue_name(struct ds *string, uint32_t port); @@ -58,11 +59,12 @@ char * ofp_packet_to_string(const void *data, size_t len) { struct ds ds = DS_EMPTY_INITIALIZER; + const struct pkt_metadata md = PKT_METADATA_INITIALIZER(ODPP_NONE); struct ofpbuf buf; struct flow flow; ofpbuf_use_const(&buf, data, len); - flow_extract(&buf, 0, 0, NULL, NULL, &flow); + flow_extract(&buf, &md, &flow); flow_format(&ds, &flow); if (buf.l7) { diff --git a/lib/ovs-thread.c b/lib/ovs-thread.c index 4dfccaf3e..b6b51c76a 100644 --- a/lib/ovs-thread.c +++ b/lib/ovs-thread.c @@ -117,6 +117,15 @@ UNLOCK_FUNCTION(rwlock, destroy); ovs_abort(error, "%s failed", #FUNCTION); \ } \ } +#define XPTHREAD_FUNC3(FUNCTION, PARAM1, PARAM2, PARAM3)\ + void \ + x##FUNCTION(PARAM1 arg1, PARAM2 arg2, PARAM3 arg3) \ + { \ + int error = FUNCTION(arg1, arg2, arg3); \ + if (OVS_UNLIKELY(error)) { \ + ovs_abort(error, "%s failed", #FUNCTION); \ + } \ + } XPTHREAD_FUNC1(pthread_mutex_lock, pthread_mutex_t *); XPTHREAD_FUNC1(pthread_mutex_unlock, pthread_mutex_t *); @@ -136,6 +145,10 @@ XPTHREAD_FUNC1(pthread_cond_destroy, pthread_cond_t *); XPTHREAD_FUNC1(pthread_cond_signal, pthread_cond_t *); XPTHREAD_FUNC1(pthread_cond_broadcast, pthread_cond_t *); +XPTHREAD_FUNC3(pthread_barrier_init, pthread_barrier_t *, + pthread_barrierattr_t *, unsigned int); +XPTHREAD_FUNC1(pthread_barrier_destroy, pthread_barrier_t *); + XPTHREAD_FUNC2(pthread_join, pthread_t, void **); typedef void destructor_func(void *); @@ -215,6 +228,19 @@ ovs_mutex_cond_wait(pthread_cond_t *cond, const struct ovs_mutex *mutex_) ovs_abort(error, "pthread_cond_wait failed"); } } + +int +xpthread_barrier_wait(pthread_barrier_t *barrier) +{ + int error; + + error = pthread_barrier_wait(barrier); + if (error && OVS_UNLIKELY(error != PTHREAD_BARRIER_SERIAL_THREAD)) { + ovs_abort(error, "pthread_barrier_wait failed"); + } + + return error; +} DEFINE_EXTERN_PER_THREAD_DATA(ovsthread_id, 0); diff --git a/lib/ovs-thread.h b/lib/ovs-thread.h index 2e9a937f5..8868c5115 100644 --- a/lib/ovs-thread.h +++ b/lib/ovs-thread.h @@ -146,6 +146,12 @@ void xpthread_cond_destroy(pthread_cond_t *); void xpthread_cond_signal(pthread_cond_t *); void xpthread_cond_broadcast(pthread_cond_t *); +/* Wrappers for pthread_barrier_*() that abort the process on any error. */ +void xpthread_barrier_init(pthread_barrier_t *, pthread_barrierattr_t *, + unsigned int count); +int xpthread_barrier_wait(pthread_barrier_t *); +void xpthread_barrier_destroy(pthread_barrier_t *); + void xpthread_key_create(pthread_key_t *, void (*destructor)(void *)); void xpthread_key_delete(pthread_key_t); void xpthread_setspecific(pthread_key_t, const void *); diff --git a/lib/packets.c b/lib/packets.c index 7238f42e8..3f7d6ebfd 100644 --- a/lib/packets.c +++ b/lib/packets.c @@ -29,6 +29,7 @@ #include "dynamic-string.h" #include "ofpbuf.h" #include "ovs-thread.h" +#include "odp-util.h" #include "unaligned.h" const struct in6_addr in6addr_exact = IN6ADDR_EXACT_INIT; @@ -991,3 +992,23 @@ packet_format_tcp_flags(struct ds *s, uint16_t tcp_flags) ds_put_cstr(s, "[800]"); } } + +void pkt_metadata_init(struct pkt_metadata *md, const struct flow_tnl *tnl, + const uint32_t skb_priority, + const uint32_t pkt_mark, + const union flow_in_port *in_port) +{ + + tnl ? memcpy(&md->tunnel, tnl, sizeof(md->tunnel)) + : memset(&md->tunnel, 0, sizeof(md->tunnel)); + + md->skb_priority = skb_priority; + md->pkt_mark = pkt_mark; + md->in_port.odp_port = in_port ? in_port->odp_port : ODPP_NONE; +} + +void pkt_metadata_from_flow(struct pkt_metadata *md, const struct flow *flow) +{ + pkt_metadata_init(md, &flow->tunnel, flow->skb_priority, + flow->pkt_mark, &flow->in_port); +} diff --git a/lib/packets.h b/lib/packets.h index 1855a1c5c..e6b330380 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -36,11 +36,17 @@ struct pkt_metadata { struct flow_tnl tunnel; /* Encapsulating tunnel parameters. */ uint32_t skb_priority; /* Packet priority for QoS. */ uint32_t pkt_mark; /* Packet mark. */ - odp_port_t in_port; /* Input port. */ + union flow_in_port in_port; /* Input port. */ }; #define PKT_METADATA_INITIALIZER(PORT) \ - (struct pkt_metadata){ { 0, 0, 0, 0, 0, 0}, 0, 0, (PORT) } + (struct pkt_metadata){ { 0, 0, 0, 0, 0, 0}, 0, 0, {(PORT)} } + +void pkt_metadata_init(struct pkt_metadata *md, const struct flow_tnl *tnl, + const uint32_t skb_priority, + const uint32_t pkt_mark, + const union flow_in_port *in_port); +void pkt_metadata_from_flow(struct pkt_metadata *md, const struct flow *flow); bool dpid_from_string(const char *s, uint64_t *dpidp); diff --git a/lib/rconn.c b/lib/rconn.c index d339365b2..72688ba8d 100644 --- a/lib/rconn.c +++ b/lib/rconn.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -592,7 +592,15 @@ rconn_run(struct rconn *rc) ovs_mutex_lock(&rc->mutex); if (rc->vconn) { + int error; + vconn_run(rc->vconn); + + error = vconn_get_status(rc->vconn); + if (error) { + report_error(rc, error); + disconnect(rc, error); + } } for (i = 0; i < rc->n_monitors; ) { struct ofpbuf *msg; diff --git a/lib/route-table.c b/lib/route-table.c index 1afc01d06..fdc21e8cd 100644 --- a/lib/route-table.c +++ b/lib/route-table.c @@ -223,7 +223,8 @@ route_table_reset(void) { struct nl_dump dump; struct rtgenmsg *rtmsg; - struct ofpbuf request, reply; + uint64_t reply_stub[NL_DUMP_BUFSIZE / 8]; + struct ofpbuf request, reply, buf; route_map_clear(); route_table_valid = true; @@ -238,13 +239,15 @@ route_table_reset(void) nl_dump_start(&dump, NETLINK_ROUTE, &request); ofpbuf_uninit(&request); - while (nl_dump_next(&dump, &reply)) { + ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub); + while (nl_dump_next(&dump, &reply, &buf)) { struct route_table_msg msg; if (route_table_parse(&reply, &msg)) { route_table_handle_msg(&msg); } } + ofpbuf_uninit(&buf); return nl_dump_done(&dump); } @@ -407,7 +410,8 @@ name_table_reset(void) { struct nl_dump dump; struct rtgenmsg *rtmsg; - struct ofpbuf request, reply; + uint64_t reply_stub[NL_DUMP_BUFSIZE / 8]; + struct ofpbuf request, reply, buf; name_table_valid = true; name_map_clear(); @@ -420,7 +424,8 @@ name_table_reset(void) nl_dump_start(&dump, NETLINK_ROUTE, &request); ofpbuf_uninit(&request); - while (nl_dump_next(&dump, &reply)) { + ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub); + while (nl_dump_next(&dump, &reply, &buf)) { struct rtnetlink_link_change change; if (rtnetlink_link_parse(&reply, &change) @@ -434,6 +439,7 @@ name_table_reset(void) hmap_insert(&name_map, &nn->node, hash_int(nn->ifi_index, 0)); } } + ofpbuf_uninit(&buf); return nl_dump_done(&dump); } diff --git a/lib/socket-util.c b/lib/socket-util.c index 728c76e33..4c79cd6a6 100644 --- a/lib/socket-util.c +++ b/lib/socket-util.c @@ -39,7 +39,7 @@ #include "poll-loop.h" #include "util.h" #include "vlog.h" -#if AF_PACKET && LINUX_DATAPATH +#ifdef __linux__ #include #endif #ifdef HAVE_NETLINK @@ -51,9 +51,11 @@ VLOG_DEFINE_THIS_MODULE(socket_util); /* #ifdefs make it a pain to maintain code: you have to try to build both ways. * Thus, this file compiles all of the code regardless of the target, by - * writing "if (LINUX_DATAPATH)" instead of "#ifdef __linux__". */ -#ifndef LINUX_DATAPATH -#define LINUX_DATAPATH 0 + * writing "if (LINUX)" instead of "#ifdef __linux__". */ +#ifdef __linux__ +#define LINUX 0 +#else +#define LINUX 1 #endif #ifndef O_DIRECTORY @@ -108,14 +110,31 @@ int set_dscp(int fd, uint8_t dscp) { int val; + bool success; if (dscp > 63) { return EINVAL; } + /* Note: this function is used for both of IPv4 and IPv6 sockets */ + success = false; val = dscp << 2; if (setsockopt(fd, IPPROTO_IP, IP_TOS, &val, sizeof val)) { - return sock_errno(); + if (sock_errno() != ENOPROTOOPT) { + return sock_errno(); + } + } else { + success = true; + } + if (setsockopt(fd, IPPROTO_IPV6, IPV6_TCLASS, &val, sizeof val)) { + if (sock_errno() != ENOPROTOOPT) { + return sock_errno(); + } + } else { + success = true; + } + if (!success) { + return ENOPROTOOPT; } return 0; @@ -275,7 +294,7 @@ drain_rcvbuf(int fd) * * On other Unix-like OSes, MSG_TRUNC has no effect in the flags * argument. */ - char buffer[LINUX_DATAPATH ? 1 : 2048]; + char buffer[LINUX ? 1 : 2048]; ssize_t n_bytes = recv(fd, buffer, sizeof buffer, MSG_TRUNC | MSG_DONTWAIT); if (n_bytes <= 0 || n_bytes >= rcvbuf) { @@ -333,7 +352,7 @@ shorten_name_via_proc(const char *name, char short_name[MAX_UN_LEN + 1], int dirfd; int len; - if (!LINUX_DATAPATH) { + if (LINUX) { return ENAMETOOLONG; } @@ -753,7 +772,7 @@ inet_open_active(int style, const char *target, uint16_t default_port, * connect(), the handshake SYN frames will be sent with a TOS of 0. */ error = set_dscp(fd, dscp); if (error) { - VLOG_ERR("%s: socket: %s", target, sock_strerror(error)); + VLOG_ERR("%s: set_dscp: %s", target, sock_strerror(error)); goto exit; } @@ -890,7 +909,7 @@ inet_open_passive(int style, const char *target, int default_port, * connect(), the handshake SYN frames will be sent with a TOS of 0. */ error = set_dscp(fd, dscp); if (error) { - VLOG_ERR("%s: socket: %s", target, sock_strerror(error)); + VLOG_ERR("%s: set_dscp: %s", target, sock_strerror(error)); goto error; } @@ -1030,6 +1049,7 @@ get_mtime(const char *file_name, struct timespec *mtime) } } +#ifndef _WIN32 void xpipe(int fds[2]) { @@ -1045,6 +1065,7 @@ xpipe_nonblocking(int fds[2]) xset_nonblocking(fds[0]); xset_nonblocking(fds[1]); } +#endif static int getsockopt_int(int fd, int level, int option, const char *optname, int *valuep) @@ -1127,7 +1148,7 @@ describe_sockaddr(struct ds *string, int fd, } } #endif -#if AF_PACKET && LINUX_DATAPATH +#if __linux__ else if (ss.ss_family == AF_PACKET) { struct sockaddr_ll sll; @@ -1157,7 +1178,7 @@ describe_sockaddr(struct ds *string, int fd, } -#ifdef LINUX_DATAPATH +#ifdef __linux__ static void put_fd_filename(struct ds *string, int fd) { @@ -1202,7 +1223,7 @@ describe_fd(int fd) : S_ISFIFO(s.st_mode) ? "FIFO" : S_ISLNK(s.st_mode) ? "symbolic link" : "unknown")); -#ifdef LINUX_DATAPATH +#ifdef __linux__ put_fd_filename(&string, fd); #endif } diff --git a/lib/socket-util.h b/lib/socket-util.h index 92f0c6f71..2acc97414 100644 --- a/lib/socket-util.h +++ b/lib/socket-util.h @@ -65,8 +65,10 @@ int write_fully(int fd, const void *, size_t, size_t *bytes_written); int fsync_parent_dir(const char *file_name); int get_mtime(const char *file_name, struct timespec *mtime); +#ifndef _WIN32 void xpipe(int fds[2]); void xpipe_nonblocking(int fds[2]); +#endif char *describe_fd(int fd); diff --git a/lib/stream-fd.c b/lib/stream-fd-unix.c similarity index 100% rename from lib/stream-fd.c rename to lib/stream-fd-unix.c diff --git a/lib/stream-fd-windows.c b/lib/stream-fd-windows.c new file mode 100644 index 000000000..7a4a9954f --- /dev/null +++ b/lib/stream-fd-windows.c @@ -0,0 +1,274 @@ +/* + * Copyright (c) 2014 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "stream-fd.h" +#include +#include +#include +#include +#include +#include +#include +#include "fatal-signal.h" +#include "poll-loop.h" +#include "socket-util.h" +#include "stream.h" +#include "stream-provider.h" +#include "util.h" +#include "vlog.h" + +VLOG_DEFINE_THIS_MODULE(stream_fd_windows); + +/* Active file descriptor stream. */ + +struct stream_fd +{ + struct stream stream; + int fd; + HANDLE wevent; +}; + +static const struct stream_class stream_fd_class; + +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 25); + +/* Creates a new stream named 'name' that will send and receive data on 'fd' + * and stores a pointer to the stream in '*streamp'. Initial connection status + * 'connect_status' is interpreted as described for stream_init(). + * + * Returns 0 if successful, otherwise a positive errno value. (The current + * implementation never fails.) */ +int +new_fd_stream(const char *name, int fd, int connect_status, + struct stream **streamp) +{ + struct stream_fd *s; + + s = xmalloc(sizeof *s); + stream_init(&s->stream, &stream_fd_class, connect_status, name); + s->fd = fd; + s->wevent = CreateEvent(NULL, FALSE, FALSE, NULL); + *streamp = &s->stream; + return 0; +} + +static struct stream_fd * +stream_fd_cast(struct stream *stream) +{ + stream_assert_class(stream, &stream_fd_class); + return CONTAINER_OF(stream, struct stream_fd, stream); +} + +static void +fd_close(struct stream *stream) +{ + struct stream_fd *s = stream_fd_cast(stream); + WSAEventSelect(s->fd, NULL, 0); + CloseHandle(s->wevent); + closesocket(s->fd); + free(s); +} + +static int +fd_connect(struct stream *stream) +{ + struct stream_fd *s = stream_fd_cast(stream); + return check_connection_completion(s->fd); +} + +static ssize_t +fd_recv(struct stream *stream, void *buffer, size_t n) +{ + struct stream_fd *s = stream_fd_cast(stream); + ssize_t retval; + + retval = recv(s->fd, buffer, n, 0); + if (retval < 0) { + retval = -sock_errno(); + } + if (retval == -WSAEWOULDBLOCK) { + return -EAGAIN; + } + return retval; +} + +static ssize_t +fd_send(struct stream *stream, const void *buffer, size_t n) +{ + struct stream_fd *s = stream_fd_cast(stream); + ssize_t retval; + + retval = send(s->fd, buffer, n, 0); + if (retval < 0) { + retval = -sock_errno(); + } + if (retval == -WSAEWOULDBLOCK) { + return -EAGAIN; + } + + return retval; +} + +static void +fd_wait(struct stream *stream, enum stream_wait_type wait) +{ + struct stream_fd *s = stream_fd_cast(stream); + switch (wait) { + case STREAM_CONNECT: + case STREAM_SEND: + poll_fd_wait_event(s->fd, s->wevent, POLLOUT); + break; + + case STREAM_RECV: + poll_fd_wait_event(s->fd, s->wevent, POLLIN); + break; + + default: + NOT_REACHED(); + } +} + +static const struct stream_class stream_fd_class = { + "fd", /* name */ + false, /* needs_probes */ + NULL, /* open */ + fd_close, /* close */ + fd_connect, /* connect */ + fd_recv, /* recv */ + fd_send, /* send */ + NULL, /* run */ + NULL, /* run_wait */ + fd_wait, /* wait */ +}; + +/* Passive file descriptor stream. */ + +struct fd_pstream +{ + struct pstream pstream; + int fd; + HANDLE wevent; + int (*accept_cb)(int fd, const struct sockaddr_storage *, size_t ss_len, + struct stream **); + int (*set_dscp_cb)(int fd, uint8_t dscp); + char *unlink_path; +}; + +static const struct pstream_class fd_pstream_class; + +static struct fd_pstream * +fd_pstream_cast(struct pstream *pstream) +{ + pstream_assert_class(pstream, &fd_pstream_class); + return CONTAINER_OF(pstream, struct fd_pstream, pstream); +} + +/* Creates a new pstream named 'name' that will accept new socket connections + * on 'fd' and stores a pointer to the stream in '*pstreamp'. + * + * When a connection has been accepted, 'accept_cb' will be called with the new + * socket fd 'fd' and the remote address of the connection 'sa' and 'sa_len'. + * accept_cb must return 0 if the connection is successful, in which case it + * must initialize '*streamp' to the new stream, or a positive errno value on + * error. In either case accept_cb takes ownership of the 'fd' passed in. + * + * When '*pstreamp' is closed, then 'unlink_path' (if nonnull) will be passed + * to fatal_signal_unlink_file_now() and freed with free(). + * + * Returns 0 if successful, otherwise a positive errno value. (The current + * implementation never fails.) */ +int +new_fd_pstream(const char *name, int fd, + int (*accept_cb)(int fd, const struct sockaddr_storage *ss, + size_t ss_len, struct stream **streamp), + int (*set_dscp_cb)(int fd, uint8_t dscp), + char *unlink_path, struct pstream **pstreamp) +{ + struct fd_pstream *ps = xmalloc(sizeof *ps); + pstream_init(&ps->pstream, &fd_pstream_class, name); + ps->fd = fd; + ps->wevent = CreateEvent(NULL, FALSE, FALSE, NULL); + ps->accept_cb = accept_cb; + ps->set_dscp_cb = set_dscp_cb; + ps->unlink_path = unlink_path; + *pstreamp = &ps->pstream; + return 0; +} + +static void +pfd_close(struct pstream *pstream) +{ + struct fd_pstream *ps = fd_pstream_cast(pstream); + WSAEventSelect(ps->fd, NULL, 0); + CloseHandle(ps->wevent); + closesocket(ps->fd); + free(ps); +} + +static int +pfd_accept(struct pstream *pstream, struct stream **new_streamp) +{ + struct fd_pstream *ps = fd_pstream_cast(pstream); + struct sockaddr_storage ss; + socklen_t ss_len = sizeof ss; + int new_fd; + int retval; + + new_fd = accept(ps->fd, (struct sockaddr *) &ss, &ss_len); + if (new_fd < 0) { + retval = sock_errno(); + if (retval == WSAEWOULDBLOCK) { + return EAGAIN; + } + return retval; + } + + retval = set_nonblocking(new_fd); + if (retval) { + closesocket(new_fd); + return retval; + } + + return ps->accept_cb(new_fd, &ss, ss_len, new_streamp); +} + +static void +pfd_wait(struct pstream *pstream) +{ + struct fd_pstream *ps = fd_pstream_cast(pstream); + poll_fd_wait_event(ps->fd, ps->wevent, POLLIN); +} + +static int +pfd_set_dscp(struct pstream *pstream, uint8_t dscp) +{ + struct fd_pstream *ps = fd_pstream_cast(pstream); + if (ps->set_dscp_cb) { + return ps->set_dscp_cb(ps->fd, dscp); + } + return 0; +} + +static const struct pstream_class fd_pstream_class = { + "pstream", + false, + NULL, + pfd_close, + pfd_accept, + pfd_wait, + pfd_set_dscp, +}; diff --git a/lib/stream-fd.h b/lib/stream-fd.h index 8f595a908..9f138a734 100644 --- a/lib/stream-fd.h +++ b/lib/stream-fd.h @@ -12,6 +12,9 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. + * + * Note on windows platform, stream fd can only handle sockets, on unix any + * fd is acceptable. */ #ifndef STREAM_FD_H diff --git a/lib/stream.c b/lib/stream.c index 2e7accb80..1dfecf060 100644 --- a/lib/stream.c +++ b/lib/stream.c @@ -51,7 +51,7 @@ enum stream_state { static const struct stream_class *stream_classes[] = { &tcp_stream_class, -#ifdef AF_UNIX +#ifndef _WIN32 &unix_stream_class, #endif #ifdef HAVE_OPENSSL @@ -61,7 +61,7 @@ static const struct stream_class *stream_classes[] = { static const struct pstream_class *pstream_classes[] = { &ptcp_pstream_class, -#ifdef AF_UNIX +#ifndef _WIN32 &punix_pstream_class, #endif #ifdef HAVE_OPENSSL diff --git a/lib/timeval.c b/lib/timeval.c index 691cf74e1..74efa59f2 100644 --- a/lib/timeval.c +++ b/lib/timeval.c @@ -40,6 +40,21 @@ VLOG_DEFINE_THIS_MODULE(timeval); +#ifdef _WIN32 +typedef unsigned int clockid_t; + +#ifndef CLOCK_MONOTONIC +#define CLOCK_MONOTONIC 1 +#endif + +#ifndef CLOCK_REALTIME +#define CLOCK_REALTIME 2 +#endif + +/* Number of 100 ns intervals from January 1, 1601 till January 1, 1970. */ +static ULARGE_INTEGER unix_epoch; +#endif /* _WIN32 */ + struct clock { clockid_t id; /* CLOCK_MONOTONIC or CLOCK_REALTIME. */ @@ -96,6 +111,16 @@ do_init_time(void) { struct timespec ts; +#ifdef _WIN32 + /* Calculate number of 100-nanosecond intervals till 01/01/1970. */ + SYSTEMTIME unix_epoch_st = { 1970, 1, 0, 1, 0, 0, 0, 0}; + FILETIME unix_epoch_ft; + + SystemTimeToFileTime(&unix_epoch_st, &unix_epoch_ft); + unix_epoch.LowPart = unix_epoch_ft.dwLowDateTime; + unix_epoch.HighPart = unix_epoch_ft.dwHighDateTime; +#endif + coverage_init(); init_clock(&monotonic_clock, (!clock_gettime(CLOCK_MONOTONIC, &ts) @@ -282,7 +307,12 @@ time_poll(struct pollfd *pollfds, int n_pollfds, HANDLE *handles OVS_UNUSED, #endif if (deadline <= time_msec()) { +#ifndef _WIN32 fatal_signal_handler(SIGALRM); +#else + VLOG_ERR("wake up from WaitForMultipleObjects after deadline"); + fatal_signal_handler(SIGTERM); +#endif if (retval < 0) { retval = 0; } @@ -320,12 +350,69 @@ time_boot_msec(void) return boot_time; } +#ifdef _WIN32 +static ULARGE_INTEGER +xgetfiletime(void) +{ + ULARGE_INTEGER current_time; + FILETIME current_time_ft; + + /* Returns current time in UTC as a 64-bit value representing the number + * of 100-nanosecond intervals since January 1, 1601 . */ + GetSystemTimePreciseAsFileTime(¤t_time_ft); + current_time.LowPart = current_time_ft.dwLowDateTime; + current_time.HighPart = current_time_ft.dwHighDateTime; + + return current_time; +} + +static int +clock_gettime(clock_t id, struct timespec *ts) +{ + if (id == CLOCK_MONOTONIC) { + static LARGE_INTEGER freq; + LARGE_INTEGER count; + long long int ns; + + if (!freq.QuadPart) { + /* Number of counts per second. */ + QueryPerformanceFrequency(&freq); + } + /* Total number of counts from a starting point. */ + QueryPerformanceCounter(&count); + + /* Total nano seconds from a starting point. */ + ns = (double) count.QuadPart / freq.QuadPart * 1000000000; + + ts->tv_sec = count.QuadPart / freq.QuadPart; + ts->tv_nsec = ns % 1000000000; + } else if (id == CLOCK_REALTIME) { + ULARGE_INTEGER current_time = xgetfiletime(); + + /* Time from Epoch to now. */ + ts->tv_sec = (current_time.QuadPart - unix_epoch.QuadPart) / 10000000; + ts->tv_nsec = ((current_time.QuadPart - unix_epoch.QuadPart) % + 10000000) * 100; + } else { + return -1; + } +} +#endif /* _WIN32 */ + void xgettimeofday(struct timeval *tv) { +#ifndef _WIN32 if (gettimeofday(tv, NULL) == -1) { VLOG_FATAL("gettimeofday failed (%s)", ovs_strerror(errno)); } +#else + ULARGE_INTEGER current_time = xgetfiletime(); + + tv->tv_sec = (current_time.QuadPart - unix_epoch.QuadPart) / 10000000; + tv->tv_usec = ((current_time.QuadPart - unix_epoch.QuadPart) % + 10000000) / 10; +#endif } void diff --git a/lib/timeval.h b/lib/timeval.h index c207f23ad..0bd74ccde 100644 --- a/lib/timeval.h +++ b/lib/timeval.h @@ -40,6 +40,11 @@ BUILD_ASSERT_DECL(TYPE_IS_SIGNED(time_t)); #define TIME_MAX TYPE_MAXIMUM(time_t) #define TIME_MIN TYPE_MINIMUM(time_t) +#ifdef _WIN32 +#define localtime_r(timep, result) localtime_s(result, timep) +#define gmtime_r(timep, result) gmtime_s(result, timep) +#endif /* _WIN32 */ + struct tm_msec { struct tm tm; int msec; diff --git a/lib/vconn.c b/lib/vconn.c index f0549d5da..d6d239fb8 100644 --- a/lib/vconn.c +++ b/lib/vconn.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -293,6 +293,15 @@ vconn_run_wait(struct vconn *vconn) } } +/* Returns 0 if 'vconn' is healthy (connecting or connected), a positive errno + * value if the connection died abnormally (connection failed or aborted), or + * EOF if the connection was closed in a normal way. */ +int +vconn_get_status(const struct vconn *vconn) +{ + return vconn->error == EAGAIN ? 0 : vconn->error; +} + int vconn_open_block(const char *name, uint32_t allowed_versions, uint8_t dscp, struct vconn **vconnp) diff --git a/lib/vconn.h b/lib/vconn.h index 86785818e..f6ba95531 100644 --- a/lib/vconn.h +++ b/lib/vconn.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -57,6 +57,8 @@ int vconn_transact_multiple_noreply(struct vconn *, struct list *requests, void vconn_run(struct vconn *); void vconn_run_wait(struct vconn *); +int vconn_get_status(const struct vconn *); + int vconn_open_block(const char *name, uint32_t allowed_versions, uint8_t dscp, struct vconn **); int vconn_connect_block(struct vconn *); diff --git a/lib/vlandev.c b/lib/vlandev.c index b793f7748..382487cdc 100644 --- a/lib/vlandev.c +++ b/lib/vlandev.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, 2013 Nicira, Inc. + * Copyright (c) 2011, 2013, 2014 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,7 +38,7 @@ struct vlandev_class { int (*vd_del)(const char *vlan_dev); }; -#ifdef LINUX_DATAPATH +#ifdef __linux__ static const struct vlandev_class vlandev_linux_class; #endif static const struct vlandev_class vlandev_stub_class; @@ -61,7 +61,7 @@ static const struct vlandev_class * vlandev_get_class(void) { if (!vd_class) { -#ifdef LINUX_DATAPATH +#if __linux__ vd_class = &vlandev_linux_class; #else vd_class = &vlandev_stub_class; @@ -161,7 +161,7 @@ vlandev_get_name(const char *real_dev_name, int vid) /* The Linux vlandev implementation. */ -#ifdef LINUX_DATAPATH +#ifdef __linux__ #include "rtnetlink-link.h" #include #include diff --git a/manpages.mk b/manpages.mk index 5d5df0315..1d9ac4526 100644 --- a/manpages.mk +++ b/manpages.mk @@ -206,6 +206,7 @@ utilities/ovs-vlan-bugs.man: utilities/ovs-vsctl.8: \ utilities/ovs-vsctl.8.in \ + lib/common.man \ lib/ssl-bootstrap.man \ lib/ssl-peer-ca-cert.man \ lib/ssl.man \ @@ -218,6 +219,7 @@ utilities/ovs-vsctl.8: \ ovsdb/remote-passive.man \ ovsdb/remote-passive.man utilities/ovs-vsctl.8.in: +lib/common.man: lib/ssl-bootstrap.man: lib/ssl-peer-ca-cert.man: lib/ssl.man: @@ -262,6 +264,7 @@ ovsdb/remote-passive.man: vtep/vtep-ctl.8: \ vtep/vtep-ctl.8.in \ + lib/common.man \ lib/ssl-bootstrap.man \ lib/ssl-peer-ca-cert.man \ lib/ssl.man \ @@ -272,6 +275,7 @@ vtep/vtep-ctl.8: \ ovsdb/remote-passive.man \ ovsdb/remote-passive.man vtep/vtep-ctl.8.in: +lib/common.man: lib/ssl-bootstrap.man: lib/ssl-peer-ca-cert.man: lib/ssl.man: diff --git a/ofproto/connmgr.c b/ofproto/connmgr.c index a58e785a2..033ab7d5c 100644 --- a/ofproto/connmgr.c +++ b/ofproto/connmgr.c @@ -1456,9 +1456,11 @@ static void schedule_packet_in(struct ofconn *, struct ofproto_packet_in, enum ofp_packet_in_reason wire_reason); /* Sends an OFPT_PORT_STATUS message with 'opp' and 'reason' to appropriate - * controllers managed by 'mgr'. */ + * controllers managed by 'mgr'. For messages caused by a controller + * OFPT_PORT_MOD, specify 'source' as the controller connection that sent the + * request; otherwise, specify 'source' as NULL. */ void -connmgr_send_port_status(struct connmgr *mgr, +connmgr_send_port_status(struct connmgr *mgr, struct ofconn *source, const struct ofputil_phy_port *pp, uint8_t reason) { /* XXX Should limit the number of queued port status change messages. */ @@ -1471,6 +1473,30 @@ connmgr_send_port_status(struct connmgr *mgr, if (ofconn_receives_async_msg(ofconn, OAM_PORT_STATUS, reason)) { struct ofpbuf *msg; + /* Before 1.5, OpenFlow specified that OFPT_PORT_MOD should not + * generate OFPT_PORT_STATUS messages. That requirement was a + * relic of how OpenFlow originally supported a single controller, + * so that one could expect the controller to already know the + * changes it had made. + * + * EXT-338 changes OpenFlow 1.5 OFPT_PORT_MOD to send + * OFPT_PORT_STATUS messages to every controller. This is + * obviously more useful in the multi-controller case. We could + * always implement it that way in OVS, but that would risk + * confusing controllers that are intended for single-controller + * use only. (Imagine a controller that generates an OFPT_PORT_MOD + * in response to any OFPT_PORT_STATUS!) + * + * So this compromises: for OpenFlow 1.4 and earlier, it generates + * OFPT_PORT_STATUS for OFPT_PORT_MOD, but not back to the + * originating controller. In a single-controller environment, in + * particular, this means that it will never generate + * OFPT_PORT_STATUS for OFPT_PORT_MOD at all. */ + if (ofconn == source + && rconn_get_version(ofconn->rconn) < OFP15_VERSION) { + continue; + } + msg = ofputil_encode_port_status(&ps, ofconn_get_protocol(ofconn)); ofconn_send(ofconn, msg, NULL); } diff --git a/ofproto/connmgr.h b/ofproto/connmgr.h index 170d8721d..3c9216ffb 100644 --- a/ofproto/connmgr.h +++ b/ofproto/connmgr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. + * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -147,7 +147,7 @@ void ofconn_remove_opgroup(struct ofconn *, struct list *, const struct ofp_header *request, int error); /* Sending asynchronous messages. */ -void connmgr_send_port_status(struct connmgr *, +void connmgr_send_port_status(struct connmgr *, struct ofconn *source, const struct ofputil_phy_port *, uint8_t reason); void connmgr_send_flow_removed(struct connmgr *, const struct ofputil_flow_removed *); diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index cad131088..0d5b25189 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -41,7 +41,6 @@ #define MAX_QUEUE_LENGTH 512 #define FLOW_MISS_MAX_BATCH 50 #define REVALIDATE_MAX_BATCH 50 -#define MAX_IDLE 1500 VLOG_DEFINE_THIS_MODULE(ofproto_dpif_upcall); @@ -230,6 +229,7 @@ static void *udpif_revalidator(void *); static uint64_t udpif_get_n_flows(struct udpif *); static void revalidate_udumps(struct revalidator *, struct list *udumps); static void revalidator_sweep(struct revalidator *); +static void revalidator_purge(struct revalidator *); static void upcall_unixctl_show(struct unixctl_conn *conn, int argc, const char *argv[], void *aux); static void upcall_unixctl_disable_megaflows(struct unixctl_conn *, int argc, @@ -332,7 +332,6 @@ udpif_set_threads(struct udpif *udpif, size_t n_handlers, for (i = 0; i < udpif->n_revalidators; i++) { struct revalidator *revalidator = &udpif->revalidators[i]; struct udpif_flow_dump *udump, *next_udump; - struct udpif_key *ukey, *next_ukey; LIST_FOR_EACH_SAFE (udump, next_udump, list_node, &revalidator->udumps) { @@ -340,10 +339,9 @@ udpif_set_threads(struct udpif *udpif, size_t n_handlers, free(udump); } - HMAP_FOR_EACH_SAFE (ukey, next_ukey, hmap_node, - &revalidator->ukeys) { - ukey_delete(revalidator, ukey); - } + /* Delete ukeys, and delete all flows from the datapath to prevent + * double-counting stats. */ + revalidator_purge(revalidator); hmap_destroy(&revalidator->ukeys); ovs_mutex_destroy(&revalidator->mutex); @@ -412,6 +410,22 @@ udpif_set_threads(struct udpif *udpif, size_t n_handlers, } } +/* Waits for all ongoing upcall translations to complete. This ensures that + * there are no transient references to any removed ofprotos (or other + * objects). In particular, this should be called after an ofproto is removed + * (e.g. via xlate_remove_ofproto()) but before it is destroyed. */ +void +udpif_synchronize(struct udpif *udpif) +{ + /* This is stronger than necessary. It would be sufficient to ensure + * (somehow) that each handler and revalidator thread had passed through + * its main loop once. */ + size_t n_handlers = udpif->n_handlers; + size_t n_revalidators = udpif->n_revalidators; + udpif_set_threads(udpif, 0, 0); + udpif_set_threads(udpif, n_handlers, n_revalidators); +} + /* Notifies 'udpif' that something changed which may render previous * xlate_actions() results invalid. */ void @@ -537,6 +551,8 @@ udpif_flow_dumper(void *arg) bool need_revalidate; uint64_t reval_seq; size_t n_flows, i; + int error; + void *state = NULL; reval_seq = seq_read(udpif->reval_seq); need_revalidate = udpif->last_reval_seq != reval_seq; @@ -547,9 +563,14 @@ udpif_flow_dumper(void *arg) udpif->avg_n_flows = (udpif->avg_n_flows + n_flows) / 2; start_time = time_msec(); - dpif_flow_dump_start(&dump, udpif->dpif); - while (dpif_flow_dump_next(&dump, &key, &key_len, &mask, &mask_len, - NULL, NULL, &stats) + error = dpif_flow_dump_start(&dump, udpif->dpif); + if (error) { + VLOG_INFO("Failed to start flow dump (%s)", ovs_strerror(error)); + goto skip; + } + dpif_flow_dump_state_init(udpif->dpif, &state); + while (dpif_flow_dump_next(&dump, state, &key, &key_len, + &mask, &mask_len, NULL, NULL, &stats) && !latch_is_set(&udpif->exit_latch)) { struct udpif_flow_dump *udump = xmalloc(sizeof *udump); struct revalidator *revalidator; @@ -580,6 +601,7 @@ udpif_flow_dumper(void *arg) xpthread_cond_signal(&revalidator->wake_cond); ovs_mutex_unlock(&revalidator->mutex); } + dpif_flow_dump_state_uninit(udpif->dpif, state); dpif_flow_dump_done(&dump); /* Let all the revalidators finish and garbage collect. */ @@ -622,7 +644,8 @@ udpif_flow_dumper(void *arg) duration); } - poll_timer_wait_until(start_time + MIN(MAX_IDLE, 500)); +skip: + poll_timer_wait_until(start_time + MIN(ofproto_max_idle, 500)); seq_wait(udpif->reval_seq, udpif->last_reval_seq); latch_wait(&udpif->exit_latch); poll_block(); @@ -968,9 +991,10 @@ handle_upcalls(struct handler *handler, struct list *upcalls) type = classify_upcall(upcall); if (type == MISS_UPCALL) { uint32_t hash; + struct pkt_metadata md; - flow_extract(packet, flow.skb_priority, flow.pkt_mark, - &flow.tunnel, &flow.in_port, &miss->flow); + pkt_metadata_from_flow(&md, &flow); + flow_extract(packet, &md, &miss->flow); hash = flow_hash(&miss->flow, 0); existing_miss = flow_miss_find(&misses, ofproto, &miss->flow, @@ -1252,6 +1276,22 @@ ukey_lookup(struct revalidator *revalidator, struct udpif_flow_dump *udump) return NULL; } +static struct udpif_key * +ukey_create(const struct nlattr *key, size_t key_len, long long int used) +{ + struct udpif_key *ukey = xmalloc(sizeof *ukey); + + ukey->key = (struct nlattr *) &ukey->key_buf; + memcpy(&ukey->key_buf, key, key_len); + ukey->key_len = key_len; + + ukey->mark = false; + ukey->created = used ? used : time_msec(); + memset(&ukey->stats, 0, sizeof ukey->stats); + + return ukey; +} + static void ukey_delete(struct revalidator *revalidator, struct udpif_key *ukey) { @@ -1360,20 +1400,106 @@ exit: return ok; } +struct dump_op { + struct udpif_key *ukey; + struct udpif_flow_dump *udump; + struct dpif_flow_stats stats; /* Stats for 'op'. */ + struct dpif_op op; /* Flow del operation. */ +}; + static void -revalidate_udumps(struct revalidator *revalidator, struct list *udumps) +dump_op_init(struct dump_op *op, const struct nlattr *key, size_t key_len, + struct udpif_key *ukey, struct udpif_flow_dump *udump) +{ + op->ukey = ukey; + op->udump = udump; + op->op.type = DPIF_OP_FLOW_DEL; + op->op.u.flow_del.key = key; + op->op.u.flow_del.key_len = key_len; + op->op.u.flow_del.stats = &op->stats; +} + +static void +push_dump_ops(struct revalidator *revalidator, + struct dump_op *ops, size_t n_ops) { struct udpif *udpif = revalidator->udpif; + struct dpif_op *opsp[REVALIDATE_MAX_BATCH]; + size_t i; - struct { - struct dpif_flow_stats ukey_stats; /* Stats stored in the ukey. */ - struct dpif_flow_stats stats; /* Stats for 'op'. */ - struct dpif_op op; /* Flow del operation. */ - } ops[REVALIDATE_MAX_BATCH]; + ovs_assert(n_ops <= REVALIDATE_MAX_BATCH); + for (i = 0; i < n_ops; i++) { + opsp[i] = &ops[i].op; + } + dpif_operate(udpif->dpif, opsp, n_ops); - struct dpif_op *opsp[REVALIDATE_MAX_BATCH]; + for (i = 0; i < n_ops; i++) { + struct dump_op *op = &ops[i]; + struct dpif_flow_stats *push, *stats, push_buf; + + stats = op->op.u.flow_del.stats; + if (op->ukey) { + push = &push_buf; + push->used = MAX(stats->used, op->ukey->stats.used); + push->tcp_flags = stats->tcp_flags | op->ukey->stats.tcp_flags; + push->n_packets = stats->n_packets - op->ukey->stats.n_packets; + push->n_bytes = stats->n_bytes - op->ukey->stats.n_bytes; + } else { + push = stats; + } + + if (push->n_packets || netflow_exists()) { + struct ofproto_dpif *ofproto; + struct netflow *netflow; + struct flow flow; + + if (!xlate_receive(udpif->backer, NULL, op->op.u.flow_del.key, + op->op.u.flow_del.key_len, &flow, &ofproto, + NULL, NULL, &netflow, NULL)) { + struct xlate_in xin; + + xlate_in_init(&xin, ofproto, &flow, NULL, push->tcp_flags, + NULL); + xin.resubmit_stats = push->n_packets ? push : NULL; + xin.may_learn = push->n_packets > 0; + xin.skip_wildcards = true; + xlate_actions_for_side_effects(&xin); + + if (netflow) { + netflow_expire(netflow, &flow); + netflow_flow_clear(netflow, &flow); + netflow_unref(netflow); + } + } + } + } + + for (i = 0; i < n_ops; i++) { + struct udpif_key *ukey; + + /* If there's a udump, this ukey came directly from a datapath flow + * dump. Sometimes a datapath can send duplicates in flow dumps, in + * which case we wouldn't want to double-free a ukey, so avoid that by + * looking up the ukey again. + * + * If there's no udump then we know what we're doing. */ + ukey = (ops[i].udump + ? ukey_lookup(revalidator, ops[i].udump) + : ops[i].ukey); + if (ukey) { + ukey_delete(revalidator, ukey); + } + } +} + +static void +revalidate_udumps(struct revalidator *revalidator, struct list *udumps) +{ + struct udpif *udpif = revalidator->udpif; + + struct dump_op ops[REVALIDATE_MAX_BATCH]; struct udpif_flow_dump *udump, *next_udump; - size_t n_ops, i, n_flows; + size_t n_ops, n_flows; unsigned int flow_limit; long long int max_idle; bool must_del; @@ -1383,7 +1509,7 @@ revalidate_udumps(struct revalidator *revalidator, struct list *udumps) n_flows = udpif_get_n_flows(udpif); must_del = false; - max_idle = MAX_IDLE; + max_idle = ofproto_max_idle; if (n_flows > flow_limit) { must_del = n_flows > 2 * flow_limit; max_idle = 100; @@ -1403,37 +1529,14 @@ revalidate_udumps(struct revalidator *revalidator, struct list *udumps) } if (must_del || (used && used < now - max_idle)) { - struct dpif_flow_stats *ukey_stats = &ops[n_ops].ukey_stats; - struct dpif_op *op = &ops[n_ops].op; - - op->type = DPIF_OP_FLOW_DEL; - op->u.flow_del.key = udump->key; - op->u.flow_del.key_len = udump->key_len; - op->u.flow_del.stats = &ops[n_ops].stats; - n_ops++; - - if (ukey) { - *ukey_stats = ukey->stats; - ukey_delete(revalidator, ukey); - } else { - memset(ukey_stats, 0, sizeof *ukey_stats); - } + struct dump_op *dop = &ops[n_ops++]; + dump_op_init(dop, udump->key, udump->key_len, ukey, udump); continue; } if (!ukey) { - ukey = xmalloc(sizeof *ukey); - - ukey->key = (struct nlattr *) &ukey->key_buf; - memcpy(ukey->key, udump->key, udump->key_len); - ukey->key_len = udump->key_len; - - ukey->created = used ? used : now; - memset(&ukey->stats, 0, sizeof ukey->stats); - - ukey->mark = false; - + ukey = ukey_create(udump->key, udump->key_len, used); hmap_insert(&revalidator->ukeys, &ukey->hmap_node, udump->key_hash); } @@ -1448,46 +1551,7 @@ revalidate_udumps(struct revalidator *revalidator, struct list *udumps) free(udump); } - for (i = 0; i < n_ops; i++) { - opsp[i] = &ops[i].op; - } - dpif_operate(udpif->dpif, opsp, n_ops); - - for (i = 0; i < n_ops; i++) { - struct dpif_flow_stats push, *stats, *ukey_stats; - - ukey_stats = &ops[i].ukey_stats; - stats = ops[i].op.u.flow_del.stats; - push.used = MAX(stats->used, ukey_stats->used); - push.tcp_flags = stats->tcp_flags | ukey_stats->tcp_flags; - push.n_packets = stats->n_packets - ukey_stats->n_packets; - push.n_bytes = stats->n_bytes - ukey_stats->n_bytes; - - if (push.n_packets || netflow_exists()) { - struct ofproto_dpif *ofproto; - struct netflow *netflow; - struct flow flow; - - if (!xlate_receive(udpif->backer, NULL, ops[i].op.u.flow_del.key, - ops[i].op.u.flow_del.key_len, &flow, - &ofproto, NULL, NULL, &netflow, NULL)) { - struct xlate_in xin; - - xlate_in_init(&xin, ofproto, &flow, NULL, push.tcp_flags, - NULL); - xin.resubmit_stats = push.n_packets ? &push : NULL; - xin.may_learn = push.n_packets > 0; - xin.skip_wildcards = true; - xlate_actions_for_side_effects(&xin); - - if (netflow) { - netflow_expire(netflow, &flow); - netflow_flow_clear(netflow, &flow); - netflow_unref(netflow); - } - } - } - } + push_dump_ops(revalidator, ops, n_ops); LIST_FOR_EACH_SAFE (udump, next_udump, list_node, udumps) { list_remove(&udump->list_node); @@ -1496,17 +1560,46 @@ revalidate_udumps(struct revalidator *revalidator, struct list *udumps) } static void -revalidator_sweep(struct revalidator *revalidator) +revalidator_sweep__(struct revalidator *revalidator, bool purge) { + struct dump_op ops[REVALIDATE_MAX_BATCH]; struct udpif_key *ukey, *next; + size_t n_ops; + + n_ops = 0; HMAP_FOR_EACH_SAFE (ukey, next, hmap_node, &revalidator->ukeys) { - if (ukey->mark) { + if (!purge && ukey->mark) { ukey->mark = false; } else { - ukey_delete(revalidator, ukey); + struct dump_op *op = &ops[n_ops++]; + + /* If we have previously seen a flow in the datapath, but didn't + * see it during the most recent dump, delete it. This allows us + * to clean up the ukey and keep the statistics consistent. */ + dump_op_init(op, ukey->key, ukey->key_len, ukey, NULL); + if (n_ops == REVALIDATE_MAX_BATCH) { + push_dump_ops(revalidator, ops, n_ops); + n_ops = 0; + } } } + + if (n_ops) { + push_dump_ops(revalidator, ops, n_ops); + } +} + +static void +revalidator_sweep(struct revalidator *revalidator) +{ + revalidator_sweep__(revalidator, false); +} + +static void +revalidator_purge(struct revalidator *revalidator) +{ + revalidator_sweep__(revalidator, true); } static void diff --git a/ofproto/ofproto-dpif-upcall.h b/ofproto/ofproto-dpif-upcall.h index d73ae4c9a..9eeee5b90 100644 --- a/ofproto/ofproto-dpif-upcall.h +++ b/ofproto/ofproto-dpif-upcall.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2013 Nicira, Inc. +/* Copyright (c) 2013, 2014 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,6 +29,7 @@ struct simap; struct udpif *udpif_create(struct dpif_backer *, struct dpif *); void udpif_set_threads(struct udpif *, size_t n_handlers, size_t n_revalidators); +void udpif_synchronize(struct udpif *); void udpif_destroy(struct udpif *); void udpif_revalidate(struct udpif *); void udpif_get_memory_usage(struct udpif *, struct simap *usage); diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index 89d92af58..eb4931ec3 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -3184,12 +3184,11 @@ xlate_send_packet(const struct ofport_dpif *ofport, struct ofpbuf *packet) struct xport *xport; struct ofpact_output output; struct flow flow; - union flow_in_port in_port_; ofpact_init(&output.ofpact, OFPACT_OUTPUT, sizeof output); /* Use OFPP_NONE as the in_port to avoid special packet processing. */ - in_port_.ofp_port = OFPP_NONE; - flow_extract(packet, 0, 0, NULL, &in_port_, &flow); + flow_extract(packet, NULL, &flow); + flow.in_port.ofp_port = OFPP_NONE; ovs_rwlock_rdlock(&xlate_rwlock); xport = xport_lookup(ofport); diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 64e27473a..8c43ee997 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -1182,9 +1182,9 @@ destruct(struct ofproto *ofproto_) xlate_remove_ofproto(ofproto); ovs_rwlock_unlock(&xlate_rwlock); - /* Discard any flow_miss_batches queued up for 'ofproto', avoiding a - * use-after-free error. */ - udpif_revalidate(ofproto->backer->udpif); + /* Ensure that the upcall processing threads have no remaining references + * to the ofproto or anything in it. */ + udpif_synchronize(ofproto->backer->udpif); hmap_remove(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node); @@ -2997,7 +2997,7 @@ ofproto_dpif_execute_actions(struct ofproto_dpif *ofproto, execute.md.tunnel = flow->tunnel; execute.md.skb_priority = flow->skb_priority; execute.md.pkt_mark = flow->pkt_mark; - execute.md.in_port = ofp_port_to_odp_port(ofproto, in_port); + execute.md.in_port.odp_port = ofp_port_to_odp_port(ofproto, in_port); execute.needs_help = (xout.slow & SLOW_ACTION) != 0; error = dpif_execute(ofproto->backer->dpif, &execute); @@ -3784,11 +3784,14 @@ parse_flow_and_packet(int argc, const char *argv[], flow_compose(packet, flow); } else { union flow_in_port in_port = flow->in_port; + struct pkt_metadata md; /* Use the metadata from the flow and the packet argument * to reconstruct the flow. */ - flow_extract(packet, flow->skb_priority, flow->pkt_mark, NULL, - &in_port, flow); + pkt_metadata_init(&md, NULL, flow->skb_priority, + flow->pkt_mark, &in_port); + + flow_extract(packet, &md, flow); } } @@ -4165,6 +4168,8 @@ ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn, struct dpif_port dpif_port; struct dpif_port_dump port_dump; struct hmap portno_names; + void *state = NULL; + int error; ofproto = ofproto_dpif_lookup(argv[argc - 1]); if (!ofproto) { @@ -4182,9 +4187,14 @@ ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn, } ds_init(&ds); - dpif_flow_dump_start(&flow_dump, ofproto->backer->dpif); - while (dpif_flow_dump_next(&flow_dump, &key, &key_len, &mask, &mask_len, - &actions, &actions_len, &stats)) { + error = dpif_flow_dump_start(&flow_dump, ofproto->backer->dpif); + if (error) { + goto exit; + } + dpif_flow_dump_state_init(ofproto->backer->dpif, &state); + while (dpif_flow_dump_next(&flow_dump, state, &key, &key_len, + &mask, &mask_len, &actions, &actions_len, + &stats)) { if (!ofproto_dpif_contains_flow(ofproto, key, key_len)) { continue; } @@ -4197,8 +4207,11 @@ ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn, format_odp_actions(&ds, actions, actions_len); ds_put_char(&ds, '\n'); } + dpif_flow_dump_state_uninit(ofproto->backer->dpif, state); + error = dpif_flow_dump_done(&flow_dump); - if (dpif_flow_dump_done(&flow_dump)) { +exit: + if (error) { ds_clear(&ds); ds_put_format(&ds, "dpif/dump_flows failed: %s", ovs_strerror(errno)); unixctl_command_reply_error(conn, ds_cstr(&ds)); diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h index 2c72fbcf6..d116451bd 100644 --- a/ofproto/ofproto-provider.h +++ b/ofproto/ofproto-provider.h @@ -464,6 +464,11 @@ void rule_collection_destroy(struct rule_collection *); * ofproto-dpif implementation. */ extern unsigned ofproto_flow_limit; +/* Maximum idle time (in ms) for flows to be cached in the datapath. + * Revalidators may expire flows more quickly than the configured value based + * on system load and other factors. This variable is subject to change. */ +extern unsigned ofproto_max_idle; + /* Number of upcall handler and revalidator threads. Only affects the * ofproto-dpif implementation. */ extern size_t n_handlers, n_revalidators; diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 02e628ae6..e691bb93d 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -307,6 +307,7 @@ static size_t allocated_ofproto_classes; struct ovs_mutex ofproto_mutex = OVS_MUTEX_INITIALIZER; unsigned ofproto_flow_limit = OFPROTO_FLOW_LIMIT_DEFAULT; +unsigned ofproto_max_idle = OFPROTO_MAX_IDLE_DEFAULT; size_t n_handlers, n_revalidators; @@ -697,6 +698,14 @@ ofproto_set_flow_limit(unsigned limit) ofproto_flow_limit = limit; } +/* Sets the maximum idle time for flows in the datapath before they are + * expired. */ +void +ofproto_set_max_idle(unsigned max_idle) +{ + ofproto_max_idle = max_idle; +} + /* If forward_bpdu is true, the NORMAL action will forward frames with * reserved (e.g. STP) destination Ethernet addresses. if forward_bpdu is false, * the NORMAL action will drop these frames. */ @@ -2193,7 +2202,7 @@ ofport_install(struct ofproto *p, if (error) { goto error; } - connmgr_send_port_status(p->connmgr, pp, OFPPR_ADD); + connmgr_send_port_status(p->connmgr, NULL, pp, OFPPR_ADD); return; error: @@ -2210,7 +2219,7 @@ error: static void ofport_remove(struct ofport *ofport) { - connmgr_send_port_status(ofport->ofproto->connmgr, &ofport->pp, + connmgr_send_port_status(ofport->ofproto->connmgr, NULL, &ofport->pp, OFPPR_DELETE); ofport_destroy(ofport); } @@ -2245,7 +2254,8 @@ ofport_modified(struct ofport *port, struct ofputil_phy_port *pp) port->pp.curr_speed = pp->curr_speed; port->pp.max_speed = pp->max_speed; - connmgr_send_port_status(port->ofproto->connmgr, &port->pp, OFPPR_MODIFY); + connmgr_send_port_status(port->ofproto->connmgr, NULL, + &port->pp, OFPPR_MODIFY); } /* Update OpenFlow 'state' in 'port' and notify controller. */ @@ -2254,8 +2264,8 @@ ofproto_port_set_state(struct ofport *port, enum ofputil_port_state state) { if (port->pp.state != state) { port->pp.state = state; - connmgr_send_port_status(port->ofproto->connmgr, &port->pp, - OFPPR_MODIFY); + connmgr_send_port_status(port->ofproto->connmgr, NULL, + &port->pp, OFPPR_MODIFY); } } @@ -2712,11 +2722,10 @@ run_rule_executes(struct ofproto *ofproto) guarded_list_pop_all(&ofproto->rule_executes, &executes); LIST_FOR_EACH_SAFE (e, next, list_node, &executes) { - union flow_in_port in_port_; struct flow flow; - in_port_.ofp_port = e->in_port; - flow_extract(e->packet, 0, 0, NULL, &in_port_, &flow); + flow_extract(e->packet, NULL, &flow); + flow.in_port.ofp_port = e->in_port; ofproto->ofproto_class->rule_execute(e->rule, &flow, e->packet); rule_execute_destroy(e); @@ -2925,7 +2934,6 @@ handle_packet_out(struct ofconn *ofconn, const struct ofp_header *oh) uint64_t ofpacts_stub[1024 / 8]; struct ofpbuf ofpacts; struct flow flow; - union flow_in_port in_port_; enum ofperr error; COVERAGE_INC(ofproto_packet_out); @@ -2959,8 +2967,8 @@ handle_packet_out(struct ofconn *ofconn, const struct ofp_header *oh) } /* Verify actions against packet, then send packet if successful. */ - in_port_.ofp_port = po.in_port; - flow_extract(payload, 0, 0, NULL, &in_port_, &flow); + flow_extract(payload, NULL, &flow); + flow.in_port.ofp_port = po.in_port; error = ofproto_check_ofpacts(p, po.ofpacts, po.ofpacts_len); if (!error) { error = p->ofproto_class->packet_out(p, payload, &flow, @@ -2975,26 +2983,27 @@ exit: } static void -update_port_config(struct ofport *port, +update_port_config(struct ofconn *ofconn, struct ofport *port, enum ofputil_port_config config, enum ofputil_port_config mask) { - enum ofputil_port_config old_config = port->pp.config; - enum ofputil_port_config toggle; + enum ofputil_port_config toggle = (config ^ port->pp.config) & mask; - toggle = (config ^ port->pp.config) & mask; - if (toggle & OFPUTIL_PC_PORT_DOWN) { - if (config & OFPUTIL_PC_PORT_DOWN) { - netdev_turn_flags_off(port->netdev, NETDEV_UP, NULL); - } else { - netdev_turn_flags_on(port->netdev, NETDEV_UP, NULL); - } + if (toggle & OFPUTIL_PC_PORT_DOWN + && (config & OFPUTIL_PC_PORT_DOWN + ? netdev_turn_flags_off(port->netdev, NETDEV_UP, NULL) + : netdev_turn_flags_on(port->netdev, NETDEV_UP, NULL))) { + /* We tried to bring the port up or down, but it failed, so don't + * update the "down" bit. */ toggle &= ~OFPUTIL_PC_PORT_DOWN; } - port->pp.config ^= toggle; - if (port->pp.config != old_config) { + if (toggle) { + enum ofputil_port_config old_config = port->pp.config; + port->pp.config ^= toggle; port->ofproto->ofproto_class->port_reconfigured(port, old_config); + connmgr_send_port_status(port->ofproto->connmgr, ofconn, &port->pp, + OFPPR_MODIFY); } } @@ -3022,7 +3031,7 @@ handle_port_mod(struct ofconn *ofconn, const struct ofp_header *oh) } else if (!eth_addr_equals(port->pp.hw_addr, pm.hw_addr)) { return OFPERR_OFPPMFC_BAD_HW_ADDR; } else { - update_port_config(port, pm.config, pm.mask); + update_port_config(ofconn, port, pm.config, pm.mask); if (pm.advertise) { netdev_set_advertisements(port->netdev, pm.advertise); } diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h index 1f9cb1589..546827fa6 100644 --- a/ofproto/ofproto.h +++ b/ofproto/ofproto.h @@ -214,6 +214,7 @@ int ofproto_port_dump_done(struct ofproto_port_dump *); ) #define OFPROTO_FLOW_LIMIT_DEFAULT 200000 +#define OFPROTO_MAX_IDLE_DEFAULT 1500 const char *ofproto_port_open_type(const char *datapath_type, const char *port_type); @@ -236,6 +237,7 @@ void ofproto_set_extra_in_band_remotes(struct ofproto *, const struct sockaddr_in *, size_t n); void ofproto_set_in_band_queue(struct ofproto *, int queue_id); void ofproto_set_flow_limit(unsigned limit); +void ofproto_set_max_idle(unsigned max_idle); void ofproto_set_forward_bpdu(struct ofproto *, bool forward_bpdu); void ofproto_set_mac_table_config(struct ofproto *, unsigned idle_time, size_t max_entries); diff --git a/ofproto/tunnel.c b/ofproto/tunnel.c index 38b782f6c..001045a53 100644 --- a/ofproto/tunnel.c +++ b/ofproto/tunnel.c @@ -324,10 +324,15 @@ tnl_xlate_init(const struct flow *base_flow, struct flow *flow, struct flow_wildcards *wc) { if (tnl_port_should_receive(flow)) { - memset(&wc->masks.tunnel, 0xff, sizeof wc->masks.tunnel); + wc->masks.tunnel.tun_id = OVS_BE64_MAX; + wc->masks.tunnel.ip_src = OVS_BE32_MAX; + wc->masks.tunnel.ip_dst = OVS_BE32_MAX; wc->masks.tunnel.flags = (FLOW_TNL_F_DONT_FRAGMENT | FLOW_TNL_F_CSUM | FLOW_TNL_F_KEY); + wc->masks.tunnel.ip_tos = UINT8_MAX; + wc->masks.tunnel.ip_ttl = UINT8_MAX; + memset(&wc->masks.pkt_mark, 0xff, sizeof wc->masks.pkt_mark); if (!tnl_ecn_ok(base_flow, flow)) { diff --git a/ovsdb/automake.mk b/ovsdb/automake.mk index 69e790098..dfb900a2b 100644 --- a/ovsdb/automake.mk +++ b/ovsdb/automake.mk @@ -40,7 +40,7 @@ MAN_FRAGMENTS += \ # ovsdb-tool bin_PROGRAMS += ovsdb/ovsdb-tool ovsdb_ovsdb_tool_SOURCES = ovsdb/ovsdb-tool.c -ovsdb_ovsdb_tool_LDADD = ovsdb/libovsdb.la lib/libopenvswitch.la $(SSL_LIBS) +ovsdb_ovsdb_tool_LDADD = ovsdb/libovsdb.la lib/libopenvswitch.la # ovsdb-tool.1 man_MANS += ovsdb/ovsdb-tool.1 DISTCLEANFILES += ovsdb/ovsdb-tool.1 @@ -49,7 +49,7 @@ MAN_ROOTS += ovsdb/ovsdb-tool.1.in # ovsdb-client bin_PROGRAMS += ovsdb/ovsdb-client ovsdb_ovsdb_client_SOURCES = ovsdb/ovsdb-client.c -ovsdb_ovsdb_client_LDADD = ovsdb/libovsdb.la lib/libopenvswitch.la $(SSL_LIBS) +ovsdb_ovsdb_client_LDADD = ovsdb/libovsdb.la lib/libopenvswitch.la # ovsdb-client.1 man_MANS += ovsdb/ovsdb-client.1 DISTCLEANFILES += ovsdb/ovsdb-client.1 @@ -58,7 +58,7 @@ MAN_ROOTS += ovsdb/ovsdb-client.1.in # ovsdb-server sbin_PROGRAMS += ovsdb/ovsdb-server ovsdb_ovsdb_server_SOURCES = ovsdb/ovsdb-server.c -ovsdb_ovsdb_server_LDADD = ovsdb/libovsdb.la lib/libopenvswitch.la $(SSL_LIBS) +ovsdb_ovsdb_server_LDADD = ovsdb/libovsdb.la lib/libopenvswitch.la # ovsdb-server.1 man_MANS += ovsdb/ovsdb-server.1 DISTCLEANFILES += ovsdb/ovsdb-server.1 diff --git a/ovsdb/ovsdb-client.c b/ovsdb/ovsdb-client.c index f149eec7d..d60d7ca84 100644 --- a/ovsdb/ovsdb-client.c +++ b/ovsdb/ovsdb-client.c @@ -31,6 +31,7 @@ #include "daemon.h" #include "dirs.h" #include "dynamic-string.h" +#include "fatal-signal.h" #include "json.h" #include "jsonrpc.h" #include "lib/table.h" @@ -88,7 +89,7 @@ main(int argc, char *argv[]) proctitle_init(argc, argv); set_program_name(argv[0]); parse_options(argc, argv); - signal(SIGPIPE, SIG_IGN); + fatal_ignore_sigpipe(); if (optind >= argc) { ovs_fatal(0, "missing command name; use --help for help"); diff --git a/ovsdb/ovsdb-doc b/ovsdb/ovsdb-doc index 46f1101a8..620091559 100755 --- a/ovsdb/ovsdb-doc +++ b/ovsdb/ovsdb-doc @@ -270,7 +270,7 @@ def docsToNroff(schemaFile, xmlFile, erFile, title=None, version=None): # Putting '\" p as the first line tells "man" that the manpage # needs to be preprocessed by "pic". s = r''''\" p -.TH "%s" 5 "%s" "Open vSwitch" "Open vSwitch Manual" +.TH "%s" 5 " DB Schema %s" "Open vSwitch %s" "Open vSwitch Manual" .\" -*- nroff -*- .de TQ . br @@ -286,7 +286,7 @@ def docsToNroff(schemaFile, xmlFile, erFile, title=None, version=None): .SH NAME %s \- %s database schema .PP -''' % (title, version, textToNroff(schema.name), schema.name) +''' % (title, schema.version, version, textToNroff(schema.name), schema.name) tables = "" introNodes = [] diff --git a/ovsdb/ovsdb-idlc.in b/ovsdb/ovsdb-idlc.in index ec1c65536..d680f7c6c 100755 --- a/ovsdb/ovsdb-idlc.in +++ b/ovsdb/ovsdb-idlc.in @@ -166,6 +166,8 @@ struct %(s)s *%(s)s_insert(struct ovsdb_idl_txn *); print "\nextern struct ovsdb_idl_class %sidl_class;" % prefix print "\nvoid %sinit(void);" % prefix + + print "\nconst char * %sget_db_version(void);" % prefix print "\n#endif /* %(prefix)sIDL_HEADER */" % {'prefix': prefix.upper()} def printEnum(members): @@ -652,6 +654,16 @@ void print " %s_columns_init();" % structName print "}" + print """ +/* Return the schema version. The caller must not free the returned value. */ +const char * +%sget_db_version(void) +{ + return "%s"; +} +""" % (prefix, schema.version) + + def ovsdb_escape(string): def escape(match): diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c index bdcdad922..6de77e44a 100644 --- a/ovsdb/ovsdb-server.c +++ b/ovsdb/ovsdb-server.c @@ -27,6 +27,7 @@ #include "dirs.h" #include "dummy.h" #include "dynamic-string.h" +#include "fatal-signal.h" #include "file.h" #include "hash.h" #include "json.h" @@ -137,7 +138,7 @@ main(int argc, char *argv[]) proctitle_init(argc, argv); set_program_name(argv[0]); service_start(&argc, &argv); - signal(SIGPIPE, SIG_IGN); + fatal_ignore_sigpipe(); process_init(); parse_options(&argc, &argv, &remotes, &unixctl_path, &run_command); diff --git a/ovsdb/ovsdb-tool.c b/ovsdb/ovsdb-tool.c index 5e2b71b84..2ae782eba 100644 --- a/ovsdb/ovsdb-tool.c +++ b/ovsdb/ovsdb-tool.c @@ -27,6 +27,7 @@ #include "compiler.h" #include "dirs.h" #include "dynamic-string.h" +#include "fatal-signal.h" #include "file.h" #include "lockfile.h" #include "log.h" @@ -56,7 +57,7 @@ main(int argc, char *argv[]) { set_program_name(argv[0]); parse_options(argc, argv); - signal(SIGPIPE, SIG_IGN); + fatal_ignore_sigpipe(); run_command(argc - optind, argv + optind, get_all_commands()); return 0; } diff --git a/python/ovs/socket_util.py b/python/ovs/socket_util.py index be9fc95a0..8d34b7163 100644 --- a/python/ovs/socket_util.py +++ b/python/ovs/socket_util.py @@ -295,5 +295,18 @@ def set_nonblocking(sock): def set_dscp(sock, dscp): if dscp > 63: raise ValueError("Invalid dscp %d" % dscp) + + # Note: this function is used for both of IPv4 and IPv6 sockets + success = False val = dscp << 2 - sock.setsockopt(socket.IPPROTO_IP, socket.IP_TOS, val) + try: + sock.setsockopt(socket.IPPROTO_IP, socket.IP_TOS, val) + except socket.error, e: + if e.errno != errno.ENOPROTOOPT: + raise + success = True + try: + sock.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_TCLASS, val) + except socket.error, e: + if e.errno != errno.ENOPROTOOPT or not success: + raise diff --git a/tests/atlocal.in b/tests/atlocal.in index 5c0db2a78..06e738434 100644 --- a/tests/atlocal.in +++ b/tests/atlocal.in @@ -68,3 +68,13 @@ FreeBSD) esac export MALLOC_CONF esac + +# The name of loopback interface +case `uname` in +Linux) + LOOPBACK_INTERFACE=lo + ;; +FreeBSD|NetBSD) + LOOPBACK_INTERFACE=lo0 + ;; +esac diff --git a/tests/automake.mk b/tests/automake.mk index 0cf45a4b8..739d79e09 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -181,38 +181,38 @@ $(srcdir)/package.m4: $(top_srcdir)/configure.ac noinst_PROGRAMS += tests/test-aes128 tests_test_aes128_SOURCES = tests/test-aes128.c -tests_test_aes128_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_aes128_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-atomic tests_test_atomic_SOURCES = tests/test-atomic.c -tests_test_atomic_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_atomic_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-bundle tests_test_bundle_SOURCES = tests/test-bundle.c -tests_test_bundle_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_bundle_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-classifier tests_test_classifier_SOURCES = tests/test-classifier.c -tests_test_classifier_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_classifier_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-controller MAN_ROOTS += tests/test-controller.8.in DISTCLEANFILES += tests/test-controller.8 noinst_man_MANS += tests/test-controller.8 tests_test_controller_SOURCES = tests/test-controller.c -tests_test_controller_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_controller_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-csum tests_test_csum_SOURCES = tests/test-csum.c -tests_test_csum_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_csum_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-file_name tests_test_file_name_SOURCES = tests/test-file_name.c -tests_test_file_name_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_file_name_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-flows tests_test_flows_SOURCES = tests/test-flows.c -tests_test_flows_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_flows_LDADD = lib/libopenvswitch.la dist_check_SCRIPTS = tests/flowgen.pl noinst_PROGRAMS += tests/test-hash @@ -221,63 +221,63 @@ tests_test_hash_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-heap tests_test_heap_SOURCES = tests/test-heap.c -tests_test_heap_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_heap_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-hindex tests_test_hindex_SOURCES = tests/test-hindex.c -tests_test_hindex_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_hindex_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-hmap tests_test_hmap_SOURCES = tests/test-hmap.c -tests_test_hmap_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_hmap_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-json tests_test_json_SOURCES = tests/test-json.c -tests_test_json_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_json_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-jsonrpc tests_test_jsonrpc_SOURCES = tests/test-jsonrpc.c -tests_test_jsonrpc_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_jsonrpc_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-list tests_test_list_SOURCES = tests/test-list.c -tests_test_list_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_list_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-lockfile tests_test_lockfile_SOURCES = tests/test-lockfile.c -tests_test_lockfile_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_lockfile_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-multipath tests_test_multipath_SOURCES = tests/test-multipath.c -tests_test_multipath_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_multipath_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-packets tests_test_packets_SOURCES = tests/test-packets.c -tests_test_packets_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_packets_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-random tests_test_random_SOURCES = tests/test-random.c -tests_test_random_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_random_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-stp tests_test_stp_SOURCES = tests/test-stp.c -tests_test_stp_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_stp_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-sflow tests_test_sflow_SOURCES = tests/test-sflow.c -tests_test_sflow_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_sflow_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-netflow tests_test_netflow_SOURCES = tests/test-netflow.c -tests_test_netflow_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_netflow_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-unix-socket tests_test_unix_socket_SOURCES = tests/test-unix-socket.c -tests_test_unix_socket_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_unix_socket_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-odp tests_test_odp_SOURCES = tests/test-odp.c -tests_test_odp_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_odp_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-ovsdb tests_test_ovsdb_SOURCES = \ @@ -285,7 +285,7 @@ tests_test_ovsdb_SOURCES = \ tests/idltest.c \ tests/idltest.h EXTRA_DIST += tests/uuidfilt.pl tests/ovsdb-monitor-sort.pl -tests_test_ovsdb_LDADD = ovsdb/libovsdb.la lib/libopenvswitch.la $(SSL_LIBS) +tests_test_ovsdb_LDADD = ovsdb/libovsdb.la lib/libopenvswitch.la # idltest schema and IDL OVSIDL_BUILT += tests/idltest.c tests/idltest.h tests/idltest.ovsidl @@ -299,11 +299,11 @@ tests/idltest.c: tests/idltest.h noinst_PROGRAMS += tests/test-reconnect tests_test_reconnect_SOURCES = tests/test-reconnect.c -tests_test_reconnect_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_reconnect_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-sha1 tests_test_sha1_SOURCES = tests/test-sha1.c -tests_test_sha1_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_sha1_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-strtok_r tests_test_strtok_r_SOURCES = tests/test-strtok_r.c @@ -313,15 +313,15 @@ tests_test_type_props_SOURCES = tests/test-type-props.c noinst_PROGRAMS += tests/test-util tests_test_util_SOURCES = tests/test-util.c -tests_test_util_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_util_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-uuid tests_test_uuid_SOURCES = tests/test-uuid.c -tests_test_uuid_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_uuid_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-vconn tests_test_vconn_SOURCES = tests/test-vconn.c -tests_test_vconn_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +tests_test_vconn_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += tests/test-byte-order tests_test_byte_order_SOURCES = tests/test-byte-order.c diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index 6d48e5a90..ad23e9df3 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -1143,6 +1143,7 @@ AT_CHECK([ovs-ofctl --protocols=OpenFlow12 monitor br0 65534 -m -P nxm --detach for i in 1 2 3; do ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:00:00,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no)' done +sleep 1 OVS_WAIT_UNTIL([ovs-appctl -t ovs-ofctl exit]) AT_CHECK([cat ofctl_monitor.log | ofctl_strip], [0], [dnl @@ -1175,6 +1176,7 @@ AT_CHECK([ovs-ofctl --protocols=OpenFlow12 monitor br0 65534 -m -P nxm --detach for i in 1 2 3; do ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:00:01,dst=50:54:00:00:00:07),eth_type(0x8847),mpls(label=10,tc=0,ttl=64,bos=1)' done +sleep 1 OVS_WAIT_UNTIL([ovs-appctl -t ovs-ofctl exit]) AT_CHECK([cat ofctl_monitor.log | ofctl_strip], [0], [dnl @@ -1209,6 +1211,7 @@ AT_CHECK([ovs-ofctl --protocols=OpenFlow12 monitor br0 65534 -m -P nxm --detach for i in 1 2 3; do ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:00:02,dst=50:54:00:00:00:07),eth_type(0x8847),mpls(label=10,tc=0,ttl=64,bos=1)' done +sleep 1 OVS_WAIT_UNTIL([ovs-appctl -t ovs-ofctl exit]) AT_CHECK([cat ofctl_monitor.log | ofctl_strip], [0], [dnl @@ -1266,6 +1269,7 @@ AT_CHECK([ovs-ofctl --protocols=OpenFlow12 monitor br0 65534 -m -P nxm --detach for i in 1 2 3; do ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:54:50,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no)' done +sleep 1 OVS_WAIT_UNTIL([ovs-appctl -t ovs-ofctl exit]) AT_CHECK([cat ofctl_monitor.log | ofctl_strip], [0], [dnl @@ -2366,7 +2370,7 @@ m4_define([CHECK_SFLOW_SAMPLING_PACKET], set Interface p2 options:ifindex=1003 -- \ set Bridge br0 sflow=@sf -- \ --id=@sf create sflow targets=\"$1:$SFLOW_PORT\" \ - header=128 sampling=1 polling=1 agent=lo + header=128 sampling=1 polling=1 agent=$LOOPBACK_INTERFACE dnl open with ARP packets to seed the bridge-learning. The output dnl ifIndex numbers should be reported predictably after that. @@ -2779,6 +2783,50 @@ m4_define([CHECK_NETFLOW_ACTIVE_EXPIRATION], CHECK_NETFLOW_ACTIVE_EXPIRATION([127.0.0.1], [IPv4]) CHECK_NETFLOW_ACTIVE_EXPIRATION([[[::1]]], [IPv6]) +AT_SETUP([ofproto-dpif - flow stats]) +OVS_VSWITCHD_START +AT_CHECK([ovs-ofctl add-flow br0 "ip,actions=NORMAL"]) +AT_CHECK([ovs-ofctl add-flow br0 "icmp,actions=NORMAL"]) + +ovs-appctl time/stop + +for i in `seq 1 10`; do + ovs-appctl netdev-dummy/receive br0 'in_port(0),eth(src=50:54:00:00:00:07,dst=50:54:00:00:00:05),eth_type(0x0800),ipv4(src=192.168.0.2,dst=192.168.0.1,proto=6,tos=0,ttl=64,frag=no)' +done + +ovs-appctl time/warp 1000 + +AT_CHECK([ovs-ofctl dump-flows br0], [0], [stdout]) +AT_CHECK([STRIP_XIDS stdout | sed -n 's/duration=[[0-9]]*\.[[0-9]]*s/duration=0.0s/p' | sort], [0], [dnl + cookie=0x0, duration=0.0s, table=0, n_packets=0, n_bytes=0, idle_age=1, icmp actions=NORMAL + cookie=0x0, duration=0.0s, table=0, n_packets=10, n_bytes=600, idle_age=1, ip actions=NORMAL +]) +OVS_VSWITCHD_STOP +AT_CLEANUP + +AT_SETUP([ofproto-dpif - flow stats, set-n-threads]) +OVS_VSWITCHD_START +AT_CHECK([ovs-ofctl add-flow br0 "ip,actions=NORMAL"]) +AT_CHECK([ovs-ofctl add-flow br0 "icmp,actions=NORMAL"]) + +ovs-appctl time/stop + +for i in `seq 1 10`; do + ovs-appctl netdev-dummy/receive br0 'in_port(0),eth(src=50:54:00:00:00:07,dst=50:54:00:00:00:05),eth_type(0x0800),ipv4(src=192.168.0.2,dst=192.168.0.1,proto=6,tos=0,ttl=64,frag=no)' +done + +ovs-appctl time/warp 100 +AT_CHECK([ovs-vsctl set Open_vSwitch . other-config:n-revalidator-threads=2]) +ovs-appctl time/warp 1000 + +AT_CHECK([ovs-ofctl dump-flows br0], [0], [stdout]) +AT_CHECK([STRIP_XIDS stdout | sed -n 's/duration=[[0-9]]*\.[[0-9]]*s/duration=0.0s/p' | sort], [0], [dnl + cookie=0x0, duration=0.0s, table=0, n_packets=0, n_bytes=0, idle_age=1, icmp actions=NORMAL + cookie=0x0, duration=0.0s, table=0, n_packets=10, n_bytes=600, idle_age=1, ip actions=NORMAL +]) +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([idle_age and hard_age increase over time]) OVS_VSWITCHD_START @@ -3054,10 +3102,16 @@ AT_CHECK([ovs-ofctl add-flow br1 actions=LOCAL,output:1,output:3]) for i in $(seq 1 10); do ovs-appctl netdev-dummy/receive br0 'in_port(100),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' + if [[ $i -eq 1 ]]; then + sleep 1 + fi done for i in $(seq 1 5); do ovs-appctl netdev-dummy/receive br1 'in_port(101),eth(src=50:54:00:00:00:07,dst=50:54:00:00:00:05),eth_type(0x0800),ipv4(src=192.168.0.2,dst=192.168.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' + if [[ $i -eq 1 ]]; then + sleep 1 + fi done AT_CHECK([ovs-appctl time/warp 500], [0], diff --git a/tests/test-controller.c b/tests/test-controller.c index f487d8ce0..a615ab49c 100644 --- a/tests/test-controller.c +++ b/tests/test-controller.c @@ -27,6 +27,7 @@ #include "command-line.h" #include "compiler.h" #include "daemon.h" +#include "fatal-signal.h" #include "learning-switch.h" #include "ofp-parse.h" #include "ofp-version-opt.h" @@ -105,7 +106,7 @@ main(int argc, char *argv[]) proctitle_init(argc, argv); set_program_name(argv[0]); parse_options(argc, argv); - signal(SIGPIPE, SIG_IGN); + fatal_ignore_sigpipe(); if (argc - optind < 1) { ovs_fatal(0, "at least one vconn argument required; " diff --git a/tests/test-flows.c b/tests/test-flows.c index 291003527..a49814295 100644 --- a/tests/test-flows.c +++ b/tests/test-flows.c @@ -58,7 +58,6 @@ main(int argc OVS_UNUSED, char *argv[]) struct ofp10_match extracted_match; struct match match; struct flow flow; - union flow_in_port in_port_; n++; retval = ovs_pcap_read(pcap, &packet, NULL); @@ -68,8 +67,9 @@ main(int argc OVS_UNUSED, char *argv[]) ovs_fatal(retval, "error reading pcap file"); } - in_port_.ofp_port = u16_to_ofp(1); - flow_extract(packet, 0, 0, NULL, &in_port_, &flow); + flow_extract(packet, NULL, &flow); + flow.in_port.ofp_port = u16_to_ofp(1); + match_wc_init(&match, &flow); ofputil_match_to_ofp10_match(&match, &extracted_match); diff --git a/tests/test-vconn.c b/tests/test-vconn.c index f54a0dfbc..76757f4ff 100644 --- a/tests/test-vconn.c +++ b/tests/test-vconn.c @@ -22,6 +22,7 @@ #include #include #include "command-line.h" +#include "fatal-signal.h" #include "ofp-msgs.h" #include "ofp-util.h" #include "ofpbuf.h" @@ -436,7 +437,7 @@ main(int argc, char *argv[]) set_program_name(argv[0]); vlog_set_levels(NULL, VLF_ANY_FACILITY, VLL_EMER); vlog_set_levels(NULL, VLF_CONSOLE, VLL_DBG); - signal(SIGPIPE, SIG_IGN); + fatal_ignore_sigpipe(); time_alarm(10); diff --git a/utilities/automake.mk b/utilities/automake.mk index ffc48b184..ce1c8b232 100644 --- a/utilities/automake.mk +++ b/utilities/automake.mk @@ -97,32 +97,31 @@ man_MANS += \ dist_man_MANS += utilities/ovs-ctl.8 utilities_ovs_appctl_SOURCES = utilities/ovs-appctl.c -utilities_ovs_appctl_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +utilities_ovs_appctl_LDADD = lib/libopenvswitch.la utilities_ovs_dpctl_SOURCES = utilities/ovs-dpctl.c -utilities_ovs_dpctl_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +utilities_ovs_dpctl_LDADD = lib/libopenvswitch.la utilities_ovs_ofctl_SOURCES = utilities/ovs-ofctl.c utilities_ovs_ofctl_LDADD = \ ofproto/libofproto.la \ - lib/libopenvswitch.la \ - $(SSL_LIBS) + lib/libopenvswitch.la utilities_ovs_vsctl_SOURCES = utilities/ovs-vsctl.c -utilities_ovs_vsctl_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +utilities_ovs_vsctl_LDADD = lib/libopenvswitch.la -if LINUX_DATAPATH +if LINUX sbin_PROGRAMS += utilities/ovs-vlan-bug-workaround utilities_ovs_vlan_bug_workaround_SOURCES = utilities/ovs-vlan-bug-workaround.c -utilities_ovs_vlan_bug_workaround_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +utilities_ovs_vlan_bug_workaround_LDADD = lib/libopenvswitch.la noinst_PROGRAMS += utilities/nlmon utilities_nlmon_SOURCES = utilities/nlmon.c -utilities_nlmon_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +utilities_nlmon_LDADD = lib/libopenvswitch.la endif bin_PROGRAMS += utilities/ovs-benchmark utilities_ovs_benchmark_SOURCES = utilities/ovs-benchmark.c -utilities_ovs_benchmark_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +utilities_ovs_benchmark_LDADD = lib/libopenvswitch.la include utilities/bugtool/automake.mk diff --git a/utilities/ovs-dpctl.c b/utilities/ovs-dpctl.c index 3b1dff1e5..4b00118ab 100644 --- a/utilities/ovs-dpctl.c +++ b/utilities/ovs-dpctl.c @@ -35,6 +35,7 @@ #include "dirs.h" #include "dpif.h" #include "dynamic-string.h" +#include "fatal-signal.h" #include "flow.h" #include "match.h" #include "netdev.h" @@ -73,7 +74,7 @@ main(int argc, char *argv[]) { set_program_name(argv[0]); parse_options(argc, argv); - signal(SIGPIPE, SIG_IGN); + fatal_ignore_sigpipe(); run_command(argc - optind, argv + optind, get_all_commands()); return 0; } @@ -760,9 +761,11 @@ dpctl_dump_flows(int argc, char *argv[]) size_t key_len; size_t mask_len; struct ds ds; - char *name, *error, *filter = NULL; + char *name, *filter = NULL; struct flow flow_filter; struct flow_wildcards wc_filter; + void *state = NULL; + int error; if (argc > 1 && !strncmp(argv[argc - 1], "filter=", 7)) { filter = xstrdup(argv[--argc] + 7); @@ -781,18 +784,22 @@ dpctl_dump_flows(int argc, char *argv[]) } if (filter) { - error = parse_ofp_exact_flow(&flow_filter, &wc_filter.masks, filter, - &names_portno); - if (error) { - ovs_fatal(0, "Failed to parse filter (%s)", error); + char *err = parse_ofp_exact_flow(&flow_filter, &wc_filter.masks, + filter, &names_portno); + if (err) { + ovs_fatal(0, "Failed to parse filter (%s)", err); } } ds_init(&ds); - dpif_flow_dump_start(&flow_dump, dpif); - while (dpif_flow_dump_next(&flow_dump, &key, &key_len, - &mask, &mask_len, - &actions, &actions_len, &stats)) { + error = dpif_flow_dump_start(&flow_dump, dpif); + if (error) { + goto exit; + } + dpif_flow_dump_state_init(dpif, &state); + while (dpif_flow_dump_next(&flow_dump, state, &key, &key_len, + &mask, &mask_len, &actions, &actions_len, + &stats)) { if (filter) { struct flow flow; struct flow_wildcards wc; @@ -823,8 +830,13 @@ dpctl_dump_flows(int argc, char *argv[]) format_odp_actions(&ds, actions, actions_len); printf("%s\n", ds_cstr(&ds)); } - dpif_flow_dump_done(&flow_dump); + dpif_flow_dump_state_uninit(dpif, state); + error = dpif_flow_dump_done(&flow_dump); +exit: + if (error) { + ovs_fatal(error, "Failed to dump flows from datapath"); + } free(filter); odp_portno_names_destroy(&portno_names); hmap_destroy(&portno_names); diff --git a/utilities/ovs-lib.in b/utilities/ovs-lib.in index 029ed3bf5..48d0c36c8 100644 --- a/utilities/ovs-lib.in +++ b/utilities/ovs-lib.in @@ -54,7 +54,11 @@ ovs_ctl () { # of ovs-ctl. It is also useful to document the o/p in ovs-ctl.log. display=`"${datadir}/scripts/ovs-ctl" "$@" 2>&1` rc=$? - echo "${display}" | tee -a "${logdir}/ovs-ctl.log" + if test -w "${logdir}/ovs-ctl.log"; then + echo "${display}" | tee -a "${logdir}/ovs-ctl.log" + else + echo "${display}" + fi return ${rc} ;; *) diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c index 69dd34fa4..e62e64691 100644 --- a/utilities/ovs-ofctl.c +++ b/utilities/ovs-ofctl.c @@ -36,6 +36,7 @@ #include "compiler.h" #include "dirs.h" #include "dynamic-string.h" +#include "fatal-signal.h" #include "nx-match.h" #include "odp-util.h" #include "ofp-actions.h" @@ -113,7 +114,7 @@ main(int argc, char *argv[]) { set_program_name(argv[0]); parse_options(argc, argv); - signal(SIGPIPE, SIG_IGN); + fatal_ignore_sigpipe(); run_command(argc - optind, argv + optind, get_all_commands()); return 0; } @@ -1863,12 +1864,13 @@ ofctl_ofp_parse_pcap(int argc OVS_UNUSED, char *argv[]) struct ofpbuf *packet; long long int when; struct flow flow; + const struct pkt_metadata md = PKT_METADATA_INITIALIZER(ODPP_NONE); error = ovs_pcap_read(file, &packet, &when); if (error) { break; } - flow_extract(packet, 0, 0, NULL, NULL, &flow); + flow_extract(packet, &md, &flow); if (flow.dl_type == htons(ETH_TYPE_IP) && flow.nw_proto == IPPROTO_TCP && (is_openflow_port(flow.tp_src, argv + 2) || @@ -3207,6 +3209,7 @@ ofctl_parse_pcap(int argc OVS_UNUSED, char *argv[]) for (;;) { struct ofpbuf *packet; struct flow flow; + const struct pkt_metadata md = PKT_METADATA_INITIALIZER(ODPP_NONE); int error; error = ovs_pcap_read(pcap, &packet, NULL); @@ -3216,7 +3219,7 @@ ofctl_parse_pcap(int argc OVS_UNUSED, char *argv[]) ovs_fatal(error, "%s: read failed", argv[1]); } - flow_extract(packet, 0, 0, NULL, NULL, &flow); + flow_extract(packet, &md, &flow); flow_print(stdout, &flow); putchar('\n'); ofpbuf_delete(packet); diff --git a/utilities/ovs-vsctl.8.in b/utilities/ovs-vsctl.8.in index 5db0a70cd..43b00bfd4 100644 --- a/utilities/ovs-vsctl.8.in +++ b/utilities/ovs-vsctl.8.in @@ -149,6 +149,7 @@ These options control the format of output from the \fBlist\fR and .so lib/ssl-bootstrap.man .so lib/ssl-peer-ca-cert.man .so lib/vlog.man +.so lib/common.man . .SH COMMANDS The commands implemented by \fBovs\-vsctl\fR are described in the diff --git a/utilities/ovs-vsctl.c b/utilities/ovs-vsctl.c index c563eee65..21ac777e2 100644 --- a/utilities/ovs-vsctl.c +++ b/utilities/ovs-vsctl.c @@ -31,6 +31,7 @@ #include "compiler.h" #include "dirs.h" #include "dynamic-string.h" +#include "fatal-signal.h" #include "hash.h" #include "json.h" #include "ovsdb-data.h" @@ -176,7 +177,7 @@ main(int argc, char *argv[]) char *args; set_program_name(argv[0]); - signal(SIGPIPE, SIG_IGN); + fatal_ignore_sigpipe(); vlog_set_levels(NULL, VLF_CONSOLE, VLL_WARN); vlog_set_levels(&VLM_reconnect, VLF_ANY_FACILITY, VLL_WARN); ovsrec_init(); @@ -391,6 +392,7 @@ parse_options(int argc, char *argv[], struct shash *local_options) case 'V': ovs_print_version(0, 0); + printf("DB Schema %s\n", ovsrec_get_db_version()); exit(EXIT_SUCCESS); case 't': diff --git a/vswitchd/automake.mk b/vswitchd/automake.mk index b0a386b4e..c45d01dcb 100644 --- a/vswitchd/automake.mk +++ b/vswitchd/automake.mk @@ -14,8 +14,7 @@ vswitchd_ovs_vswitchd_SOURCES = \ vswitchd_ovs_vswitchd_LDADD = \ ofproto/libofproto.la \ lib/libsflow.la \ - lib/libopenvswitch.la \ - $(SSL_LIBS) + lib/libopenvswitch.la EXTRA_DIST += vswitchd/INTERNALS MAN_ROOTS += vswitchd/ovs-vswitchd.8.in diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index aa4ab3129..f7bd6572f 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -251,7 +251,7 @@ static struct iface *iface_lookup(const struct bridge *, const char *name); static struct iface *iface_find(const char *name); static struct iface *iface_from_ofp_port(const struct bridge *, ofp_port_t ofp_port); -static void iface_set_mac(struct iface *, const uint8_t *); +static void iface_set_mac(const struct bridge *, const struct port *, struct iface *); static void iface_set_ofport(const struct ovsrec_interface *, ofp_port_t ofport); static void iface_clear_db_record(const struct ovsrec_interface *if_cfg); static void iface_configure_qos(struct iface *, const struct ovsrec_qos *); @@ -493,6 +493,8 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) ofproto_set_flow_limit(smap_get_int(&ovs_cfg->other_config, "flow-limit", OFPROTO_FLOW_LIMIT_DEFAULT)); + ofproto_set_max_idle(smap_get_int(&ovs_cfg->other_config, "max-idle", + OFPROTO_MAX_IDLE_DEFAULT)); ofproto_set_threads( smap_get_int(&ovs_cfg->other_config, "n-handler-threads", 0), @@ -574,7 +576,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) iface_set_ofport(iface->cfg, iface->ofp_port); iface_configure_cfm(iface); iface_configure_qos(iface, port->cfg->qos); - iface_set_mac(iface, port->cfg->fake_bridge ? br->ea : NULL); + iface_set_mac(br, port, iface); ofproto_port_set_bfd(br->ofproto, iface->ofp_port, &iface->cfg->bfd); } @@ -1548,31 +1550,15 @@ bridge_configure_mac_table(struct bridge *br) } static void -bridge_pick_local_hw_addr(struct bridge *br, uint8_t ea[ETH_ADDR_LEN], - struct iface **hw_addr_iface) +find_local_hw_addr(const struct bridge *br, uint8_t ea[ETH_ADDR_LEN], + const struct port *fake_br, struct iface **hw_addr_iface) { struct hmapx mirror_output_ports; - const char *hwaddr; struct port *port; bool found_addr = false; int error; int i; - *hw_addr_iface = NULL; - - /* Did the user request a particular MAC? */ - hwaddr = smap_get(&br->cfg->other_config, "hwaddr"); - if (hwaddr && eth_addr_from_string(hwaddr, ea)) { - if (eth_addr_is_multicast(ea)) { - VLOG_ERR("bridge %s: cannot set MAC address to multicast " - "address "ETH_ADDR_FMT, br->name, ETH_ADDR_ARGS(ea)); - } else if (eth_addr_is_zero(ea)) { - VLOG_ERR("bridge %s: cannot set MAC address to zero", br->name); - } else { - return; - } - } - /* Mirror output ports don't participate in picking the local hardware * address. ofproto can't help us find out whether a given port is a * mirror output because we haven't configured mirrors yet, so we need to @@ -1628,6 +1614,16 @@ bridge_pick_local_hw_addr(struct bridge *br, uint8_t ea[ETH_ADDR_LEN], continue; } + /* For fake bridges we only choose from ports with the same tag */ + if (fake_br && fake_br->cfg && fake_br->cfg->tag) { + if (!port->cfg->tag) { + continue; + } + if (*port->cfg->tag != *fake_br->cfg->tag) { + continue; + } + } + /* Grab MAC. */ error = netdev_get_etheraddr(iface->netdev, iface_ea); if (error) { @@ -1656,6 +1652,30 @@ bridge_pick_local_hw_addr(struct bridge *br, uint8_t ea[ETH_ADDR_LEN], hmapx_destroy(&mirror_output_ports); } +static void +bridge_pick_local_hw_addr(struct bridge *br, uint8_t ea[ETH_ADDR_LEN], + struct iface **hw_addr_iface) +{ + const char *hwaddr; + *hw_addr_iface = NULL; + + /* Did the user request a particular MAC? */ + hwaddr = smap_get(&br->cfg->other_config, "hwaddr"); + if (hwaddr && eth_addr_from_string(hwaddr, ea)) { + if (eth_addr_is_multicast(ea)) { + VLOG_ERR("bridge %s: cannot set MAC address to multicast " + "address "ETH_ADDR_FMT, br->name, ETH_ADDR_ARGS(ea)); + } else if (eth_addr_is_zero(ea)) { + VLOG_ERR("bridge %s: cannot set MAC address to zero", br->name); + } else { + return; + } + } + + /* Find a local hw address */ + find_local_hw_addr(br, ea, NULL, hw_addr_iface); +} + /* Choose and returns the datapath ID for bridge 'br' given that the bridge * Ethernet address is 'bridge_ea'. If 'bridge_ea' is the Ethernet address of * an interface on 'br', then that interface must be passed in as @@ -3469,9 +3489,10 @@ iface_from_ofp_port(const struct bridge *br, ofp_port_t ofp_port) /* Set Ethernet address of 'iface', if one is specified in the configuration * file. */ static void -iface_set_mac(struct iface *iface, const uint8_t *mac) +iface_set_mac(const struct bridge *br, const struct port *port, struct iface *iface) { - uint8_t ea[ETH_ADDR_LEN]; + uint8_t ea[ETH_ADDR_LEN], *mac = NULL; + struct iface *hw_addr_iface; if (strcmp(iface->type, "internal")) { return; @@ -3479,6 +3500,10 @@ iface_set_mac(struct iface *iface, const uint8_t *mac) if (iface->cfg->mac && eth_addr_from_string(iface->cfg->mac, ea)) { mac = ea; + } else if (port->cfg->fake_bridge) { + /* Fake bridge and no MAC set in the configuration. Pick a local one. */ + find_local_hw_addr(br, ea, port, &hw_addr_iface); + mac = ea; } if (mac) { diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c index da18a0a39..ca76aef10 100644 --- a/vswitchd/ovs-vswitchd.c +++ b/vswitchd/ovs-vswitchd.c @@ -32,6 +32,7 @@ #include "dirs.h" #include "dpif.h" #include "dummy.h" +#include "fatal-signal.h" #include "memory.h" #include "netdev.h" #include "openflow/openflow.h" @@ -72,7 +73,7 @@ main(int argc, char *argv[]) set_program_name(argv[0]); service_start(&argc, &argv); remote = parse_options(argc, argv, &unixctl_path); - signal(SIGPIPE, SIG_IGN); + fatal_ignore_sigpipe(); ovsrec_init(); daemonize_start(); diff --git a/vswitchd/system-stats.c b/vswitchd/system-stats.c index 1d9cb78ed..ca98b03e5 100644 --- a/vswitchd/system-stats.c +++ b/vswitchd/system-stats.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2010, 2012, 2013 Nicira, Inc. +/* Copyright (c) 2010, 2012, 2013, 2014 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -48,12 +48,13 @@ VLOG_DEFINE_THIS_MODULE(system_stats); /* #ifdefs make it a pain to maintain code: you have to try to build both ways. * Thus, this file tries to compile as much of the code as possible regardless - * of the target, by writing "if (LINUX_DATAPATH)" instead of "#ifdef - * __linux__" where this is possible. */ -#ifdef LINUX_DATAPATH + * of the target, by writing "if (LINUX)" instead of "#ifdef __linux__" where + * this is possible. */ +#ifdef __linux__ +#define LINUX 1 #include #else -#define LINUX_DATAPATH 0 +#define LINUX 0 #endif static void @@ -96,7 +97,7 @@ get_page_size(void) static void get_memory_stats(struct smap *stats) { - if (!LINUX_DATAPATH) { + if (!LINUX) { unsigned int pagesize = get_page_size(); #ifdef _SC_PHYS_PAGES long int phys_pages = sysconf(_SC_PHYS_PAGES); @@ -174,7 +175,7 @@ get_boot_time(void) static long long int cache_expiration = LLONG_MIN; static long long int boot_time; - ovs_assert(LINUX_DATAPATH); + ovs_assert(LINUX); if (time_msec() >= cache_expiration) { static const char stat_file[] = "/proc/stat"; @@ -207,7 +208,7 @@ get_boot_time(void) static unsigned long long int ticks_to_ms(unsigned long long int ticks) { - ovs_assert(LINUX_DATAPATH); + ovs_assert(LINUX); #ifndef USER_HZ #define USER_HZ 100 @@ -240,7 +241,7 @@ get_raw_process_info(pid_t pid, struct raw_process_info *raw) FILE *stream; int n; - ovs_assert(LINUX_DATAPATH); + ovs_assert(LINUX); sprintf(file_name, "/proc/%lu/stat", (unsigned long int) pid); stream = fopen(file_name, "r"); @@ -326,7 +327,7 @@ count_crashes(pid_t pid) int crashes = 0; FILE *stream; - ovs_assert(LINUX_DATAPATH); + ovs_assert(LINUX); sprintf(file_name, "/proc/%lu/cmdline", (unsigned long int) pid); stream = fopen(file_name, "r"); @@ -369,7 +370,7 @@ get_process_info(pid_t pid, struct process_info *pinfo) { struct raw_process_info child; - ovs_assert(LINUX_DATAPATH); + ovs_assert(LINUX); if (!get_raw_process_info(pid, &child)) { return false; } @@ -435,7 +436,7 @@ get_process_stats(struct smap *stats) key = xasprintf("process_%.*s", (int) (extension - de->d_name), de->d_name); if (!smap_get(stats, key)) { - if (LINUX_DATAPATH && get_process_info(pid, &pinfo)) { + if (LINUX && get_process_info(pid, &pinfo)) { smap_add_format(stats, key, "%lu,%lu,%lld,%d,%lld,%lld", pinfo.vsz, pinfo.rss, pinfo.cputime, pinfo.crashes, pinfo.booted, pinfo.uptime); diff --git a/vtep/automake.mk b/vtep/automake.mk index 008f5b435..f06148a40 100644 --- a/vtep/automake.mk +++ b/vtep/automake.mk @@ -11,7 +11,7 @@ man_MANS += \ vtep/vtep-ctl.8 vtep_vtep_ctl_SOURCES = vtep/vtep-ctl.c -vtep_vtep_ctl_LDADD = lib/libopenvswitch.la $(SSL_LIBS) +vtep_vtep_ctl_LDADD = lib/libopenvswitch.la # ovs-vtep scripts_SCRIPTS += \ @@ -50,6 +50,7 @@ $(srcdir)/vtep/vtep.5: \ $(OVSDB_DOC) \ --title="vtep" \ $(VTEP_DOT_DIAGRAM_ARG) \ + --version=$(VERSION) \ $(srcdir)/vtep/vtep.ovsschema \ $(srcdir)/vtep/vtep.xml > $@.tmp mv $@.tmp $@ diff --git a/vtep/vtep-ctl.8.in b/vtep/vtep-ctl.8.in index 5ffcc6206..320385417 100644 --- a/vtep/vtep-ctl.8.in +++ b/vtep/vtep-ctl.8.in @@ -100,6 +100,7 @@ These options control the format of output from the \fBlist\fR and .so lib/ssl-bootstrap.man .so lib/ssl-peer-ca-cert.man .so lib/vlog.man +.so lib/common.man . .SH COMMANDS The commands implemented by \fBvtep\-ctl\fR are described in the diff --git a/vtep/vtep-ctl.c b/vtep/vtep-ctl.c index 233367b70..25470ff1c 100644 --- a/vtep/vtep-ctl.c +++ b/vtep/vtep-ctl.c @@ -31,6 +31,7 @@ #include "compiler.h" #include "dirs.h" #include "dynamic-string.h" +#include "fatal-signal.h" #include "hash.h" #include "json.h" #include "ovsdb-data.h" @@ -167,7 +168,7 @@ main(int argc, char *argv[]) char *args; set_program_name(argv[0]); - signal(SIGPIPE, SIG_IGN); + fatal_ignore_sigpipe(); vlog_set_levels(NULL, VLF_CONSOLE, VLL_WARN); vlog_set_levels(&VLM_reconnect, VLF_ANY_FACILITY, VLL_WARN); vteprec_init(); @@ -369,6 +370,7 @@ parse_options(int argc, char *argv[], struct shash *local_options) case 'V': ovs_print_version(0, 0); + printf("DB Schema %s\n", vteprec_get_db_version()); exit(EXIT_SUCCESS); case 't':