/* Userspace communication. */
static DEFINE_SPINLOCK(brc_lock); /* Ensure atomic access to these vars. */
static DECLARE_COMPLETION(brc_done); /* Userspace signaled operation done? */
- static int brc_err; /* Error code from userspace. */
+ static struct sk_buff *brc_reply; /* Reply from userspace. */
static u32 brc_seq; /* Sequence number for current op. */
- static int brc_send_command(const char *bridge, const char *port, int op);
+ static struct sk_buff *brc_send_command(struct sk_buff *, struct nlattr **attrs);
+ static int brc_send_simple_command(struct sk_buff *);
static int
get_dp_ifindices(int *indices, int num)
rcu_read_unlock();
}
+ static struct sk_buff *
+ brc_make_request(int op, const char *bridge, const char *port)
+ {
+ struct sk_buff *skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ goto error;
+
+ genlmsg_put(skb, 0, 0, &brc_genl_family, 0, op);
+ NLA_PUT_STRING(skb, BRC_GENL_A_DP_NAME, bridge);
+ if (port)
+ NLA_PUT_STRING(skb, BRC_GENL_A_PORT_NAME, port);
+ return skb;
+
+ nla_put_failure:
+ kfree_skb(skb);
+ error:
+ return NULL;
+ }
+
+ static int brc_send_simple_command(struct sk_buff *request)
+ {
+ struct nlattr *attrs[BRC_GENL_A_MAX + 1];
+ struct sk_buff *reply;
+ int error;
+
+ reply = brc_send_command(request, attrs);
+ if (IS_ERR(reply))
+ return PTR_ERR(reply);
+
+ error = nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
+ kfree_skb(reply);
+ return -error;
+ }
+
static int brc_add_del_bridge(char __user *uname, int add)
{
+ struct sk_buff *request;
char name[IFNAMSIZ];
if (copy_from_user(name, uname, IFNAMSIZ))
return -EFAULT;
name[IFNAMSIZ - 1] = 0;
- return brc_send_command(name, NULL,
- add ? BRC_GENL_C_DP_ADD : BRC_GENL_C_DP_DEL);
+ request = brc_make_request(add ? BRC_GENL_C_DP_ADD : BRC_GENL_C_DP_DEL,
+ name, NULL);
+ if (!request)
+ return -ENOMEM;
+
+ return brc_send_simple_command(request);
}
static int brc_get_bridges(int __user *uindices, int n)
static int
brc_add_del_port(struct net_device *dev, int port_ifindex, int add)
{
+ struct sk_buff *request;
struct net_device *port;
- char dev_name[IFNAMSIZ], port_name[IFNAMSIZ];
int err;
port = __dev_get_by_index(&init_net, port_ifindex);
return -EINVAL;
/* Save name of dev and port because there's a race between the
- * rtnl_unlock() and the brc_send_command(). */
- strcpy(dev_name, dev->name);
- strcpy(port_name, port->name);
+ * rtnl_unlock() and the brc_send_simple_command(). */
+ request = brc_make_request(add ? BRC_GENL_C_PORT_ADD : BRC_GENL_C_PORT_DEL,
+ dev->name, port->name);
+ if (!request)
+ return -ENOMEM;
rtnl_unlock();
- err = brc_send_command(dev_name, port_name,
- add ? BRC_GENL_C_PORT_ADD : BRC_GENL_C_PORT_DEL);
+ err = brc_send_simple_command(request);
rtnl_lock();
return err;
return num;
}
+ /*
+ * Format up to a page worth of forwarding table entries
+ * userbuf -- where to copy result
+ * maxnum -- maximum number of entries desired
+ * (limited to a page for sanity)
+ * offset -- number of records to skip
+ */
+ static int brc_get_fdb_entries(struct net_device *dev, void __user *userbuf,
+ unsigned long maxnum, unsigned long offset)
+ {
+ struct nlattr *attrs[BRC_GENL_A_MAX + 1];
+ struct sk_buff *request, *reply;
+ int retval;
+ int len;
+
+ /* Clamp size to PAGE_SIZE, test maxnum to avoid overflow */
+ if (maxnum > PAGE_SIZE/sizeof(struct __fdb_entry))
+ maxnum = PAGE_SIZE/sizeof(struct __fdb_entry);
+
+ request = brc_make_request(BRC_GENL_C_FDB_QUERY, dev->name, NULL);
+ if (!request)
+ return -ENOMEM;
+ NLA_PUT_U64(request, BRC_GENL_A_FDB_COUNT, maxnum);
+ NLA_PUT_U64(request, BRC_GENL_A_FDB_SKIP, offset);
+
+ rtnl_unlock();
+ reply = brc_send_command(request, attrs);
+ retval = PTR_ERR(reply);
+ if (IS_ERR(reply))
+ goto exit;
+
+ retval = -nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
+ if (retval < 0)
+ goto exit_free_skb;
+
+ retval = -EINVAL;
+ if (!attrs[BRC_GENL_A_FDB_DATA])
+ goto exit_free_skb;
+ len = nla_len(attrs[BRC_GENL_A_FDB_DATA]);
+ if (len % sizeof(struct __fdb_entry) ||
+ len / sizeof(struct __fdb_entry) > maxnum)
+ goto exit_free_skb;
+
+ retval = len / sizeof(struct __fdb_entry);
+ if (copy_to_user(userbuf, nla_data(attrs[BRC_GENL_A_FDB_DATA]), len))
+ retval = -EFAULT;
+
+ exit_free_skb:
+ kfree_skb(reply);
+ exit:
+ rtnl_lock();
+ return retval;
+
+ nla_put_failure:
+ kfree_skb(request);
+ return -ENOMEM;
+ }
+
/* Legacy ioctl's through SIOCDEVPRIVATE. Called with rtnl_lock. */
static int
old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
case BRCTL_GET_PORT_LIST:
return brc_get_port_list(dev, (int __user *)args[1], args[2]);
+
+ case BRCTL_GET_FDB_ENTRIES:
+ return brc_get_fdb_entries(dev, (void __user *)args[1],
+ args[2], args[3]);
}
return -EOPNOTSUPP;
/* Attribute policy: what each attribute may contain. */
static struct nla_policy brc_genl_policy[BRC_GENL_A_MAX + 1] = {
[BRC_GENL_A_ERR_CODE] = { .type = NLA_U32 },
+
[BRC_GENL_A_PROC_DIR] = { .type = NLA_NUL_STRING },
[BRC_GENL_A_PROC_NAME] = { .type = NLA_NUL_STRING },
[BRC_GENL_A_PROC_DATA] = { .type = NLA_NUL_STRING },
+
+ [BRC_GENL_A_FDB_DATA] = { .type = NLA_UNSPEC },
};
static int
if (!info->attrs[BRC_GENL_A_ERR_CODE])
return -EINVAL;
+ skb = skb_clone(skb, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
spin_lock_irqsave(&brc_lock, flags);
if (brc_seq == info->snd_seq) {
- brc_err = nla_get_u32(info->attrs[BRC_GENL_A_ERR_CODE]);
+ brc_seq++;
+
+ if (brc_reply)
+ kfree_skb(brc_reply);
+ brc_reply = skb;
+
complete(&brc_done);
err = 0;
} else {
+ kfree_skb(skb);
err = -ESTALE;
}
spin_unlock_irqrestore(&brc_lock, flags);
.dumpit = NULL
};
- static int brc_send_command(const char *bridge, const char *port, int op)
+ static struct sk_buff *brc_send_command(struct sk_buff *request, struct nlattr **attrs)
{
unsigned long int flags;
- struct sk_buff *skb;
- void *data;
+ struct sk_buff *reply;
int error;
mutex_lock(&brc_serial);
/* Increment sequence number first, so that we ignore any replies
* to stale requests. */
spin_lock_irqsave(&brc_lock, flags);
- brc_seq++;
+ nlmsg_hdr(request)->nlmsg_seq = ++brc_seq;
INIT_COMPLETION(brc_done);
spin_unlock_irqrestore(&brc_lock, flags);
- /* Compose message. */
- skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
- error = -ENOMEM;
- if (skb == NULL)
- goto exit_unlock;
- data = genlmsg_put(skb, 0, brc_seq, &brc_genl_family, 0, op);
-
- NLA_PUT_STRING(skb, BRC_GENL_A_DP_NAME, bridge);
- if (port)
- NLA_PUT_STRING(skb, BRC_GENL_A_PORT_NAME, port);
-
- genlmsg_end(skb, data);
+ nlmsg_end(request, nlmsg_hdr(request));
/* Send message. */
- error = genlmsg_multicast(skb, 0, brc_mc_group.id, GFP_KERNEL);
+ error = genlmsg_multicast(request, 0, brc_mc_group.id, GFP_KERNEL);
if (error < 0)
- goto exit_unlock;
+ goto error;
/* Wait for reply. */
error = -ETIMEDOUT;
if (!wait_for_completion_timeout(&brc_done, BRC_TIMEOUT))
- goto exit_unlock;
+ goto error;
- error = -brc_err;
- goto exit_unlock;
+ /* Grab reply. */
+ spin_lock_irqsave(&brc_lock, flags);
+ reply = brc_reply;
+ brc_reply = NULL;
+ spin_unlock_irqrestore(&brc_lock, flags);
- nla_put_failure:
- kfree_skb(skb);
- exit_unlock:
mutex_unlock(&brc_serial);
- return error;
+
+ /* Re-parse message. Can't fail, since it parsed correctly once
+ * already. */
+ error = nlmsg_parse(nlmsg_hdr(reply), GENL_HDRLEN,
+ attrs, BRC_GENL_A_MAX, brc_genl_policy);
+ WARN_ON(error);
+
+ return reply;
+
+ error:
+ mutex_unlock(&brc_serial);
+ return ERR_PTR(error);
}
int brc_add_dp(struct datapath *dp)
{
if (!try_module_get(THIS_MODULE))
return -ENODEV;
-#ifdef SUPPORT_SYSFS
brc_sysfs_add_dp(dp);
-#endif
return 0;
}
int brc_del_dp(struct datapath *dp)
{
-#ifdef SUPPORT_SYSFS
brc_sysfs_del_dp(dp);
-#endif
module_put(THIS_MODULE);
return 0;
dp_del_dp_hook = brc_del_dp;
/* Register hooks for interface adds and deletes */
-#ifdef SUPPORT_SYSFS
dp_add_if_hook = brc_sysfs_add_if;
dp_del_if_hook = brc_sysfs_del_if;
-#endif
/* Randomize the initial sequence number. This is not a security
* feature; it only helps avoid crossed wires between userspace and
kfree_skb(skb);
goto errout;
}
- err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
+ rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
+ return;
errout:
if (err < 0)
rtnl_set_sk_err(net, RTNLGRP_LINK, err);
dp = kzalloc(sizeof *dp, GFP_KERNEL);
if (dp == NULL)
goto err_put_module;
-
+ INIT_LIST_HEAD(&dp->port_list);
mutex_init(&dp->mutex);
dp->dp_idx = dp_idx;
for (i = 0; i < DP_N_QUEUES; i++)
skb_queue_head_init(&dp->queues[i]);
init_waitqueue_head(&dp->waitqueue);
+ /* Allocate table. */
+ err = -ENOMEM;
+ rcu_assign_pointer(dp->table, dp_table_create(DP_L1_SIZE));
+ if (!dp->table)
+ goto err_free_dp;
+
/* Setup our datapath device */
dp_dev = dp_dev_create(dp, devname, ODPP_LOCAL);
err = PTR_ERR(dp_dev);
if (IS_ERR(dp_dev))
- goto err_free_dp;
-
- err = -ENOMEM;
- rcu_assign_pointer(dp->table, dp_table_create(DP_L1_SIZE));
- if (!dp->table)
- goto err_destroy_dp_dev;
- INIT_LIST_HEAD(&dp->port_list);
+ goto err_destroy_table;
err = new_nbp(dp, dp_dev, ODPP_LOCAL);
- if (err)
+ if (err) {
+ dp_dev_destroy(dp_dev);
goto err_destroy_table;
+ }
dp->drop_frags = 0;
dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
return 0;
err_destroy_local_port:
- dp_del_port(dp->ports[ODPP_LOCAL], NULL);
+ dp_del_port(dp->ports[ODPP_LOCAL]);
err_destroy_table:
dp_table_destroy(dp->table, 0);
- err_destroy_dp_dev:
- dp_dev_destroy(dp_dev);
err_free_dp:
kfree(dp);
err_put_module:
return err;
}
- static void do_destroy_dp(struct datapath *dp, struct list_head *dp_devs)
+ static void do_destroy_dp(struct datapath *dp)
{
struct net_bridge_port *p, *n;
int i;
list_for_each_entry_safe (p, n, &dp->port_list, node)
if (p->port_no != ODPP_LOCAL)
- dp_del_port(p, dp_devs);
+ dp_del_port(p);
if (dp_del_dp_hook)
dp_del_dp_hook(dp);
rcu_assign_pointer(dps[dp->dp_idx], NULL);
- dp_del_port(dp->ports[ODPP_LOCAL], dp_devs);
+ dp_del_port(dp->ports[ODPP_LOCAL]);
dp_table_destroy(dp->table, 1);
static int destroy_dp(int dp_idx)
{
- struct dp_dev *dp_dev, *next;
struct datapath *dp;
- LIST_HEAD(dp_devs);
int err;
rtnl_lock();
if (!dp)
goto err_unlock;
- do_destroy_dp(dp, &dp_devs);
+ do_destroy_dp(dp);
err = 0;
err_unlock:
mutex_unlock(&dp_mutex);
rtnl_unlock();
- list_for_each_entry_safe (dp_dev, next, &dp_devs, list)
- free_netdev(dp_dev->dev);
return err;
}
if (copy_from_user(&port, portp, sizeof port))
goto out;
port.devname[IFNAMSIZ - 1] = '\0';
- port_no = port.port;
-
- err = -EINVAL;
- if (port_no < 0 || port_no >= DP_MAX_PORTS)
- goto out;
rtnl_lock();
dp = get_dp_locked(dp_idx);
if (!dp)
goto out_unlock_rtnl;
- err = -EEXIST;
- if (dp->ports[port_no])
- goto out_unlock_dp;
+ for (port_no = 1; port_no < DP_MAX_PORTS; port_no++)
+ if (!dp->ports[port_no])
+ goto got_port_no;
+ err = -EXFULL;
+ goto out_unlock_dp;
+got_port_no:
if (!(port.flags & ODP_PORT_INTERNAL)) {
err = -ENODEV;
dev = dev_get_by_name(&init_net, port.devname);
if (dp_add_if_hook)
dp_add_if_hook(dp->ports[port_no]);
+ err = __put_user(port_no, &port.port);
+
out_put:
dev_put(dev);
out_unlock_dp:
return err;
}
- int dp_del_port(struct net_bridge_port *p, struct list_head *dp_devs)
+ int dp_del_port(struct net_bridge_port *p)
{
ASSERT_RTNL();
-#ifdef SUPPORT_SYSFS
- if (p->port_no != ODPP_LOCAL && dp_del_if_hook)
+ if (p->port_no != ODPP_LOCAL && dp_del_if_hook) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
sysfs_remove_link(&p->dp->ifobj, p->dev->name);
+#else
+ sysfs_remove_link(p->dp->ifobj, p->dev->name);
#endif
+ }
dp_ifinfo_notify(RTM_DELLINK, p);
p->dp->n_ports--;
if (is_dp_dev(p->dev)) {
dp_dev_destroy(p->dev);
- if (dp_devs) {
- struct dp_dev *dp_dev = dp_dev_priv(p->dev);
- list_add(&dp_dev->list, dp_devs);
- }
}
if (p->port_no != ODPP_LOCAL && dp_del_if_hook) {
dp_del_if_hook(p);
static int del_port(int dp_idx, int port_no)
{
- struct dp_dev *dp_dev, *next;
struct net_bridge_port *p;
struct datapath *dp;
LIST_HEAD(dp_devs);
if (!p)
goto out_unlock_dp;
- err = dp_del_port(p, &dp_devs);
+ err = dp_del_port(p);
out_unlock_dp:
mutex_unlock(&dp->mutex);
out_unlock_rtnl:
rtnl_unlock();
out:
- list_for_each_entry_safe (dp_dev, next, &dp_devs, list)
- free_netdev(dp_dev->dev);
return err;
}
struct sw_flow *flow;
WARN_ON_ONCE(skb_shared(skb));
- WARN_ON_ONCE(skb->destructor);
/* BHs are off so we don't have to use get_cpu()/put_cpu() here. */
stats = percpu_ptr(dp->stats_percpu, smp_processor_id());
#endif
#ifdef CONFIG_XEN
+#if LINUX_VERSION_CODE == KERNEL_VERSION(2,6,18)
/* This code is copied verbatim from net/dev/core.c in Xen's
* linux-2.6.18-92.1.10.el5.xs5.0.0.394.644. We can't call those functions
* directly because they aren't exported. */
out:
return -EPROTO;
}
-#endif
+#endif /* linux == 2.6.18 */
+#endif /* CONFIG_XEN */
int
dp_output_control(struct datapath *dp, struct sk_buff *skb, int queue_no,
stats->n_bytes = flow->byte_count;
stats->ip_tos = flow->ip_tos;
stats->tcp_flags = flow->tcp_flags;
+ stats->error = 0;
}
static void clear_stats(struct sw_flow *flow)
if (!n_actions)
return 0;
- if (ufp->n_actions > INT_MAX / sizeof(union odp_action))
- return -EINVAL;
sf_acts = rcu_dereference(flow->sf_acts);
if (__put_user(sf_acts->n_actions, &ufp->n_actions) ||
return put_actions(flow, ufp);
}
-static int del_or_query_flow(struct datapath *dp,
- struct odp_flow __user *ufp,
- unsigned int cmd)
+static int del_flow(struct datapath *dp, struct odp_flow __user *ufp)
{
struct dp_table *table = rcu_dereference(dp->table);
struct odp_flow uf;
if (!flow)
goto error;
- if (cmd == ODP_FLOW_DEL) {
- /* XXX redundant lookup */
- error = dp_table_delete(table, flow);
- if (error)
- goto error;
+ /* XXX redundant lookup */
+ error = dp_table_delete(table, flow);
+ if (error)
+ goto error;
- /* XXX These statistics might lose a few packets, since other
- * CPUs can be using this flow. We used to synchronize_rcu()
- * to make sure that we get completely accurate stats, but that
- * blows our performance, badly. */
- dp->n_flows--;
- error = answer_query(flow, ufp);
- flow_deferred_free(flow);
- } else {
- error = answer_query(flow, ufp);
- }
+ /* XXX These statistics might lose a few packets, since other CPUs can
+ * be using this flow. We used to synchronize_rcu() to make sure that
+ * we get completely accurate stats, but that blows our performance,
+ * badly. */
+ dp->n_flows--;
+ error = answer_query(flow, ufp);
+ flow_deferred_free(flow);
error:
return error;
}
-static int query_multiple_flows(struct datapath *dp,
- const struct odp_flowvec *flowvec)
+static int query_flows(struct datapath *dp, const struct odp_flowvec *flowvec)
{
struct dp_table *table = rcu_dereference(dp->table);
int i;
flow = dp_table_lookup(table, &uf.key);
if (!flow)
- error = __clear_user(&ufp->stats, sizeof ufp->stats);
+ error = __put_user(ENOENT, &ufp->stats.error);
else
error = answer_query(flow, ufp);
if (error)
return err;
}
-static int
-get_dp_stats(struct datapath *dp, struct odp_stats __user *statsp)
+static int get_dp_stats(struct datapath *dp, struct odp_stats __user *statsp)
{
struct odp_stats stats;
int i;
break;
}
}
- return put_user(idx, &pvp->n_ports);
+ return put_user(dp->n_ports, &pvp->n_ports);
}
/* RCU callback for freeing a dp_port_group */
/* Handle commands with special locking requirements up front. */
switch (cmd) {
case ODP_DP_CREATE:
- return create_dp(dp_idx, (char __user *)argp);
+ err = create_dp(dp_idx, (char __user *)argp);
+ goto exit;
case ODP_DP_DESTROY:
- return destroy_dp(dp_idx);
+ err = destroy_dp(dp_idx);
+ goto exit;
case ODP_PORT_ADD:
- return add_port(dp_idx, (struct odp_port __user *)argp);
+ err = add_port(dp_idx, (struct odp_port __user *)argp);
+ goto exit;
case ODP_PORT_DEL:
err = get_user(port_no, (int __user *)argp);
- if (err)
- break;
- return del_port(dp_idx, port_no);
+ if (!err)
+ err = del_port(dp_idx, port_no);
+ goto exit;
}
dp = get_dp_locked(dp_idx);
+ err = -ENODEV;
if (!dp)
- return -ENODEV;
+ goto exit;
switch (cmd) {
case ODP_DP_STATS:
break;
case ODP_FLOW_DEL:
- case ODP_FLOW_GET:
- err = del_or_query_flow(dp, (struct odp_flow __user *)argp,
- cmd);
+ err = del_flow(dp, (struct odp_flow __user *)argp);
break;
- case ODP_FLOW_GET_MULTIPLE:
- err = do_flowvec_ioctl(dp, argp, query_multiple_flows);
+ case ODP_FLOW_GET:
+ err = do_flowvec_ioctl(dp, argp, query_flows);
break;
case ODP_FLOW_LIST:
break;
}
mutex_unlock(&dp->mutex);
+exit:
return err;
}
#include <linux/netdevice.h>
#include <linux/workqueue.h>
#include <linux/skbuff.h>
+#include <linux/version.h>
#include "flow.h"
#include "brc_sysfs.h"
- struct sk_buff;
-
/* Mask for the priority bits in a vlan header. If we ever merge upstream
* then this should go into include/linux/if_vlan.h. */
#define VLAN_PCP_MASK 0xe000
struct mutex mutex;
int dp_idx;
-#ifdef SUPPORT_SYSFS
+#ifdef CONFIG_SYSFS
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
struct kobject ifobj;
+#else
+ struct kobject *ifobj;
+#endif
#endif
int drop_frags;
u16 port_no;
struct datapath *dp;
struct net_device *dev;
-#ifdef SUPPORT_SYSFS
+#ifdef CONFIG_SYSFS
struct kobject kobj;
#endif
struct list_head node; /* Element in datapath.ports. */
void *aux);
void dp_process_received_packet(struct sk_buff *, struct net_bridge_port *);
- int dp_del_port(struct net_bridge_port *, struct list_head *);
- int dp_output_port(struct datapath *, struct sk_buff *, int out_port,
- int ignore_no_fwd);
+ int dp_del_port(struct net_bridge_port *);
int dp_output_control(struct datapath *, struct sk_buff *, int, u32 arg);
- void dp_set_origin(struct datapath *, u16, struct sk_buff *);
struct datapath *get_dp(int dp_idx);
#include "datapath.h"
#include "dp_dev.h"
+ struct pcpu_lstats {
+ unsigned long rx_packets;
+ unsigned long rx_bytes;
+ unsigned long tx_packets;
+ unsigned long tx_bytes;
+ };
+
struct datapath *dp_dev_get_dp(struct net_device *netdev)
{
return dp_dev_priv(netdev)->dp;
static struct net_device_stats *dp_dev_get_stats(struct net_device *netdev)
{
struct dp_dev *dp_dev = dp_dev_priv(netdev);
- return &dp_dev->stats;
+ struct net_device_stats *stats;
+ int i;
+
+ stats = &dp_dev->stats;
+ memset(stats, 0, sizeof *stats);
+ for_each_possible_cpu(i) {
+ const struct pcpu_lstats *lb_stats;
+
+ lb_stats = per_cpu_ptr(dp_dev->lstats, i);
+ stats->rx_bytes += lb_stats->rx_bytes;
+ stats->rx_packets += lb_stats->rx_packets;
+ stats->tx_bytes += lb_stats->tx_bytes;
+ stats->tx_packets += lb_stats->tx_packets;
+ }
+ return stats;
}
int dp_dev_recv(struct net_device *netdev, struct sk_buff *skb)
{
struct dp_dev *dp_dev = dp_dev_priv(netdev);
+ struct pcpu_lstats *lb_stats;
int len;
len = skb->len;
skb->pkt_type = PACKET_HOST;
else
netif_rx_ni(skb);
netdev->last_rx = jiffies;
- dp_dev->stats.rx_packets++;
- dp_dev->stats.rx_bytes += len;
+ lb_stats = per_cpu_ptr(dp_dev->lstats, smp_processor_id());
+ lb_stats->rx_packets++;
+ lb_stats->rx_bytes += len;
return len;
}
return 0;
}
+ /* Not reentrant (because it is called with BHs disabled), but may be called
+ * simultaneously on different CPUs. */
static int dp_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
{
struct dp_dev *dp_dev = dp_dev_priv(netdev);
+ struct pcpu_lstats *lb_stats;
- /* By orphaning 'skb' we will screw up socket accounting slightly, but
- * the effect is limited to the device queue length. If we don't
- * do this, then the sk_buff will be destructed eventually, but it is
- * harder to predict when. */
- skb_orphan(skb);
-
- /* We are going to modify 'skb', by sticking it on &dp_dev->xmit_queue,
- * so we need to have our own clone. (At any rate, fwd_port_input()
- * will need its own clone, so there's no benefit to queuing any other
- * way.) */
+ /* dp_process_received_packet() needs its own clone. */
skb = skb_share_check(skb, GFP_ATOMIC);
if (!skb)
return 0;
- dp_dev->stats.tx_packets++;
- dp_dev->stats.tx_bytes += skb->len;
-
- if (skb_queue_len(&dp_dev->xmit_queue) >= netdev->tx_queue_len) {
- /* Queue overflow. Stop transmitter. */
- netif_stop_queue(netdev);
-
- /* We won't see all dropped packets individually, so overrun
- * error is appropriate. */
- dp_dev->stats.tx_fifo_errors++;
- }
- skb_queue_tail(&dp_dev->xmit_queue, skb);
- netdev->trans_start = jiffies;
+ lb_stats = per_cpu_ptr(dp_dev->lstats, smp_processor_id());
+ lb_stats->tx_packets++;
+ lb_stats->tx_bytes += skb->len;
- schedule_work(&dp_dev->xmit_work);
+ skb_reset_mac_header(skb);
+ rcu_read_lock_bh();
+ dp_process_received_packet(skb, dp_dev->dp->ports[dp_dev->port_no]);
+ rcu_read_unlock_bh();
return 0;
}
- static void dp_dev_do_xmit(struct work_struct *work)
- {
- struct dp_dev *dp_dev = container_of(work, struct dp_dev, xmit_work);
- struct datapath *dp = dp_dev->dp;
- struct sk_buff *skb;
-
- while ((skb = skb_dequeue(&dp_dev->xmit_queue)) != NULL) {
- skb_reset_mac_header(skb);
- rcu_read_lock_bh();
- dp_process_received_packet(skb, dp->ports[dp_dev->port_no]);
- rcu_read_unlock_bh();
- }
- netif_wake_queue(dp_dev->dev);
- }
-
static int dp_dev_open(struct net_device *netdev)
{
netif_start_queue(netdev);
{
struct dp_dev *dp_dev = dp_dev_priv(netdev);
strcpy(info->driver, "openvswitch");
- sprintf(info->bus_info, "%d", dp_dev->dp->dp_idx);
+ sprintf(info->bus_info, "%d.%d", dp_dev->dp->dp_idx, dp_dev->port_no);
}
static struct ethtool_ops dp_ethtool_ops = {
.get_tso = ethtool_op_get_tso,
};
+ static int dp_dev_init(struct net_device *netdev)
+ {
+ struct dp_dev *dp_dev = dp_dev_priv(netdev);
+
+ dp_dev->lstats = alloc_percpu(struct pcpu_lstats);
+ if (!dp_dev->lstats)
+ return -ENOMEM;
+
+ return 0;
+ }
+
+ static void dp_dev_free(struct net_device *netdev)
+ {
+ struct dp_dev *dp_dev = dp_dev_priv(netdev);
+
+ free_percpu(dp_dev->lstats);
+ free_netdev(netdev);
+ }
+
static void
do_setup(struct net_device *netdev)
{
netdev->open = dp_dev_open;
SET_ETHTOOL_OPS(netdev, &dp_ethtool_ops);
netdev->stop = dp_dev_stop;
- netdev->tx_queue_len = 100;
+ netdev->tx_queue_len = 0;
netdev->set_mac_address = dp_dev_mac_addr;
+ netdev->init = dp_dev_init;
+ netdev->destructor = dp_dev_free;
netdev->flags = IFF_BROADCAST | IFF_MULTICAST;
+ netdev->features = NETIF_F_LLTX; /* XXX other features? */
random_ether_addr(netdev->dev_addr);
dp_dev->dp = dp;
dp_dev->port_no = port_no;
dp_dev->dev = netdev;
- skb_queue_head_init(&dp_dev->xmit_queue);
- INIT_WORK(&dp_dev->xmit_work, dp_dev_do_xmit);
return netdev;
}
/* Called with RTNL lock and dp_mutex.*/
void dp_dev_destroy(struct net_device *netdev)
{
- struct dp_dev *dp_dev = dp_dev_priv(netdev);
-
- netif_tx_disable(netdev);
- synchronize_net();
- skb_queue_purge(&dp_dev->xmit_queue);
unregister_netdevice(netdev);
}
static void show_flows(struct rconn *);
static void show_dpid_ip(struct rconn *, const struct dict *);
-static void show_secchan_state(const struct dict *);
+static void show_ofproto_state(const struct dict *);
static void show_fail_open_state(const struct dict *);
static void show_discovery_state(const struct dict *);
static void show_remote_state(const struct dict *);
if (!show_reboot_state()) {
show_flows(rconn);
show_dpid_ip(rconn, dict);
- show_secchan_state(dict);
+ show_ofproto_state(dict);
show_fail_open_state(dict);
show_discovery_state(dict);
show_remote_state(dict);
const char *is_connected, *local_ip;
dict_lookup(dict, "local.is-connected", &is_connected);
- dict_lookup(dict, "in-band.local-ip", &local_ip);
+ dict_lookup(dict, "remote.local-ip", &local_ip);
if (!is_connected && !local_ip) {
/* If we're not connected to the datapath and don't have a local IP,
* then we won't have anything useful to show anyhow. */
}
static void
-show_secchan_state(const struct dict *dict)
+show_ofproto_state(const struct dict *dict)
{
static struct message *msg;
const char *is_connected;
static bool inited = false;
dict_lookup(dict, "local.is-connected", &is_connected);
- dict_lookup(dict, "in-band.local-ip", &local_ip);
+ dict_lookup(dict, "remote.local-ip", &local_ip);
if (!is_connected && !local_ip) {
/* If we're not connected to the datapath and don't have a local IP,
* then we won't have anything useful to show anyhow. */
#include "dynamic-string.h"
#include "fatal-signal.h"
#include "list.h"
+#include "netdev-linux.h"
#include "netlink.h"
#include "ofpbuf.h"
#include "openflow/openflow.h"
#include "packets.h"
#include "poll-loop.h"
+#include "shash.h"
#include "socket-util.h"
#include "svec.h"
peer ? peer : &dummy[3]);
}
+/* Set the features advertised by 'netdev' to 'advertise'. */
int
netdev_set_advertisements(struct netdev *netdev, uint32_t advertise)
{
/* If 'netdev' has an assigned IPv4 address, sets '*in4' to that address (if
* 'in4' is non-null) and returns true. Otherwise, returns false. */
bool
- netdev_get_in4(const struct netdev *netdev, struct in_addr *in4)
+ netdev_nodev_get_in4(const char *netdev_name, struct in_addr *in4)
{
struct ifreq ifr;
struct in_addr ip = { INADDR_ANY };
- strncpy(ifr.ifr_name, netdev->name, sizeof ifr.ifr_name);
+ init_netdev();
+
+ strncpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name);
ifr.ifr_addr.sa_family = AF_INET;
COVERAGE_INC(netdev_get_in4);
if (ioctl(af_inet_sock, SIOCGIFADDR, &ifr) == 0) {
ip = sin->sin_addr;
} else {
VLOG_DBG_RL(&rl, "%s: ioctl(SIOCGIFADDR) failed: %s",
- netdev->name, strerror(errno));
+ netdev_name, strerror(errno));
}
if (in4) {
*in4 = ip;
return ip.s_addr != INADDR_ANY;
}
+ bool
+ netdev_get_in4(const struct netdev *netdev, struct in_addr *in4)
+ {
+ return netdev_nodev_get_in4(netdev->name, in4);
+ }
+
static void
make_in4_sockaddr(struct sockaddr *sa, struct in_addr addr)
{
* returns 0. Otherwise, it returns a positive errno value; in particular,
* ENXIO indicates that there is not ARP table entry for 'ip' on 'netdev'. */
int
- netdev_arp_lookup(const struct netdev *netdev,
- uint32_t ip, uint8_t mac[ETH_ADDR_LEN])
+ netdev_nodev_arp_lookup(const char *netdev_name, uint32_t ip,
+ uint8_t mac[ETH_ADDR_LEN])
{
struct arpreq r;
struct sockaddr_in *pa;
int retval;
+ init_netdev();
+
memset(&r, 0, sizeof r);
pa = (struct sockaddr_in *) &r.arp_pa;
pa->sin_family = AF_INET;
pa->sin_port = 0;
r.arp_ha.sa_family = ARPHRD_ETHER;
r.arp_flags = 0;
- strncpy(r.arp_dev, netdev->name, sizeof r.arp_dev);
+ strncpy(r.arp_dev, netdev_name, sizeof r.arp_dev);
COVERAGE_INC(netdev_arp_lookup);
retval = ioctl(af_inet_sock, SIOCGARP, &r) < 0 ? errno : 0;
if (!retval) {
memcpy(mac, r.arp_ha.sa_data, ETH_ADDR_LEN);
} else if (retval != ENXIO) {
VLOG_WARN_RL(&rl, "%s: could not look up ARP entry for "IP_FMT": %s",
- netdev->name, IP_ARGS(&ip), strerror(retval));
+ netdev_name, IP_ARGS(&ip), strerror(retval));
}
return retval;
}
+ int
+ netdev_arp_lookup(const struct netdev *netdev, uint32_t ip,
+ uint8_t mac[ETH_ADDR_LEN])
+ {
+ return netdev_nodev_arp_lookup(netdev->name, ip, mac);
+ }
+
static int
get_stats_via_netlink(int ifindex, struct netdev_stats *stats)
{
return ENODEV;
}
+/* Sets 'carrier' to true if carrier is active (link light is on) on
+ * 'netdev'. */
int
netdev_get_carrier(const struct netdev *netdev, bool *carrier)
{
return error;
}
+/* Retrieves current device stats for 'netdev'. */
int
netdev_get_stats(const struct netdev *netdev, struct netdev_stats *stats)
{
}
}
+ /* Attempts to locate a device based on its IPv4 address. The caller
+ * may provide a hint as to the device by setting 'netdev_name' to a
+ * likely device name. This string must be malloc'd, since if it is
+ * not correct then it will be freed. If there is no hint, then
+ * 'netdev_name' must be the NULL pointer.
+ *
+ * If the device is found, the return value will be true and 'netdev_name'
+ * contains the device's name as a string, which the caller is responsible
+ * for freeing. If the device is not found, the return value is false. */
+ bool
+ netdev_find_dev_by_in4(const struct in_addr *in4, char **netdev_name)
+ {
+ int i;
+ struct in_addr dev_in4;
+ struct svec dev_list;
+
+ /* Check the hint first. */
+ if (*netdev_name && (netdev_nodev_get_in4(*netdev_name, &dev_in4))
+ && (dev_in4.s_addr == in4->s_addr)) {
+ return true;
+ }
+
+ free(*netdev_name);
+ *netdev_name = NULL;
+ netdev_enumerate(&dev_list);
+
+ for (i=0; i<dev_list.n; i++) {
+ if ((netdev_nodev_get_in4(dev_list.names[i], &dev_in4))
+ && (dev_in4.s_addr == in4->s_addr)) {
+ *netdev_name = xstrdup(dev_list.names[i]);
+ svec_destroy(&dev_list);
+ return true;
+ }
+ }
+
+ svec_destroy(&dev_list);
+ return false;
+ }
+
/* Obtains the current flags for the network device named 'netdev_name' and
* stores them into '*flagsp'. Returns 0 if successful, otherwise a positive
* errno value. On error, stores 0 into '*flagsp'.
return error;
}
\f
+struct netdev_monitor {
+ struct linux_netdev_notifier notifier;
+ struct shash polled_netdevs;
+ struct shash changed_netdevs;
+};
+
+static void netdev_monitor_change(const struct linux_netdev_change *change,
+ void *monitor);
+
+int
+netdev_monitor_create(struct netdev_monitor **monitorp)
+{
+ struct netdev_monitor *monitor;
+ int error;
+
+ monitor = xmalloc(sizeof *monitor);
+ error = linux_netdev_notifier_register(&monitor->notifier,
+ netdev_monitor_change, monitor);
+ if (error) {
+ free(monitor);
+ return error;
+ }
+ shash_init(&monitor->polled_netdevs);
+ shash_init(&monitor->changed_netdevs);
+ *monitorp = monitor;
+ return 0;
+}
+
+void
+netdev_monitor_destroy(struct netdev_monitor *monitor)
+{
+ if (monitor) {
+ linux_netdev_notifier_unregister(&monitor->notifier);
+ shash_destroy(&monitor->polled_netdevs);
+ free(monitor);
+ }
+}
+
+void
+netdev_monitor_add(struct netdev_monitor *monitor, struct netdev *netdev)
+{
+ if (!shash_find(&monitor->polled_netdevs, netdev_get_name(netdev))) {
+ shash_add(&monitor->polled_netdevs, netdev_get_name(netdev), NULL);
+ }
+}
+
+void
+netdev_monitor_remove(struct netdev_monitor *monitor, struct netdev *netdev)
+{
+ struct shash_node *node;
+
+ node = shash_find(&monitor->polled_netdevs, netdev_get_name(netdev));
+ if (node) {
+ shash_delete(&monitor->polled_netdevs, node);
+ node = shash_find(&monitor->changed_netdevs, netdev_get_name(netdev));
+ if (node) {
+ shash_delete(&monitor->changed_netdevs, node);
+ }
+ }
+}
+
+int
+netdev_monitor_poll(struct netdev_monitor *monitor, char **devnamep)
+{
+ int error = linux_netdev_notifier_get_error(&monitor->notifier);
+ *devnamep = NULL;
+ if (!error) {
+ struct shash_node *node = shash_first(&monitor->changed_netdevs);
+ if (!node) {
+ return EAGAIN;
+ }
+ *devnamep = xstrdup(node->name);
+ shash_delete(&monitor->changed_netdevs, node);
+ } else {
+ shash_clear(&monitor->changed_netdevs);
+ }
+ return error;
+}
+
+void
+netdev_monitor_poll_wait(const struct netdev_monitor *monitor)
+{
+ if (!shash_is_empty(&monitor->changed_netdevs)
+ || linux_netdev_notifier_peek_error(&monitor->notifier)) {
+ poll_immediate_wake();
+ } else {
+ linux_netdev_notifier_wait();
+ }
+}
+
+static void
+netdev_monitor_change(const struct linux_netdev_change *change, void *monitor_)
+{
+ struct netdev_monitor *monitor = monitor_;
+ if (shash_find(&monitor->polled_netdevs, change->ifname)
+ && !shash_find(&monitor->changed_netdevs, change->ifname)) {
+ shash_add(&monitor->changed_netdevs, change->ifname, NULL);
+ }
+}
+\f
static void restore_all_flags(void *aux);
/* Set up a signal hook to restore network device flags on program
enum netdev_flags {
NETDEV_UP = 0x0001, /* Device enabled? */
- NETDEV_PROMISC = 0x0002 /* Promiscuous mode? */
+ NETDEV_PROMISC = 0x0002, /* Promiscuous mode? */
+ NETDEV_LOOPBACK = 0x0004 /* This is a loopback device. */
};
enum netdev_pseudo_ethertype {
uint32_t kbits_burst);
void netdev_enumerate(struct svec *);
+ bool netdev_find_dev_by_in4(const struct in_addr *in4, char **netdev_name);
int netdev_nodev_get_flags(const char *netdev_name, enum netdev_flags *);
+ bool netdev_nodev_get_in4(const char *netdev_name, struct in_addr *);
int netdev_nodev_set_etheraddr(const char *name, const uint8_t mac[6]);
int netdev_nodev_get_etheraddr(const char *netdev_name, uint8_t mac[6]);
int netdev_nodev_set_policing(const char *netdev_name, uint32_t kbits_rate,
uint32_t kbits_burst);
+ int netdev_nodev_arp_lookup(const char *netdev_name, uint32_t ip,
+ uint8_t mac[6]);
int netdev_nodev_get_carrier(const char *netdev_name, bool *carrier);
int netdev_get_vlan_vid(const char *netdev_name, int *vlan_vid);
+struct netdev_monitor;
+int netdev_monitor_create(struct netdev_monitor **);
+void netdev_monitor_destroy(struct netdev_monitor *);
+void netdev_monitor_add(struct netdev_monitor *, struct netdev *);
+void netdev_monitor_remove(struct netdev_monitor *, struct netdev *);
+int netdev_monitor_poll(struct netdev_monitor *, char **devnamep);
+void netdev_monitor_poll_wait(const struct netdev_monitor *);
+
#endif /* netdev.h */
time_t last_admitted;
/* These values are simply for statistics reporting, not used directly by
- * anything internal to the rconn (or the secchan for that matter). */
+ * anything internal to the rconn (or ofproto for that matter). */
unsigned int packets_received;
unsigned int n_attempted_connections, n_successful_connections;
time_t creation_time;
/* Returns the IP address of the peer, or 0 if the peer is not connected over
* an IP-based protocol or if its IP address is not known. */
uint32_t
- rconn_get_ip(const struct rconn *rconn)
+ rconn_get_remote_ip(const struct rconn *rconn)
{
- return rconn->vconn ? vconn_get_ip(rconn->vconn) : 0;
+ return rconn->vconn ? vconn_get_remote_ip(rconn->vconn) : 0;
+ }
+
+ /* Returns the transport port of the peer, or 0 if the peer does not
+ * contain a port or if the port is not known. */
+ uint16_t
+ rconn_get_remote_port(const struct rconn *rconn)
+ {
+ return rconn->vconn ? vconn_get_remote_port(rconn->vconn) : 0;
+ }
+
+ /* Returns the IP address used to connect to the peer, or 0 if the
+ * connection is not an IP-based protocol or if its IP address is not
+ * known. */
+ uint32_t
+ rconn_get_local_ip(const struct rconn *rconn)
+ {
+ return rconn->vconn ? vconn_get_local_ip(rconn->vconn) : 0;
+ }
+
+ /* Returns the transport port used to connect to the peer, or 0 if the
+ * connection does not contain a port or if the port is not known. */
+ uint16_t
+ rconn_get_local_port(const struct rconn *rconn)
+ {
+ return rconn->vconn ? vconn_get_local_port(rconn->vconn) : 0;
}
/* If 'rconn' can't connect to the peer, it could be for any number of reasons.
#include <poll.h>
#include <stddef.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
#include <sys/resource.h>
#include <sys/un.h>
: htonl(0)); /* ??? */
}
+/* Opens a non-blocking TCP socket and connects to 'target', which should be a
+ * string in the format "<host>[:<port>]", where <host> is required and <port>
+ * is optional, with 'default_port' assumed if <port> is omitted.
+ *
+ * On success, returns 0 (indicating connection complete) or EAGAIN (indicating
+ * connection in progress), in which case the new file descriptor is stored
+ * into '*fdp'. On failure, returns a positive errno value other than EAGAIN
+ * and stores -1 into '*fdp'.
+ *
+ * If 'sinp' is non-null, then on success the target address is stored into
+ * '*sinp'. */
+int
+tcp_open_active(const char *target_, uint16_t default_port,
+ struct sockaddr_in *sinp, int *fdp)
+{
+ char *target = xstrdup(target_);
+ char *save_ptr = NULL;
+ const char *host_name;
+ const char *port_string;
+ struct sockaddr_in sin;
+ int fd = -1;
+ int error;
+
+ /* Defaults. */
+ memset(&sin, 0, sizeof sin);
+ sin.sin_family = AF_INET;
+ sin.sin_port = htons(default_port);
+
+ /* Tokenize. */
+ host_name = strtok_r(target, ":", &save_ptr);
+ port_string = strtok_r(NULL, ":", &save_ptr);
+ if (!host_name) {
+ ovs_error(0, "%s: bad peer name format", target_);
+ error = EAFNOSUPPORT;
+ goto exit;
+ }
+
+ /* Look up IP, port. */
+ error = lookup_ip(host_name, &sin.sin_addr);
+ if (error) {
+ goto exit;
+ }
+ if (port_string && atoi(port_string)) {
+ sin.sin_port = htons(atoi(port_string));
+ }
+
+ /* Create non-blocking socket. */
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (fd < 0) {
+ VLOG_ERR("%s: socket: %s", target_, strerror(errno));
+ error = errno;
+ goto exit;
+ }
+ error = set_nonblocking(fd);
+ if (error) {
+ goto exit_close;
+ }
+
+ /* Connect. */
+ error = connect(fd, (struct sockaddr *) &sin, sizeof sin) == 0 ? 0 : errno;
+ if (error == EINPROGRESS) {
+ error = EAGAIN;
+ } else if (error && error != EAGAIN) {
+ goto exit_close;
+ }
+
+ /* Success: error is 0 or EAGAIN. */
+ goto exit;
+
+exit_close:
+ close(fd);
+exit:
+ if (!error || error == EAGAIN) {
+ if (sinp) {
+ *sinp = sin;
+ }
+ *fdp = fd;
+ } else {
+ *fdp = -1;
+ }
+ free(target);
+ return error;
+}
+
+/* Opens a non-blocking TCP socket, binds to 'target', and listens for incoming
+ * connections. 'target' should be a string in the format "[<port>][:<ip>]",
+ * where both <port> and <ip> are optional. If <port> is omitted, it defaults
+ * to 'default_port'; if <ip> is omitted it defaults to the wildcard IP
+ * address.
+ *
+ * The socket will have SO_REUSEADDR turned on.
+ *
+ * On success, returns a non-negative file descriptor. On failure, returns a
+ * negative errno value. */
+int
+tcp_open_passive(const char *target_, uint16_t default_port)
+{
+ char *target = xstrdup(target_);
+ char *string_ptr = target;
+ struct sockaddr_in sin;
+ const char *host_name;
+ const char *port_string;
+ int fd, error;
+ unsigned int yes = 1;
+
+ /* Address defaults. */
+ memset(&sin, 0, sizeof sin);
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = htonl(INADDR_ANY);
+ sin.sin_port = htons(default_port);
+
+ /* Parse optional port number. */
+ port_string = strsep(&string_ptr, ":");
+ if (port_string && atoi(port_string)) {
+ sin.sin_port = htons(atoi(port_string));
+ }
+
+ /* Parse optional bind IP. */
+ host_name = strsep(&string_ptr, ":");
+ if (host_name && host_name[0]) {
+ error = lookup_ip(host_name, &sin.sin_addr);
+ if (error) {
+ goto exit;
+ }
+ }
+
+ /* Create non-blocking socket, set SO_REUSEADDR. */
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (fd < 0) {
+ error = errno;
+ VLOG_ERR("%s: socket: %s", target_, strerror(error));
+ goto exit;
+ }
+ error = set_nonblocking(fd);
+ if (error) {
+ goto exit_close;
+ }
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof yes) < 0) {
+ error = errno;
+ VLOG_ERR("%s: setsockopt(SO_REUSEADDR): %s", target_, strerror(error));
+ goto exit_close;
+ }
+
+ /* Bind. */
+ if (bind(fd, (struct sockaddr *) &sin, sizeof sin) < 0) {
+ error = errno;
+ VLOG_ERR("%s: bind: %s", target_, strerror(error));
+ goto exit_close;
+ }
+
+ /* Listen. */
+ if (listen(fd, 10) < 0) {
+ error = errno;
+ VLOG_ERR("%s: listen: %s", target_, strerror(error));
+ goto exit_close;
+ }
+ error = 0;
+ goto exit;
+
+exit_close:
+ close(fd);
+exit:
+ free(target);
+ return error ? -error : fd;
+}
+
+ /* Returns a readable and writable fd for /dev/null, if successful, otherwise
+ * a negative errno value. The caller must not close the returned fd (because
+ * the same fd will be handed out to subsequent callers). */
+ int
+ get_null_fd(void)
+ {
+ static int null_fd = -1;
+ if (null_fd < 0) {
+ null_fd = open("/dev/null", O_RDWR);
+ if (null_fd < 0) {
+ int error = errno;
+ VLOG_ERR("could not open /dev/null: %s", strerror(error));
+ return -error;
+ }
+ }
+ return null_fd;
+ }
+
int
read_fully(int fd, void *p_, size_t size, size_t *bytes_read)
{
const char *bind_path, const char *connect_path);
int get_unix_name_len(socklen_t sun_len);
uint32_t guess_netmask(uint32_t ip);
+ int get_null_fd(void);
+int tcp_open_active(const char *target, uint16_t default_port,
+ struct sockaddr_in *sinp, int *fdp);
+int tcp_open_passive(const char *target, uint16_t default_port);
+
int read_fully(int fd, void *, size_t, size_t *bytes_read);
int write_fully(int fd, const void *, size_t, size_t *bytes_written);
static int
new_ssl_vconn(const char *name, int fd, enum session_type type,
- enum ssl_state state, const struct sockaddr_in *sin,
+ enum ssl_state state, const struct sockaddr_in *remote,
struct vconn **vconnp)
{
+ struct sockaddr_in local;
+ socklen_t local_len = sizeof local;
struct ssl_vconn *sslv;
SSL *ssl = NULL;
int on = 1;
goto error;
}
+ /* Get the local IP and port information */
+ retval = getsockname(fd, (struct sockaddr *) &local, &local_len);
+ if (retval) {
+ memset(&local, 0, sizeof local);
+ }
+
/* Disable Nagle. */
retval = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &on, sizeof on);
if (retval) {
/* Create and return the ssl_vconn. */
sslv = xmalloc(sizeof *sslv);
- vconn_init(&sslv->vconn, &ssl_vconn_class, EAGAIN, sin->sin_addr.s_addr,
- name, true);
+ vconn_init(&sslv->vconn, &ssl_vconn_class, EAGAIN, name, true);
+ vconn_set_remote_ip(&sslv->vconn, remote->sin_addr.s_addr);
+ vconn_set_remote_port(&sslv->vconn, remote->sin_port);
+ vconn_set_local_ip(&sslv->vconn, local.sin_addr.s_addr);
+ vconn_set_local_port(&sslv->vconn, local.sin_port);
sslv->state = state;
sslv->type = type;
sslv->fd = fd;
static int
ssl_open(const char *name, char *suffix, struct vconn **vconnp)
{
- char *save_ptr, *host_name, *port_string;
struct sockaddr_in sin;
- int retval;
- int fd;
-
- retval = ssl_init();
- if (retval) {
- return retval;
- }
-
- host_name = strtok_r(suffix, ":", &save_ptr);
- port_string = strtok_r(NULL, ":", &save_ptr);
- if (!host_name) {
- ovs_error(0, "%s: bad peer name format", name);
- return EAFNOSUPPORT;
- }
-
- memset(&sin, 0, sizeof sin);
- sin.sin_family = AF_INET;
- if (lookup_ip(host_name, &sin.sin_addr)) {
- return ENOENT;
- }
- sin.sin_port = htons(port_string && *port_string ? atoi(port_string)
- : OFP_SSL_PORT);
+ int error, fd;
- /* Create socket. */
- fd = socket(AF_INET, SOCK_STREAM, 0);
- if (fd < 0) {
- VLOG_ERR("%s: socket: %s", name, strerror(errno));
- return errno;
- }
- retval = set_nonblocking(fd);
- if (retval) {
- close(fd);
- return retval;
+ error = ssl_init();
+ if (error) {
+ return error;
}
- /* Connect socket. */
- retval = connect(fd, (struct sockaddr *) &sin, sizeof sin);
- if (retval < 0) {
- if (errno == EINPROGRESS) {
- return new_ssl_vconn(name, fd, CLIENT, STATE_TCP_CONNECTING,
- &sin, vconnp);
- } else {
- int error = errno;
- VLOG_ERR("%s: connect: %s", name, strerror(error));
- close(fd);
- return error;
- }
+ error = tcp_open_active(suffix, OFP_SSL_PORT, &sin, &fd);
+ if (fd >= 0) {
+ int state = error ? STATE_TCP_CONNECTING : STATE_SSL_CONNECTING;
+ return new_ssl_vconn(name, fd, CLIENT, state, &sin, vconnp);
} else {
- return new_ssl_vconn(name, fd, CLIENT, STATE_SSL_CONNECTING,
- &sin, vconnp);
+ VLOG_ERR("%s: connect: %s", name, strerror(error));
+ return error;
}
}
static int
pssl_open(const char *name, char *suffix, struct pvconn **pvconnp)
{
- struct sockaddr_in sin;
struct pssl_pvconn *pssl;
int retval;
int fd;
- unsigned int yes = 1;
retval = ssl_init();
if (retval) {
return retval;
}
- /* Create socket. */
- fd = socket(AF_INET, SOCK_STREAM, 0);
+ fd = tcp_open_passive(suffix, OFP_SSL_PORT);
if (fd < 0) {
- int error = errno;
- VLOG_ERR("%s: socket: %s", name, strerror(error));
- return error;
- }
-
- if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof yes) < 0) {
- int error = errno;
- VLOG_ERR("%s: setsockopt(SO_REUSEADDR): %s", name, strerror(errno));
- return error;
- }
-
- memset(&sin, 0, sizeof sin);
- sin.sin_family = AF_INET;
- sin.sin_addr.s_addr = htonl(INADDR_ANY);
- sin.sin_port = htons(atoi(suffix) ? atoi(suffix) : OFP_SSL_PORT);
- retval = bind(fd, (struct sockaddr *) &sin, sizeof sin);
- if (retval < 0) {
- int error = errno;
- VLOG_ERR("%s: bind: %s", name, strerror(error));
- close(fd);
- return error;
- }
-
- retval = listen(fd, 10);
- if (retval < 0) {
- int error = errno;
- VLOG_ERR("%s: listen: %s", name, strerror(error));
- close(fd);
- return error;
- }
-
- retval = set_nonblocking(fd);
- if (retval) {
- close(fd);
- return retval;
+ return -fd;
}
pssl = xmalloc(sizeof *pssl);
int
new_stream_vconn(const char *name, int fd, int connect_status,
- uint32_t ip, bool reconnectable, struct vconn **vconnp)
+ bool reconnectable, struct vconn **vconnp)
{
struct stream_vconn *s;
s = xmalloc(sizeof *s);
- vconn_init(&s->vconn, &stream_vconn_class, connect_status, ip, name,
- reconnectable);
+ vconn_init(&s->vconn, &stream_vconn_class, connect_status,
+ name, reconnectable);
s->fd = fd;
s->txbuf = NULL;
s->tx_waiter = NULL;
size_t sa_len, struct vconn **),
struct pvconn **pvconnp)
{
- struct pstream_pvconn *ps;
- int retval;
-
- retval = set_nonblocking(fd);
- if (retval) {
- close(fd);
- return retval;
- }
-
- if (listen(fd, 10) < 0) {
- int error = errno;
- VLOG_ERR("%s: listen: %s", name, strerror(error));
- close(fd);
- return error;
- }
-
- ps = xmalloc(sizeof *ps);
+ struct pstream_pvconn *ps = xmalloc(sizeof *ps);
pvconn_init(&ps->pvconn, &pstream_pvconn_class, name);
ps->fd = fd;
ps->accept_cb = accept_cb;
static int
new_tcp_vconn(const char *name, int fd, int connect_status,
- const struct sockaddr_in *sin, struct vconn **vconnp)
+ const struct sockaddr_in *remote, struct vconn **vconnp)
{
+ struct sockaddr_in local;
+ socklen_t local_len = sizeof local;
int on = 1;
int retval;
+ /* Get the local IP and port information */
+ retval = getsockname(fd, (struct sockaddr *)&local, &local_len);
+ if (retval) {
+ memset(&local, 0, sizeof local);
+ }
+
retval = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &on, sizeof on);
if (retval) {
VLOG_ERR("%s: setsockopt(TCP_NODELAY): %s", name, strerror(errno));
return errno;
}
- return new_stream_vconn(name, fd, connect_status, sin->sin_addr.s_addr,
- true, vconnp);
+ retval = new_stream_vconn(name, fd, connect_status, true, vconnp);
+ if (!retval) {
+ struct vconn *vconn = *vconnp;
+ vconn_set_remote_ip(vconn, remote->sin_addr.s_addr);
+ vconn_set_remote_port(vconn, remote->sin_port);
+ vconn_set_local_ip(vconn, local.sin_addr.s_addr);
+ vconn_set_local_port(vconn, local.sin_port);
+ }
+ return retval;
}
static int
tcp_open(const char *name, char *suffix, struct vconn **vconnp)
{
- char *save_ptr;
- const char *host_name;
- const char *port_string;
struct sockaddr_in sin;
- int retval;
- int fd;
-
- host_name = strtok_r(suffix, ":", &save_ptr);
- port_string = strtok_r(NULL, ":", &save_ptr);
- if (!host_name) {
- ovs_error(0, "%s: bad peer name format", name);
- return EAFNOSUPPORT;
- }
-
- memset(&sin, 0, sizeof sin);
- sin.sin_family = AF_INET;
- if (lookup_ip(host_name, &sin.sin_addr)) {
- return ENOENT;
- }
- sin.sin_port = htons(port_string ? atoi(port_string) : OFP_TCP_PORT);
-
- fd = socket(AF_INET, SOCK_STREAM, 0);
- if (fd < 0) {
- VLOG_ERR("%s: socket: %s", name, strerror(errno));
- return errno;
- }
-
- retval = set_nonblocking(fd);
- if (retval) {
- close(fd);
- return retval;
- }
+ int fd, error;
- retval = connect(fd, (struct sockaddr *) &sin, sizeof sin);
- if (retval < 0) {
- if (errno == EINPROGRESS) {
- return new_tcp_vconn(name, fd, EAGAIN, &sin, vconnp);
- } else {
- int error = errno;
- VLOG_ERR("%s: connect: %s", name, strerror(error));
- close(fd);
- return error;
- }
+ error = tcp_open_active(suffix, OFP_TCP_PORT, NULL, &fd);
+ if (fd >= 0) {
+ return new_tcp_vconn(name, fd, error, &sin, vconnp);
} else {
- return new_tcp_vconn(name, fd, 0, &sin, vconnp);
+ VLOG_ERR("%s: connect: %s", name, strerror(error));
+ return error;
}
}
struct vconn **vconnp);
static int
-ptcp_open(const char *name, char *suffix, struct pvconn **pvconnp)
+ptcp_open(const char *name UNUSED, char *suffix, struct pvconn **pvconnp)
{
- struct sockaddr_in sin;
- int retval;
int fd;
- unsigned int yes = 1;
- fd = socket(AF_INET, SOCK_STREAM, 0);
+ fd = tcp_open_passive(suffix, OFP_TCP_PORT);
if (fd < 0) {
- VLOG_ERR("%s: socket: %s", name, strerror(errno));
- return errno;
- }
-
- if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof yes) < 0) {
- VLOG_ERR("%s: setsockopt(SO_REUSEADDR): %s", name, strerror(errno));
- return errno;
- }
-
- memset(&sin, 0, sizeof sin);
- sin.sin_family = AF_INET;
- sin.sin_addr.s_addr = htonl(INADDR_ANY);
- sin.sin_port = htons(atoi(suffix) ? atoi(suffix) : OFP_TCP_PORT);
- retval = bind(fd, (struct sockaddr *) &sin, sizeof sin);
- if (retval < 0) {
- int error = errno;
- VLOG_ERR("%s: bind: %s", name, strerror(error));
- close(fd);
- return error;
+ return -fd;
+ } else {
+ return new_pstream_pvconn("ptcp", fd, ptcp_accept, pvconnp);
}
-
- return new_pstream_pvconn("ptcp", fd, ptcp_accept, pvconnp);
}
static int
}
return new_stream_vconn(name, fd, check_connection_completion(fd),
- 0, true, vconnp);
+ true, vconnp);
}
struct vconn_class unix_vconn_class = {
static int
punix_open(const char *name UNUSED, char *suffix, struct pvconn **pvconnp)
{
- int fd;
+ int fd, error;
fd = make_unix_socket(SOCK_STREAM, true, true, suffix, NULL);
if (fd < 0) {
return errno;
}
+ error = set_nonblocking(fd);
+ if (error) {
+ close(fd);
+ return error;
+ }
+
+ if (listen(fd, 10) < 0) {
+ error = errno;
+ VLOG_ERR("%s: listen: %s", name, strerror(error));
+ close(fd);
+ return error;
+ }
+
return new_pstream_pvconn("punix", fd, punix_accept, pvconnp);
}
} else {
strcpy(name, "unix");
}
- return new_stream_vconn(name, fd, 0, 0, true, vconnp);
+ return new_stream_vconn(name, fd, 0, true, vconnp);
}
struct pvconn_class punix_pvconn_class = {
if (passive) {
printf("Passive OpenFlow connection methods:\n");
- printf(" ptcp:[PORT] "
- "listen to TCP PORT (default: %d)\n",
+ printf(" ptcp:[PORT][:IP] "
+ "listen to TCP PORT (default: %d) on IP\n",
OFP_TCP_PORT);
#ifdef HAVE_OPENSSL
- printf(" pssl:[PORT] "
- "listen for SSL on PORT (default: %d)\n",
+ printf(" pssl:[PORT][:IP] "
+ "listen for SSL on PORT (default: %d) on IP\n",
OFP_SSL_PORT);
#endif
printf(" punix:FILE "
/* Returns the IP address of the peer, or 0 if the peer is not connected over
* an IP-based protocol or if its IP address is not yet known. */
uint32_t
- vconn_get_ip(const struct vconn *vconn)
+ vconn_get_remote_ip(const struct vconn *vconn)
{
- return vconn->ip;
+ return vconn->remote_ip;
+ }
+
+ /* Returns the transport port of the peer, or 0 if the connection does not
+ * contain a port or if the port is not yet known. */
+ uint16_t
+ vconn_get_remote_port(const struct vconn *vconn)
+ {
+ return vconn->remote_port;
+ }
+
+ /* Returns the IP address used to connect to the peer, or 0 if the
+ * connection is not an IP-based protocol or if its IP address is not
+ * yet known. */
+ uint32_t
+ vconn_get_local_ip(const struct vconn *vconn)
+ {
+ return vconn->local_ip;
+ }
+
+ /* Returns the transport port used to connect to the peer, or 0 if the
+ * connection does not contain a port or if the port is not yet known. */
+ uint16_t
+ vconn_get_local_port(const struct vconn *vconn)
+ {
+ return vconn->local_port;
}
static void
if (retval != EAGAIN) {
vconn->state = VCS_DISCONNECTED;
- vconn->error = retval;
+ vconn->error = retval == EOF ? ECONNRESET : retval;
}
}
static int
do_recv(struct vconn *vconn, struct ofpbuf **msgp)
{
- int retval;
-
-again:
- retval = (vconn->class->recv)(vconn, msgp);
+ int retval = (vconn->class->recv)(vconn, msgp);
if (!retval) {
struct ofp_header *oh;
&& oh->type != OFPT_VENDOR)
{
if (vconn->version < 0) {
- if (oh->type == OFPT_PACKET_IN
- || oh->type == OFPT_FLOW_EXPIRED
- || oh->type == OFPT_PORT_STATUS) {
- /* The kernel datapath is stateless and doesn't really
- * support version negotiation, so it can end up sending
- * these asynchronous message before version negotiation
- * is complete. Just ignore them.
- *
- * (After we move OFPT_PORT_STATUS messages from the kernel
- * into secchan, we won't get those here, since secchan
- * does proper version negotiation.) */
- ofpbuf_delete(*msgp);
- goto again;
- }
VLOG_ERR_RL(&bad_ofmsg_rl,
"%s: received OpenFlow message type %"PRIu8" "
"before version negotiation complete",
void
vconn_init(struct vconn *vconn, struct vconn_class *class, int connect_status,
- uint32_t ip, const char *name, bool reconnectable)
+ const char *name, bool reconnectable)
{
vconn->class = class;
vconn->state = (connect_status == EAGAIN ? VCS_CONNECTING
vconn->error = connect_status;
vconn->version = -1;
vconn->min_version = -1;
- vconn->ip = ip;
+ vconn->remote_ip = 0;
+ vconn->remote_port = 0;
+ vconn->local_ip = 0;
+ vconn->local_port = 0;
vconn->name = xstrdup(name);
vconn->reconnectable = reconnectable;
}
+ void
+ vconn_set_remote_ip(struct vconn *vconn, uint32_t ip)
+ {
+ vconn->remote_ip = ip;
+ }
+
+ void
+ vconn_set_remote_port(struct vconn *vconn, uint16_t port)
+ {
+ vconn->remote_port = port;
+ }
+
+ void
+ vconn_set_local_ip(struct vconn *vconn, uint32_t ip)
+ {
+ vconn->local_ip = ip;
+ }
+
+ void
+ vconn_set_local_port(struct vconn *vconn, uint16_t port)
+ {
+ vconn->local_port = port;
+ }
+
void
pvconn_init(struct pvconn *pvconn, struct pvconn_class *class,
const char *name)
};
/* File descriptors for waking up when a child dies. */
- static int signal_fds[2];
-
- /* File descriptor for /dev/null. */
- static int null_fd = -1;
+ static int signal_fds[2] = {-1, -1};
static void send_child_status(struct rconn *, uint32_t xid, uint32_t status,
const void *data, size_t size);
/* Returns true if 'cmd' is allowed by 'acl', which is a command-separated
* access control list in the format described for --command-acl in
- * secchan(8). */
+ * ovs-openflowd(8). */
static bool
executer_is_permitted(const char *acl_, const char *cmd)
{
* subprocesses at once? Would also want to catch fatal signals and
* kill them at the same time though. */
fatal_signal_fork();
- dup2(null_fd, 0);
+ dup2(get_null_fd(), 0);
dup2(output_fds[1], 1);
- dup2(null_fd, 2);
+ dup2(get_null_fd(), 2);
max_fds = get_max_fds();
for (i = 3; i < max_fds; i++) {
close(i);
struct sigaction sa;
*executerp = NULL;
- if (null_fd == -1) {
+ if (signal_fds[0] == -1) {
+ /* Make sure we can get a fd for /dev/null. */
+ int null_fd = get_null_fd();
+ if (null_fd < 0) {
+ return -null_fd;
+ }
+
/* Create pipe for notifying us that SIGCHLD was invoked. */
if (pipe(signal_fds)) {
VLOG_ERR("pipe failed: %s", strerror(errno));
}
set_nonblocking(signal_fds[0]);
set_nonblocking(signal_fds[1]);
-
- /* Open /dev/null. */
- null_fd = open("/dev/null", O_RDWR);
- if (null_fd < 0) {
- int error = errno;
- VLOG_ERR("could not open /dev/null: %s", strerror(error));
- close(signal_fds[0]);
- close(signal_fds[1]);
- return error;
- }
}
/* Set up signal handler. */
#include <inttypes.h>
#include <net/if.h>
#include <string.h>
- #include "dpif.h"
+ #include <stdlib.h>
#include "flow.h"
#include "mac-learning.h"
#include "netdev.h"
#define IB_BASE_PRIORITY 18181800
enum {
- IBR_FROM_LOCAL_PORT, /* Sent by ofproto local port. */
- IBR_TO_LOCAL_PORT, /* Sent to ofproto local port. */
+ IBR_FROM_LOCAL_PORT, /* Sent by the local port. */
+ IBR_OFP_TO_LOCAL, /* Sent to secure channel on local port. */
+ IBR_ARP_FROM_LOCAL, /* ARP from the local port. */
IBR_ARP_FROM_CTL, /* ARP from the controller. */
IBR_TO_CTL_OFP_SRC, /* To controller, OpenFlow source port. */
IBR_TO_CTL_OFP_DST, /* To controller, OpenFlow dest port. */
struct in_band {
struct ofproto *ofproto;
- struct netdev *netdev;
struct rconn *controller;
struct status_category *ss_cat;
uint32_t last_ip; /* Last known IP, 0 if never known. */
uint8_t mac[ETH_ADDR_LEN]; /* Current MAC, 0 if unknown. */
uint8_t last_mac[ETH_ADDR_LEN]; /* Last known MAC, 0 if never known */
+ char *dev_name;
time_t next_refresh; /* Next time to refresh MAC address. */
/* Keeping track of the local port's MAC address. */
get_controller_mac(struct in_band *ib)
{
time_t now = time_now();
- uint32_t ip;
+ uint32_t controller_ip;
- ip = rconn_get_ip(ib->controller);
- if (ip != ib->ip || now >= ib->next_refresh) {
+ controller_ip = rconn_get_remote_ip(ib->controller);
+ if (controller_ip != ib->ip || now >= ib->next_refresh) {
bool have_mac;
- ib->ip = ip;
+ ib->ip = controller_ip;
/* Look up MAC address. */
memset(ib->mac, 0, sizeof ib->mac);
if (ib->ip) {
- int retval = netdev_arp_lookup(ib->netdev, ib->ip, ib->mac);
- if (retval) {
- VLOG_DBG_RL(&rl, "cannot look up controller hw address "
- "("IP_FMT"): %s",
- IP_ARGS(&ib->ip), strerror(retval));
+ uint32_t local_ip = rconn_get_local_ip(ib->controller);
+ struct in_addr in4;
+ int retval;
+
+ in4.s_addr = local_ip;
+ if (netdev_find_dev_by_in4(&in4, &ib->dev_name)) {
+ retval = netdev_nodev_arp_lookup(ib->dev_name, ib->ip,
+ ib->mac);
+ if (retval) {
+ VLOG_DBG_RL(&rl, "cannot look up controller MAC address "
+ "("IP_FMT"): %s",
+ IP_ARGS(&ib->ip), strerror(retval));
+ }
+ } else {
+ VLOG_DBG_RL(&rl, "cannot find device with IP address "IP_FMT,
+ IP_ARGS(&local_ip));
}
}
have_mac = !eth_addr_is_zero(ib->mac);
time_t now = time_now();
if (now >= ib->next_local_refresh) {
uint8_t ea[ETH_ADDR_LEN];
- if (!netdev_nodev_get_etheraddr(netdev_get_name(ib->netdev), ea)) {
+ if (ib->dev_name && (!netdev_nodev_get_etheraddr(ib->dev_name, ea))) {
memcpy(ib->local_mac, ea, ETH_ADDR_LEN);
}
ib->next_local_refresh = now + 1;
in_band_status_cb(struct status_reply *sr, void *in_band_)
{
struct in_band *in_band = in_band_;
- struct in_addr local_ip;
const uint8_t *local_mac;
- uint32_t controller_ip;
const uint8_t *controller_mac;
- if (netdev_get_in4(in_band->netdev, &local_ip)) {
- status_reply_put(sr, "local-ip="IP_FMT, IP_ARGS(&local_ip.s_addr));
- }
local_mac = get_local_mac(in_band);
if (local_mac) {
status_reply_put(sr, "local-mac="ETH_ADDR_FMT,
ETH_ADDR_ARGS(local_mac));
}
- controller_ip = rconn_get_ip(in_band->controller);
- if (controller_ip) {
- status_reply_put(sr, "controller-ip="IP_FMT,
- IP_ARGS(&controller_ip));
- }
controller_mac = get_controller_mac(in_band);
if (controller_mac) {
status_reply_put(sr, "controller-mac="ETH_ADDR_FMT,
controller_mac = get_controller_mac(in_band);
local_mac = get_local_mac(in_band);
- /* Switch traffic sent from the local port. */
+ /* Switch traffic sent by the local port. */
memset(&flow, 0, sizeof flow);
flow.in_port = ODPP_LOCAL;
setup_flow(in_band, IBR_FROM_LOCAL_PORT, &flow, OFPFW_IN_PORT,
OFPP_NORMAL);
- /* Deliver traffic sent to the local port. */
if (local_mac) {
+ /* Deliver traffic sent to the connection's interface. */
memset(&flow, 0, sizeof flow);
memcpy(flow.dl_dst, local_mac, ETH_ADDR_LEN);
- setup_flow(in_band, IBR_TO_LOCAL_PORT, &flow, OFPFW_DL_DST,
- OFPP_NORMAL);
+ setup_flow(in_band, IBR_OFP_TO_LOCAL, &flow, OFPFW_DL_DST,
+ OFPP_NORMAL);
+
+ /* Allow the connection's interface to be the source of ARP traffic. */
+ memset(&flow, 0, sizeof flow);
+ flow.dl_type = htons(ETH_TYPE_ARP);
+ memcpy(flow.dl_src, local_mac, ETH_ADDR_LEN);
+ setup_flow(in_band, IBR_ARP_FROM_LOCAL, &flow,
+ OFPFW_DL_TYPE | OFPFW_DL_SRC, OFPP_NORMAL);
} else {
- drop_flow(in_band, IBR_TO_LOCAL_PORT);
+ drop_flow(in_band, IBR_OFP_TO_LOCAL);
+ drop_flow(in_band, IBR_ARP_FROM_LOCAL);
}
if (controller_mac) {
}
}
- int
- in_band_create(struct ofproto *ofproto,
- struct dpif *dpif, struct switch_status *ss,
+ void
+ in_band_create(struct ofproto *ofproto, struct switch_status *ss,
struct rconn *controller, struct in_band **in_bandp)
{
struct in_band *in_band;
- struct netdev *netdev;
- char local_name[IF_NAMESIZE];
- int error;
-
- *in_bandp = NULL;
- error = dpif_port_get_name(dpif, ODPP_LOCAL,
- local_name, sizeof local_name);
- if (error) {
- return error;
- }
-
- error = netdev_open(local_name, NETDEV_ETH_TYPE_NONE, &netdev);
- if (error) {
- VLOG_ERR("failed to open %s network device: %s",
- local_name, strerror(error));
- return error;
- }
in_band = xcalloc(1, sizeof *in_band);
in_band->ofproto = ofproto;
- in_band->netdev = netdev;
in_band->controller = controller;
in_band->ss_cat = switch_status_register(ss, "in-band",
in_band_status_cb, in_band);
in_band->next_refresh = TIME_MIN;
in_band->next_local_refresh = TIME_MIN;
+ in_band->dev_name = NULL;
*in_bandp = in_band;
- return 0;
}
void
in_band_destroy(struct in_band *in_band)
{
if (in_band) {
- netdev_close(in_band->netdev);
switch_status_unregister(in_band->ss_cat);
/* We don't own the rconn. */
}
struct in_band;
struct ofproto;
struct rconn;
-struct secchan;
struct settings;
struct switch_status;
- int in_band_create(struct ofproto *, struct dpif *, struct switch_status *,
- struct rconn *controller, struct in_band **);
+ void in_band_create(struct ofproto *, struct switch_status *,
+ struct rconn *controller, struct in_band **);
void in_band_destroy(struct in_band *);
void in_band_run(struct in_band *);
void in_band_wait(struct in_band *);
return true;
}
- /* Rules with priority higher than UINT16_MAX are set up by secchan itself
+ /* Rules with priority higher than UINT16_MAX are set up by ofproto itself
* (e.g. by in-band control) and are intentionally hidden from the
* controller. */
if (rule->cr.priority > UINT16_MAX) {
char *serial; /* Serial number. */
/* Datapath. */
- struct dpif dpif;
- struct dpifmon *dpifmon;
+ struct dpif *dpif;
+ struct netdev_monitor *netdev_monitor;
struct port_array ports; /* Index is ODP port nr; ofport->opp.port_no is
* OFP port nr. */
struct shash port_by_name;
ofproto_create(const char *datapath, const struct ofhooks *ofhooks, void *aux,
struct ofproto **ofprotop)
{
- struct dpifmon *dpifmon;
+ struct netdev_monitor *netdev_monitor;
struct odp_stats stats;
struct ofproto *p;
- struct dpif dpif;
+ struct dpif *dpif;
int error;
*ofprotop = NULL;
VLOG_ERR("failed to open datapath %s: %s", datapath, strerror(error));
return error;
}
- error = dpif_get_dp_stats(&dpif, &stats);
+ error = dpif_get_dp_stats(dpif, &stats);
if (error) {
VLOG_ERR("failed to obtain stats for datapath %s: %s",
datapath, strerror(error));
- dpif_close(&dpif);
+ dpif_close(dpif);
return error;
}
- error = dpif_set_listen_mask(&dpif, ODPL_MISS | ODPL_ACTION);
+ error = dpif_recv_set_mask(dpif, ODPL_MISS | ODPL_ACTION);
if (error) {
VLOG_ERR("failed to listen on datapath %s: %s",
datapath, strerror(error));
- dpif_close(&dpif);
+ dpif_close(dpif);
return error;
}
- dpif_flow_flush(&dpif);
- dpif_purge(&dpif);
+ dpif_flow_flush(dpif);
+ dpif_recv_purge(dpif);
- /* Start monitoring datapath ports for status changes. */
- error = dpifmon_create(datapath, &dpifmon);
+ /* Arrange to monitor datapath ports for status changes. */
+ error = netdev_monitor_create(&netdev_monitor);
if (error) {
VLOG_ERR("failed to starting monitoring datapath %s: %s",
datapath, strerror(error));
- dpif_close(&dpif);
+ dpif_close(dpif);
return error;
}
/* Initialize settings. */
p = xcalloc(1, sizeof *p);
p->fallback_dpid = pick_fallback_dpid();
- p->datapath_id = pick_datapath_id(&dpif, p->fallback_dpid);
+ p->datapath_id = pick_datapath_id(dpif, p->fallback_dpid);
VLOG_INFO("using datapath ID %012"PRIx64, p->datapath_id);
p->manufacturer = xstrdup("Nicira Networks, Inc.");
p->hardware = xstrdup("Reference Implementation");
/* Initialize datapath. */
p->dpif = dpif;
- p->dpifmon = dpifmon;
+ p->netdev_monitor = netdev_monitor;
port_array_init(&p->ports);
shash_init(&p->port_by_name);
p->max_ports = stats.max_ports;
uint64_t old_dpid = p->datapath_id;
p->datapath_id = (datapath_id
? datapath_id
- : pick_datapath_id(&p->dpif, p->fallback_dpid));
+ : pick_datapath_id(p->dpif, p->fallback_dpid));
if (p->datapath_id != old_dpid) {
VLOG_INFO("datapath ID changed to %012"PRIx64, p->datapath_id);
rconn_reconnect(p->controller->rconn);
{
if (in_band != (p->in_band != NULL)) {
if (in_band) {
- return in_band_create(p, p->dpif, p->switch_status,
- p->controller->rconn, &p->in_band);
+ in_band_create(p, p->switch_status, p->controller->rconn,
+ &p->in_band);
+ return 0;
} else {
ofproto_set_discovery(p, false, NULL, true);
in_band_destroy(p->in_band);
return error;
}
error = discovery_create(re, update_resolv_conf,
- &p->dpif, p->switch_status,
+ p->dpif, p->switch_status,
&p->discovery);
if (error) {
return error;
return ofproto->datapath_id;
}
+ uint64_t
+ ofproto_get_mgmt_id(const struct ofproto *ofproto)
+ {
+ return ofproto->mgmt_id;
+ }
+
int
ofproto_get_probe_interval(const struct ofproto *ofproto)
{
ofconn_destroy(ofconn, p);
}
- dpif_close(&p->dpif);
- dpifmon_destroy(p->dpifmon);
+ dpif_close(p->dpif);
+ netdev_monitor_destroy(p->netdev_monitor);
PORT_ARRAY_FOR_EACH (ofport, &p->ports, port_no) {
ofport_free(ofport);
}
return error;
}
+static void
+process_port_change(struct ofproto *ofproto, int error, char *devname)
+{
+ if (error == ENOBUFS) {
+ reinit_ports(ofproto);
+ } else if (!error) {
+ update_port(ofproto, devname);
+ free(devname);
+ }
+}
+
int
ofproto_run1(struct ofproto *p)
{
struct ofpbuf *buf;
int error;
- error = dpif_recv(&p->dpif, &buf);
+ error = dpif_recv(p->dpif, &buf);
if (error) {
if (error == ENODEV) {
/* Someone destroyed the datapath behind our back. The caller
* better destroy us and give up, because we're just going to
* spin from here on out. */
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
- VLOG_ERR_RL(&rl, "dp%u: datapath was destroyed externally",
- dpif_id(&p->dpif));
+ VLOG_ERR_RL(&rl, "%s: datapath was destroyed externally",
+ dpif_name(p->dpif));
return ENODEV;
}
break;
handle_odp_msg(p, buf);
}
- while ((error = dpifmon_poll(p->dpifmon, &devname)) != EAGAIN) {
- if (error == ENOBUFS) {
- reinit_ports(p);
- } else if (!error) {
- update_port(p, devname);
- free(devname);
- }
+ while ((error = dpif_port_poll(p->dpif, &devname)) != EAGAIN) {
+ process_port_change(p, error, devname);
+ }
+ while ((error = netdev_monitor_poll(p->netdev_monitor,
+ &devname)) != EAGAIN) {
+ process_port_change(p, error, devname);
}
if (p->in_band) {
struct ofconn *ofconn;
size_t i;
- dpif_recv_wait(&p->dpif);
- dpifmon_wait(p->dpifmon);
+ dpif_recv_wait(p->dpif);
+ dpif_port_poll_wait(p->dpif);
+ netdev_monitor_poll_wait(p->netdev_monitor);
LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) {
ofconn_wait(ofconn);
}
/* XXX Should we translate the dpif_execute() errno value into an OpenFlow
* error code? */
- dpif_execute(&p->dpif, flow->in_port, odp_actions.actions,
+ dpif_execute(p->dpif, flow->in_port, odp_actions.actions,
odp_actions.n_actions, packet);
return 0;
}
{
COVERAGE_INC(ofproto_flush);
classifier_for_each(&ofproto->cls, CLS_INC_ALL, destroy_rule, ofproto);
- dpif_flow_flush(&ofproto->dpif);
+ dpif_flow_flush(ofproto->dpif);
if (ofproto->in_band) {
in_band_flushed(ofproto->in_band);
}
PORT_ARRAY_FOR_EACH (ofport, &p->ports, port_no) {
svec_add (&devnames, (char *) ofport->opp.name);
}
- dpif_port_list(&p->dpif, &odp_ports, &n_odp_ports);
+ dpif_port_list(p->dpif, &odp_ports, &n_odp_ports);
for (i = 0; i < n_odp_ports; i++) {
svec_add (&devnames, odp_ports[i].devname);
}
ports[n_ports++] = port_no;
}
}
- dpif_port_group_set(&p->dpif, group, ports, n_ports);
+ dpif_port_group_set(p->dpif, group, ports, n_ports);
free(ports);
}
static void
ofport_install(struct ofproto *p, struct ofport *ofport)
{
+ netdev_monitor_add(p->netdev_monitor, ofport->netdev);
port_array_set(&p->ports, ofp_port_to_odp_port(ofport->opp.port_no),
ofport);
shash_add(&p->port_by_name, (char *) ofport->opp.name, ofport);
static void
ofport_remove(struct ofproto *p, struct ofport *ofport)
{
+ netdev_monitor_remove(p->netdev_monitor, ofport->netdev);
port_array_set(&p->ports, ofp_port_to_odp_port(ofport->opp.port_no), NULL);
shash_delete(&p->port_by_name,
shash_find(&p->port_by_name, (char *) ofport->opp.name));
COVERAGE_INC(ofproto_update_port);
ofport = shash_find_data(&p->port_by_name, devname);
- error = dpif_port_query_by_name(&p->dpif, devname, &odp_port);
+ error = dpif_port_query_by_name(p->dpif, devname, &odp_port);
if (!error) {
if (!ofport) {
/* New port. */
size_t i;
int error;
- error = dpif_port_list(&p->dpif, &ports, &n_ports);
+ error = dpif_port_list(p->dpif, &ports, &n_ports);
if (error) {
return error;
}
}
/* Execute the ODP actions. */
- if (!dpif_execute(&ofproto->dpif, flow->in_port,
+ if (!dpif_execute(ofproto->dpif, flow->in_port,
actions, n_actions, packet)) {
struct odp_flow_stats stats;
flow_extract_stats(flow, packet, &stats);
put->flow.actions = rule->odp_actions;
put->flow.n_actions = rule->n_odp_actions;
put->flags = flags;
- return dpif_flow_put(&ofproto->dpif, put);
+ return dpif_flow_put(ofproto->dpif, put);
}
static void
odp_flow.key = rule->cr.flow;
odp_flow.actions = NULL;
odp_flow.n_actions = 0;
- if (!dpif_flow_del(&p->dpif, &odp_flow)) {
+ if (!dpif_flow_del(p->dpif, &odp_flow)) {
update_stats(rule, &odp_flow.stats);
}
rule->installed = false;
bool drop_frags;
/* Figure out flags. */
- dpif_get_drop_frags(&p->dpif, &drop_frags);
+ dpif_get_drop_frags(p->dpif, &drop_frags);
flags = drop_frags ? OFPC_FRAG_DROP : OFPC_FRAG_NORMAL;
if (ofconn->send_flow_exp) {
flags |= OFPC_SEND_FLOW_EXP;
if (ofconn == p->controller) {
switch (flags & OFPC_FRAG_MASK) {
case OFPC_FRAG_NORMAL:
- dpif_set_drop_frags(&p->dpif, false);
+ dpif_set_drop_frags(p->dpif, false);
break;
case OFPC_FRAG_DROP:
- dpif_set_drop_frags(&p->dpif, true);
+ dpif_set_drop_frags(p->dpif, true);
break;
default:
VLOG_WARN_RL(&rl, "requested bad fragment mode (flags=%"PRIx16")",
return error;
}
- dpif_execute(&p->dpif, flow.in_port, actions.actions, actions.n_actions,
+ dpif_execute(p->dpif, flow.in_port, actions.actions, actions.n_actions,
&payload);
ofpbuf_delete(buffer);
n_wild = classifier_count(&p->cls) - classifier_count_exact(&p->cls);
/* Hash table. */
- dpif_get_dp_stats(&p->dpif, &dpstats);
+ dpif_get_dp_stats(p->dpif, &dpstats);
ots = append_stats_reply(sizeof *ots, ofconn, &msg);
memset(ots, 0, sizeof *ots);
ots->table_id = TABLEID_HASH;
packet_count = rule->packet_count;
byte_count = rule->byte_count;
- if (!dpif_flow_get_multiple(&p->dpif, odp_flows, n_odp_flows)) {
+ if (!dpif_flow_get_multiple(p->dpif, odp_flows, n_odp_flows)) {
size_t i;
for (i = 0; i < n_odp_flows; i++) {
struct odp_flow *odp_flow = &odp_flows[i];
size_t i;
int error;
- error = dpif_flow_list_all(&p->dpif, &flows, &n_flows);
+ error = dpif_flow_list_all(p->dpif, &flows, &n_flows);
if (error) {
return;
}
classifier_find_rule_exactly(&p->cls, &f->key, 0, UINT16_MAX));
if (!rule || !rule->installed) {
COVERAGE_INC(ofproto_unexpected_rule);
- dpif_flow_del(&p->dpif, f);
+ dpif_flow_del(p->dpif, f);
continue;
}
uint8_t ea[ETH_ADDR_LEN];
int error;
- error = dpif_get_name(dpif, local_name, sizeof local_name);
+ error = dpif_port_get_name(dpif, ODPP_LOCAL,
+ local_name, sizeof local_name);
if (!error) {
error = netdev_nodev_get_etheraddr(local_name, ea);
if (!error) {
/* Configuration querying. */
uint64_t ofproto_get_datapath_id(const struct ofproto *);
+ uint64_t ofproto_get_mgmt_id(const struct ofproto *);
int ofproto_get_probe_interval(const struct ofproto *);
int ofproto_get_max_backoff(const struct ofproto *);
bool ofproto_get_in_band(const struct ofproto *);
#include <arpa/inet.h>
#include <assert.h>
#include <errno.h>
+ #include <inttypes.h>
#include <stdlib.h>
#include <unistd.h>
#include "dynamic-string.h"
#include "ofpbuf.h"
#include "ofproto.h"
#include "openflow/nicira-ext.h"
+ #include "packets.h"
#include "rconn.h"
#include "svec.h"
#include "timeval.h"
{
struct rconn *rconn = rconn_;
time_t now = time_now();
+ uint32_t remote_ip = rconn_get_remote_ip(rconn);
+ uint32_t local_ip = rconn_get_local_ip(rconn);
status_reply_put(sr, "name=%s", rconn_get_name(rconn));
+ if (remote_ip) {
+ status_reply_put(sr, "remote-ip="IP_FMT, IP_ARGS(&remote_ip));
+ status_reply_put(sr, "remote-port=%d",
+ ntohs(rconn_get_remote_port(rconn)));
+ status_reply_put(sr, "local-ip="IP_FMT, IP_ARGS(&local_ip));
+ status_reply_put(sr, "local-port=%d",
+ ntohs(rconn_get_local_port(rconn)));
+ }
status_reply_put(sr, "state=%s", rconn_get_state(rconn));
status_reply_put(sr, "backoff=%d", rconn_get_backoff(rconn));
status_reply_put(sr, "probe-interval=%d", rconn_get_probe_interval(rconn));
config_status_cb(struct status_reply *sr, void *ofproto_)
{
const struct ofproto *ofproto = ofproto_;
+ uint64_t datapath_id, mgmt_id;
struct svec listeners;
int probe_interval, max_backoff;
size_t i;
+ datapath_id = ofproto_get_datapath_id(ofproto);
+ if (datapath_id) {
+ status_reply_put(sr, "datapath-id=%"PRIx64, datapath_id);
+ }
+
+ mgmt_id = ofproto_get_mgmt_id(ofproto);
+ if (mgmt_id) {
+ status_reply_put(sr, "mgmt-id=%"PRIx64, mgmt_id);
+ }
+
svec_init(&listeners);
ofproto_get_listeners(ofproto, &listeners);
for (i = 0; i < listeners.n; i++) {
-.TH ovs\-ofctl 8 "March 2009" "Open vSwitch" "Open vSwitch Manual"
+.TH ovs\-ofctl 8 "June 2009" "Open vSwitch" "Open vSwitch Manual"
.ds PN ovs\-ofctl
.SH NAME
\fBmonitor \fIswitch\fR [\fImiss-len\fR [\fIsend-exp]]
Connects to \fIswitch\fR and prints to the console all OpenFlow
messages received. Usually, \fIswitch\fR should specify a connection
-named on \fBsecchan\fR(8)'s \fB-l\fR or \fB--listen\fR command line
+named on \fBovs\-openflowd\fR(8)'s \fB-l\fR or \fB--listen\fR command line
option.
If \fImiss-len\fR is provided, \fBovs\-ofctl\fR sends an OpenFlow ``set
configuration'' message at connection setup time that requests
\fImiss-len\fR bytes of each packet that misses the flow table. The
- OpenFlow reference implementation not send these messages to the
+ OpenFlow reference implementation does not send these messages to the
\fBovs\-ofctl monitor\fR client connection unless a nonzero value is
specified on this argument.
displayed by \fBovs\-ofctl show\fR.
.IP \fBdl_vlan=\fIvlan\fR
-Matches IEEE 802.1q virtual LAN tag \fIvlan\fR. Specify \fB0xffff\fR
-as \fIvlan\fR to match packets that are not tagged with a virtual LAN;
+Matches IEEE 802.1q Virtual LAN tag \fIvlan\fR. Specify \fB0xffff\fR
+as \fIvlan\fR to match packets that are not tagged with a Virtual LAN;
otherwise, specify a number between 0 and 4095, inclusive, as the
12-bit VLAN ID to match.
.IP \fBlocal\fR
Outputs the packet on the ``local port,'' which corresponds to the
\fBof\fIn\fR network device (see \fBCONTACTING THE CONTROLLER\fR in
-\fBsecchan\fR(8) for information on the \fBof\fIn\fR network device).
+\fBovs\-openflowd\fR(8) for information on the \fBof\fIn\fR network device).
.IP \fBdrop\fR
Discards the packet, so no further processing or forwarding takes place.
host has been configured to listen for management connections on a
Unix domain socket named \fB@RUNDIR@/openflow.sock\fR, e.g. by
specifying \fB--listen=punix:@RUNDIR@/openflow.sock\fR on the
-\fBsecchan\fR(8) command line.
+\fBovs\-openflowd\fR(8) command line.
.TP
\fBovs\-ofctl dump-tables unix:@RUNDIR@/openflow.sock\fR
static void
open_vconn(const char *name, struct vconn **vconnp)
{
- struct dpif dpif;
+ struct dpif *dpif;
struct stat s;
if (strstr(name, ":")) {
char *socket_name;
char *vconn_name;
- run(dpif_get_name(&dpif, dpif_name, sizeof dpif_name),
+ run(dpif_port_get_name(dpif, ODPP_LOCAL, dpif_name, sizeof dpif_name),
"obtaining name of %s", dpif_name);
- dpif_close(&dpif);
+ dpif_close(dpif);
if (strcmp(dpif_name, name)) {
VLOG_INFO("datapath %s is named %s", name, dpif_name);
}
static void
str_to_mac(const char *str, uint8_t mac[6])
{
- if (sscanf(str, "%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8,
- &mac[0], &mac[1], &mac[2], &mac[3], &mac[4], &mac[5]) != 6) {
+ if (sscanf(str, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))
+ != ETH_ADDR_SCAN_COUNT) {
ovs_fatal(0, "invalid mac address %s", str);
}
}
* packet to the controller. */
if (arg && (strspn(act, "0123456789") == strlen(act))) {
oao->max_len = htons(str_to_u32(arg));
+ } else {
+ oao->max_len = htons(UINT16_MAX);
}
} else if (parse_port_name(act, &port)) {
put_output_action(b, port);
#include "odp-util.h"
#include "ofp-print.h"
#include "ofpbuf.h"
+#include "ofproto/ofproto.h"
#include "packets.h"
#include "poll-loop.h"
#include "port-array.h"
#include "proc-net-compat.h"
#include "process.h"
-#include "secchan/ofproto.h"
#include "socket-util.h"
#include "stp.h"
#include "svec.h"
struct ofproto *ofproto; /* OpenFlow switch. */
/* Kernel datapath information. */
- struct dpif dpif; /* Kernel datapath. */
+ struct dpif *dpif; /* Datapath. */
struct port_array ifaces; /* Indexed by kernel datapath port number. */
/* Bridge ports. */
const char *devname);
static uint64_t dpid_from_hash(const void *, size_t nbytes);
+ static void bridge_unixctl_fdb_show(struct unixctl_conn *, const char *args);
+
static void bond_init(void);
static void bond_run(struct bridge *);
static void bond_wait(struct bridge *);
for (j = 0; j < port->n_ifaces; j++) {
struct iface *iface = port->ifaces[j];
if (iface->dp_ifidx < 0) {
- VLOG_ERR("%s interface not in dp%u, ignoring",
- iface->name, dpif_id(&br->dpif));
+ VLOG_ERR("%s interface not in datapath %s, ignoring",
+ iface->name, dpif_name(br->dpif));
} else {
if (iface->dp_ifidx != ODPP_LOCAL) {
svec_add(svec, iface->name);
void
bridge_init(void)
{
- int retval;
- int i;
-
- bond_init();
+ struct svec dpif_names;
+ size_t i;
- for (i = 0; i < DP_MAX; i++) {
- struct dpif dpif;
- char devname[16];
+ unixctl_command_register("fdb/show", bridge_unixctl_fdb_show);
+
+ dp_enumerate(&dpif_names);
+ for (i = 0; i < dpif_names.n; i++) {
+ const char *dpif_name = dpif_names.names[i];
+ struct dpif *dpif;
+ int retval;
- sprintf(devname, "dp%d", i);
- retval = dpif_open(devname, &dpif);
+ retval = dpif_open(dpif_name, &dpif);
if (!retval) {
- char dpif_name[IF_NAMESIZE];
- if (dpif_get_name(&dpif, dpif_name, sizeof dpif_name)
- || !cfg_has("bridge.%s.port", dpif_name)) {
- dpif_delete(&dpif);
+ struct svec all_names;
+ size_t j;
+
+ svec_init(&all_names);
+ dpif_get_all_names(dpif, &all_names);
+ for (j = 0; j < all_names.n; j++) {
+ if (cfg_has("bridge.%s.port", all_names.names[j])) {
+ goto found;
+ }
}
- dpif_close(&dpif);
- } else if (retval != ENODEV) {
- VLOG_ERR("failed to delete datapath dp%d: %s",
- i, strerror(retval));
+ dpif_delete(dpif);
+ found:
+ svec_destroy(&all_names);
+ dpif_close(dpif);
}
}
+ bond_init();
bridge_reconfigure();
}
* the old certificate will still be trusted until vSwitch is
* restarted. We may want to address this in vconn's SSL library. */
if (config_string_change("ssl.ca-cert", &cacert_file)
- || (stat(cacert_file, &s) && errno == ENOENT)) {
+ || (cacert_file && stat(cacert_file, &s) && errno == ENOENT)) {
vconn_ssl_set_ca_cert_file(cacert_file,
cfg_get_bool(0, "ssl.bootstrap-ca-cert"));
}
void
bridge_reconfigure(void)
{
- struct svec old_br, new_br, raw_new_br;
+ struct svec old_br, new_br;
struct bridge *br, *next;
size_t i, j;
COVERAGE_INC(bridge_reconfigure);
- /* Collect old bridges. */
+ /* Collect old and new bridges. */
svec_init(&old_br);
+ svec_init(&new_br);
LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
svec_add(&old_br, br->name);
}
-
- /* Collect new bridges. */
- svec_init(&raw_new_br);
- cfg_get_subsections(&raw_new_br, "bridge");
- svec_init(&new_br);
- for (i = 0; i < raw_new_br.n; i++) {
- const char *name = raw_new_br.names[i];
- if ((!strncmp(name, "dp", 2) && isdigit(name[2])) ||
- (!strncmp(name, "nl:", 3) && isdigit(name[3]))) {
- VLOG_ERR("%s is not a valid bridge name (bridges may not be "
- "named \"dp\" or \"nl:\" followed by a digit)", name);
- } else {
- svec_add(&new_br, name);
- }
- }
- svec_destroy(&raw_new_br);
+ cfg_get_subsections(&new_br, "bridge");
/* Get rid of deleted bridges and add new bridges. */
svec_sort(&old_br);
size_t n_dpif_ports;
struct svec want_ifaces;
- dpif_port_list(&br->dpif, &dpif_ports, &n_dpif_ports);
+ dpif_port_list(br->dpif, &dpif_ports, &n_dpif_ports);
bridge_get_all_ifaces(br, &want_ifaces);
for (i = 0; i < n_dpif_ports; i++) {
const struct odp_port *p = &dpif_ports[i];
if (!svec_contains(&want_ifaces, p->devname)
&& strcmp(p->devname, br->name)) {
- int retval = dpif_port_del(&br->dpif, p->port);
+ int retval = dpif_port_del(br->dpif, p->port);
if (retval) {
- VLOG_ERR("failed to remove %s interface from dp%u: %s",
- p->devname, dpif_id(&br->dpif), strerror(retval));
+ VLOG_ERR("failed to remove %s interface from %s: %s",
+ p->devname, dpif_name(br->dpif),
+ strerror(retval));
}
}
}
struct odp_port *dpif_ports;
size_t n_dpif_ports;
struct svec cur_ifaces, want_ifaces, add_ifaces;
- int next_port_no;
- dpif_port_list(&br->dpif, &dpif_ports, &n_dpif_ports);
+ dpif_port_list(br->dpif, &dpif_ports, &n_dpif_ports);
svec_init(&cur_ifaces);
for (i = 0; i < n_dpif_ports; i++) {
svec_add(&cur_ifaces, dpif_ports[i].devname);
bridge_get_all_ifaces(br, &want_ifaces);
svec_diff(&want_ifaces, &cur_ifaces, &add_ifaces, NULL, NULL);
- next_port_no = 1;
for (i = 0; i < add_ifaces.n; i++) {
const char *if_name = add_ifaces.names[i];
- for (;;) {
- int internal = cfg_get_bool(0, "iface.%s.internal", if_name);
- int error = dpif_port_add(&br->dpif, if_name, next_port_no++,
- internal ? ODP_PORT_INTERNAL : 0);
- if (error != EEXIST) {
- if (next_port_no >= 256) {
- VLOG_ERR("ran out of valid port numbers on dp%u",
- dpif_id(&br->dpif));
- goto out;
- }
- if (error) {
- VLOG_ERR("failed to add %s interface to dp%u: %s",
- if_name, dpif_id(&br->dpif), strerror(error));
- }
- break;
- }
+ int internal = cfg_get_bool(0, "iface.%s.internal", if_name);
+ int flags = internal ? ODP_PORT_INTERNAL : 0;
+ int error = dpif_port_add(br->dpif, if_name, flags, NULL);
+ if (error == EXFULL) {
+ VLOG_ERR("ran out of valid port numbers on %s",
+ dpif_name(br->dpif));
+ break;
+ } else if (error) {
+ VLOG_ERR("failed to add %s interface to %s: %s",
+ if_name, dpif_name(br->dpif), strerror(error));
}
}
- out:
svec_destroy(&cur_ifaces);
svec_destroy(&want_ifaces);
svec_destroy(&add_ifaces);
uint64_t dpid;
struct iface *local_iface = NULL;
const char *devname;
- uint8_t engine_type = br->dpif.minor;
- uint8_t engine_id = br->dpif.minor;
+ uint8_t engine_type, engine_id;
bool add_id_to_iface = false;
struct svec nf_hosts;
for (j = 0; j < port->n_ifaces; ) {
struct iface *iface = port->ifaces[j];
if (iface->dp_ifidx < 0) {
- VLOG_ERR("%s interface not in dp%u, dropping",
- iface->name, dpif_id(&br->dpif));
+ VLOG_ERR("%s interface not in %s, dropping",
+ iface->name, dpif_name(br->dpif));
iface_destroy(iface);
} else {
if (iface->dp_ifidx == ODPP_LOCAL) {
local_iface = iface;
}
- VLOG_DBG("dp%u has interface %s on port %d",
- dpif_id(&br->dpif), iface->name, iface->dp_ifidx);
+ VLOG_DBG("%s has interface %s on port %d",
+ dpif_name(br->dpif),
+ iface->name, iface->dp_ifidx);
j++;
}
}
ofproto_set_datapath_id(br->ofproto, dpid);
/* Set NetFlow configuration on this bridge. */
+ dpif_get_netflow_ids(br->dpif, &engine_type, &engine_id);
if (cfg_has("netflow.%s.engine-type", br->name)) {
engine_type = cfg_get_int(0, "netflow.%s.engine-type",
br->name);
}
}
\f
+ /* Bridge unixctl user interface functions. */
+ static void
+ bridge_unixctl_fdb_show(struct unixctl_conn *conn, const char *args)
+ {
+ struct ds ds = DS_EMPTY_INITIALIZER;
+ const struct bridge *br;
+
+ br = bridge_lookup(args);
+ if (!br) {
+ unixctl_command_reply(conn, 501, "no such bridge");
+ return;
+ }
+
+ ds_put_cstr(&ds, " port VLAN MAC Age\n");
+ if (br->ml) {
+ const struct mac_entry *e;
+ LIST_FOR_EACH (e, struct mac_entry, lru_node, &br->ml->lrus) {
+ ds_put_format(&ds, "%5d %4d "ETH_ADDR_FMT" %3d\n",
+ e->port, e->vlan, ETH_ADDR_ARGS(e->mac),
+ mac_entry_age(e));
+ }
+ }
+ unixctl_command_reply(conn, 200, ds_cstr(&ds));
+ ds_destroy(&ds);
+ }
+ \f
/* Bridge reconfiguration functions. */
static struct bridge *
br = xcalloc(1, sizeof *br);
error = dpif_create(name, &br->dpif);
- if (error == EEXIST) {
+ if (error == EEXIST || error == EBUSY) {
error = dpif_open(name, &br->dpif);
if (error) {
VLOG_ERR("datapath %s already exists but cannot be opened: %s",
free(br);
return NULL;
}
- dpif_flow_flush(&br->dpif);
+ dpif_flow_flush(br->dpif);
} else if (error) {
VLOG_ERR("failed to create datapath %s: %s", name, strerror(error));
free(br);
error = ofproto_create(name, &bridge_ofhooks, br, &br->ofproto);
if (error) {
VLOG_ERR("failed to create switch %s: %s", name, strerror(error));
- dpif_delete(&br->dpif);
- dpif_close(&br->dpif);
+ dpif_delete(br->dpif);
+ dpif_close(br->dpif);
free(br);
return NULL;
}
list_push_back(&all_bridges, &br->node);
- VLOG_INFO("created bridge %s on dp%u", br->name, dpif_id(&br->dpif));
+ VLOG_INFO("created bridge %s on %s", br->name, dpif_name(br->dpif));
return br;
}
port_destroy(br->ports[br->n_ports - 1]);
}
list_remove(&br->node);
- error = dpif_delete(&br->dpif);
+ error = dpif_delete(br->dpif);
if (error && error != ENOENT) {
- VLOG_ERR("failed to delete dp%u: %s",
- dpif_id(&br->dpif), strerror(error));
+ VLOG_ERR("failed to delete %s: %s",
+ dpif_name(br->dpif), strerror(error));
}
- dpif_close(&br->dpif);
+ dpif_close(br->dpif);
ofproto_destroy(br->ofproto);
free(br->controller);
mac_learning_destroy(br->ml);
svec_init(&new_ports);
cfg_get_all_keys(&new_ports, "bridge.%s.port", br->name);
svec_sort(&new_ports);
- if (bridge_get_controller(br) && !svec_contains(&new_ports, br->name)) {
- svec_add(&new_ports, br->name);
- svec_sort(&new_ports);
+ if (bridge_get_controller(br)) {
+ char local_name[IF_NAMESIZE];
+ int error;
+
+ error = dpif_port_get_name(br->dpif, ODPP_LOCAL,
+ local_name, sizeof local_name);
+ if (!error && !svec_contains(&new_ports, local_name)) {
+ svec_add(&new_ports, local_name);
+ svec_sort(&new_ports);
+ }
}
if (!svec_is_unique(&new_ports)) {
VLOG_WARN("bridge %s: %s specified twice as bridge port",
int rate_limit, burst_limit;
if (!strcmp(controller, "discover")) {
+ bool update_resolv_conf = true;
+
+ if (cfg_has("%s.update-resolv.conf", pfx)) {
+ update_resolv_conf = cfg_get_bool(0, "%s.update-resolv.conf",
+ pfx);
+ }
ofproto_set_discovery(br->ofproto, true,
cfg_get_string(0, "%s.accept-regex", pfx),
- cfg_get_bool(0, "%s.update-resolv.conf",
- pfx));
+ update_resolv_conf);
} else {
+ char local_name[IF_NAMESIZE];
struct netdev *netdev;
bool in_band;
int error;
ofproto_set_discovery(br->ofproto, false, NULL, NULL);
ofproto_set_in_band(br->ofproto, in_band);
- error = netdev_open(br->name, NETDEV_ETH_TYPE_NONE, &netdev);
+ error = dpif_port_get_name(br->dpif, ODPP_LOCAL,
+ local_name, sizeof local_name);
+ if (!error) {
+ error = netdev_open(local_name, NETDEV_ETH_TYPE_NONE, &netdev);
+ }
if (!error) {
if (cfg_is_valid(CFG_IP | CFG_REQUIRED, "%s.ip", pfx)) {
struct in_addr ip, mask, gateway;
}
port_array_clear(&br->ifaces);
- dpif_port_list(&br->dpif, &dpif_ports, &n_dpif_ports);
+ dpif_port_list(br->dpif, &dpif_ports, &n_dpif_ports);
for (i = 0; i < n_dpif_ports; i++) {
struct odp_port *p = &dpif_ports[i];
struct iface *iface = iface_lookup(br, p->devname);
if (iface) {
if (iface->dp_ifidx >= 0) {
- VLOG_WARN("dp%u reported interface %s twice",
- dpif_id(&br->dpif), p->devname);
+ VLOG_WARN("%s reported interface %s twice",
+ dpif_name(br->dpif), p->devname);
} else if (iface_from_dp_ifidx(br, p->port)) {
- VLOG_WARN("dp%u reported interface %"PRIu16" twice",
- dpif_id(&br->dpif), p->port);
+ VLOG_WARN("%s reported interface %"PRIu16" twice",
+ dpif_name(br->dpif), p->port);
} else {
port_array_set(&br->ifaces, p->port, iface);
iface->dp_ifidx = p->port;
iface->delay_expires = LLONG_MAX;
VLOG_INFO_RL(&rl, "interface %s: will not be %s",
iface->name, carrier ? "disabled" : "enabled");
+ } else if (carrier && port->updelay && port->active_iface < 0) {
+ iface->delay_expires = time_msec();
+ VLOG_INFO_RL(&rl, "interface %s: skipping %d ms updelay since no "
+ "other interface is up", iface->name, port->updelay);
} else {
int delay = carrier ? port->updelay : port->downdelay;
iface->delay_expires = time_msec() + delay;
iface->enabled = enable;
if (!iface->enabled) {
- VLOG_WARN("interface %s: enabled", iface->name);
+ VLOG_WARN("interface %s: disabled", iface->name);
ofproto_revalidate(br->ofproto, iface->tag);
if (iface->port_ifidx == port->active_iface) {
ofproto_revalidate(br->ofproto,
}
bond_send_learning_packets(port);
} else {
- VLOG_WARN("interface %s: disabled", iface->name);
+ VLOG_WARN("interface %s: enabled", iface->name);
if (port->active_iface < 0) {
ofproto_revalidate(br->ofproto, port->no_ifaces_tag);
bond_choose_active_iface(port);
return;
}
- if (sscanf(hash_s, "%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8,
- &mac[0], &mac[1], &mac[2], &mac[3], &mac[4], &mac[5]) == 6) {
+ if (sscanf(hash_s, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))
+ == ETH_ADDR_SCAN_COUNT) {
hash = bond_hash(mac);
} else if (strspn(hash_s, "0123456789") == strlen(hash_s)) {
hash = atoi(hash_s) & BOND_MASK;
\fBovs\-vswitchd\fR to reload its configuration file.
.PP
.SH OPTIONS
- .IP "\fB--reload-command=\fIcommand\fR"
- Sets the command that \fBovs\-brcompatd\fR runs to force \fBovs\-vswitchd\fR to
- reload its configuration file to \fIcommand\fR. The command is run in
- a subshell, so it may contain arbitrary shell metacharacters, etc.
- The \fB--help\fR option displays the default reload command.
+ .IP "\fB--appctl-command=\fIcommand\fR"
+ Sets the command that \fBovs\-brcompatd\fR runs to communicate with
+ \fBovs\-vswitchd\fR. The command is run in \fB/bin/sh\fR as a shell
+ command, so \fIcommand\fR may contain arbitrary shell metacharacters,
+ etc. The \fB--help\fR option displays the default command.
+ .IP
+ \fIcommand\fR must contain exactly one instance of \fB%s\fR, which
+ \fBovs\-brcompatd\fR replaces by a command from the set understood by
+ \fBovs\-vswitchd\fR. Any instances of \fB%%\fR in \fIcommand\fR are
+ replaced by a single \fB%\fR. The \fB%\fR character may not otherwise
+ appear in \fIcommand\fR.
+ .IP
+ The commands that are substituted into \fIcommand\fR are those that
+ can be listed by passing \fB-e help\fR to \fBovs\-appctl\fR with
+ \fBovs\-vswitchd\fR as target. The command that is substituted may
+ include white space-separated arguments, so \fIcommand\fR should include
+ shell quotes around \fB%s\fR.
+ .IP
+ \fIcommand\fR must not redirect \fBovs\-appctl\fR's standard output or
+ standard error streams, because \fBovs\-brcompatd\fR expects to read
+ both of these streams separately.
.TP
\fB--prune-timeout=\fIsecs\fR
.
.BR ovs\-appctl (8),
.BR ovs\-vswitchd (8),
.BR ovs\-vswitchd.conf (5),
-\fBINSTALL\fR in the Open vSwitch distribution.
+\fBINSTALL.bridge\fR in the Open vSwitch distribution.
. RE
. PP
..
-.TH ovs\-vswitchd.conf 5 "April 2009" "Open vSwitch" "OpenVSwitch Manual"
+.TH ovs\-vswitchd.conf 5 "June 2009" "Open vSwitch" "Open vSwitch Manual"
.
.SH NAME
ovs\-vswitchd.conf \- configuration file for \fBovs\-vswitchd\fR
.
.SH DESCRIPTION
This manual page describes the syntax for the configuration file used
-by \fBovs\-vswitchd\fR(8), the virtual switch daemon.
+by \fBovs\-vswitchd\fR(8), the Open vSwitch daemon.
.PP
The configuration file is based on key-value pairs, which are given
one per line in the form \fIkey\fB=\fIvalue\fR. Each \fIkey\fR
.SS "Bridge Configuration"
A bridge (switch) with a given \fIname\fR is configured by specifying
the names of its network devices as values for key
-\fBbridge.\fIname\fB.port\fR. (The specified \fIname\fR may not begin
-with \fBdp\fR or \fBnl:\fR followed by a digit.)
+\fBbridge.\fIname\fB.port\fR.
.PP
The names given on \fBbridge.\fIname\fB.port\fR must be the names of
existing network devices, except for ``internal ports.'' An internal
port is a simulated network device that receives traffic only
-through the virtual switch and switches any traffic sent it through
-virtual switch. An internal port may configured with an IP address,
+through the switch and switches any traffic sent it through the
+switch. An internal port may configured with an IP address,
etc. using the usual system tools (e.g. \fBifconfig\fR, \fBip\fR). To
designate network device \fInetdev\fR as an internal port, add
\fBiface.\fInetdev\fB.internal=true\fR to the configuration file.
\fBbonding.\fIname\fB.updelay\fR or
\fBbonding.\fIname\fB.downdelay\fR, respectively, to a positive
integer, interpreted in milliseconds.
+ The \fBupdelay\fR setting is honored only when at least one bonded
+ interface is already enabled. When no interfaces are enabled, then
+ the first bond interface to come up is enabled immediately. The
+ \fBdowndelay\fR setting is always honored.
.PP
The following syntax bonds \fBeth0\fR and \fBeth1\fR into a bonding
device named \fBbond0\fR, which is added to bridge \fBmybr\fR along
\fBnetflow.\fIbridge\fB.engine-id\fR, respectively. Each takes a value
between 0 and 255, inclusive.
-Many NetFlow collectors do not expect multiple virtual switches to be
+Many NetFlow collectors do not expect multiple switches to be
sending messages from the same host, and they do not store the engine
information which could be used to disambiguate the traffic. To prevent
flows from multiple switches appearing as if they came on the interface,
.TP
\fBdiscover\fR
Use controller discovery to find the local OpenFlow controller.
-Refer to \fBsecchan\fR(8) for information on how to configure a DHCP
+Refer to \fB\ovs\-openflowd\fR(8) for information on how to configure a DHCP
server to support controller discovery. The following additional
options control the discovery process:
.
.IP
The default regular expression is \fBssl:.*\fR, meaning that only SSL
controller connections will be accepted, when SSL is configured (see
-\fBSSL Configuration\fR), and \fB.*\fR otherwise, meaning that any
-controller will be accepted.
+\fBSSL Configuration\fR), and \fBtcp:.*\fR otherwise, meaning that only
+TCP controller connections will be accepted.
.IP
The regular expression is implicitly anchored at the beginning of the
controller location string, as if it begins with \fB^\fR.
By default, or if this is set to \fBtrue\fR, \fBovs\-vswitchd\fR connects
to the controller in-band. If this is set to \fBfalse\fR,
\fBovs\-vswitchd\fR connects to the controller out-of-band. Refer to
-\fBsecchan\fR(8) for a description of in-band and out-of-band control.
+\fBovs\-openflowd\fR(8) for a description of in-band and out-of-band control.
.IP "\fBbridge.\fIname\fB.controller.ip=\fIip\fR"
If specified, the IP address to configure on the bridge's local port.
.IP "\fBbridge.\fIname\fB.controller.netmask=\fInetmask\fR"
The minimum value of \fIsecs\fR is 5 seconds. The default is taken
from \fBmgmt.inactivity-probe\fR (see above).
.IP
-When the virtual switch is connected to the controller, it waits for a
+When the switch is connected to the controller, it waits for a
message to be received from the controller for \fIsecs\fR seconds
before it sends a inactivity probe to the controller. After sending
the inactivity probe, if no response is received for an additional
-\fIsecs\fR seconds, the secure channel assumes that the connection has
+\fIsecs\fR seconds, \fBovs-vswitchd\fR assumes that the connection has
been broken and attempts to reconnect.
.IP
Changing the inactivity probe interval also changes the interval
.IP "\fBbridge.\fIname\fB.controller.fail-mode=\fBstandalone\fR|\fBsecure\fR"
.IQ "\fBmgmt.fail-mode=standalone\fR|\fBsecure\fR"
When a controller is configured, it is, ordinarily, responsible for
-setting up all flows on the virtual switch. Thus, if the connection to
+setting up all flows on the switch. Thus, if the connection to
the controller fails, no new network connections can be set up. If
the connection to the controller stays down long enough, no packets
can pass through the switch at all.
attempt until it reaches the maximum. The default maximum backoff
time is taken from \fBmgmt.max-backoff\fR.
.ST "Controller Rate-Limiting"
-These settings configure how the virtual switch applies a ``token
+These settings configure how the switch applies a ``token
bucket'' to limit the rate at which packets in unknown flows are
forwarded to the OpenFlow controller for flow-setup processing. This
feature prevents a single bridge from overwhelming a controller.
for controller connectivity, the following settings are required:
.TP
\fBssl.private-key=\fIprivkey.pem\fR
-Specifies a PEM file containing the private key used as the virtual
+Specifies a PEM file containing the private key used as the
switch's identity for SSL connections to the controller.
.TP
\fBssl.certificate=\fIcert.pem\fR
Specifies a PEM file containing a certificate, signed by the
certificate authority (CA) used by the controller and manager, that
-certifies the virtual switch's private key, identifying a trustworthy
+certifies the switch's private key, identifying a trustworthy
switch.
.TP
\fBssl.ca-cert=\fIcacert.pem\fR
Specifies a PEM file containing the CA certificate used to verify that
-the virtual switch is connected to a trustworthy controller.
+the switch is connected to a trustworthy controller.
.PP
These files are read only once, at \fBovs\-vswitchd\fR startup time. If
their contents change, \fBovs\-vswitchd\fR must be killed and restarted.
.PP
-These SSL settings apply to all SSL connections made by the virtual
-switch.
+These SSL settings apply to all SSL connections made by the switch.
.ST "CA Certificate Bootstrap"
Ordinarily, all of the files named in the SSL configuration must exist
when \fBovs\-vswitchd\fR starts. However, if \fBssl.bootstrap-ca-cert\fR
Listens for SSL connections on \fIport\fR (default: 6633). SSL must
be configured when this form is used (see \fBSSL Configuration\fR,
above).
-.IP "\fBptcp:\fR[\fIport\fR]"
+.IP "\fBptcp:\fR[\fIport\fR][\fB:\fIip\fR]"
Listens for TCP connections on \fIport\fR (default: 6633).
+By default, \fB\ovs\-vswitchd\fR listens for connections to any local
+IP address, but \fIip\fR may be specified to limit connections to the
+specified local \fIip\fR.
.RE
To entirely disable listening for management connections, set
\fBbridge.\fIname\fB.openflow.listeners\fR to the single value