/*
- * Copyright (c) 2009, 2010 Nicira Networks.
- * Distributed under the terms of the GNU GPL version 2.
+ * Copyright (c) 2007-2012 Nicira, Inc.
*
- * Significant portions of this file may be copied from parts of the Linux
- * kernel, by Linus Torvalds and others.
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
*/
-/* Interface exported by openvswitch_mod. */
-
#ifndef DATAPATH_H
#define DATAPATH_H 1
#include <linux/kernel.h>
#include <linux/mutex.h>
#include <linux/netdevice.h>
-#include <linux/workqueue.h>
-#include <linux/seqlock.h>
#include <linux/skbuff.h>
-#include <linux/version.h>
-#include "flow.h"
-#include "dp_sysfs.h"
+#include <linux/u64_stats_sync.h>
-struct vport;
-struct dp_port;
-
-/* Mask for the priority bits in a vlan header. If we ever merge upstream
- * then this should go into include/linux/if_vlan.h. */
-#define VLAN_PCP_MASK 0xe000
-#define VLAN_PCP_SHIFT 13
+#include "checksum.h"
+#include "compat.h"
+#include "flow.h"
+#include "tunnel.h"
+#include "vlan.h"
+#include "vport.h"
-#define DP_MAX_PORTS 1024
-#define DP_MAX_GROUPS 16
+#define DP_MAX_PORTS USHRT_MAX
+#define DP_VPORT_HASH_BUCKETS 1024
-#define DP_N_QUEUES 3
-#define DP_MAX_QUEUE_LEN 100
+#define SAMPLE_ACTION_DEPTH 3
/**
* struct dp_stats_percpu - per-cpu packet processing statistics for a given
* datapath.
- * @n_frags: Number of IP fragments processed by datapath.
* @n_hit: Number of received packets for which a matching flow was found in
* the flow table.
* @n_miss: Number of received packets that had no matching flow in the flow
* one of the datapath's queues).
*/
struct dp_stats_percpu {
- u64 n_frags;
u64 n_hit;
u64 n_missed;
u64 n_lost;
- seqcount_t seqlock;
-};
-
-struct dp_port_group {
- struct rcu_head rcu;
- int n_ports;
- u16 ports[];
+ struct u64_stats_sync sync;
};
/**
* struct datapath - datapath for flow-based packet switching
- * @mutex: Mutual exclusion for ioctls.
- * @dp_idx: Datapath number (index into the dps[] array in datapath.c).
- * @ifobj: Represents /sys/class/net/<devname>/brif.
- * @drop_frags: Drop all IP fragments if nonzero.
- * @queues: %DP_N_QUEUES sets of queued packets for userspace to handle.
- * @waitqueue: Waitqueue, for waiting for new packets in @queues.
- * @n_flows: Number of flows currently in flow table.
- * @table: Current flow table (RCU protected).
- * @groups: Port groups, used by ODPAT_OUTPUT_GROUP action (RCU protected).
- * @n_ports: Number of ports currently in @ports.
- * @ports: Map from port number to &struct dp_port. %ODPP_LOCAL port
- * always exists, other ports may be %NULL.
- * @port_list: List of all ports in @ports in arbitrary order.
+ * @rcu: RCU callback head for deferred destruction.
+ * @list_node: Element in global 'dps' list.
+ * @table: Current flow table. Protected by ovs_mutex and RCU.
+ * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by
+ * ovs_mutex and RCU.
* @stats_percpu: Per-CPU datapath statistics.
- * @sflow_probability: Number of packets out of UINT_MAX to sample to the
- * %ODPL_SFLOW queue, e.g. (@sflow_probability/UINT_MAX) is the probability of
- * sampling a given packet.
+ * @net: Reference to net namespace.
+ *
+ * Context: See the comment on locking at the top of datapath.c for additional
+ * locking information.
*/
struct datapath {
- struct mutex mutex;
- int dp_idx;
- struct kobject ifobj;
-
- int drop_frags;
-
- /* Queued data. */
- struct sk_buff_head queues[DP_N_QUEUES];
- wait_queue_head_t waitqueue;
+ struct rcu_head rcu;
+ struct list_head list_node;
/* Flow table. */
- struct tbl *table;
-
- /* Port groups. */
- struct dp_port_group *groups[DP_MAX_GROUPS];
+ struct flow_table __rcu *table;
/* Switch ports. */
- unsigned int n_ports;
- struct dp_port *ports[DP_MAX_PORTS];
- struct list_head port_list;
+ struct hlist_head *ports;
/* Stats. */
- struct dp_stats_percpu *stats_percpu;
-
- /* sFlow Sampling */
- unsigned int sflow_probability;
-};
-
-/**
- * struct dp_port - one port within a datapath
- * @port_no: Index into @dp's @ports array.
- * @dp: Datapath to which this port belongs.
- * @vport: The network device attached to this port. The contents depends on
- * the device and should be accessed only through the vport_* functions.
- * @kobj: Represents /sys/class/net/<devname>/brport.
- * @linkname: The name of the link from /sys/class/net/<datapath>/brif to this
- * &struct dp_port. (We keep this around so that we can delete it if the
- * device gets renamed.) Set to the null string when no link exists.
- * @node: Element in @dp's @port_list.
- * @sflow_pool: Number of packets that were candidates for sFlow sampling,
- * regardless of whether they were actually chosen and sent down to userspace.
- */
-struct dp_port {
- u16 port_no;
- struct datapath *dp;
- struct vport *vport;
- struct kobject kobj;
- char linkname[IFNAMSIZ];
- struct list_head node;
- atomic_t sflow_pool;
-};
+ struct dp_stats_percpu __percpu *stats_percpu;
-enum csum_type {
- OVS_CSUM_NONE = 0,
- OVS_CSUM_UNNECESSARY = 1,
- OVS_CSUM_COMPLETE = 2,
- OVS_CSUM_PARTIAL = 3,
+#ifdef CONFIG_NET_NS
+ /* Network namespace ref. */
+ struct net *net;
+#endif
};
/**
* struct ovs_skb_cb - OVS data in skb CB
- * @dp_port: The datapath port on which the skb entered the switch.
+ * @flow: The flow associated with this packet. May be %NULL if no flow.
+ * @pkt_key: The flow information extracted from the packet. Must be nonnull.
+ * @tun_key: Key for the tunnel that encapsulated this packet. NULL if the
+ * packet is not being tunneled.
* @ip_summed: Consistently stores L4 checksumming status across different
* kernel versions.
- * @tun_id: ID (in network byte order) of the tunnel that encapsulated this
- * packet. It is 0 if the packet was not received on a tunnel.
- * @is_frag: %true if this packet is an IPv4 fragment, %false otherwise.
+ * @csum_start: Stores the offset from which to start checksumming independent
+ * of the transport header on all kernel versions.
+ * packet was not received on a tunnel.
+ * @vlan_tci: Provides a substitute for the skb->vlan_tci field on kernels
+ * before 2.6.27.
*/
struct ovs_skb_cb {
- struct dp_port *dp_port;
+ struct sw_flow *flow;
+ struct sw_flow_key *pkt_key;
+ struct ovs_key_ipv4_tunnel *tun_key;
+#ifdef NEED_CSUM_NORMALIZE
enum csum_type ip_summed;
- __be32 tun_id;
- bool is_frag;
+ u16 csum_start;
+#endif
+#ifdef NEED_VLAN_FIELD
+ u16 vlan_tci;
+#endif
};
#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
-extern struct notifier_block dp_device_notifier;
-extern int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
+/**
+ * struct dp_upcall - metadata to include with a packet to send to userspace
+ * @cmd: One of %OVS_PACKET_CMD_*.
+ * @key: Becomes %OVS_PACKET_ATTR_KEY. Must be nonnull.
+ * @userdata: If nonnull, its variable-length value is passed to userspace as
+ * %OVS_PACKET_ATTR_USERDATA.
+ * @portid: Netlink PID to which packet should be sent. If @portid is 0 then no
+ * packet is sent and the packet is accounted in the datapath's @n_lost
+ * counter.
+ */
+struct dp_upcall_info {
+ u8 cmd;
+ const struct sw_flow_key *key;
+ const struct nlattr *userdata;
+ u32 portid;
+};
-void dp_process_received_packet(struct dp_port *, struct sk_buff *);
-int dp_detach_port(struct dp_port *, int may_delete);
-int dp_output_control(struct datapath *, struct sk_buff *, int, u32 arg);
-int dp_min_mtu(const struct datapath *dp);
-void set_internal_devs_mtu(const struct datapath *dp);
+/**
+ * struct ovs_net - Per net-namespace data for ovs.
+ * @dps: List of datapaths to enable dumping them all out.
+ * Protected by genl_mutex.
+ * @vport_net: Per network namespace data for vport.
+ */
+struct ovs_net {
+ struct list_head dps;
+ struct vport_net vport_net;
+ struct work_struct dp_notify_work;
+};
-struct datapath *get_dp(int dp_idx);
-const char *dp_name(const struct datapath *dp);
+extern int ovs_net_id;
+void ovs_lock(void);
+void ovs_unlock(void);
-#if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID)
-int vswitch_skb_checksum_setup(struct sk_buff *skb);
+#ifdef CONFIG_LOCKDEP
+int lockdep_ovsl_is_held(void);
#else
-static inline int vswitch_skb_checksum_setup(struct sk_buff *skb)
+#define lockdep_ovsl_is_held() 1
+#endif
+
+#define ASSERT_OVSL() WARN_ON(unlikely(!lockdep_ovsl_is_held()))
+#define ovsl_dereference(p) \
+ rcu_dereference_protected(p, lockdep_ovsl_is_held())
+
+static inline struct net *ovs_dp_get_net(struct datapath *dp)
{
- return 0;
+ return read_pnet(&dp->net);
}
-#endif
-void compute_ip_summed(struct sk_buff *skb, bool xmit);
-void forward_ip_summed(struct sk_buff *skb);
+static inline void ovs_dp_set_net(struct datapath *dp, struct net *net)
+{
+ write_pnet(&dp->net, net);
+}
+
+struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no);
+
+static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no)
+{
+ WARN_ON_ONCE(!rcu_read_lock_held());
+ return ovs_lookup_vport(dp, port_no);
+}
+
+static inline struct vport *ovs_vport_ovsl_rcu(const struct datapath *dp, int port_no)
+{
+ WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
+ return ovs_lookup_vport(dp, port_no);
+}
+
+static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_no)
+{
+ ASSERT_OVSL();
+ return ovs_lookup_vport(dp, port_no);
+}
+
+extern struct notifier_block ovs_dp_device_notifier;
+extern struct genl_multicast_group ovs_dp_vport_multicast_group;
+
+void ovs_dp_process_received_packet(struct vport *, struct sk_buff *);
+void ovs_dp_detach_port(struct vport *);
+int ovs_dp_upcall(struct datapath *, struct sk_buff *,
+ const struct dp_upcall_info *);
+
+const char *ovs_dp_name(const struct datapath *dp);
+struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 portid, u32 seq,
+ u8 cmd);
+
+int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
+void ovs_dp_notify_wq(struct work_struct *work);
+
+#define OVS_NLERR(fmt, ...) \
+ pr_info_once(fmt "netlink: ", ##__VA_ARGS__)
#endif /* datapath.h */