memset(upcall, 0, sizeof *upcall);
upcall->type = type;
- upcall->packet = buf;
- upcall->packet->data = CONST_CAST(struct nlattr *,
- nl_attr_get(a[OVS_PACKET_ATTR_PACKET]));
- upcall->packet->size = nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]);
upcall->key = CONST_CAST(struct nlattr *,
nl_attr_get(a[OVS_PACKET_ATTR_KEY]));
upcall->key_len = nl_attr_get_size(a[OVS_PACKET_ATTR_KEY]);
upcall->userdata = a[OVS_PACKET_ATTR_USERDATA];
+
+ /* Allow overwriting the netlink attribute header without reallocating. */
+ ofpbuf_use_stub(&upcall->packet,
+ CONST_CAST(struct nlattr *,
+ nl_attr_get(a[OVS_PACKET_ATTR_PACKET])) - 1,
+ nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]) +
+ sizeof(struct nlattr));
+ upcall->packet.data = (char *)upcall->packet.data + sizeof(struct nlattr);
+ upcall->packet.size -= sizeof(struct nlattr);
+
*dp_ifindex = ovs_header->dp_ifindex;
return 0;
static int do_del_port(struct dp_netdev *, odp_port_t port_no);
static int dpif_netdev_open(const struct dpif_class *, const char *name,
bool create, struct dpif **);
-static int dp_netdev_output_userspace(struct dp_netdev *, const struct ofpbuf *,
+static int dp_netdev_output_userspace(struct dp_netdev *, struct ofpbuf *,
int queue_no, const struct flow *,
const struct nlattr *userdata);
static void dp_netdev_execute_actions(struct dp_netdev *, const struct flow *,
while (q->tail != q->head) {
struct dp_netdev_upcall *u = &q->upcalls[q->tail++ & QUEUE_MASK];
+ ofpbuf_uninit(&u->upcall.packet);
ofpbuf_uninit(&u->buf);
}
}
/* Get packet metadata. */
error = dpif_netdev_flow_from_nlattrs(execute->key, execute->key_len, &md);
if (!error) {
- struct ofpbuf *copy;
struct flow key;
- /* Make a deep copy of 'packet', because we might modify its data. */
- copy = ofpbuf_clone_with_headroom(execute->packet, DP_NETDEV_HEADROOM);
-
/* Extract flow key. */
- flow_extract(copy, md.skb_priority, md.pkt_mark, &md.tunnel,
+ flow_extract(execute->packet, md.skb_priority, md.pkt_mark, &md.tunnel,
&md.in_port, &key);
ovs_mutex_lock(&dp_netdev_mutex);
- dp_netdev_execute_actions(dp, &key, copy,
+ dp_netdev_execute_actions(dp, &key, execute->packet,
execute->actions, execute->actions_len);
ovs_mutex_unlock(&dp_netdev_mutex);
- ofpbuf_delete(copy);
}
return error;
}
struct dp_netdev_upcall *u = &q->upcalls[q->tail++ & QUEUE_MASK];
*upcall = u->upcall;
- upcall->packet = buf;
ofpbuf_uninit(buf);
*buf = u->buf;
struct dp_netdev_port *port;
struct dp_netdev *dp;
struct ofpbuf packet;
+ size_t buf_size;
ovs_mutex_lock(&dp_netdev_mutex);
dp = get_dp_netdev(dpif);
- ofpbuf_init(&packet,
- DP_NETDEV_HEADROOM + VLAN_ETH_HEADER_LEN + dp->max_mtu);
+ ofpbuf_init(&packet, 0);
+
+ buf_size = DP_NETDEV_HEADROOM + VLAN_ETH_HEADER_LEN + dp->max_mtu;
LIST_FOR_EACH (port, node, &dp->port_list) {
int error;
- /* Reset packet contents. */
+ /* Reset packet contents. Packet data may have been stolen. */
ofpbuf_clear(&packet);
- ofpbuf_reserve(&packet, DP_NETDEV_HEADROOM);
+ ofpbuf_reserve_with_tailroom(&packet, DP_NETDEV_HEADROOM, buf_size);
error = port->rx ? netdev_rx_recv(port->rx, &packet) : EOPNOTSUPP;
if (!error) {
}
static int
-dp_netdev_output_userspace(struct dp_netdev *dp, const struct ofpbuf *packet,
+dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet,
int queue_no, const struct flow *flow,
const struct nlattr *userdata)
{
upcall->type = queue_no;
/* Allocate buffer big enough for everything. */
- buf_size = ODPUTIL_FLOW_KEY_BYTES + 2 + packet->size;
+ buf_size = ODPUTIL_FLOW_KEY_BYTES;
if (userdata) {
buf_size += NLA_ALIGN(userdata->nla_len);
}
NLA_ALIGN(userdata->nla_len));
}
- /* Put packet.
- *
- * We adjust 'data' and 'size' in 'buf' so that only the packet itself
- * is visible in 'upcall->packet'. The ODP flow and (if present)
- * userdata become part of the headroom. */
- ofpbuf_put_zeros(buf, 2);
- buf->data = ofpbuf_put(buf, packet->data, packet->size);
- buf->size = packet->size;
- upcall->packet = buf;
+ /* Steal packet data. */
+ ovs_assert(packet->source == OFPBUF_MALLOC);
+ upcall->packet = *packet;
+ ofpbuf_use(packet, NULL, 0);
seq_change(dp->queue_seq);
static void
dp_netdev_action_userspace(void *aux_, struct ofpbuf *packet,
const struct flow *flow OVS_UNUSED,
- const struct nlattr *a)
+ const struct nlattr *a, bool may_steal)
{
struct dp_netdev_execute_aux *aux = aux_;
const struct nlattr *userdata;
userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA);
+
+ /* Make a copy if we are not allowed to steal the packet's data. */
+ if (!may_steal) {
+ packet = ofpbuf_clone_with_headroom(packet, DP_NETDEV_HEADROOM);
+ }
dp_netdev_output_userspace(aux->dp, packet, DPIF_UC_ACTION, aux->key,
userdata);
+ if (!may_steal) {
+ ofpbuf_uninit(packet);
+ }
}
static void
* '*upcall', using 'buf' for storage. Should only be called if 'recv_set'
* has been used to enable receiving packets from 'dpif'.
*
- * The implementation should point 'upcall->packet' and 'upcall->key' into
- * data in the caller-provided 'buf'. If necessary to make room, the
- * implementation may expand the data in 'buf'. (This is hardly a great
- * way to do things but it works out OK for the dpif providers that exist
- * so far.)
+ * The implementation should point 'upcall->key' and 'upcall->userdata'
+ * (if any) into data in the caller-provided 'buf'. The implementation may
+ * also use 'buf' for storing the data of 'upcall->packet'. If necessary
+ * to make room, the implementation may reallocate the data in 'buf'.
+ *
+ * The caller owns the data of 'upcall->packet' and may modify it. If
+ * packet's headroom is exhausted as it is manipulated, 'upcall->packet'
+ * will be reallocated. This requires the data of 'upcall->packet' to be
+ * released with ofpbuf_uninit() before 'upcall' is destroyed.
*
* This function must not block. If no upcall is pending when it is
* called, it should return EAGAIN without blocking. */
static void
dpif_execute_helper_userspace_cb(void *aux, struct ofpbuf *packet,
const struct flow *flow,
- const struct nlattr *action)
+ const struct nlattr *action,
+ bool may_steal OVS_UNUSED)
{
dpif_execute_helper_execute__(aux, packet, flow,
action, NLA_ALIGN(action->nla_len));
{
struct dpif_execute_helper_aux aux;
enum odp_key_fitness fit;
- struct ofpbuf *packet;
struct flow flow;
COVERAGE_INC(dpif_execute_with_help);
aux.dpif = dpif;
aux.error = 0;
- packet = ofpbuf_clone_with_headroom(execute->packet, VLAN_HEADER_LEN);
- odp_execute_actions(&aux, packet, &flow,
+ odp_execute_actions(&aux, execute->packet, &flow,
execute->actions, execute->actions_len,
dpif_execute_helper_output_cb,
dpif_execute_helper_userspace_cb);
- ofpbuf_delete(packet);
-
return aux.error;
}
dpif_execute(struct dpif *dpif,
const struct nlattr *key, size_t key_len,
const struct nlattr *actions, size_t actions_len,
- const struct ofpbuf *buf,
- bool needs_help)
+ struct ofpbuf *buf, bool needs_help)
{
struct dpif_execute execute;
* '*upcall', using 'buf' for storage. Should only be called if
* dpif_recv_set() has been used to enable receiving packets on 'dpif'.
*
- * 'upcall->packet' and 'upcall->key' point into data in the caller-provided
- * 'buf', so their memory cannot be freed separately from 'buf'. (This is
- * hardly a great way to do things but it works out OK for the dpif providers
- * and clients that exist so far.)
+ * 'upcall->key' and 'upcall->userdata' point into data in the caller-provided
+ * 'buf', so their memory cannot be freed separately from 'buf'.
*
* Returns 0 if successful, otherwise a positive errno value. Returns EAGAIN
* if no upcall is immediately available. */
struct ds flow;
char *packet;
- packet = ofp_packet_to_string(upcall->packet->data,
- upcall->packet->size);
+ packet = ofp_packet_to_string(upcall->packet.data,
+ upcall->packet.size);
ds_init(&flow);
odp_flow_key_format(upcall->key, upcall->key_len, &flow);
* The terms written in quotes below are defined in later sections.
*
* When a datapath "port" receives a packet, it extracts the headers (the
- * "flow"). If the datapath's "flow table" contains a "flow entry" whose flow
- * is the same as the packet's, then it executes the "actions" in the flow
- * entry and increments the flow's statistics. If there is no matching flow
- * entry, the datapath instead appends the packet to an "upcall" queue.
+ * "flow"). If the datapath's "flow table" contains a "flow entry" matching
+ * the packet, then it executes the "actions" in the flow entry and increments
+ * the flow's statistics. If there is no matching flow entry, the datapath
+ * instead appends the packet to an "upcall" queue.
*
*
* Ports
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
+#include "ofpbuf.h"
#include "openflow/openflow.h"
#include "netdev.h"
#include "util.h"
struct ds;
struct flow;
struct nlattr;
-struct ofpbuf;
struct sset;
struct dpif_class;
int dpif_execute(struct dpif *,
const struct nlattr *key, size_t key_len,
const struct nlattr *actions, size_t actions_len,
- const struct ofpbuf *,
- bool needs_help);
+ struct ofpbuf *, bool needs_help);
\f
/* Operation batching interface.
*
size_t key_len; /* Length of 'key' in bytes. */
const struct nlattr *actions; /* Actions to execute on packet. */
size_t actions_len; /* Length of 'actions' in bytes. */
- const struct ofpbuf *packet; /* Packet to execute. */
+ struct ofpbuf *packet; /* Packet to execute. */
/* Some dpif providers do not implement every action. The Linux kernel
* datapath, in particular, does not implement ARP field modification.
/* A packet passed up from the datapath to userspace.
*
- * If 'key', 'actions', or 'userdata' is nonnull, then it points into data
- * owned by 'packet', so their memory cannot be freed separately. (This is
- * hardly a great way to do things but it works out OK for the dpif providers
- * and clients that exist so far.)
+ * The 'packet', 'key' and 'userdata' may point into data in a buffer
+ * provided by the caller, so the buffer should be released only after the
+ * upcall processing has been finished.
+ *
+ * While being processed, the 'packet' may be reallocated, so the packet must
+ * be separately released with ofpbuf_uninit().
*/
struct dpif_upcall {
/* All types. */
enum dpif_upcall_type type;
- struct ofpbuf *packet; /* Packet data. */
+ struct ofpbuf packet; /* Packet data. */
struct nlattr *key; /* Flow key. */
size_t key_len; /* Length of 'key' in bytes. */
}
}
+static void
+odp_execute_actions__(void *dp, struct ofpbuf *packet, struct flow *key,
+ const struct nlattr *actions, size_t actions_len,
+ odp_output_cb output, odp_userspace_cb userspace,
+ bool more_actions);
+
static void
odp_execute_sample(void *dp, struct ofpbuf *packet, struct flow *key,
- const struct nlattr *action,
- void (*output)(void *dp, struct ofpbuf *packet,
- const struct flow *key,
- odp_port_t out_port),
- void (*userspace)(void *dp, struct ofpbuf *packet,
- const struct flow *key,
- const struct nlattr *action))
+ const struct nlattr *action, odp_output_cb output,
+ odp_userspace_cb userspace, bool more_actions)
{
const struct nlattr *subactions = NULL;
const struct nlattr *a;
}
}
- odp_execute_actions(dp, packet, key, nl_attr_get(subactions),
- nl_attr_get_size(subactions), output, userspace);
+ odp_execute_actions__(dp, packet, key, nl_attr_get(subactions),
+ nl_attr_get_size(subactions), output, userspace,
+ more_actions);
}
-void
-odp_execute_actions(void *dp, struct ofpbuf *packet, struct flow *key,
- const struct nlattr *actions, size_t actions_len,
- void (*output)(void *dp, struct ofpbuf *packet,
- const struct flow *key,
- odp_port_t out_port),
- void (*userspace)(void *dp, struct ofpbuf *packet,
- const struct flow *key,
- const struct nlattr *action))
+static void
+odp_execute_actions__(void *dp, struct ofpbuf *packet, struct flow *key,
+ const struct nlattr *actions, size_t actions_len,
+ odp_output_cb output, odp_userspace_cb userspace,
+ bool more_actions)
{
const struct nlattr *a;
unsigned int left;
case OVS_ACTION_ATTR_USERSPACE: {
if (userspace) {
- userspace(dp, packet, key, a);
+ /* Allow 'userspace' to steal the packet data if we do not
+ * need it any more. */
+ bool steal = !more_actions && left <= NLA_ALIGN(a->nla_len);
+ userspace(dp, packet, key, a, steal);
}
break;
}
break;
case OVS_ACTION_ATTR_SAMPLE:
- odp_execute_sample(dp, packet, key, a, output, userspace);
+ odp_execute_sample(dp, packet, key, a, output, userspace,
+ more_actions || left > NLA_ALIGN(a->nla_len));
break;
case OVS_ACTION_ATTR_UNSPEC:
}
}
}
+
+void
+odp_execute_actions(void *dp, struct ofpbuf *packet, struct flow *key,
+ const struct nlattr *actions, size_t actions_len,
+ odp_output_cb output, odp_userspace_cb userspace)
+{
+ odp_execute_actions__(dp, packet, key, actions, actions_len, output,
+ userspace, false);
+}
#ifndef EXECUTE_ACTIONS_H
#define EXECUTE_ACTIONS_H 1
+#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include "openvswitch/types.h"
struct nlattr;
struct ofpbuf;
+typedef void (*odp_output_cb)(void *dp, struct ofpbuf *packet,
+ const struct flow *key, odp_port_t out_port);
+typedef void (*odp_userspace_cb)(void *dp, struct ofpbuf *packet,
+ const struct flow *key,
+ const struct nlattr *action, bool may_steal);
+
void
odp_execute_actions(void *dp, struct ofpbuf *packet, struct flow *key,
const struct nlattr *actions, size_t actions_len,
- void (*output)(void *dp, struct ofpbuf *packet,
- const struct flow *key,
- odp_port_t out_port),
- void (*userspace)(void *dp, struct ofpbuf *packet,
- const struct flow *key,
- const struct nlattr *action));
+ odp_output_cb output, odp_userspace_cb userspace);
#endif
b->data = (char*)b->data + size;
}
+/* Reserves 'size' bytes of headroom so that they can be later allocated with
+ * ofpbuf_push_uninit() without reallocating the ofpbuf. */
+void
+ofpbuf_reserve_with_tailroom(struct ofpbuf *b, size_t headroom,
+ size_t tailroom)
+{
+ ovs_assert(!b->size);
+ ofpbuf_prealloc_tailroom(b, headroom + tailroom);
+ b->data = (char*)b->data + headroom;
+}
+
/* Prefixes 'size' bytes to the head end of 'b', reallocating and copying its
* data if necessary. Returns a pointer to the first byte of the data's
* location in the ofpbuf. The new data is left uninitialized. */
void *ofpbuf_put(struct ofpbuf *, const void *, size_t);
char *ofpbuf_put_hex(struct ofpbuf *, const char *s, size_t *n);
void ofpbuf_reserve(struct ofpbuf *, size_t);
+void ofpbuf_reserve_with_tailroom(struct ofpbuf *b, size_t headroom,
+ size_t tailroom);
void *ofpbuf_push_uninit(struct ofpbuf *b, size_t);
void *ofpbuf_push_zeros(struct ofpbuf *, size_t);
void *ofpbuf_push(struct ofpbuf *b, const void *, size_t);
upcall_destroy(struct upcall *upcall)
{
if (upcall) {
+ ofpbuf_uninit(&upcall->dpif_upcall.packet);
ofpbuf_uninit(&upcall->upcall_buf);
free(upcall);
}
n_misses = 0;
LIST_FOR_EACH_SAFE (upcall, next, list_node, upcalls) {
struct dpif_upcall *dupcall = &upcall->dpif_upcall;
- struct ofpbuf *packet = dupcall->packet;
+ struct ofpbuf *packet = &dupcall->packet;
struct flow_miss *miss = &fmb->miss_buf[n_misses];
struct flow_miss *existing_miss;
struct ofproto_dpif *ofproto;
memset(&cookie, 0, sizeof cookie);
memcpy(&cookie, nl_attr_get(dupcall->userdata),
sizeof cookie.sflow);
- dpif_sflow_received(sflow, dupcall->packet, &flow, odp_in_port,
+ dpif_sflow_received(sflow, packet, &flow, odp_in_port,
&cookie);
}
break;
case IPFIX_UPCALL:
if (ipfix) {
- dpif_ipfix_bridge_sample(ipfix, dupcall->packet, &flow);
+ dpif_ipfix_bridge_sample(ipfix, packet, &flow);
}
break;
case FLOW_SAMPLE_UPCALL:
/* The flow reflects exactly the contents of the packet.
* Sample the packet using it. */
- dpif_ipfix_flow_sample(ipfix, dupcall->packet, &flow,
+ dpif_ipfix_flow_sample(ipfix, packet, &flow,
cookie.flow_sample.collector_set_id,
cookie.flow_sample.probability,
cookie.flow_sample.obs_domain_id,
n_ops = 0;
LIST_FOR_EACH (upcall, list_node, upcalls) {
struct flow_miss *miss = upcall->flow_miss;
- struct ofpbuf *packet = upcall->dpif_upcall.packet;
+ struct ofpbuf *packet = &upcall->dpif_upcall.packet;
if (miss->xout.slow) {
struct xlate_in xin;
}
}
- /* Execute batch. */
- for (i = 0; i < n_ops; i++) {
- opsp[i] = &ops[i];
- }
- dpif_operate(udpif->dpif, opsp, n_ops);
-
/* Special case for fail-open mode.
*
* If we are in fail-open mode, but we are connected to a controller too,
* then we should send the packet up to the controller in the hope that it
* will try to set up a flow and thereby allow us to exit fail-open.
*
- * See the top-level comment in fail-open.c for more information. */
+ * See the top-level comment in fail-open.c for more information.
+ *
+ * Copy packets before they are modified by execution. */
if (fail_open) {
LIST_FOR_EACH (upcall, list_node, upcalls) {
struct flow_miss *miss = upcall->flow_miss;
- struct ofpbuf *packet = upcall->dpif_upcall.packet;
+ struct ofpbuf *packet = &upcall->dpif_upcall.packet;
struct ofproto_packet_in *pin;
pin = xmalloc(sizeof *pin);
}
}
+ /* Execute batch. */
+ for (i = 0; i < n_ops; i++) {
+ opsp[i] = &ops[i];
+ }
+ dpif_operate(udpif->dpif, opsp, n_ops);
+
list_move(&fmb->upcalls, upcalls);
if (fmb->reval_seq != seq_read(udpif->reval_seq)) {
/* Make the packet resemble the flow, so that it gets sent to
* an OpenFlow controller properly, so that it looks correct
* for sFlow, and so that flow_extract() will get the correct
- * vlan_tci if it is called on 'packet'.
- *
- * The allocated space inside 'packet' probably also contains
- * 'key', that is, both 'packet' and 'key' are probably part of
- * a struct dpif_upcall (see the large comment on that
- * structure definition), so pushing data on 'packet' is in
- * general not a good idea since it could overwrite 'key' or
- * free it as a side effect. However, it's OK in this special
- * case because we know that 'packet' is inside a Netlink
- * attribute: pushing 4 bytes will just overwrite the 4-byte
- * "struct nlattr", which is fine since we don't need that
- * header anymore. */
+ * vlan_tci if it is called on 'packet'. */
eth_push_vlan(packet, flow->vlan_tci);
}
/* We can't reproduce 'key' from 'flow'. */