/*
- * Copyright (c) 2009, 2010 Nicira Networks.
+ * Copyright (c) 2009, 2010, 2011 Nicira Networks.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include <netinet/in.h>
#include <sys/socket.h>
#include <net/if.h>
+#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include "hmap.h"
#include "list.h"
#include "netdev.h"
+#include "netlink.h"
#include "odp-util.h"
#include "ofp-print.h"
#include "ofpbuf.h"
#include "packets.h"
#include "poll-loop.h"
-#include "queue.h"
#include "shash.h"
#include "timeval.h"
#include "util.h"
bool destroyed;
bool drop_frags; /* Drop all IP fragments, if true. */
- struct ovs_queue queues[N_QUEUES]; /* Messages queued for dpif_recv(). */
+ struct list queues[N_QUEUES]; /* Contain ofpbufs queued for dpif_recv(). */
+ size_t queue_len[N_QUEUES]; /* Number of packets in each queue. */
struct hmap flow_table; /* Flow table. */
/* Statistics. */
uint16_t tcp_ctl; /* Bitwise-OR of seen tcp_ctl values. */
/* Actions. */
- union odp_action *actions;
- unsigned int n_actions;
+ struct nlattr *actions;
+ size_t actions_len;
};
/* Interface to netdev-based datapath. */
static int dpif_netdev_open(const struct dpif_class *, const char *name,
bool create, struct dpif **);
static int dp_netdev_output_control(struct dp_netdev *, const struct ofpbuf *,
- int queue_no, int port_no, uint32_t arg);
+ int queue_no, int port_no, uint64_t arg);
static int dp_netdev_execute_actions(struct dp_netdev *,
struct ofpbuf *, struct flow *,
- const union odp_action *, int n);
+ const struct nlattr *actions,
+ size_t actions_len);
static struct dpif_class dpif_dummy_class;
dp->open_cnt = 0;
dp->drop_frags = false;
for (i = 0; i < N_QUEUES; i++) {
- queue_init(&dp->queues[i]);
+ list_init(&dp->queues[i]);
}
hmap_init(&dp->flow_table);
list_init(&dp->port_list);
do_del_port(dp, port->port_no);
}
for (i = 0; i < N_QUEUES; i++) {
- queue_destroy(&dp->queues[i]);
+ ofpbuf_list_delete(&dp->queues[i]);
}
hmap_destroy(&dp->flow_table);
free(dp->name);
odp_flow->stats.tcp_flags = TCP_FLAGS(flow->tcp_ctl);
odp_flow->stats.reserved = 0;
odp_flow->stats.error = 0;
- if (odp_flow->n_actions > 0) {
- unsigned int n = MIN(odp_flow->n_actions, flow->n_actions);
+ if (odp_flow->actions_len > 0) {
memcpy(odp_flow->actions, flow->actions,
- n * sizeof *odp_flow->actions);
- odp_flow->n_actions = flow->n_actions;
+ MIN(odp_flow->actions_len, flow->actions_len));
+ odp_flow->actions_len = flow->actions_len;
}
if (query_flags & ODPFF_ZERO_TCP_FLAGS) {
}
static int
-dpif_netdev_validate_actions(const union odp_action *actions, int n_actions,
- bool *mutates)
+dpif_netdev_validate_actions(const struct nlattr *actions,
+ size_t actions_len, bool *mutates)
{
- unsigned int i;
+ const struct nlattr *a;
+ unsigned int left;
*mutates = false;
- for (i = 0; i < n_actions; i++) {
- const union odp_action *a = &actions[i];
- switch (a->type) {
+ NL_ATTR_FOR_EACH (a, left, actions, actions_len) {
+ uint16_t type = nl_attr_type(a);
+ int len = odp_action_len(type);
+
+ if (len != nl_attr_get_size(a)) {
+ return EINVAL;
+ }
+
+ switch (type) {
case ODPAT_OUTPUT:
- if (a->output.port >= MAX_PORTS) {
+ if (nl_attr_get_u32(a) >= MAX_PORTS) {
return EINVAL;
}
break;
case ODPAT_CONTROLLER:
+ case ODPAT_DROP_SPOOFED_ARP:
break;
case ODPAT_SET_DL_TCI:
*mutates = true;
- if (a->dl_tci.tci & htons(VLAN_CFI)) {
+ if (nl_attr_get_be16(a) & htons(VLAN_CFI)) {
return EINVAL;
}
break;
case ODPAT_SET_NW_TOS:
*mutates = true;
- if (a->nw_tos.nw_tos & IP_ECN_MASK) {
+ if (nl_attr_get_u8(a) & IP_ECN_MASK) {
return EINVAL;
}
break;
*mutates = true;
break;
+ case ODPAT_SET_TUNNEL:
+ case ODPAT_SET_PRIORITY:
+ case ODPAT_POP_PRIORITY:
default:
return EOPNOTSUPP;
}
static int
set_flow_actions(struct dp_netdev_flow *flow, struct odp_flow *odp_flow)
{
- size_t n_bytes;
bool mutates;
int error;
- if (odp_flow->n_actions >= 4096 / sizeof *odp_flow->actions) {
- return EINVAL;
- }
error = dpif_netdev_validate_actions(odp_flow->actions,
- odp_flow->n_actions, &mutates);
+ odp_flow->actions_len, &mutates);
if (error) {
return error;
}
- n_bytes = odp_flow->n_actions * sizeof *flow->actions;
- flow->actions = xrealloc(flow->actions, n_bytes);
- flow->n_actions = odp_flow->n_actions;
- memcpy(flow->actions, odp_flow->actions, n_bytes);
+ flow->actions = xrealloc(flow->actions, odp_flow->actions_len);
+ flow->actions_len = odp_flow->actions_len;
+ memcpy(flow->actions, odp_flow->actions, odp_flow->actions_len);
return 0;
}
static int
dpif_netdev_execute(struct dpif *dpif,
- const union odp_action actions[], int n_actions,
+ const struct nlattr *actions, size_t actions_len,
const struct ofpbuf *packet)
{
struct dp_netdev *dp = get_dp_netdev(dpif);
return EINVAL;
}
- error = dpif_netdev_validate_actions(actions, n_actions, &mutates);
+ error = dpif_netdev_validate_actions(actions, actions_len, &mutates);
if (error) {
return error;
}
/* We need a deep copy of 'packet' since we're going to modify its
* data. */
ofpbuf_init(©, DP_NETDEV_HEADROOM + packet->size);
- copy.data = (char*)copy.base + DP_NETDEV_HEADROOM;
+ ofpbuf_reserve(©, DP_NETDEV_HEADROOM);
ofpbuf_put(©, packet->data, packet->size);
} else {
/* We still need a shallow copy of 'packet', even though we won't
copy = *packet;
}
flow_extract(©, 0, -1, &key);
- error = dp_netdev_execute_actions(dp, ©, &key, actions, n_actions);
+ error = dp_netdev_execute_actions(dp, ©, &key, actions, actions_len);
if (mutates) {
ofpbuf_uninit(©);
}
}
}
-static struct ovs_queue *
+static int
find_nonempty_queue(struct dpif *dpif)
{
struct dpif_netdev *dpif_netdev = dpif_netdev_cast(dpif);
int i;
for (i = 0; i < N_QUEUES; i++) {
- struct ovs_queue *q = &dp->queues[i];
- if (q->n && mask & (1u << i)) {
- return q;
+ struct list *queue = &dp->queues[i];
+ if (!list_is_empty(queue) && mask & (1u << i)) {
+ return i;
}
}
- return NULL;
+ return -1;
}
static int
dpif_netdev_recv(struct dpif *dpif, struct ofpbuf **bufp)
{
- struct ovs_queue *q = find_nonempty_queue(dpif);
- if (q) {
- *bufp = queue_pop_head(q);
+ int queue_idx = find_nonempty_queue(dpif);
+ if (queue_idx >= 0) {
+ struct dp_netdev *dp = get_dp_netdev(dpif);
+
+ *bufp = ofpbuf_from_list(list_pop_front(&dp->queues[queue_idx]));
+ dp->queue_len[queue_idx]--;
+
return 0;
} else {
return EAGAIN;
static void
dpif_netdev_recv_wait(struct dpif *dpif)
{
- struct ovs_queue *q = find_nonempty_queue(dpif);
- if (q) {
+ if (find_nonempty_queue(dpif) >= 0) {
poll_immediate_wake();
} else {
/* No messages ready to be received, and dp_wait() will ensure that we
if (flow) {
dp_netdev_flow_used(flow, &key, packet);
dp_netdev_execute_actions(dp, packet, &key,
- flow->actions, flow->n_actions);
+ flow->actions, flow->actions_len);
dp->n_hit++;
} else {
dp->n_missed++;
int error;
/* Reset packet contents. */
- packet.data = (char*)packet.base + DP_NETDEV_HEADROOM;
- packet.size = 0;
+ ofpbuf_clear(&packet);
+ ofpbuf_reserve(&packet, DP_NETDEV_HEADROOM);
error = netdev_recv(port->netdev, &packet);
if (!error) {
dp_netdev_port_input(dp, port, &packet);
} else if (error != EAGAIN && error != EOPNOTSUPP) {
- struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
VLOG_ERR_RL(&rl, "error receiving data from %s: %s",
netdev_get_name(port->netdev), strerror(error));
}
memcpy(tmp.eth_src, veh->veth_src, ETH_ADDR_LEN);
tmp.eth_type = veh->veth_next_type;
- packet->size -= VLAN_HEADER_LEN;
- packet->data = (char*)packet->data + VLAN_HEADER_LEN;
+ ofpbuf_pull(packet, VLAN_HEADER_LEN);
packet->l2 = (char*)packet->l2 + VLAN_HEADER_LEN;
memcpy(packet->data, &tmp, sizeof tmp);
}
}
static void
-dp_netdev_set_nw_addr(struct ofpbuf *packet, struct flow *key,
- const struct odp_action_nw_addr *a)
+dp_netdev_set_nw_addr(struct ofpbuf *packet, const struct flow *key,
+ const struct nlattr *a)
{
if (is_ip(packet, key)) {
struct ip_header *nh = packet->l3;
+ ovs_be32 ip = nl_attr_get_be32(a);
+ uint16_t type = nl_attr_type(a);
uint32_t *field;
- field = a->type == ODPAT_SET_NW_SRC ? &nh->ip_src : &nh->ip_dst;
+ field = type == ODPAT_SET_NW_SRC ? &nh->ip_src : &nh->ip_dst;
if (key->nw_proto == IP_TYPE_TCP && packet->l7) {
struct tcp_header *th = packet->l4;
- th->tcp_csum = recalc_csum32(th->tcp_csum, *field, a->nw_addr);
+ th->tcp_csum = recalc_csum32(th->tcp_csum, *field, ip);
} else if (key->nw_proto == IP_TYPE_UDP && packet->l7) {
struct udp_header *uh = packet->l4;
if (uh->udp_csum) {
- uh->udp_csum = recalc_csum32(uh->udp_csum, *field, a->nw_addr);
+ uh->udp_csum = recalc_csum32(uh->udp_csum, *field, ip);
if (!uh->udp_csum) {
uh->udp_csum = 0xffff;
}
}
}
- nh->ip_csum = recalc_csum32(nh->ip_csum, *field, a->nw_addr);
- *field = a->nw_addr;
+ nh->ip_csum = recalc_csum32(nh->ip_csum, *field, ip);
+ *field = ip;
}
}
static void
-dp_netdev_set_nw_tos(struct ofpbuf *packet, struct flow *key,
- const struct odp_action_nw_tos *a)
+dp_netdev_set_nw_tos(struct ofpbuf *packet, const struct flow *key,
+ uint8_t nw_tos)
{
if (is_ip(packet, key)) {
struct ip_header *nh = packet->l3;
uint8_t *field = &nh->ip_tos;
/* Set the DSCP bits and preserve the ECN bits. */
- uint8_t new = a->nw_tos | (nh->ip_tos & IP_ECN_MASK);
+ uint8_t new = nw_tos | (nh->ip_tos & IP_ECN_MASK);
nh->ip_csum = recalc_csum16(nh->ip_csum, htons((uint16_t)*field),
- htons((uint16_t)a->nw_tos));
+ htons((uint16_t) new));
*field = new;
}
}
static void
-dp_netdev_set_tp_port(struct ofpbuf *packet, struct flow *key,
- const struct odp_action_tp_port *a)
+dp_netdev_set_tp_port(struct ofpbuf *packet, const struct flow *key,
+ const struct nlattr *a)
{
if (is_ip(packet, key)) {
+ uint16_t type = nl_attr_type(a);
+ ovs_be16 port = nl_attr_get_be16(a);
uint16_t *field;
+
if (key->nw_proto == IPPROTO_TCP && packet->l7) {
struct tcp_header *th = packet->l4;
- field = a->type == ODPAT_SET_TP_SRC ? &th->tcp_src : &th->tcp_dst;
- th->tcp_csum = recalc_csum16(th->tcp_csum, *field, a->tp_port);
- *field = a->tp_port;
+ field = type == ODPAT_SET_TP_SRC ? &th->tcp_src : &th->tcp_dst;
+ th->tcp_csum = recalc_csum16(th->tcp_csum, *field, port);
+ *field = port;
} else if (key->nw_proto == IPPROTO_UDP && packet->l7) {
struct udp_header *uh = packet->l4;
- field = a->type == ODPAT_SET_TP_SRC ? &uh->udp_src : &uh->udp_dst;
- uh->udp_csum = recalc_csum16(uh->udp_csum, *field, a->tp_port);
- *field = a->tp_port;
+ field = type == ODPAT_SET_TP_SRC ? &uh->udp_src : &uh->udp_dst;
+ uh->udp_csum = recalc_csum16(uh->udp_csum, *field, port);
+ *field = port;
} else {
return;
}
static int
dp_netdev_output_control(struct dp_netdev *dp, const struct ofpbuf *packet,
- int queue_no, int port_no, uint32_t arg)
+ int queue_no, int port_no, uint64_t arg)
{
- struct ovs_queue *q = &dp->queues[queue_no];
struct odp_msg *header;
struct ofpbuf *msg;
size_t msg_size;
- if (q->n >= MAX_QUEUE_LEN) {
+ if (dp->queue_len[queue_no] >= MAX_QUEUE_LEN) {
dp->n_lost++;
return ENOBUFS;
}
header->port = port_no;
header->arg = arg;
ofpbuf_put(msg, packet->data, packet->size);
- queue_push_tail(q, msg);
+ list_push_back(&dp->queues[queue_no], &msg->list_node);
+ dp->queue_len[queue_no]++;
return 0;
}
static int
dp_netdev_execute_actions(struct dp_netdev *dp,
struct ofpbuf *packet, struct flow *key,
- const union odp_action *actions, int n_actions)
+ const struct nlattr *actions,
+ size_t actions_len)
{
- int i;
- for (i = 0; i < n_actions; i++) {
- const union odp_action *a = &actions[i];
+ const struct nlattr *a;
+ unsigned int left;
- switch (a->type) {
+ NL_ATTR_FOR_EACH_UNSAFE (a, left, actions, actions_len) {
+ switch (nl_attr_type(a)) {
case ODPAT_OUTPUT:
- dp_netdev_output_port(dp, packet, a->output.port);
+ dp_netdev_output_port(dp, packet, nl_attr_get_u32(a));
break;
case ODPAT_CONTROLLER:
dp_netdev_output_control(dp, packet, _ODPL_ACTION_NR,
- key->in_port, a->controller.arg);
+ key->in_port, nl_attr_get_u64(a));
break;
case ODPAT_SET_DL_TCI:
- dp_netdev_set_dl_tci(packet, a->dl_tci.tci);
+ dp_netdev_set_dl_tci(packet, nl_attr_get_be16(a));
break;
case ODPAT_STRIP_VLAN:
break;
case ODPAT_SET_DL_SRC:
- dp_netdev_set_dl_src(packet, a->dl_addr.dl_addr);
+ dp_netdev_set_dl_src(packet, nl_attr_get_unspec(a, ETH_ADDR_LEN));
break;
case ODPAT_SET_DL_DST:
- dp_netdev_set_dl_dst(packet, a->dl_addr.dl_addr);
+ dp_netdev_set_dl_dst(packet, nl_attr_get_unspec(a, ETH_ADDR_LEN));
break;
case ODPAT_SET_NW_SRC:
case ODPAT_SET_NW_DST:
- dp_netdev_set_nw_addr(packet, key, &a->nw_addr);
+ dp_netdev_set_nw_addr(packet, key, a);
break;
case ODPAT_SET_NW_TOS:
- dp_netdev_set_nw_tos(packet, key, &a->nw_tos);
+ dp_netdev_set_nw_tos(packet, key, nl_attr_get_u8(a));
break;
case ODPAT_SET_TP_SRC:
case ODPAT_SET_TP_DST:
- dp_netdev_set_tp_port(packet, key, &a->tp_port);
+ dp_netdev_set_tp_port(packet, key, a);
break;
case ODPAT_DROP_SPOOFED_ARP: