2 * Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include "dpif-linux.h"
27 #include <linux/types.h>
28 #include <linux/ethtool.h>
29 #include <linux/pkt_sched.h>
30 #include <linux/rtnetlink.h>
31 #include <linux/sockios.h>
33 #include <sys/ioctl.h>
37 #include "dpif-provider.h"
39 #include "netdev-vport.h"
40 #include "netlink-socket.h"
44 #include "openvswitch/tunnel.h"
46 #include "poll-loop.h"
47 #include "rtnetlink.h"
48 #include "rtnetlink-link.h"
51 #include "unaligned.h"
55 VLOG_DEFINE_THIS_MODULE(dpif_linux);
57 struct dpif_linux_dp {
58 /* Generic Netlink header. */
61 /* struct odp_header. */
65 const char *name; /* ODP_DP_ATTR_NAME. */
66 struct odp_stats stats; /* ODP_DP_ATTR_STATS. */
67 enum odp_frag_handling ipv4_frags; /* ODP_DP_ATTR_IPV4_FRAGS. */
68 const uint32_t *sampling; /* ODP_DP_ATTR_SAMPLING. */
69 uint32_t mcgroups[DPIF_N_UC_TYPES]; /* ODP_DP_ATTR_MCGROUPS. */
72 static void dpif_linux_dp_init(struct dpif_linux_dp *);
73 static int dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp *,
74 const struct ofpbuf *);
75 static void dpif_linux_dp_dump_start(struct nl_dump *);
76 static int dpif_linux_dp_transact(const struct dpif_linux_dp *request,
77 struct dpif_linux_dp *reply,
78 struct ofpbuf **bufp);
79 static int dpif_linux_dp_get(const struct dpif *, struct dpif_linux_dp *reply,
80 struct ofpbuf **bufp);
82 struct dpif_linux_flow {
83 /* ioctl command argument. */
86 /* struct odp_flow header. */
87 unsigned int nlmsg_flags;
92 * The 'stats', 'used', and 'state' members point to 64-bit data that might
93 * only be aligned on 32-bit boundaries, so get_unaligned_u64() should be
94 * used to access their values. */
95 const struct nlattr *key; /* ODP_FLOW_ATTR_KEY. */
97 const struct nlattr *actions; /* ODP_FLOW_ATTR_ACTIONS. */
99 const struct odp_flow_stats *stats; /* ODP_FLOW_ATTR_STATS. */
100 const uint8_t *tcp_flags; /* ODP_FLOW_ATTR_TCP_FLAGS. */
101 const uint64_t *used; /* ODP_FLOW_ATTR_USED. */
102 bool clear; /* ODP_FLOW_ATTR_CLEAR. */
103 const uint64_t *state; /* ODP_FLOW_ATTR_STATE. */
106 static void dpif_linux_flow_init(struct dpif_linux_flow *);
107 static int dpif_linux_flow_transact(const struct dpif_linux_flow *request,
108 struct dpif_linux_flow *reply,
109 struct ofpbuf **bufp);
110 static void dpif_linux_flow_get_stats(const struct dpif_linux_flow *,
111 struct dpif_flow_stats *);
113 /* Datapath interface for the openvswitch Linux kernel module. */
118 /* Multicast group messages. */
119 struct nl_sock *mc_sock;
120 uint32_t mcgroups[DPIF_N_UC_TYPES];
121 unsigned int listen_mask;
123 /* Used by dpif_linux_get_all_names(). */
127 /* Change notification. */
128 int local_ifindex; /* Ifindex of local port. */
129 struct shash changed_ports; /* Ports that have changed. */
130 struct rtnetlink_notifier port_notifier;
134 static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5);
136 /* Generic Netlink family numbers for ODP. */
137 static int odp_datapath_family;
138 static int odp_packet_family;
140 /* Generic Netlink socket. */
141 static struct nl_sock *genl_sock;
143 static int dpif_linux_init(void);
144 static int open_dpif(const struct dpif_linux_dp *,
145 const struct dpif_linux_vport *local_vport,
147 static int get_openvswitch_major(void);
148 static int open_minor(int minor, int *fdp);
149 static int make_openvswitch_device(int minor, char **fnp);
150 static void dpif_linux_port_changed(const struct rtnetlink_link_change *,
153 static struct dpif_linux *
154 dpif_linux_cast(const struct dpif *dpif)
156 dpif_assert_class(dpif, &dpif_linux_class);
157 return CONTAINER_OF(dpif, struct dpif_linux, dpif);
161 dpif_linux_enumerate(struct svec *all_dps)
168 error = dpif_linux_init();
173 /* Check that the Open vSwitch module is loaded. */
174 major = get_openvswitch_major();
179 dpif_linux_dp_dump_start(&dump);
180 while (nl_dump_next(&dump, &msg)) {
181 struct dpif_linux_dp dp;
183 if (!dpif_linux_dp_from_ofpbuf(&dp, &msg)) {
184 svec_add(all_dps, dp.name);
187 return nl_dump_done(&dump);
191 dpif_linux_open(const struct dpif_class *class OVS_UNUSED, const char *name,
192 bool create, struct dpif **dpifp)
194 struct dpif_linux_vport vport_request, vport;
195 struct dpif_linux_dp dp_request, dp;
200 error = dpif_linux_init();
205 minor = !strncmp(name, "dp", 2)
206 && isdigit((unsigned char)name[2]) ? atoi(name + 2) : -1;
208 /* Create or look up datapath. */
209 dpif_linux_dp_init(&dp_request);
210 dp_request.cmd = create ? ODP_DP_CMD_NEW : ODP_DP_CMD_GET;
211 dp_request.dp_idx = minor;
212 dp_request.name = minor < 0 ? name : NULL;
213 error = dpif_linux_dp_transact(&dp_request, &dp, &buf);
217 ofpbuf_delete(buf); /* Pointers inside 'dp' are now invalid! */
219 /* Look up local port. */
220 dpif_linux_vport_init(&vport_request);
221 vport_request.cmd = ODP_VPORT_GET;
222 vport_request.dp_idx = dp.dp_idx;
223 vport_request.port_no = ODPP_LOCAL;
224 vport_request.name = minor < 0 ? name : NULL;
225 error = dpif_linux_vport_transact(&vport_request, &vport, &buf);
228 } else if (vport.port_no != ODPP_LOCAL) {
229 /* This is an Open vSwitch device but not the local port. We
230 * intentionally support only using the name of the local port as the
231 * name of a datapath; otherwise, it would be too difficult to
232 * enumerate all the names of a datapath. */
235 error = open_dpif(&dp, &vport, dpifp);
242 open_dpif(const struct dpif_linux_dp *dp,
243 const struct dpif_linux_vport *local_vport, struct dpif **dpifp)
245 int dp_idx = local_vport->dp_idx;
246 struct dpif_linux *dpif;
252 error = open_minor(dp_idx, &fd);
257 dpif = xmalloc(sizeof *dpif);
258 error = rtnetlink_link_notifier_register(&dpif->port_notifier,
259 dpif_linux_port_changed, dpif);
264 name = xasprintf("dp%d", dp_idx);
265 dpif_init(&dpif->dpif, &dpif_linux_class, name, dp_idx, dp_idx);
269 dpif->mc_sock = NULL;
270 for (i = 0; i < DPIF_N_UC_TYPES; i++) {
271 dpif->mcgroups[i] = dp->mcgroups[i];
273 dpif->listen_mask = 0;
274 dpif->local_ifname = xstrdup(local_vport->name);
275 dpif->local_ifindex = local_vport->ifindex;
276 dpif->minor = dp_idx;
277 shash_init(&dpif->changed_ports);
278 dpif->change_error = false;
279 *dpifp = &dpif->dpif;
291 dpif_linux_close(struct dpif *dpif_)
293 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
294 rtnetlink_link_notifier_unregister(&dpif->port_notifier);
295 shash_destroy(&dpif->changed_ports);
296 free(dpif->local_ifname);
302 dpif_linux_get_all_names(const struct dpif *dpif_, struct svec *all_names)
304 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
306 svec_add_nocopy(all_names, xasprintf("dp%d", dpif->minor));
307 svec_add(all_names, dpif->local_ifname);
312 dpif_linux_destroy(struct dpif *dpif_)
314 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
315 struct dpif_linux_dp dp;
317 dpif_linux_dp_init(&dp);
318 dp.cmd = ODP_DP_CMD_DEL;
319 dp.dp_idx = dpif->minor;
320 return dpif_linux_dp_transact(&dp, NULL, NULL);
324 dpif_linux_get_stats(const struct dpif *dpif_, struct odp_stats *stats)
326 struct dpif_linux_dp dp;
330 error = dpif_linux_dp_get(dpif_, &dp, &buf);
339 dpif_linux_get_drop_frags(const struct dpif *dpif_, bool *drop_fragsp)
341 struct dpif_linux_dp dp;
345 error = dpif_linux_dp_get(dpif_, &dp, &buf);
347 *drop_fragsp = dp.ipv4_frags == ODP_DP_FRAG_DROP;
354 dpif_linux_set_drop_frags(struct dpif *dpif_, bool drop_frags)
356 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
357 struct dpif_linux_dp dp;
359 dpif_linux_dp_init(&dp);
360 dp.cmd = ODP_DP_CMD_SET;
361 dp.dp_idx = dpif->minor;
362 dp.ipv4_frags = drop_frags ? ODP_DP_FRAG_DROP : ODP_DP_FRAG_ZERO;
363 return dpif_linux_dp_transact(&dp, NULL, NULL);
367 dpif_linux_port_add(struct dpif *dpif_, struct netdev *netdev,
370 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
371 const char *name = netdev_get_name(netdev);
372 const char *type = netdev_get_type(netdev);
373 struct dpif_linux_vport request, reply;
374 const struct ofpbuf *options;
378 dpif_linux_vport_init(&request);
379 request.cmd = ODP_VPORT_NEW;
380 request.dp_idx = dpif->minor;
381 request.type = netdev_vport_get_vport_type(netdev);
382 if (request.type == ODP_VPORT_TYPE_UNSPEC) {
383 VLOG_WARN_RL(&error_rl, "%s: cannot create port `%s' because it has "
384 "unsupported type `%s'",
385 dpif_name(dpif_), name, type);
390 options = netdev_vport_get_options(netdev);
391 if (options && options->size) {
392 request.options = options->data;
393 request.options_len = options->size;
396 error = dpif_linux_vport_transact(&request, &reply, &buf);
398 *port_nop = reply.port_no;
406 dpif_linux_port_del(struct dpif *dpif_, uint16_t port_no)
408 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
409 struct dpif_linux_vport vport;
411 dpif_linux_vport_init(&vport);
412 vport.cmd = ODP_VPORT_DEL;
413 vport.dp_idx = dpif->minor;
414 vport.port_no = port_no;
415 return dpif_linux_vport_transact(&vport, NULL, NULL);
419 dpif_linux_port_query__(const struct dpif *dpif, uint32_t port_no,
420 const char *port_name, struct dpif_port *dpif_port)
422 struct dpif_linux_vport request;
423 struct dpif_linux_vport reply;
427 dpif_linux_vport_init(&request);
428 request.cmd = ODP_VPORT_GET;
429 request.dp_idx = dpif_linux_cast(dpif)->minor;
430 request.port_no = port_no;
431 request.name = port_name;
433 error = dpif_linux_vport_transact(&request, &reply, &buf);
435 dpif_port->name = xstrdup(reply.name);
436 dpif_port->type = xstrdup(netdev_vport_get_netdev_type(&reply));
437 dpif_port->port_no = reply.port_no;
444 dpif_linux_port_query_by_number(const struct dpif *dpif, uint16_t port_no,
445 struct dpif_port *dpif_port)
447 return dpif_linux_port_query__(dpif, port_no, NULL, dpif_port);
451 dpif_linux_port_query_by_name(const struct dpif *dpif, const char *devname,
452 struct dpif_port *dpif_port)
454 return dpif_linux_port_query__(dpif, 0, devname, dpif_port);
458 dpif_linux_get_max_ports(const struct dpif *dpif OVS_UNUSED)
460 /* If the datapath increases its range of supported ports, then it should
461 * start reporting that. */
466 dpif_linux_flow_flush(struct dpif *dpif_)
468 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
469 return ioctl(dpif->fd, ODP_FLOW_FLUSH, dpif->minor) ? errno : 0;
472 struct dpif_linux_port_state {
478 dpif_linux_port_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep)
480 *statep = xzalloc(sizeof(struct dpif_linux_port_state));
485 dpif_linux_port_dump_next(const struct dpif *dpif, void *state_,
486 struct dpif_port *dpif_port)
488 struct dpif_linux_port_state *state = state_;
489 struct dpif_linux_vport request, reply;
493 ofpbuf_delete(state->buf);
496 dpif_linux_vport_init(&request);
497 request.cmd = ODP_VPORT_DUMP;
498 request.dp_idx = dpif_linux_cast(dpif)->minor;
499 request.port_no = state->next;
500 error = dpif_linux_vport_transact(&request, &reply, &buf);
502 return error == ENODEV ? EOF : error;
504 dpif_port->name = (char *) reply.name;
505 dpif_port->type = (char *) netdev_vport_get_netdev_type(&reply);
506 dpif_port->port_no = reply.port_no;
508 state->next = reply.port_no + 1;
514 dpif_linux_port_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
516 struct dpif_linux_port_state *state = state_;
517 ofpbuf_delete(state->buf);
523 dpif_linux_port_poll(const struct dpif *dpif_, char **devnamep)
525 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
527 if (dpif->change_error) {
528 dpif->change_error = false;
529 shash_clear(&dpif->changed_ports);
531 } else if (!shash_is_empty(&dpif->changed_ports)) {
532 struct shash_node *node = shash_first(&dpif->changed_ports);
533 *devnamep = shash_steal(&dpif->changed_ports, node);
541 dpif_linux_port_poll_wait(const struct dpif *dpif_)
543 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
544 if (!shash_is_empty(&dpif->changed_ports) || dpif->change_error) {
545 poll_immediate_wake();
547 rtnetlink_link_notifier_wait();
552 dpif_linux_flow_get(const struct dpif *dpif_,
553 const struct nlattr *key, size_t key_len,
554 struct ofpbuf **actionsp, struct dpif_flow_stats *stats)
556 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
557 struct dpif_linux_flow request, reply;
561 dpif_linux_flow_init(&request);
562 request.cmd = ODP_FLOW_GET;
563 request.dp_idx = dpif->minor;
565 request.key_len = key_len;
566 error = dpif_linux_flow_transact(&request, &reply, &buf);
569 dpif_linux_flow_get_stats(&reply, stats);
572 buf->data = (void *) reply.actions;
573 buf->size = reply.actions_len;
583 dpif_linux_flow_put(struct dpif *dpif_, enum dpif_flow_put_flags flags,
584 const struct nlattr *key, size_t key_len,
585 const struct nlattr *actions, size_t actions_len,
586 struct dpif_flow_stats *stats)
588 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
589 struct dpif_linux_flow request, reply;
593 dpif_linux_flow_init(&request);
594 request.cmd = flags & DPIF_FP_CREATE ? ODP_FLOW_NEW : ODP_FLOW_SET;
595 request.dp_idx = dpif->minor;
597 request.key_len = key_len;
598 request.actions = actions;
599 request.actions_len = actions_len;
600 if (flags & DPIF_FP_ZERO_STATS) {
601 request.clear = true;
603 request.nlmsg_flags = flags & DPIF_FP_MODIFY ? 0 : NLM_F_CREATE;
604 error = dpif_linux_flow_transact(&request,
605 stats ? &reply : NULL,
606 stats ? &buf : NULL);
607 if (!error && stats) {
608 dpif_linux_flow_get_stats(&reply, stats);
615 dpif_linux_flow_del(struct dpif *dpif_,
616 const struct nlattr *key, size_t key_len,
617 struct dpif_flow_stats *stats)
619 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
620 struct dpif_linux_flow request, reply;
624 dpif_linux_flow_init(&request);
625 request.cmd = ODP_FLOW_DEL;
626 request.dp_idx = dpif->minor;
628 request.key_len = key_len;
629 error = dpif_linux_flow_transact(&request,
630 stats ? &reply : NULL,
631 stats ? &buf : NULL);
632 if (!error && stats) {
633 dpif_linux_flow_get_stats(&reply, stats);
639 struct dpif_linux_flow_state {
640 struct dpif_linux_flow flow;
642 struct dpif_flow_stats stats;
646 dpif_linux_flow_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep)
648 *statep = xzalloc(sizeof(struct dpif_linux_flow_state));
653 dpif_linux_flow_dump_next(const struct dpif *dpif_, void *state_,
654 const struct nlattr **key, size_t *key_len,
655 const struct nlattr **actions, size_t *actions_len,
656 const struct dpif_flow_stats **stats)
658 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
659 struct dpif_linux_flow_state *state = state_;
660 struct ofpbuf *old_buf = state->buf;
661 struct dpif_linux_flow request;
664 dpif_linux_flow_init(&request);
665 request.cmd = ODP_FLOW_DUMP;
666 request.dp_idx = dpif->minor;
667 request.state = state->flow.state;
668 error = dpif_linux_flow_transact(&request, &state->flow, &state->buf);
669 ofpbuf_delete(old_buf);
673 *key = state->flow.key;
674 *key_len = state->flow.key_len;
677 *actions = state->flow.actions;
678 *actions_len = state->flow.actions_len;
681 dpif_linux_flow_get_stats(&state->flow, &state->stats);
682 *stats = &state->stats;
685 return error == ENODEV ? EOF : error;
689 dpif_linux_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
691 struct dpif_linux_flow_state *state = state_;
693 ofpbuf_delete(state->buf);
699 dpif_linux_execute(struct dpif *dpif_,
700 const struct nlattr *actions, size_t actions_len,
701 const struct ofpbuf *packet)
703 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
704 struct odp_header *execute;
708 buf = ofpbuf_new(128 + actions_len + packet->size);
710 nl_msg_put_genlmsghdr(buf, 0, odp_packet_family, NLM_F_REQUEST,
711 ODP_PACKET_CMD_EXECUTE, 1);
713 execute = ofpbuf_put_uninit(buf, sizeof *execute);
714 execute->dp_idx = dpif->minor;
716 nl_msg_put_unspec(buf, ODP_PACKET_ATTR_PACKET, packet->data, packet->size);
717 nl_msg_put_unspec(buf, ODP_PACKET_ATTR_ACTIONS, actions, actions_len);
719 error = nl_sock_transact(genl_sock, buf, NULL);
725 dpif_linux_recv_get_mask(const struct dpif *dpif_, int *listen_mask)
727 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
728 *listen_mask = dpif->listen_mask;
733 dpif_linux_recv_set_mask(struct dpif *dpif_, int listen_mask)
735 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
739 if (listen_mask == dpif->listen_mask) {
741 } else if (!listen_mask) {
742 nl_sock_destroy(dpif->mc_sock);
743 dpif->mc_sock = NULL;
744 dpif->listen_mask = 0;
746 } else if (!dpif->mc_sock) {
747 error = nl_sock_create(NETLINK_GENERIC, &dpif->mc_sock);
753 /* Unsubscribe from old groups. */
754 for (i = 0; i < DPIF_N_UC_TYPES; i++) {
755 if (dpif->listen_mask & (1u << i)) {
756 nl_sock_leave_mcgroup(dpif->mc_sock, dpif->mcgroups[i]);
760 /* Update listen_mask. */
761 dpif->listen_mask = listen_mask;
763 /* Subscribe to new groups. */
765 for (i = 0; i < DPIF_N_UC_TYPES; i++) {
766 if (dpif->listen_mask & (1u << i)) {
769 retval = nl_sock_join_mcgroup(dpif->mc_sock, dpif->mcgroups[i]);
779 dpif_linux_get_sflow_probability(const struct dpif *dpif_,
780 uint32_t *probability)
782 struct dpif_linux_dp dp;
786 error = dpif_linux_dp_get(dpif_, &dp, &buf);
788 *probability = dp.sampling ? *dp.sampling : 0;
795 dpif_linux_set_sflow_probability(struct dpif *dpif_, uint32_t probability)
797 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
798 struct dpif_linux_dp dp;
800 dpif_linux_dp_init(&dp);
801 dp.cmd = ODP_DP_CMD_SET;
802 dp.dp_idx = dpif->minor;
803 dp.sampling = &probability;
804 return dpif_linux_dp_transact(&dp, NULL, NULL);
808 dpif_linux_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
809 uint32_t queue_id, uint32_t *priority)
811 if (queue_id < 0xf000) {
812 *priority = TC_H_MAKE(1 << 16, queue_id + 1);
820 parse_odp_packet(struct ofpbuf *buf, struct dpif_upcall *upcall,
823 static const struct nl_policy odp_packet_policy[] = {
824 /* Always present. */
825 [ODP_PACKET_ATTR_PACKET] = { .type = NL_A_UNSPEC,
826 .min_len = ETH_HEADER_LEN },
827 [ODP_PACKET_ATTR_KEY] = { .type = NL_A_NESTED },
829 /* ODP_PACKET_CMD_ACTION only. */
830 [ODP_PACKET_ATTR_USERDATA] = { .type = NL_A_U64, .optional = true },
832 /* ODP_PACKET_CMD_SAMPLE only. */
833 [ODP_PACKET_ATTR_SAMPLE_POOL] = { .type = NL_A_U32, .optional = true },
834 [ODP_PACKET_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
837 struct odp_header *odp_header;
838 struct nlattr *a[ARRAY_SIZE(odp_packet_policy)];
839 struct nlmsghdr *nlmsg;
840 struct genlmsghdr *genl;
844 ofpbuf_use_const(&b, buf->data, buf->size);
846 nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
847 genl = ofpbuf_try_pull(&b, sizeof *genl);
848 odp_header = ofpbuf_try_pull(&b, sizeof *odp_header);
849 if (!nlmsg || !genl || !odp_header
850 || nlmsg->nlmsg_type != odp_packet_family
851 || !nl_policy_parse(&b, 0, odp_packet_policy, a,
852 ARRAY_SIZE(odp_packet_policy))) {
856 type = (genl->cmd == ODP_PACKET_CMD_MISS ? DPIF_UC_MISS
857 : genl->cmd == ODP_PACKET_CMD_ACTION ? DPIF_UC_ACTION
858 : genl->cmd == ODP_PACKET_CMD_SAMPLE ? DPIF_UC_SAMPLE
864 memset(upcall, 0, sizeof *upcall);
866 upcall->packet = buf;
867 upcall->packet->data = (void *) nl_attr_get(a[ODP_PACKET_ATTR_PACKET]);
868 upcall->packet->size = nl_attr_get_size(a[ODP_PACKET_ATTR_PACKET]);
869 upcall->key = (void *) nl_attr_get(a[ODP_PACKET_ATTR_KEY]);
870 upcall->key_len = nl_attr_get_size(a[ODP_PACKET_ATTR_KEY]);
871 upcall->userdata = (a[ODP_PACKET_ATTR_USERDATA]
872 ? nl_attr_get_u64(a[ODP_PACKET_ATTR_USERDATA])
874 upcall->sample_pool = (a[ODP_PACKET_ATTR_SAMPLE_POOL]
875 ? nl_attr_get_u32(a[ODP_PACKET_ATTR_SAMPLE_POOL])
877 if (a[ODP_PACKET_ATTR_ACTIONS]) {
878 upcall->actions = (void *) nl_attr_get(a[ODP_PACKET_ATTR_ACTIONS]);
879 upcall->actions_len = nl_attr_get_size(a[ODP_PACKET_ATTR_ACTIONS]);
882 *dp_idx = odp_header->dp_idx;
888 dpif_linux_recv(struct dpif *dpif_, struct dpif_upcall *upcall)
890 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
895 if (!dpif->mc_sock) {
899 for (i = 0; i < 50; i++) {
902 error = nl_sock_recv(dpif->mc_sock, &buf, false);
907 error = parse_odp_packet(buf, upcall, &dp_idx);
909 && dp_idx == dpif->minor
910 && dpif->listen_mask & (1u << upcall->type)) {
924 dpif_linux_recv_wait(struct dpif *dpif_)
926 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
928 nl_sock_wait(dpif->mc_sock, POLLIN);
933 dpif_linux_recv_purge(struct dpif *dpif_)
935 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
938 nl_sock_drain(dpif->mc_sock);
942 const struct dpif_class dpif_linux_class = {
946 dpif_linux_enumerate,
949 dpif_linux_get_all_names,
951 dpif_linux_get_stats,
952 dpif_linux_get_drop_frags,
953 dpif_linux_set_drop_frags,
956 dpif_linux_port_query_by_number,
957 dpif_linux_port_query_by_name,
958 dpif_linux_get_max_ports,
959 dpif_linux_port_dump_start,
960 dpif_linux_port_dump_next,
961 dpif_linux_port_dump_done,
962 dpif_linux_port_poll,
963 dpif_linux_port_poll_wait,
967 dpif_linux_flow_flush,
968 dpif_linux_flow_dump_start,
969 dpif_linux_flow_dump_next,
970 dpif_linux_flow_dump_done,
972 dpif_linux_recv_get_mask,
973 dpif_linux_recv_set_mask,
974 dpif_linux_get_sflow_probability,
975 dpif_linux_set_sflow_probability,
976 dpif_linux_queue_to_priority,
978 dpif_linux_recv_wait,
979 dpif_linux_recv_purge,
982 static int get_major(const char *target);
985 dpif_linux_init(void)
987 static int error = -1;
990 error = nl_lookup_genl_family(ODP_DATAPATH_FAMILY,
991 &odp_datapath_family);
993 error = nl_lookup_genl_family(ODP_PACKET_FAMILY,
997 error = nl_sock_create(NETLINK_GENERIC, &genl_sock);
1005 dpif_linux_is_internal_device(const char *name)
1007 struct dpif_linux_vport reply;
1011 error = dpif_linux_vport_get(name, &reply, &buf);
1014 } else if (error != ENODEV) {
1015 VLOG_WARN_RL(&error_rl, "%s: vport query failed (%s)",
1016 name, strerror(error));
1019 return reply.type == ODP_VPORT_TYPE_INTERNAL;
1023 make_openvswitch_device(int minor, char **fnp)
1025 const char dirname[] = "/dev/net";
1033 major = get_openvswitch_major();
1037 dev = makedev(major, minor);
1039 sprintf(fn, "%s/dp%d", dirname, minor);
1040 if (!stat(fn, &s)) {
1041 if (!S_ISCHR(s.st_mode)) {
1042 VLOG_WARN_RL(&error_rl, "%s is not a character device, fixing",
1044 } else if (s.st_rdev != dev) {
1045 VLOG_WARN_RL(&error_rl,
1046 "%s is device %u:%u but should be %u:%u, fixing",
1047 fn, major(s.st_rdev), minor(s.st_rdev),
1048 major(dev), minor(dev));
1053 VLOG_WARN_RL(&error_rl, "%s: unlink failed (%s)",
1054 fn, strerror(errno));
1057 } else if (errno == ENOENT) {
1058 if (stat(dirname, &s)) {
1059 if (errno == ENOENT) {
1060 if (mkdir(dirname, 0755)) {
1061 VLOG_WARN_RL(&error_rl, "%s: mkdir failed (%s)",
1062 dirname, strerror(errno));
1066 VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)",
1067 dirname, strerror(errno));
1072 VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)", fn, strerror(errno));
1076 /* The device needs to be created. */
1077 if (mknod(fn, S_IFCHR | 0700, dev)) {
1078 VLOG_WARN_RL(&error_rl,
1079 "%s: creating character device %u:%u failed (%s)",
1080 fn, major(dev), minor(dev), strerror(errno));
1089 /* Return the major device number of the Open vSwitch device. If it
1090 * cannot be determined, a negative errno is returned. */
1092 get_openvswitch_major(void)
1094 static int openvswitch_major = -1;
1095 if (openvswitch_major < 0) {
1096 openvswitch_major = get_major("openvswitch");
1098 return openvswitch_major;
1102 get_major(const char *target)
1104 const char fn[] = "/proc/devices";
1109 file = fopen(fn, "r");
1111 VLOG_ERR("opening %s failed (%s)", fn, strerror(errno));
1115 for (ln = 1; fgets(line, sizeof line, file); ln++) {
1119 if (!strncmp(line, "Character", 9) || line[0] == '\0') {
1120 /* Nothing to do. */
1121 } else if (!strncmp(line, "Block", 5)) {
1122 /* We only want character devices, so skip the rest of the file. */
1124 } else if (sscanf(line, "%d %63s", &major, name)) {
1125 if (!strcmp(name, target)) {
1130 VLOG_WARN_ONCE("%s:%d: syntax error", fn, ln);
1136 VLOG_ERR("%s: %s major not found (is the module loaded?)", fn, target);
1141 open_minor(int minor, int *fdp)
1146 error = make_openvswitch_device(minor, &fn);
1151 *fdp = open(fn, O_RDONLY | O_NONBLOCK);
1154 VLOG_WARN("%s: open failed (%s)", fn, strerror(error));
1163 dpif_linux_port_changed(const struct rtnetlink_link_change *change,
1166 struct dpif_linux *dpif = dpif_;
1169 if (change->master_ifindex == dpif->local_ifindex
1170 && (change->nlmsg_type == RTM_NEWLINK
1171 || change->nlmsg_type == RTM_DELLINK))
1173 /* Our datapath changed, either adding a new port or deleting an
1175 shash_add_once(&dpif->changed_ports, change->ifname, NULL);
1178 dpif->change_error = true;
1183 get_dp0_fd(int *dp0_fdp)
1185 static int dp0_fd = -1;
1190 error = open_minor(0, &fd);
1200 /* Parses the contents of 'buf', which contains a "struct odp_vport" followed
1201 * by Netlink attributes, into 'vport'. Returns 0 if successful, otherwise a
1202 * positive errno value.
1204 * 'vport' will contain pointers into 'buf', so the caller should not free
1205 * 'buf' while 'vport' is still in use. */
1207 dpif_linux_vport_from_ofpbuf(struct dpif_linux_vport *vport,
1208 const struct ofpbuf *buf)
1210 static const struct nl_policy odp_vport_policy[] = {
1211 [ODP_VPORT_ATTR_PORT_NO] = { .type = NL_A_U32 },
1212 [ODP_VPORT_ATTR_TYPE] = { .type = NL_A_U32 },
1213 [ODP_VPORT_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
1214 [ODP_VPORT_ATTR_STATS] = { .type = NL_A_UNSPEC,
1215 .min_len = sizeof(struct rtnl_link_stats64),
1216 .max_len = sizeof(struct rtnl_link_stats64),
1218 [ODP_VPORT_ATTR_ADDRESS] = { .type = NL_A_UNSPEC,
1219 .min_len = ETH_ADDR_LEN,
1220 .max_len = ETH_ADDR_LEN,
1222 [ODP_VPORT_ATTR_MTU] = { .type = NL_A_U32, .optional = true },
1223 [ODP_VPORT_ATTR_OPTIONS] = { .type = NL_A_NESTED, .optional = true },
1224 [ODP_VPORT_ATTR_IFINDEX] = { .type = NL_A_U32, .optional = true },
1225 [ODP_VPORT_ATTR_IFLINK] = { .type = NL_A_U32, .optional = true },
1228 struct odp_vport *odp_vport;
1229 struct nlattr *a[ARRAY_SIZE(odp_vport_policy)];
1231 dpif_linux_vport_init(vport);
1233 if (!nl_policy_parse(buf, sizeof *odp_vport, odp_vport_policy,
1234 a, ARRAY_SIZE(odp_vport_policy))) {
1237 odp_vport = buf->data;
1239 vport->dp_idx = odp_vport->dp_idx;
1240 vport->port_no = nl_attr_get_u32(a[ODP_VPORT_ATTR_PORT_NO]);
1241 vport->type = nl_attr_get_u32(a[ODP_VPORT_ATTR_TYPE]);
1242 vport->name = nl_attr_get_string(a[ODP_VPORT_ATTR_NAME]);
1243 if (a[ODP_VPORT_ATTR_STATS]) {
1244 vport->stats = nl_attr_get(a[ODP_VPORT_ATTR_STATS]);
1246 if (a[ODP_VPORT_ATTR_ADDRESS]) {
1247 vport->address = nl_attr_get(a[ODP_VPORT_ATTR_ADDRESS]);
1249 if (a[ODP_VPORT_ATTR_MTU]) {
1250 vport->mtu = nl_attr_get_u32(a[ODP_VPORT_ATTR_MTU]);
1252 if (a[ODP_VPORT_ATTR_OPTIONS]) {
1253 vport->options = nl_attr_get(a[ODP_VPORT_ATTR_OPTIONS]);
1254 vport->options_len = nl_attr_get_size(a[ODP_VPORT_ATTR_OPTIONS]);
1256 if (a[ODP_VPORT_ATTR_IFINDEX]) {
1257 vport->ifindex = nl_attr_get_u32(a[ODP_VPORT_ATTR_IFINDEX]);
1259 if (a[ODP_VPORT_ATTR_IFLINK]) {
1260 vport->iflink = nl_attr_get_u32(a[ODP_VPORT_ATTR_IFLINK]);
1265 /* Appends to 'buf' (which must initially be empty) a "struct odp_vport"
1266 * followed by Netlink attributes corresponding to 'vport'. */
1268 dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport *vport,
1271 struct odp_vport *odp_vport;
1273 ofpbuf_reserve(buf, sizeof odp_vport);
1275 if (vport->port_no != UINT32_MAX) {
1276 nl_msg_put_u32(buf, ODP_VPORT_ATTR_PORT_NO, vport->port_no);
1279 if (vport->type != ODP_VPORT_TYPE_UNSPEC) {
1280 nl_msg_put_u32(buf, ODP_VPORT_ATTR_TYPE, vport->type);
1284 nl_msg_put_string(buf, ODP_VPORT_ATTR_NAME, vport->name);
1288 nl_msg_put_unspec(buf, ODP_VPORT_ATTR_STATS,
1289 vport->stats, sizeof *vport->stats);
1292 if (vport->address) {
1293 nl_msg_put_unspec(buf, ODP_VPORT_ATTR_ADDRESS,
1294 vport->address, ETH_ADDR_LEN);
1298 nl_msg_put_u32(buf, ODP_VPORT_ATTR_MTU, vport->mtu);
1301 if (vport->options) {
1302 nl_msg_put_nested(buf, ODP_VPORT_ATTR_OPTIONS,
1303 vport->options, vport->options_len);
1306 if (vport->ifindex) {
1307 nl_msg_put_u32(buf, ODP_VPORT_ATTR_IFINDEX, vport->ifindex);
1310 if (vport->iflink) {
1311 nl_msg_put_u32(buf, ODP_VPORT_ATTR_IFLINK, vport->iflink);
1314 odp_vport = ofpbuf_push_uninit(buf, sizeof *odp_vport);
1315 odp_vport->dp_idx = vport->dp_idx;
1316 odp_vport->len = buf->size;
1317 odp_vport->total_len = (char *) ofpbuf_end(buf) - (char *) buf->data;
1320 /* Clears 'vport' to "empty" values. */
1322 dpif_linux_vport_init(struct dpif_linux_vport *vport)
1324 memset(vport, 0, sizeof *vport);
1325 vport->dp_idx = UINT32_MAX;
1326 vport->port_no = UINT32_MAX;
1329 /* Executes 'request' in the kernel datapath. If the command fails, returns a
1330 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
1331 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
1332 * result of the command is expected to be an odp_vport also, which is decoded
1333 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
1334 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
1336 dpif_linux_vport_transact(const struct dpif_linux_vport *request,
1337 struct dpif_linux_vport *reply,
1338 struct ofpbuf **bufp)
1340 struct ofpbuf *buf = NULL;
1344 assert((reply != NULL) == (bufp != NULL));
1346 error = get_dp0_fd(&fd);
1351 buf = ofpbuf_new(1024);
1352 dpif_linux_vport_to_ofpbuf(request, buf);
1354 error = ioctl(fd, request->cmd, buf->data) ? errno : 0;
1360 buf->size = ((struct odp_vport *) buf->data)->len;
1361 error = dpif_linux_vport_from_ofpbuf(reply, buf);
1374 memset(reply, 0, sizeof *reply);
1380 /* Obtains information about the kernel vport named 'name' and stores it into
1381 * '*reply' and '*bufp'. The caller must free '*bufp' when the reply is no
1382 * longer needed ('reply' will contain pointers into '*bufp'). */
1384 dpif_linux_vport_get(const char *name, struct dpif_linux_vport *reply,
1385 struct ofpbuf **bufp)
1387 struct dpif_linux_vport request;
1389 dpif_linux_vport_init(&request);
1390 request.cmd = ODP_VPORT_GET;
1391 request.name = name;
1393 return dpif_linux_vport_transact(&request, reply, bufp);
1396 /* Parses the contents of 'buf', which contains a "struct odp_header" followed
1397 * by Netlink attributes, into 'dp'. Returns 0 if successful, otherwise a
1398 * positive errno value.
1400 * 'dp' will contain pointers into 'buf', so the caller should not free 'buf'
1401 * while 'dp' is still in use. */
1403 dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp *dp, const struct ofpbuf *buf)
1405 static const struct nl_policy odp_datapath_policy[] = {
1406 [ODP_DP_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
1407 [ODP_DP_ATTR_STATS] = { .type = NL_A_UNSPEC,
1408 .min_len = sizeof(struct odp_stats),
1409 .max_len = sizeof(struct odp_stats),
1411 [ODP_DP_ATTR_IPV4_FRAGS] = { .type = NL_A_U32, .optional = true },
1412 [ODP_DP_ATTR_SAMPLING] = { .type = NL_A_U32, .optional = true },
1413 [ODP_DP_ATTR_MCGROUPS] = { .type = NL_A_NESTED, .optional = true },
1416 struct nlattr *a[ARRAY_SIZE(odp_datapath_policy)];
1417 struct odp_header *odp_header;
1418 struct nlmsghdr *nlmsg;
1419 struct genlmsghdr *genl;
1422 dpif_linux_dp_init(dp);
1424 ofpbuf_use_const(&b, buf->data, buf->size);
1425 nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
1426 genl = ofpbuf_try_pull(&b, sizeof *genl);
1427 odp_header = ofpbuf_try_pull(&b, sizeof *odp_header);
1428 if (!nlmsg || !genl || !odp_header
1429 || nlmsg->nlmsg_type != odp_datapath_family
1430 || !nl_policy_parse(&b, 0, odp_datapath_policy, a,
1431 ARRAY_SIZE(odp_datapath_policy))) {
1435 dp->cmd = genl->cmd;
1436 dp->dp_idx = odp_header->dp_idx;
1437 dp->name = nl_attr_get_string(a[ODP_DP_ATTR_NAME]);
1438 if (a[ODP_DP_ATTR_STATS]) {
1439 /* Can't use structure assignment because Netlink doesn't ensure
1440 * sufficient alignment for 64-bit members. */
1441 memcpy(&dp->stats, nl_attr_get(a[ODP_DP_ATTR_STATS]),
1444 if (a[ODP_DP_ATTR_IPV4_FRAGS]) {
1445 dp->ipv4_frags = nl_attr_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]);
1447 if (a[ODP_DP_ATTR_SAMPLING]) {
1448 dp->sampling = nl_attr_get(a[ODP_DP_ATTR_SAMPLING]);
1451 if (a[ODP_DP_ATTR_MCGROUPS]) {
1452 static const struct nl_policy odp_mcgroup_policy[] = {
1453 [ODP_PACKET_CMD_MISS] = { .type = NL_A_U32, .optional = true },
1454 [ODP_PACKET_CMD_ACTION] = { .type = NL_A_U32, .optional = true },
1455 [ODP_PACKET_CMD_SAMPLE] = { .type = NL_A_U32, .optional = true },
1458 struct nlattr *mcgroups[ARRAY_SIZE(odp_mcgroup_policy)];
1460 if (!nl_parse_nested(a[ODP_DP_ATTR_MCGROUPS], odp_mcgroup_policy,
1461 mcgroups, ARRAY_SIZE(odp_mcgroup_policy))) {
1465 if (mcgroups[ODP_PACKET_CMD_MISS]) {
1466 dp->mcgroups[DPIF_UC_MISS]
1467 = nl_attr_get_u32(mcgroups[ODP_PACKET_CMD_MISS]);
1469 if (mcgroups[ODP_PACKET_CMD_ACTION]) {
1470 dp->mcgroups[DPIF_UC_ACTION]
1471 = nl_attr_get_u32(mcgroups[ODP_PACKET_CMD_ACTION]);
1473 if (mcgroups[ODP_PACKET_CMD_SAMPLE]) {
1474 dp->mcgroups[DPIF_UC_SAMPLE]
1475 = nl_attr_get_u32(mcgroups[ODP_PACKET_CMD_SAMPLE]);
1482 /* Appends to 'buf' the Generic Netlink message described by 'dp'. */
1484 dpif_linux_dp_to_ofpbuf(const struct dpif_linux_dp *dp, struct ofpbuf *buf)
1486 struct odp_header *odp_header;
1488 nl_msg_put_genlmsghdr(buf, 0, odp_datapath_family,
1489 NLM_F_REQUEST | NLM_F_ECHO, dp->cmd, 1);
1491 odp_header = ofpbuf_put_uninit(buf, sizeof *odp_header);
1492 odp_header->dp_idx = dp->dp_idx;
1495 nl_msg_put_string(buf, ODP_DP_ATTR_NAME, dp->name);
1498 /* Skip ODP_DP_ATTR_STATS since we never have a reason to serialize it. */
1500 if (dp->ipv4_frags) {
1501 nl_msg_put_u32(buf, ODP_DP_ATTR_IPV4_FRAGS, dp->ipv4_frags);
1505 nl_msg_put_u32(buf, ODP_DP_ATTR_SAMPLING, *dp->sampling);
1509 /* Clears 'dp' to "empty" values. */
1511 dpif_linux_dp_init(struct dpif_linux_dp *dp)
1513 memset(dp, 0, sizeof *dp);
1518 dpif_linux_dp_dump_start(struct nl_dump *dump)
1520 struct dpif_linux_dp request;
1523 dpif_linux_dp_init(&request);
1524 request.cmd = ODP_DP_CMD_GET;
1526 buf = ofpbuf_new(1024);
1527 dpif_linux_dp_to_ofpbuf(&request, buf);
1528 nl_dump_start(dump, genl_sock, buf);
1532 /* Executes 'request' in the kernel datapath. If the command fails, returns a
1533 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
1534 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
1535 * result of the command is expected to be of the same form, which is decoded
1536 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
1537 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
1539 dpif_linux_dp_transact(const struct dpif_linux_dp *request,
1540 struct dpif_linux_dp *reply, struct ofpbuf **bufp)
1542 struct ofpbuf *request_buf;
1545 assert((reply != NULL) == (bufp != NULL));
1547 request_buf = ofpbuf_new(1024);
1548 dpif_linux_dp_to_ofpbuf(request, request_buf);
1549 error = nl_sock_transact(genl_sock, request_buf, bufp);
1550 ofpbuf_delete(request_buf);
1554 error = dpif_linux_dp_from_ofpbuf(reply, *bufp);
1557 dpif_linux_dp_init(reply);
1558 ofpbuf_delete(*bufp);
1565 /* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'.
1566 * The caller must free '*bufp' when the reply is no longer needed ('reply'
1567 * will contain pointers into '*bufp'). */
1569 dpif_linux_dp_get(const struct dpif *dpif_, struct dpif_linux_dp *reply,
1570 struct ofpbuf **bufp)
1572 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
1573 struct dpif_linux_dp request;
1575 dpif_linux_dp_init(&request);
1576 request.cmd = ODP_DP_CMD_GET;
1577 request.dp_idx = dpif->minor;
1579 return dpif_linux_dp_transact(&request, reply, bufp);
1582 /* Parses the contents of 'buf', which contains a "struct odp_flow" followed by
1583 * Netlink attributes, into 'flow'. Returns 0 if successful, otherwise a
1584 * positive errno value.
1586 * 'flow' will contain pointers into 'buf', so the caller should not free 'buf'
1587 * while 'flow' is still in use. */
1589 dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow *flow,
1590 const struct ofpbuf *buf)
1592 static const struct nl_policy odp_flow_policy[] = {
1593 [ODP_FLOW_ATTR_KEY] = { .type = NL_A_NESTED },
1594 [ODP_FLOW_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
1595 [ODP_FLOW_ATTR_STATS] = { .type = NL_A_UNSPEC,
1596 .min_len = sizeof(struct odp_flow_stats),
1597 .max_len = sizeof(struct odp_flow_stats),
1599 [ODP_FLOW_ATTR_TCP_FLAGS] = { .type = NL_A_U8, .optional = true },
1600 [ODP_FLOW_ATTR_USED] = { .type = NL_A_U64, .optional = true },
1601 /* The kernel never uses ODP_FLOW_ATTR_CLEAR. */
1602 [ODP_FLOW_ATTR_STATE] = { .type = NL_A_U64, .optional = true },
1605 struct odp_flow *odp_flow;
1606 struct nlattr *a[ARRAY_SIZE(odp_flow_policy)];
1608 dpif_linux_flow_init(flow);
1610 if (!nl_policy_parse(buf, sizeof *odp_flow, odp_flow_policy,
1611 a, ARRAY_SIZE(odp_flow_policy))) {
1614 odp_flow = buf->data;
1616 flow->nlmsg_flags = odp_flow->nlmsg_flags;
1617 flow->dp_idx = odp_flow->dp_idx;
1618 flow->key = nl_attr_get(a[ODP_FLOW_ATTR_KEY]);
1619 flow->key_len = nl_attr_get_size(a[ODP_FLOW_ATTR_KEY]);
1620 if (a[ODP_FLOW_ATTR_ACTIONS]) {
1621 flow->actions = nl_attr_get(a[ODP_FLOW_ATTR_ACTIONS]);
1622 flow->actions_len = nl_attr_get_size(a[ODP_FLOW_ATTR_ACTIONS]);
1624 if (a[ODP_FLOW_ATTR_STATS]) {
1625 flow->stats = nl_attr_get(a[ODP_FLOW_ATTR_STATS]);
1627 if (a[ODP_FLOW_ATTR_TCP_FLAGS]) {
1628 flow->tcp_flags = nl_attr_get(a[ODP_FLOW_ATTR_TCP_FLAGS]);
1630 if (a[ODP_FLOW_ATTR_STATE]) {
1631 flow->state = nl_attr_get(a[ODP_FLOW_ATTR_STATE]);
1636 /* Appends to 'buf' (which must initially be empty) a "struct odp_flow"
1637 * followed by Netlink attributes corresponding to 'flow'. */
1639 dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow *flow,
1642 struct odp_flow *odp_flow;
1644 ofpbuf_reserve(buf, sizeof odp_flow);
1646 if (flow->key_len) {
1647 nl_msg_put_unspec(buf, ODP_FLOW_ATTR_KEY, flow->key, flow->key_len);
1650 if (flow->actions_len) {
1651 nl_msg_put_unspec(buf, ODP_FLOW_ATTR_ACTIONS,
1652 flow->actions, flow->actions_len);
1655 /* We never need to send these to the kernel. */
1656 assert(!flow->stats);
1657 assert(!flow->tcp_flags);
1658 assert(!flow->used);
1661 nl_msg_put_flag(buf, ODP_FLOW_ATTR_CLEAR);
1665 nl_msg_put_u64(buf, ODP_FLOW_ATTR_STATE,
1666 get_unaligned_u64(flow->state));
1669 odp_flow = ofpbuf_push_uninit(buf, sizeof *odp_flow);
1670 odp_flow->nlmsg_flags = flow->nlmsg_flags;
1671 odp_flow->dp_idx = flow->dp_idx;
1672 odp_flow->len = buf->size;
1673 odp_flow->total_len = (char *) ofpbuf_end(buf) - (char *) buf->data;
1676 /* Clears 'flow' to "empty" values. */
1678 dpif_linux_flow_init(struct dpif_linux_flow *flow)
1680 memset(flow, 0, sizeof *flow);
1683 /* Executes 'request' in the kernel datapath. If the command fails, returns a
1684 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
1685 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
1686 * result of the command is expected to be an odp_flow also, which is decoded
1687 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
1688 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
1690 dpif_linux_flow_transact(const struct dpif_linux_flow *request,
1691 struct dpif_linux_flow *reply, struct ofpbuf **bufp)
1693 struct ofpbuf *buf = NULL;
1697 assert((reply != NULL) == (bufp != NULL));
1699 error = get_dp0_fd(&fd);
1704 buf = ofpbuf_new(1024);
1705 dpif_linux_flow_to_ofpbuf(request, buf);
1707 error = ioctl(fd, request->cmd, buf->data) ? errno : 0;
1713 buf->size = ((struct odp_flow *) buf->data)->len;
1714 error = dpif_linux_flow_from_ofpbuf(reply, buf);
1727 memset(reply, 0, sizeof *reply);
1734 dpif_linux_flow_get_stats(const struct dpif_linux_flow *flow,
1735 struct dpif_flow_stats *stats)
1738 stats->n_packets = get_unaligned_u64(&flow->stats->n_packets);
1739 stats->n_bytes = get_unaligned_u64(&flow->stats->n_bytes);
1741 stats->n_packets = 0;
1744 stats->used = flow->used ? get_unaligned_u64(flow->used) : 0;
1745 stats->tcp_flags = flow->tcp_flags ? *flow->tcp_flags : 0;