2 * Copyright (c) 2010, 2011, 2012, 2013 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include "netdev-vport.h"
23 #include <sys/socket.h>
24 #include <linux/openvswitch.h>
25 #include <linux/rtnetlink.h>
27 #include <sys/ioctl.h>
29 #include "byte-order.h"
33 #include "dpif-linux.h"
37 #include "netdev-linux.h"
38 #include "netdev-provider.h"
40 #include "netlink-notifier.h"
41 #include "netlink-socket.h"
43 #include "openvswitch/tunnel.h"
45 #include "route-table.h"
47 #include "socket-util.h"
48 #include "unaligned.h"
51 VLOG_DEFINE_THIS_MODULE(netdev_vport);
53 /* Default to the OTV port, per the VXLAN IETF draft. */
54 #define VXLAN_DST_PORT 8472
56 #define DEFAULT_TTL 64
58 struct netdev_dev_vport {
59 struct netdev_dev netdev_dev;
60 unsigned int change_seq;
61 uint8_t etheraddr[ETH_ADDR_LEN];
62 struct netdev_stats stats;
65 struct netdev_tunnel_config tnl_cfg;
72 enum ovs_vport_type type;
73 const char *dpif_port;
74 struct netdev_class netdev_class;
77 static int netdev_vport_create(const struct netdev_class *, const char *,
78 struct netdev_dev **);
79 static int get_patch_config(struct netdev_dev *, struct smap *args);
80 static void netdev_vport_poll_notify(struct netdev_dev_vport *);
83 is_vport_class(const struct netdev_class *class)
85 return class->create == netdev_vport_create;
88 static const struct vport_class *
89 vport_class_cast(const struct netdev_class *class)
91 ovs_assert(is_vport_class(class));
92 return CONTAINER_OF(class, struct vport_class, netdev_class);
95 static struct netdev_dev_vport *
96 netdev_dev_vport_cast(const struct netdev_dev *netdev_dev)
98 ovs_assert(is_vport_class(netdev_dev_get_class(netdev_dev)));
99 return CONTAINER_OF(netdev_dev, struct netdev_dev_vport, netdev_dev);
102 static struct netdev_dev_vport *
103 netdev_vport_get_dev(const struct netdev *netdev)
105 return netdev_dev_vport_cast(netdev_get_dev(netdev));
108 static const struct netdev_tunnel_config *
109 get_netdev_tunnel_config(const struct netdev_dev *netdev_dev)
111 return &netdev_dev_vport_cast(netdev_dev)->tnl_cfg;
115 netdev_vport_get_vport_type(const struct netdev *netdev)
117 const struct netdev_dev *dev = netdev_get_dev(netdev);
118 const struct netdev_class *class = netdev_dev_get_class(dev);
120 return (is_vport_class(class) ? vport_class_cast(class)->type
121 : class == &netdev_internal_class ? OVS_VPORT_TYPE_INTERNAL
122 : (class == &netdev_linux_class ||
123 class == &netdev_tap_class) ? OVS_VPORT_TYPE_NETDEV
124 : OVS_VPORT_TYPE_UNSPEC);
128 netdev_vport_is_patch(const struct netdev *netdev)
130 const struct netdev_dev *dev = netdev_get_dev(netdev);
131 const struct netdev_class *class = netdev_dev_get_class(dev);
133 return class->get_config == get_patch_config;
137 netdev_vport_get_dpif_port(const struct netdev *netdev)
139 const struct netdev_dev *dev = netdev_get_dev(netdev);
140 const struct netdev_class *class = netdev_dev_get_class(dev);
141 const char *dpif_port;
143 dpif_port = (is_vport_class(class)
144 ? vport_class_cast(class)->dpif_port
146 return dpif_port ? dpif_port : netdev_get_name(netdev);
150 netdev_vport_create(const struct netdev_class *netdev_class, const char *name,
151 struct netdev_dev **netdev_devp)
153 struct netdev_dev_vport *dev;
155 dev = xzalloc(sizeof *dev);
156 netdev_dev_init(&dev->netdev_dev, name, netdev_class);
158 eth_addr_random(dev->etheraddr);
160 *netdev_devp = &dev->netdev_dev;
161 route_table_register();
167 netdev_vport_destroy(struct netdev_dev *netdev_dev_)
169 struct netdev_dev_vport *netdev_dev = netdev_dev_vport_cast(netdev_dev_);
171 route_table_unregister();
172 free(netdev_dev->peer);
177 netdev_vport_open(struct netdev_dev *netdev_dev, struct netdev **netdevp)
179 *netdevp = xmalloc(sizeof **netdevp);
180 netdev_init(*netdevp, netdev_dev);
185 netdev_vport_close(struct netdev *netdev)
191 netdev_vport_set_etheraddr(struct netdev *netdev,
192 const uint8_t mac[ETH_ADDR_LEN])
194 struct netdev_dev_vport *dev = netdev_vport_get_dev(netdev);
195 memcpy(dev->etheraddr, mac, ETH_ADDR_LEN);
196 netdev_vport_poll_notify(dev);
201 netdev_vport_get_etheraddr(const struct netdev *netdev,
202 uint8_t mac[ETH_ADDR_LEN])
204 memcpy(mac, netdev_vport_get_dev(netdev)->etheraddr, ETH_ADDR_LEN);
208 /* Copies 'src' into 'dst', performing format conversion in the process.
210 * 'src' is allowed to be misaligned. */
212 netdev_stats_from_ovs_vport_stats(struct netdev_stats *dst,
213 const struct ovs_vport_stats *src)
215 dst->rx_packets = get_unaligned_u64(&src->rx_packets);
216 dst->tx_packets = get_unaligned_u64(&src->tx_packets);
217 dst->rx_bytes = get_unaligned_u64(&src->rx_bytes);
218 dst->tx_bytes = get_unaligned_u64(&src->tx_bytes);
219 dst->rx_errors = get_unaligned_u64(&src->rx_errors);
220 dst->tx_errors = get_unaligned_u64(&src->tx_errors);
221 dst->rx_dropped = get_unaligned_u64(&src->rx_dropped);
222 dst->tx_dropped = get_unaligned_u64(&src->tx_dropped);
225 dst->rx_length_errors = 0;
226 dst->rx_over_errors = 0;
227 dst->rx_crc_errors = 0;
228 dst->rx_frame_errors = 0;
229 dst->rx_fifo_errors = 0;
230 dst->rx_missed_errors = 0;
231 dst->tx_aborted_errors = 0;
232 dst->tx_carrier_errors = 0;
233 dst->tx_fifo_errors = 0;
234 dst->tx_heartbeat_errors = 0;
235 dst->tx_window_errors = 0;
239 netdev_vport_get_stats(const struct netdev *netdev, struct netdev_stats *stats)
241 struct dpif_linux_vport reply;
245 error = dpif_linux_vport_get(netdev_get_name(netdev), &reply, &buf);
248 } else if (!reply.stats) {
253 netdev_stats_from_ovs_vport_stats(stats, reply.stats);
261 tunnel_get_status(const struct netdev *netdev, struct smap *smap)
263 static char iface[IFNAMSIZ];
266 route = netdev_vport_get_dev(netdev)->tnl_cfg.ip_dst;
267 if (route_table_get_name(route, iface)) {
268 struct netdev *egress_netdev;
270 smap_add(smap, "tunnel_egress_iface", iface);
272 if (!netdev_open(iface, "system", &egress_netdev)) {
273 smap_add(smap, "tunnel_egress_iface_carrier",
274 netdev_get_carrier(egress_netdev) ? "up" : "down");
275 netdev_close(egress_netdev);
283 netdev_vport_update_flags(struct netdev *netdev OVS_UNUSED,
284 enum netdev_flags off, enum netdev_flags on OVS_UNUSED,
285 enum netdev_flags *old_flagsp)
287 if (off & (NETDEV_UP | NETDEV_PROMISC)) {
291 *old_flagsp = NETDEV_UP | NETDEV_PROMISC;
296 netdev_vport_change_seq(const struct netdev *netdev)
298 return netdev_vport_get_dev(netdev)->change_seq;
302 netdev_vport_run(void)
308 netdev_vport_wait(void)
313 /* Helper functions. */
316 netdev_vport_poll_notify(struct netdev_dev_vport *ndv)
319 if (!ndv->change_seq) {
324 /* Code specific to tunnel types. */
327 parse_key(const struct smap *args, const char *name,
328 bool *present, bool *flow)
335 s = smap_get(args, name);
337 s = smap_get(args, "key");
345 if (!strcmp(s, "flow")) {
349 return htonll(strtoull(s, NULL, 0));
354 set_tunnel_config(struct netdev_dev *dev_, const struct smap *args)
356 struct netdev_dev_vport *dev = netdev_dev_vport_cast(dev_);
357 const char *name = netdev_dev_get_name(dev_);
358 const char *type = netdev_dev_get_type(dev_);
359 bool ipsec_mech_set, needs_dst_port, has_csum;
360 struct netdev_tunnel_config tnl_cfg;
361 struct smap_node *node;
363 has_csum = strstr(type, "gre");
364 ipsec_mech_set = false;
365 memset(&tnl_cfg, 0, sizeof tnl_cfg);
367 if (!strcmp(type, "capwap")) {
368 VLOG_WARN_ONCE("CAPWAP tunnel support is deprecated.");
371 needs_dst_port = !strcmp(type, "vxlan");
372 tnl_cfg.ipsec = strstr(type, "ipsec");
373 tnl_cfg.dont_fragment = true;
375 SMAP_FOR_EACH (node, args) {
376 if (!strcmp(node->key, "remote_ip")) {
377 struct in_addr in_addr;
378 if (lookup_ip(node->value, &in_addr)) {
379 VLOG_WARN("%s: bad %s 'remote_ip'", name, type);
381 tnl_cfg.ip_dst = in_addr.s_addr;
383 } else if (!strcmp(node->key, "local_ip")) {
384 struct in_addr in_addr;
385 if (lookup_ip(node->value, &in_addr)) {
386 VLOG_WARN("%s: bad %s 'local_ip'", name, type);
388 tnl_cfg.ip_src = in_addr.s_addr;
390 } else if (!strcmp(node->key, "tos")) {
391 if (!strcmp(node->value, "inherit")) {
392 tnl_cfg.tos_inherit = true;
396 tos = strtol(node->value, &endptr, 0);
397 if (*endptr == '\0' && tos == (tos & IP_DSCP_MASK)) {
400 VLOG_WARN("%s: invalid TOS %s", name, node->value);
403 } else if (!strcmp(node->key, "ttl")) {
404 if (!strcmp(node->value, "inherit")) {
405 tnl_cfg.ttl_inherit = true;
407 tnl_cfg.ttl = atoi(node->value);
409 } else if (!strcmp(node->key, "dst_port") && needs_dst_port) {
410 tnl_cfg.dst_port = htons(atoi(node->value));
411 } else if (!strcmp(node->key, "csum") && has_csum) {
412 if (!strcmp(node->value, "true")) {
415 } else if (!strcmp(node->key, "df_default")) {
416 if (!strcmp(node->value, "false")) {
417 tnl_cfg.dont_fragment = false;
419 } else if (!strcmp(node->key, "peer_cert") && tnl_cfg.ipsec) {
420 if (smap_get(args, "certificate")) {
421 ipsec_mech_set = true;
423 const char *use_ssl_cert;
425 /* If the "use_ssl_cert" is true, then "certificate" and
426 * "private_key" will be pulled from the SSL table. The
427 * use of this option is strongly discouraged, since it
428 * will like be removed when multiple SSL configurations
429 * are supported by OVS.
431 use_ssl_cert = smap_get(args, "use_ssl_cert");
432 if (!use_ssl_cert || strcmp(use_ssl_cert, "true")) {
433 VLOG_ERR("%s: 'peer_cert' requires 'certificate' argument",
437 ipsec_mech_set = true;
439 } else if (!strcmp(node->key, "psk") && tnl_cfg.ipsec) {
440 ipsec_mech_set = true;
441 } else if (tnl_cfg.ipsec
442 && (!strcmp(node->key, "certificate")
443 || !strcmp(node->key, "private_key")
444 || !strcmp(node->key, "use_ssl_cert"))) {
445 /* Ignore options not used by the netdev. */
446 } else if (!strcmp(node->key, "key") ||
447 !strcmp(node->key, "in_key") ||
448 !strcmp(node->key, "out_key")) {
449 /* Handled separately below. */
451 VLOG_WARN("%s: unknown %s argument '%s'", name, type, node->key);
455 /* Add a default destination port for VXLAN if none specified. */
456 if (needs_dst_port && !tnl_cfg.dst_port) {
457 tnl_cfg.dst_port = htons(VXLAN_DST_PORT);
461 static pid_t pid = 0;
463 char *file_name = xasprintf("%s/%s", ovs_rundir(),
464 "ovs-monitor-ipsec.pid");
465 pid = read_pidfile(file_name);
470 VLOG_ERR("%s: IPsec requires the ovs-monitor-ipsec daemon",
475 if (smap_get(args, "peer_cert") && smap_get(args, "psk")) {
476 VLOG_ERR("%s: cannot define both 'peer_cert' and 'psk'", name);
480 if (!ipsec_mech_set) {
481 VLOG_ERR("%s: IPsec requires an 'peer_cert' or psk' argument",
487 if (!tnl_cfg.ip_dst) {
488 VLOG_ERR("%s: %s type requires valid 'remote_ip' argument",
493 if (tnl_cfg.ip_src) {
494 if (ip_is_multicast(tnl_cfg.ip_dst)) {
495 VLOG_WARN("%s: remote_ip is multicast, ignoring local_ip", name);
501 tnl_cfg.ttl = DEFAULT_TTL;
504 tnl_cfg.in_key = parse_key(args, "in_key",
505 &tnl_cfg.in_key_present,
506 &tnl_cfg.in_key_flow);
508 tnl_cfg.out_key = parse_key(args, "out_key",
509 &tnl_cfg.out_key_present,
510 &tnl_cfg.out_key_flow);
512 dev->tnl_cfg = tnl_cfg;
513 netdev_vport_poll_notify(dev);
519 get_tunnel_config(struct netdev_dev *dev, struct smap *args)
521 const struct netdev_tunnel_config *tnl_cfg =
522 &netdev_dev_vport_cast(dev)->tnl_cfg;
524 if (tnl_cfg->ip_dst) {
525 smap_add_format(args, "remote_ip", IP_FMT, IP_ARGS(tnl_cfg->ip_dst));
528 if (tnl_cfg->ip_src) {
529 smap_add_format(args, "local_ip", IP_FMT, IP_ARGS(tnl_cfg->ip_src));
532 if (tnl_cfg->in_key_flow && tnl_cfg->out_key_flow) {
533 smap_add(args, "key", "flow");
534 } else if (tnl_cfg->in_key_present && tnl_cfg->out_key_present
535 && tnl_cfg->in_key == tnl_cfg->out_key) {
536 smap_add_format(args, "key", "%"PRIu64, ntohll(tnl_cfg->in_key));
538 if (tnl_cfg->in_key_flow) {
539 smap_add(args, "in_key", "flow");
540 } else if (tnl_cfg->in_key_present) {
541 smap_add_format(args, "in_key", "%"PRIu64,
542 ntohll(tnl_cfg->in_key));
545 if (tnl_cfg->out_key_flow) {
546 smap_add(args, "out_key", "flow");
547 } else if (tnl_cfg->out_key_present) {
548 smap_add_format(args, "out_key", "%"PRIu64,
549 ntohll(tnl_cfg->out_key));
553 if (tnl_cfg->ttl_inherit) {
554 smap_add(args, "ttl", "inherit");
555 } else if (tnl_cfg->ttl != DEFAULT_TTL) {
556 smap_add_format(args, "ttl", "%"PRIu8, tnl_cfg->ttl);
559 if (tnl_cfg->tos_inherit) {
560 smap_add(args, "tos", "inherit");
561 } else if (tnl_cfg->tos) {
562 smap_add_format(args, "tos", "0x%x", tnl_cfg->tos);
565 if (tnl_cfg->dst_port) {
566 uint16_t dst_port = ntohs(tnl_cfg->dst_port);
567 if (dst_port != VXLAN_DST_PORT) {
568 smap_add_format(args, "dst_port", "%d", dst_port);
573 smap_add(args, "csum", "true");
576 if (!tnl_cfg->dont_fragment) {
577 smap_add(args, "df_default", "false");
583 /* Code specific to patch ports. */
586 netdev_vport_patch_peer(const struct netdev *netdev)
588 return netdev_vport_is_patch(netdev)
589 ? netdev_vport_get_dev(netdev)->peer
594 netdev_vport_inc_rx(const struct netdev *netdev,
595 const struct dpif_flow_stats *stats)
597 if (is_vport_class(netdev_dev_get_class(netdev_get_dev(netdev)))) {
598 struct netdev_dev_vport *dev = netdev_vport_get_dev(netdev);
599 dev->stats.rx_packets += stats->n_packets;
600 dev->stats.rx_bytes += stats->n_bytes;
605 netdev_vport_inc_tx(const struct netdev *netdev,
606 const struct dpif_flow_stats *stats)
608 if (is_vport_class(netdev_dev_get_class(netdev_get_dev(netdev)))) {
609 struct netdev_dev_vport *dev = netdev_vport_get_dev(netdev);
610 dev->stats.tx_packets += stats->n_packets;
611 dev->stats.tx_bytes += stats->n_bytes;
616 get_patch_config(struct netdev_dev *dev_, struct smap *args)
618 struct netdev_dev_vport *dev = netdev_dev_vport_cast(dev_);
621 smap_add(args, "peer", dev->peer);
627 set_patch_config(struct netdev_dev *dev_, const struct smap *args)
629 struct netdev_dev_vport *dev = netdev_dev_vport_cast(dev_);
630 const char *name = netdev_dev_get_name(dev_);
633 peer = smap_get(args, "peer");
635 VLOG_ERR("%s: patch type requires valid 'peer' argument", name);
639 if (smap_count(args) > 1) {
640 VLOG_ERR("%s: patch type takes only a 'peer' argument", name);
644 if (!strcmp(name, peer)) {
645 VLOG_ERR("%s: patch peer must not be self", name);
650 dev->peer = xstrdup(peer);
656 get_stats(const struct netdev *netdev, struct netdev_stats *stats)
658 struct netdev_dev_vport *dev = netdev_vport_get_dev(netdev);
659 memcpy(stats, &dev->stats, sizeof *stats);
663 #define VPORT_FUNCTIONS(GET_CONFIG, SET_CONFIG, \
664 GET_TUNNEL_CONFIG, GET_STATUS) \
669 netdev_vport_create, \
670 netdev_vport_destroy, \
676 netdev_vport_close, \
680 NULL, /* recv_wait */ \
684 NULL, /* send_wait */ \
686 netdev_vport_set_etheraddr, \
687 netdev_vport_get_etheraddr, \
688 NULL, /* get_mtu */ \
689 NULL, /* set_mtu */ \
690 NULL, /* get_ifindex */ \
691 NULL, /* get_carrier */ \
692 NULL, /* get_carrier_resets */ \
693 NULL, /* get_miimon */ \
695 NULL, /* set_stats */ \
697 NULL, /* get_features */ \
698 NULL, /* set_advertisements */ \
700 NULL, /* set_policing */ \
701 NULL, /* get_qos_types */ \
702 NULL, /* get_qos_capabilities */ \
703 NULL, /* get_qos */ \
704 NULL, /* set_qos */ \
705 NULL, /* get_queue */ \
706 NULL, /* set_queue */ \
707 NULL, /* delete_queue */ \
708 NULL, /* get_queue_stats */ \
709 NULL, /* dump_queues */ \
710 NULL, /* dump_queue_stats */ \
712 NULL, /* get_in4 */ \
713 NULL, /* set_in4 */ \
714 NULL, /* get_in6 */ \
715 NULL, /* add_router */ \
716 NULL, /* get_next_hop */ \
718 NULL, /* arp_lookup */ \
720 netdev_vport_update_flags, \
722 netdev_vport_change_seq
724 #define TUNNEL_CLASS(NAME, VPORT_TYPE, DPIF_PORT) \
725 { VPORT_TYPE, DPIF_PORT, \
726 { NAME, VPORT_FUNCTIONS(get_tunnel_config, \
728 get_netdev_tunnel_config, \
729 tunnel_get_status) }}
732 netdev_vport_register(void)
734 static const struct vport_class vport_classes[] = {
735 TUNNEL_CLASS("gre", OVS_VPORT_TYPE_GRE, "gre_system"),
736 TUNNEL_CLASS("ipsec_gre", OVS_VPORT_TYPE_GRE, "gre_system"),
737 TUNNEL_CLASS("gre64", OVS_VPORT_TYPE_GRE64, "gre64_system"),
738 TUNNEL_CLASS("ipsec_gre64", OVS_VPORT_TYPE_GRE64, "gre64_system"),
739 TUNNEL_CLASS("capwap", OVS_VPORT_TYPE_CAPWAP, "capwap_system"),
740 TUNNEL_CLASS("vxlan", OVS_VPORT_TYPE_VXLAN, "vxlan_system"),
742 { OVS_VPORT_TYPE_UNSPEC, NULL,
743 { "patch", VPORT_FUNCTIONS(get_patch_config,
751 for (i = 0; i < ARRAY_SIZE(vport_classes); i++) {
752 netdev_register_provider(&vport_classes[i].netdev_class);