2 * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include "netdev-linux.h"
23 #include <arpa/inet.h>
25 #include <linux/filter.h>
26 #include <linux/gen_stats.h>
27 #include <linux/if_ether.h>
28 #include <linux/if_tun.h>
29 #include <linux/types.h>
30 #include <linux/ethtool.h>
31 #include <linux/mii.h>
32 #include <linux/pkt_cls.h>
33 #include <linux/pkt_sched.h>
34 #include <linux/rtnetlink.h>
35 #include <linux/sockios.h>
36 #include <sys/types.h>
37 #include <sys/ioctl.h>
38 #include <sys/socket.h>
39 #include <netpacket/packet.h>
41 #include <net/if_arp.h>
42 #include <net/if_packet.h>
43 #include <net/route.h>
44 #include <netinet/in.h>
51 #include "dpif-linux.h"
52 #include "dpif-netdev.h"
53 #include "dynamic-string.h"
54 #include "fatal-signal.h"
57 #include "netdev-provider.h"
58 #include "netdev-vport.h"
59 #include "netlink-notifier.h"
60 #include "netlink-socket.h"
63 #include "openflow/openflow.h"
64 #include "ovs-atomic.h"
66 #include "poll-loop.h"
67 #include "rtnetlink-link.h"
69 #include "socket-util.h"
72 #include "unaligned.h"
75 VLOG_DEFINE_THIS_MODULE(netdev_linux);
77 COVERAGE_DEFINE(netdev_set_policing);
78 COVERAGE_DEFINE(netdev_arp_lookup);
79 COVERAGE_DEFINE(netdev_get_ifindex);
80 COVERAGE_DEFINE(netdev_get_hwaddr);
81 COVERAGE_DEFINE(netdev_set_hwaddr);
82 COVERAGE_DEFINE(netdev_get_ethtool);
83 COVERAGE_DEFINE(netdev_set_ethtool);
86 /* These were introduced in Linux 2.6.14, so they might be missing if we have
88 #ifndef ADVERTISED_Pause
89 #define ADVERTISED_Pause (1 << 13)
91 #ifndef ADVERTISED_Asym_Pause
92 #define ADVERTISED_Asym_Pause (1 << 14)
95 /* These were introduced in Linux 2.6.24, so they might be missing if we
96 * have old headers. */
97 #ifndef ETHTOOL_GFLAGS
98 #define ETHTOOL_GFLAGS 0x00000025 /* Get flags bitmap(ethtool_value) */
100 #ifndef ETHTOOL_SFLAGS
101 #define ETHTOOL_SFLAGS 0x00000026 /* Set flags bitmap(ethtool_value) */
104 /* This was introduced in Linux 2.6.25, so it might be missing if we have old
107 #define TC_RTAB_SIZE 1024
110 /* Linux 2.6.21 introduced struct tpacket_auxdata.
111 * Linux 2.6.27 added the tp_vlan_tci member.
112 * Linux 3.0 defined TP_STATUS_VLAN_VALID.
113 * Linux 3.13 repurposed a padding member for tp_vlan_tpid and defined
114 * TP_STATUS_VLAN_TPID_VALID.
116 * With all this churn it's easiest to unconditionally define a replacement
117 * structure that has everything we want.
119 #ifndef PACKET_AUXDATA
120 #define PACKET_AUXDATA 8
122 #ifndef TP_STATUS_VLAN_VALID
123 #define TP_STATUS_VLAN_VALID (1 << 4)
125 #ifndef TP_STATUS_VLAN_TPID_VALID
126 #define TP_STATUS_VLAN_TPID_VALID (1 << 6)
128 #undef tpacket_auxdata
129 #define tpacket_auxdata rpl_tpacket_auxdata
130 struct tpacket_auxdata {
136 uint16_t tp_vlan_tci;
137 uint16_t tp_vlan_tpid;
141 VALID_IFINDEX = 1 << 0,
142 VALID_ETHERADDR = 1 << 1,
146 VALID_POLICING = 1 << 5,
147 VALID_VPORT_STAT_ERROR = 1 << 6,
148 VALID_DRVINFO = 1 << 7,
149 VALID_FEATURES = 1 << 8,
152 /* Traffic control. */
154 /* An instance of a traffic control class. Always associated with a particular
157 * Each TC implementation subclasses this with whatever additional data it
160 const struct tc_ops *ops;
161 struct hmap queues; /* Contains "struct tc_queue"s.
162 * Read by generic TC layer.
163 * Written only by TC implementation. */
166 #define TC_INITIALIZER(TC, OPS) { OPS, HMAP_INITIALIZER(&(TC)->queues) }
168 /* One traffic control queue.
170 * Each TC implementation subclasses this with whatever additional data it
173 struct hmap_node hmap_node; /* In struct tc's "queues" hmap. */
174 unsigned int queue_id; /* OpenFlow queue ID. */
175 long long int created; /* Time queue was created, in msecs. */
178 /* A particular kind of traffic control. Each implementation generally maps to
179 * one particular Linux qdisc class.
181 * The functions below return 0 if successful or a positive errno value on
182 * failure, except where otherwise noted. All of them must be provided, except
183 * where otherwise noted. */
185 /* Name used by kernel in the TCA_KIND attribute of tcmsg, e.g. "htb".
186 * This is null for tc_ops_default and tc_ops_other, for which there are no
187 * appropriate values. */
188 const char *linux_name;
190 /* Name used in OVS database, e.g. "linux-htb". Must be nonnull. */
191 const char *ovs_name;
193 /* Number of supported OpenFlow queues, 0 for qdiscs that have no
194 * queues. The queues are numbered 0 through n_queues - 1. */
195 unsigned int n_queues;
197 /* Called to install this TC class on 'netdev'. The implementation should
198 * make the Netlink calls required to set up 'netdev' with the right qdisc
199 * and configure it according to 'details'. The implementation may assume
200 * that the current qdisc is the default; that is, there is no need for it
201 * to delete the current qdisc before installing itself.
203 * The contents of 'details' should be documented as valid for 'ovs_name'
204 * in the "other_config" column in the "QoS" table in vswitchd/vswitch.xml
205 * (which is built as ovs-vswitchd.conf.db(8)).
207 * This function must return 0 if and only if it sets 'netdev->tc' to an
208 * initialized 'struct tc'.
210 * (This function is null for tc_ops_other, which cannot be installed. For
211 * other TC classes it should always be nonnull.) */
212 int (*tc_install)(struct netdev *netdev, const struct smap *details);
214 /* Called when the netdev code determines (through a Netlink query) that
215 * this TC class's qdisc is installed on 'netdev', but we didn't install
216 * it ourselves and so don't know any of the details.
218 * 'nlmsg' is the kernel reply to a RTM_GETQDISC Netlink message for
219 * 'netdev'. The TCA_KIND attribute of 'nlmsg' is 'linux_name'. The
220 * implementation should parse the other attributes of 'nlmsg' as
221 * necessary to determine its configuration. If necessary it should also
222 * use Netlink queries to determine the configuration of queues on
225 * This function must return 0 if and only if it sets 'netdev->tc' to an
226 * initialized 'struct tc'. */
227 int (*tc_load)(struct netdev *netdev, struct ofpbuf *nlmsg);
229 /* Destroys the data structures allocated by the implementation as part of
230 * 'tc'. (This includes destroying 'tc->queues' by calling
233 * The implementation should not need to perform any Netlink calls. If
234 * desirable, the caller is responsible for deconfiguring the kernel qdisc.
235 * (But it may not be desirable.)
237 * This function may be null if 'tc' is trivial. */
238 void (*tc_destroy)(struct tc *tc);
240 /* Retrieves details of 'netdev->tc' configuration into 'details'.
242 * The implementation should not need to perform any Netlink calls, because
243 * the 'tc_install' or 'tc_load' that instantiated 'netdev->tc' should have
244 * cached the configuration.
246 * The contents of 'details' should be documented as valid for 'ovs_name'
247 * in the "other_config" column in the "QoS" table in vswitchd/vswitch.xml
248 * (which is built as ovs-vswitchd.conf.db(8)).
250 * This function may be null if 'tc' is not configurable.
252 int (*qdisc_get)(const struct netdev *netdev, struct smap *details);
254 /* Reconfigures 'netdev->tc' according to 'details', performing any
255 * required Netlink calls to complete the reconfiguration.
257 * The contents of 'details' should be documented as valid for 'ovs_name'
258 * in the "other_config" column in the "QoS" table in vswitchd/vswitch.xml
259 * (which is built as ovs-vswitchd.conf.db(8)).
261 * This function may be null if 'tc' is not configurable.
263 int (*qdisc_set)(struct netdev *, const struct smap *details);
265 /* Retrieves details of 'queue' on 'netdev->tc' into 'details'. 'queue' is
266 * one of the 'struct tc_queue's within 'netdev->tc->queues'.
268 * The contents of 'details' should be documented as valid for 'ovs_name'
269 * in the "other_config" column in the "Queue" table in
270 * vswitchd/vswitch.xml (which is built as ovs-vswitchd.conf.db(8)).
272 * The implementation should not need to perform any Netlink calls, because
273 * the 'tc_install' or 'tc_load' that instantiated 'netdev->tc' should have
274 * cached the queue configuration.
276 * This function may be null if 'tc' does not have queues ('n_queues' is
278 int (*class_get)(const struct netdev *netdev, const struct tc_queue *queue,
279 struct smap *details);
281 /* Configures or reconfigures 'queue_id' on 'netdev->tc' according to
282 * 'details', perfoming any required Netlink calls to complete the
283 * reconfiguration. The caller ensures that 'queue_id' is less than
286 * The contents of 'details' should be documented as valid for 'ovs_name'
287 * in the "other_config" column in the "Queue" table in
288 * vswitchd/vswitch.xml (which is built as ovs-vswitchd.conf.db(8)).
290 * This function may be null if 'tc' does not have queues or its queues are
291 * not configurable. */
292 int (*class_set)(struct netdev *, unsigned int queue_id,
293 const struct smap *details);
295 /* Deletes 'queue' from 'netdev->tc'. 'queue' is one of the 'struct
296 * tc_queue's within 'netdev->tc->queues'.
298 * This function may be null if 'tc' does not have queues or its queues
299 * cannot be deleted. */
300 int (*class_delete)(struct netdev *, struct tc_queue *queue);
302 /* Obtains stats for 'queue' from 'netdev->tc'. 'queue' is one of the
303 * 'struct tc_queue's within 'netdev->tc->queues'.
305 * On success, initializes '*stats'.
307 * This function may be null if 'tc' does not have queues or if it cannot
308 * report queue statistics. */
309 int (*class_get_stats)(const struct netdev *netdev,
310 const struct tc_queue *queue,
311 struct netdev_queue_stats *stats);
313 /* Extracts queue stats from 'nlmsg', which is a response to a
314 * RTM_GETTCLASS message, and passes them to 'cb' along with 'aux'.
316 * This function may be null if 'tc' does not have queues or if it cannot
317 * report queue statistics. */
318 int (*class_dump_stats)(const struct netdev *netdev,
319 const struct ofpbuf *nlmsg,
320 netdev_dump_queue_stats_cb *cb, void *aux);
324 tc_init(struct tc *tc, const struct tc_ops *ops)
327 hmap_init(&tc->queues);
331 tc_destroy(struct tc *tc)
333 hmap_destroy(&tc->queues);
336 static const struct tc_ops tc_ops_htb;
337 static const struct tc_ops tc_ops_hfsc;
338 static const struct tc_ops tc_ops_default;
339 static const struct tc_ops tc_ops_other;
341 static const struct tc_ops *const tcs[] = {
342 &tc_ops_htb, /* Hierarchy token bucket (see tc-htb(8)). */
343 &tc_ops_hfsc, /* Hierarchical fair service curve. */
344 &tc_ops_default, /* Default qdisc (see tc-pfifo_fast(8)). */
345 &tc_ops_other, /* Some other qdisc. */
349 static unsigned int tc_make_handle(unsigned int major, unsigned int minor);
350 static unsigned int tc_get_major(unsigned int handle);
351 static unsigned int tc_get_minor(unsigned int handle);
353 static unsigned int tc_ticks_to_bytes(unsigned int rate, unsigned int ticks);
354 static unsigned int tc_bytes_to_ticks(unsigned int rate, unsigned int size);
355 static unsigned int tc_buffer_per_jiffy(unsigned int rate);
357 static struct tcmsg *tc_make_request(const struct netdev *, int type,
358 unsigned int flags, struct ofpbuf *);
359 static int tc_transact(struct ofpbuf *request, struct ofpbuf **replyp);
360 static int tc_add_del_ingress_qdisc(struct netdev *netdev, bool add);
361 static int tc_add_policer(struct netdev *netdev, int kbits_rate,
364 static int tc_parse_qdisc(const struct ofpbuf *, const char **kind,
365 struct nlattr **options);
366 static int tc_parse_class(const struct ofpbuf *, unsigned int *queue_id,
367 struct nlattr **options,
368 struct netdev_queue_stats *);
369 static int tc_query_class(const struct netdev *,
370 unsigned int handle, unsigned int parent,
371 struct ofpbuf **replyp);
372 static int tc_delete_class(const struct netdev *, unsigned int handle);
374 static int tc_del_qdisc(struct netdev *netdev);
375 static int tc_query_qdisc(const struct netdev *netdev);
377 static int tc_calc_cell_log(unsigned int mtu);
378 static void tc_fill_rate(struct tc_ratespec *rate, uint64_t bps, int mtu);
379 static void tc_put_rtab(struct ofpbuf *, uint16_t type,
380 const struct tc_ratespec *rate);
381 static int tc_calc_buffer(unsigned int Bps, int mtu, uint64_t burst_bytes);
383 struct netdev_linux {
386 /* Protects all members below. */
387 struct ovs_mutex mutex;
389 unsigned int cache_valid;
391 bool miimon; /* Link status of last poll. */
392 long long int miimon_interval; /* Miimon Poll rate. Disabled if <= 0. */
393 struct timer miimon_timer;
395 /* The following are figured out "on demand" only. They are only valid
396 * when the corresponding VALID_* bit in 'cache_valid' is set. */
398 uint8_t etheraddr[ETH_ADDR_LEN];
399 struct in_addr address, netmask;
402 unsigned int ifi_flags;
403 long long int carrier_resets;
404 uint32_t kbits_rate; /* Policing data. */
405 uint32_t kbits_burst;
406 int vport_stats_error; /* Cached error code from vport_get_stats().
407 0 or an errno value. */
408 int netdev_mtu_error; /* Cached error code from SIOCGIFMTU or SIOCSIFMTU. */
409 int ether_addr_error; /* Cached error code from set/get etheraddr. */
410 int netdev_policing_error; /* Cached error code from set policing. */
411 int get_features_error; /* Cached error code from ETHTOOL_GSET. */
412 int get_ifindex_error; /* Cached error code from SIOCGIFINDEX. */
414 enum netdev_features current; /* Cached from ETHTOOL_GSET. */
415 enum netdev_features advertised; /* Cached from ETHTOOL_GSET. */
416 enum netdev_features supported; /* Cached from ETHTOOL_GSET. */
418 struct ethtool_drvinfo drvinfo; /* Cached from ETHTOOL_GDRVINFO. */
421 /* For devices of class netdev_tap_class only. */
425 struct netdev_rxq_linux {
426 struct netdev_rxq up;
431 /* This is set pretty low because we probably won't learn anything from the
432 * additional log messages. */
433 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
435 /* Polling miimon status for all ports causes performance degradation when
436 * handling a large number of ports. If there are no devices using miimon, then
437 * we skip netdev_linux_miimon_run() and netdev_linux_miimon_wait(). */
438 static atomic_int miimon_cnt = ATOMIC_VAR_INIT(0);
440 static void netdev_linux_run(void);
442 static int netdev_linux_do_ethtool(const char *name, struct ethtool_cmd *,
443 int cmd, const char *cmd_name);
444 static int netdev_linux_get_ipv4(const struct netdev *, struct in_addr *,
445 int cmd, const char *cmd_name);
446 static int get_flags(const struct netdev *, unsigned int *flags);
447 static int set_flags(const char *, unsigned int flags);
448 static int update_flags(struct netdev_linux *netdev, enum netdev_flags off,
449 enum netdev_flags on, enum netdev_flags *old_flagsp)
450 OVS_REQUIRES(netdev->mutex);
451 static int do_get_ifindex(const char *netdev_name);
452 static int get_ifindex(const struct netdev *, int *ifindexp);
453 static int do_set_addr(struct netdev *netdev,
454 int ioctl_nr, const char *ioctl_name,
455 struct in_addr addr);
456 static int get_etheraddr(const char *netdev_name, uint8_t ea[ETH_ADDR_LEN]);
457 static int set_etheraddr(const char *netdev_name, const uint8_t[ETH_ADDR_LEN]);
458 static int get_stats_via_netlink(const struct netdev *, struct netdev_stats *);
459 static int af_packet_sock(void);
460 static bool netdev_linux_miimon_enabled(void);
461 static void netdev_linux_miimon_run(void);
462 static void netdev_linux_miimon_wait(void);
463 static int netdev_linux_get_mtu__(struct netdev_linux *netdev, int *mtup);
466 is_netdev_linux_class(const struct netdev_class *netdev_class)
468 return netdev_class->run == netdev_linux_run;
472 is_tap_netdev(const struct netdev *netdev)
474 return netdev_get_class(netdev) == &netdev_tap_class;
477 static struct netdev_linux *
478 netdev_linux_cast(const struct netdev *netdev)
480 ovs_assert(is_netdev_linux_class(netdev_get_class(netdev)));
482 return CONTAINER_OF(netdev, struct netdev_linux, up);
485 static struct netdev_rxq_linux *
486 netdev_rxq_linux_cast(const struct netdev_rxq *rx)
488 ovs_assert(is_netdev_linux_class(netdev_get_class(rx->netdev)));
489 return CONTAINER_OF(rx, struct netdev_rxq_linux, up);
492 static void netdev_linux_update(struct netdev_linux *netdev,
493 const struct rtnetlink_link_change *)
494 OVS_REQUIRES(netdev->mutex);
495 static void netdev_linux_changed(struct netdev_linux *netdev,
496 unsigned int ifi_flags, unsigned int mask)
497 OVS_REQUIRES(netdev->mutex);
499 /* Returns a NETLINK_ROUTE socket listening for RTNLGRP_LINK changes, or NULL
500 * if no such socket could be created. */
501 static struct nl_sock *
502 netdev_linux_notify_sock(void)
504 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
505 static struct nl_sock *sock;
507 if (ovsthread_once_start(&once)) {
510 error = nl_sock_create(NETLINK_ROUTE, &sock);
512 error = nl_sock_join_mcgroup(sock, RTNLGRP_LINK);
514 nl_sock_destroy(sock);
518 ovsthread_once_done(&once);
525 netdev_linux_miimon_enabled(void)
529 atomic_read(&miimon_cnt, &miimon);
534 netdev_linux_run(void)
536 struct nl_sock *sock;
539 if (netdev_linux_miimon_enabled()) {
540 netdev_linux_miimon_run();
543 sock = netdev_linux_notify_sock();
549 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
550 uint64_t buf_stub[4096 / 8];
553 ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub);
554 error = nl_sock_recv(sock, &buf, false);
556 struct rtnetlink_link_change change;
558 if (rtnetlink_link_parse(&buf, &change)) {
559 struct netdev *netdev_ = netdev_from_name(change.ifname);
560 if (netdev_ && is_netdev_linux_class(netdev_->netdev_class)) {
561 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
563 ovs_mutex_lock(&netdev->mutex);
564 netdev_linux_update(netdev, &change);
565 ovs_mutex_unlock(&netdev->mutex);
567 netdev_close(netdev_);
569 } else if (error == ENOBUFS) {
570 struct shash device_shash;
571 struct shash_node *node;
575 shash_init(&device_shash);
576 netdev_get_devices(&netdev_linux_class, &device_shash);
577 SHASH_FOR_EACH (node, &device_shash) {
578 struct netdev *netdev_ = node->data;
579 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
582 ovs_mutex_lock(&netdev->mutex);
583 get_flags(netdev_, &flags);
584 netdev_linux_changed(netdev, flags, 0);
585 ovs_mutex_unlock(&netdev->mutex);
587 netdev_close(netdev_);
589 shash_destroy(&device_shash);
590 } else if (error != EAGAIN) {
591 VLOG_WARN_RL(&rl, "error reading or parsing netlink (%s)",
592 ovs_strerror(error));
599 netdev_linux_wait(void)
601 struct nl_sock *sock;
603 if (netdev_linux_miimon_enabled()) {
604 netdev_linux_miimon_wait();
606 sock = netdev_linux_notify_sock();
608 nl_sock_wait(sock, POLLIN);
613 netdev_linux_changed(struct netdev_linux *dev,
614 unsigned int ifi_flags, unsigned int mask)
615 OVS_REQUIRES(dev->mutex)
617 netdev_change_seq_changed(&dev->up);
619 if ((dev->ifi_flags ^ ifi_flags) & IFF_RUNNING) {
620 dev->carrier_resets++;
622 dev->ifi_flags = ifi_flags;
624 dev->cache_valid &= mask;
628 netdev_linux_update(struct netdev_linux *dev,
629 const struct rtnetlink_link_change *change)
630 OVS_REQUIRES(dev->mutex)
632 if (change->nlmsg_type == RTM_NEWLINK) {
634 netdev_linux_changed(dev, change->ifi_flags, VALID_DRVINFO);
636 /* Update netdev from rtnl-change msg. */
638 dev->mtu = change->mtu;
639 dev->cache_valid |= VALID_MTU;
640 dev->netdev_mtu_error = 0;
643 if (!eth_addr_is_zero(change->addr)) {
644 memcpy(dev->etheraddr, change->addr, ETH_ADDR_LEN);
645 dev->cache_valid |= VALID_ETHERADDR;
646 dev->ether_addr_error = 0;
649 dev->ifindex = change->ifi_index;
650 dev->cache_valid |= VALID_IFINDEX;
651 dev->get_ifindex_error = 0;
654 netdev_linux_changed(dev, change->ifi_flags, 0);
658 static struct netdev *
659 netdev_linux_alloc(void)
661 struct netdev_linux *netdev = xzalloc(sizeof *netdev);
666 netdev_linux_common_construct(struct netdev_linux *netdev)
668 ovs_mutex_init(&netdev->mutex);
671 /* Creates system and internal devices. */
673 netdev_linux_construct(struct netdev *netdev_)
675 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
678 netdev_linux_common_construct(netdev);
680 error = get_flags(&netdev->up, &netdev->ifi_flags);
681 if (error == ENODEV) {
682 if (netdev->up.netdev_class != &netdev_internal_class) {
683 /* The device does not exist, so don't allow it to be opened. */
686 /* "Internal" netdevs have to be created as netdev objects before
687 * they exist in the kernel, because creating them in the kernel
688 * happens by passing a netdev object to dpif_port_add().
689 * Therefore, ignore the error. */
696 /* For most types of netdevs we open the device for each call of
697 * netdev_open(). However, this is not the case with tap devices,
698 * since it is only possible to open the device once. In this
699 * situation we share a single file descriptor, and consequently
700 * buffers, across all readers. Therefore once data is read it will
701 * be unavailable to other reads for tap devices. */
703 netdev_linux_construct_tap(struct netdev *netdev_)
705 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
706 static const char tap_dev[] = "/dev/net/tun";
707 const char *name = netdev_->name;
711 netdev_linux_common_construct(netdev);
713 /* Open tap device. */
714 netdev->tap_fd = open(tap_dev, O_RDWR);
715 if (netdev->tap_fd < 0) {
717 VLOG_WARN("opening \"%s\" failed: %s", tap_dev, ovs_strerror(error));
721 /* Create tap device. */
722 ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
723 ovs_strzcpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
724 if (ioctl(netdev->tap_fd, TUNSETIFF, &ifr) == -1) {
725 VLOG_WARN("%s: creating tap device failed: %s", name,
726 ovs_strerror(errno));
731 /* Make non-blocking. */
732 error = set_nonblocking(netdev->tap_fd);
740 close(netdev->tap_fd);
745 netdev_linux_destruct(struct netdev *netdev_)
747 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
749 if (netdev->tc && netdev->tc->ops->tc_destroy) {
750 netdev->tc->ops->tc_destroy(netdev->tc);
753 if (netdev_get_class(netdev_) == &netdev_tap_class
754 && netdev->tap_fd >= 0)
756 close(netdev->tap_fd);
759 if (netdev->miimon_interval > 0) {
761 atomic_sub(&miimon_cnt, 1, &junk);
764 ovs_mutex_destroy(&netdev->mutex);
768 netdev_linux_dealloc(struct netdev *netdev_)
770 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
774 static struct netdev_rxq *
775 netdev_linux_rxq_alloc(void)
777 struct netdev_rxq_linux *rx = xzalloc(sizeof *rx);
782 netdev_linux_rxq_construct(struct netdev_rxq *rxq_)
784 struct netdev_rxq_linux *rx = netdev_rxq_linux_cast(rxq_);
785 struct netdev *netdev_ = rx->up.netdev;
786 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
789 ovs_mutex_lock(&netdev->mutex);
790 rx->is_tap = is_tap_netdev(netdev_);
792 rx->fd = netdev->tap_fd;
794 struct sockaddr_ll sll;
796 /* Result of tcpdump -dd inbound */
797 static const struct sock_filter filt[] = {
798 { 0x28, 0, 0, 0xfffff004 }, /* ldh [0] */
799 { 0x15, 0, 1, 0x00000004 }, /* jeq #4 jt 2 jf 3 */
800 { 0x6, 0, 0, 0x00000000 }, /* ret #0 */
801 { 0x6, 0, 0, 0x0000ffff } /* ret #65535 */
803 static const struct sock_fprog fprog = {
804 ARRAY_SIZE(filt), (struct sock_filter *) filt
807 /* Create file descriptor. */
808 rx->fd = socket(PF_PACKET, SOCK_RAW, 0);
811 VLOG_ERR("failed to create raw socket (%s)", ovs_strerror(error));
816 if (setsockopt(rx->fd, SOL_PACKET, PACKET_AUXDATA, &val, sizeof val)) {
818 VLOG_ERR("%s: failed to mark socket for auxdata (%s)",
819 netdev_get_name(netdev_), ovs_strerror(error));
823 /* Set non-blocking mode. */
824 error = set_nonblocking(rx->fd);
829 /* Get ethernet device index. */
830 error = get_ifindex(&netdev->up, &ifindex);
835 /* Bind to specific ethernet device. */
836 memset(&sll, 0, sizeof sll);
837 sll.sll_family = AF_PACKET;
838 sll.sll_ifindex = ifindex;
839 sll.sll_protocol = htons(ETH_P_ALL);
840 if (bind(rx->fd, (struct sockaddr *) &sll, sizeof sll) < 0) {
842 VLOG_ERR("%s: failed to bind raw socket (%s)",
843 netdev_get_name(netdev_), ovs_strerror(error));
847 /* Filter for only inbound packets. */
848 error = setsockopt(rx->fd, SOL_SOCKET, SO_ATTACH_FILTER, &fprog,
852 VLOG_ERR("%s: failed to attach filter (%s)",
853 netdev_get_name(netdev_), ovs_strerror(error));
857 ovs_mutex_unlock(&netdev->mutex);
865 ovs_mutex_unlock(&netdev->mutex);
870 netdev_linux_rxq_destruct(struct netdev_rxq *rxq_)
872 struct netdev_rxq_linux *rx = netdev_rxq_linux_cast(rxq_);
880 netdev_linux_rxq_dealloc(struct netdev_rxq *rxq_)
882 struct netdev_rxq_linux *rx = netdev_rxq_linux_cast(rxq_);
888 auxdata_to_vlan_tpid(const struct tpacket_auxdata *aux)
890 if (aux->tp_status & TP_STATUS_VLAN_TPID_VALID) {
891 return htons(aux->tp_vlan_tpid);
893 return htons(ETH_TYPE_VLAN);
898 auxdata_has_vlan_tci(const struct tpacket_auxdata *aux)
900 return aux->tp_vlan_tci || aux->tp_status & TP_STATUS_VLAN_VALID;
904 netdev_linux_rxq_recv_sock(int fd, struct ofpbuf *buffer)
909 struct cmsghdr *cmsg;
912 char buffer[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
916 /* Reserve headroom for a single VLAN tag */
917 ofpbuf_reserve(buffer, VLAN_HEADER_LEN);
918 size = ofpbuf_tailroom(buffer);
920 iov.iov_base = ofpbuf_data(buffer);
922 msgh.msg_name = NULL;
923 msgh.msg_namelen = 0;
926 msgh.msg_control = &cmsg_buffer;
927 msgh.msg_controllen = sizeof cmsg_buffer;
931 retval = recvmsg(fd, &msgh, MSG_TRUNC);
932 } while (retval < 0 && errno == EINTR);
936 } else if (retval > size) {
940 ofpbuf_set_size(buffer, ofpbuf_size(buffer) + retval);
942 for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg; cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
943 const struct tpacket_auxdata *aux;
945 if (cmsg->cmsg_level != SOL_PACKET
946 || cmsg->cmsg_type != PACKET_AUXDATA
947 || cmsg->cmsg_len < CMSG_LEN(sizeof(struct tpacket_auxdata))) {
951 aux = ALIGNED_CAST(struct tpacket_auxdata *, CMSG_DATA(cmsg));
952 if (auxdata_has_vlan_tci(aux)) {
953 if (retval < ETH_HEADER_LEN) {
957 eth_push_vlan(buffer, auxdata_to_vlan_tpid(aux),
958 htons(aux->tp_vlan_tci));
967 netdev_linux_rxq_recv_tap(int fd, struct ofpbuf *buffer)
970 size_t size = ofpbuf_tailroom(buffer);
973 retval = read(fd, ofpbuf_data(buffer), size);
974 } while (retval < 0 && errno == EINTR);
978 } else if (retval > size) {
982 ofpbuf_set_size(buffer, ofpbuf_size(buffer) + retval);
987 netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct ofpbuf **packet, int *c)
989 struct netdev_rxq_linux *rx = netdev_rxq_linux_cast(rxq_);
990 struct netdev *netdev = rx->up.netdev;
991 struct ofpbuf *buffer;
995 if (netdev_linux_get_mtu__(netdev_linux_cast(netdev), &mtu)) {
996 mtu = ETH_PAYLOAD_MAX;
999 buffer = ofpbuf_new_with_headroom(VLAN_ETH_HEADER_LEN + mtu, DP_NETDEV_HEADROOM);
1001 retval = (rx->is_tap
1002 ? netdev_linux_rxq_recv_tap(rx->fd, buffer)
1003 : netdev_linux_rxq_recv_sock(rx->fd, buffer));
1006 if (retval != EAGAIN && retval != EMSGSIZE) {
1007 VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s",
1008 ovs_strerror(errno), netdev_rxq_get_name(rxq_));
1010 ofpbuf_delete(buffer);
1012 dp_packet_pad(buffer);
1021 netdev_linux_rxq_wait(struct netdev_rxq *rxq_)
1023 struct netdev_rxq_linux *rx = netdev_rxq_linux_cast(rxq_);
1024 poll_fd_wait(rx->fd, POLLIN);
1028 netdev_linux_rxq_drain(struct netdev_rxq *rxq_)
1030 struct netdev_rxq_linux *rx = netdev_rxq_linux_cast(rxq_);
1033 int error = af_inet_ifreq_ioctl(netdev_rxq_get_name(rxq_), &ifr,
1034 SIOCGIFTXQLEN, "SIOCGIFTXQLEN");
1038 drain_fd(rx->fd, ifr.ifr_qlen);
1041 return drain_rcvbuf(rx->fd);
1045 /* Sends 'buffer' on 'netdev'. Returns 0 if successful, otherwise a positive
1046 * errno value. Returns EAGAIN without blocking if the packet cannot be queued
1047 * immediately. Returns EMSGSIZE if a partial packet was transmitted or if
1048 * the packet is too big or too small to transmit on the device.
1050 * The caller retains ownership of 'buffer' in all cases.
1052 * The kernel maintains a packet transmission queue, so the caller is not
1053 * expected to do additional queuing of packets. */
1055 netdev_linux_send(struct netdev *netdev_, struct ofpbuf *pkt, bool may_steal)
1057 const void *data = ofpbuf_data(pkt);
1058 size_t size = ofpbuf_size(pkt);
1063 if (!is_tap_netdev(netdev_)) {
1064 /* Use our AF_PACKET socket to send to this device. */
1065 struct sockaddr_ll sll;
1071 sock = af_packet_sock();
1076 ifindex = netdev_get_ifindex(netdev_);
1081 /* We don't bother setting most fields in sockaddr_ll because the
1082 * kernel ignores them for SOCK_RAW. */
1083 memset(&sll, 0, sizeof sll);
1084 sll.sll_family = AF_PACKET;
1085 sll.sll_ifindex = ifindex;
1087 iov.iov_base = CONST_CAST(void *, data);
1090 msg.msg_name = &sll;
1091 msg.msg_namelen = sizeof sll;
1094 msg.msg_control = NULL;
1095 msg.msg_controllen = 0;
1098 retval = sendmsg(sock, &msg, 0);
1100 /* Use the tap fd to send to this device. This is essential for
1101 * tap devices, because packets sent to a tap device with an
1102 * AF_PACKET socket will loop back to be *received* again on the
1103 * tap device. This doesn't occur on other interface types
1104 * because we attach a socket filter to the rx socket. */
1105 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
1107 retval = write(netdev->tap_fd, data, size);
1115 /* The Linux AF_PACKET implementation never blocks waiting for room
1116 * for packets, instead returning ENOBUFS. Translate this into
1117 * EAGAIN for the caller. */
1118 if (errno == ENOBUFS) {
1120 } else if (errno == EINTR) {
1122 } else if (errno != EAGAIN) {
1123 VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s",
1124 netdev_get_name(netdev_), ovs_strerror(errno));
1127 } else if (retval != size) {
1128 VLOG_WARN_RL(&rl, "sent partial Ethernet packet (%"PRIuSIZE"d bytes of "
1129 "%"PRIuSIZE") on %s", retval, size, netdev_get_name(netdev_));
1137 /* Registers with the poll loop to wake up from the next call to poll_block()
1138 * when the packet transmission queue has sufficient room to transmit a packet
1139 * with netdev_send().
1141 * The kernel maintains a packet transmission queue, so the client is not
1142 * expected to do additional queuing of packets. Thus, this function is
1143 * unlikely to ever be used. It is included for completeness. */
1145 netdev_linux_send_wait(struct netdev *netdev)
1147 if (is_tap_netdev(netdev)) {
1148 /* TAP device always accepts packets.*/
1149 poll_immediate_wake();
1153 /* Attempts to set 'netdev''s MAC address to 'mac'. Returns 0 if successful,
1154 * otherwise a positive errno value. */
1156 netdev_linux_set_etheraddr(struct netdev *netdev_,
1157 const uint8_t mac[ETH_ADDR_LEN])
1159 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
1160 enum netdev_flags old_flags = 0;
1163 ovs_mutex_lock(&netdev->mutex);
1165 if (netdev->cache_valid & VALID_ETHERADDR) {
1166 error = netdev->ether_addr_error;
1167 if (error || eth_addr_equals(netdev->etheraddr, mac)) {
1170 netdev->cache_valid &= ~VALID_ETHERADDR;
1173 /* Tap devices must be brought down before setting the address. */
1174 if (is_tap_netdev(netdev_)) {
1175 update_flags(netdev, NETDEV_UP, 0, &old_flags);
1177 error = set_etheraddr(netdev_get_name(netdev_), mac);
1178 if (!error || error == ENODEV) {
1179 netdev->ether_addr_error = error;
1180 netdev->cache_valid |= VALID_ETHERADDR;
1182 memcpy(netdev->etheraddr, mac, ETH_ADDR_LEN);
1186 if (is_tap_netdev(netdev_) && old_flags & NETDEV_UP) {
1187 update_flags(netdev, 0, NETDEV_UP, &old_flags);
1191 ovs_mutex_unlock(&netdev->mutex);
1195 /* Copies 'netdev''s MAC address to 'mac' which is passed as param. */
1197 netdev_linux_get_etheraddr(const struct netdev *netdev_,
1198 uint8_t mac[ETH_ADDR_LEN])
1200 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
1203 ovs_mutex_lock(&netdev->mutex);
1204 if (!(netdev->cache_valid & VALID_ETHERADDR)) {
1205 netdev->ether_addr_error = get_etheraddr(netdev_get_name(netdev_),
1207 netdev->cache_valid |= VALID_ETHERADDR;
1210 error = netdev->ether_addr_error;
1212 memcpy(mac, netdev->etheraddr, ETH_ADDR_LEN);
1214 ovs_mutex_unlock(&netdev->mutex);
1220 netdev_linux_get_mtu__(struct netdev_linux *netdev, int *mtup)
1224 if (!(netdev->cache_valid & VALID_MTU)) {
1227 netdev->netdev_mtu_error = af_inet_ifreq_ioctl(
1228 netdev_get_name(&netdev->up), &ifr, SIOCGIFMTU, "SIOCGIFMTU");
1229 netdev->mtu = ifr.ifr_mtu;
1230 netdev->cache_valid |= VALID_MTU;
1233 error = netdev->netdev_mtu_error;
1235 *mtup = netdev->mtu;
1241 /* Returns the maximum size of transmitted (and received) packets on 'netdev',
1242 * in bytes, not including the hardware header; thus, this is typically 1500
1243 * bytes for Ethernet devices. */
1245 netdev_linux_get_mtu(const struct netdev *netdev_, int *mtup)
1247 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
1250 ovs_mutex_lock(&netdev->mutex);
1251 error = netdev_linux_get_mtu__(netdev, mtup);
1252 ovs_mutex_unlock(&netdev->mutex);
1257 /* Sets the maximum size of transmitted (MTU) for given device using linux
1258 * networking ioctl interface.
1261 netdev_linux_set_mtu(const struct netdev *netdev_, int mtu)
1263 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
1267 ovs_mutex_lock(&netdev->mutex);
1268 if (netdev->cache_valid & VALID_MTU) {
1269 error = netdev->netdev_mtu_error;
1270 if (error || netdev->mtu == mtu) {
1273 netdev->cache_valid &= ~VALID_MTU;
1276 error = af_inet_ifreq_ioctl(netdev_get_name(netdev_), &ifr,
1277 SIOCSIFMTU, "SIOCSIFMTU");
1278 if (!error || error == ENODEV) {
1279 netdev->netdev_mtu_error = error;
1280 netdev->mtu = ifr.ifr_mtu;
1281 netdev->cache_valid |= VALID_MTU;
1284 ovs_mutex_unlock(&netdev->mutex);
1288 /* Returns the ifindex of 'netdev', if successful, as a positive number.
1289 * On failure, returns a negative errno value. */
1291 netdev_linux_get_ifindex(const struct netdev *netdev_)
1293 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
1296 ovs_mutex_lock(&netdev->mutex);
1297 error = get_ifindex(netdev_, &ifindex);
1298 ovs_mutex_unlock(&netdev->mutex);
1300 return error ? -error : ifindex;
1304 netdev_linux_get_carrier(const struct netdev *netdev_, bool *carrier)
1306 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
1308 ovs_mutex_lock(&netdev->mutex);
1309 if (netdev->miimon_interval > 0) {
1310 *carrier = netdev->miimon;
1312 *carrier = (netdev->ifi_flags & IFF_RUNNING) != 0;
1314 ovs_mutex_unlock(&netdev->mutex);
1319 static long long int
1320 netdev_linux_get_carrier_resets(const struct netdev *netdev_)
1322 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
1323 long long int carrier_resets;
1325 ovs_mutex_lock(&netdev->mutex);
1326 carrier_resets = netdev->carrier_resets;
1327 ovs_mutex_unlock(&netdev->mutex);
1329 return carrier_resets;
1333 netdev_linux_do_miimon(const char *name, int cmd, const char *cmd_name,
1334 struct mii_ioctl_data *data)
1339 memset(&ifr, 0, sizeof ifr);
1340 memcpy(&ifr.ifr_data, data, sizeof *data);
1341 error = af_inet_ifreq_ioctl(name, &ifr, cmd, cmd_name);
1342 memcpy(data, &ifr.ifr_data, sizeof *data);
1348 netdev_linux_get_miimon(const char *name, bool *miimon)
1350 struct mii_ioctl_data data;
1355 memset(&data, 0, sizeof data);
1356 error = netdev_linux_do_miimon(name, SIOCGMIIPHY, "SIOCGMIIPHY", &data);
1358 /* data.phy_id is filled out by previous SIOCGMIIPHY miimon call. */
1359 data.reg_num = MII_BMSR;
1360 error = netdev_linux_do_miimon(name, SIOCGMIIREG, "SIOCGMIIREG",
1364 *miimon = !!(data.val_out & BMSR_LSTATUS);
1366 VLOG_WARN_RL(&rl, "%s: failed to query MII", name);
1369 struct ethtool_cmd ecmd;
1371 VLOG_DBG_RL(&rl, "%s: failed to query MII, falling back to ethtool",
1374 COVERAGE_INC(netdev_get_ethtool);
1375 memset(&ecmd, 0, sizeof ecmd);
1376 error = netdev_linux_do_ethtool(name, &ecmd, ETHTOOL_GLINK,
1379 struct ethtool_value eval;
1381 memcpy(&eval, &ecmd, sizeof eval);
1382 *miimon = !!eval.data;
1384 VLOG_WARN_RL(&rl, "%s: ethtool link status failed", name);
1392 netdev_linux_set_miimon_interval(struct netdev *netdev_,
1393 long long int interval)
1395 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
1397 ovs_mutex_lock(&netdev->mutex);
1398 interval = interval > 0 ? MAX(interval, 100) : 0;
1399 if (netdev->miimon_interval != interval) {
1402 if (interval && !netdev->miimon_interval) {
1403 atomic_add(&miimon_cnt, 1, &junk);
1404 } else if (!interval && netdev->miimon_interval) {
1405 atomic_sub(&miimon_cnt, 1, &junk);
1408 netdev->miimon_interval = interval;
1409 timer_set_expired(&netdev->miimon_timer);
1411 ovs_mutex_unlock(&netdev->mutex);
1417 netdev_linux_miimon_run(void)
1419 struct shash device_shash;
1420 struct shash_node *node;
1422 shash_init(&device_shash);
1423 netdev_get_devices(&netdev_linux_class, &device_shash);
1424 SHASH_FOR_EACH (node, &device_shash) {
1425 struct netdev *netdev = node->data;
1426 struct netdev_linux *dev = netdev_linux_cast(netdev);
1429 ovs_mutex_lock(&dev->mutex);
1430 if (dev->miimon_interval > 0 && timer_expired(&dev->miimon_timer)) {
1431 netdev_linux_get_miimon(dev->up.name, &miimon);
1432 if (miimon != dev->miimon) {
1433 dev->miimon = miimon;
1434 netdev_linux_changed(dev, dev->ifi_flags, 0);
1437 timer_set_duration(&dev->miimon_timer, dev->miimon_interval);
1439 ovs_mutex_unlock(&dev->mutex);
1440 netdev_close(netdev);
1443 shash_destroy(&device_shash);
1447 netdev_linux_miimon_wait(void)
1449 struct shash device_shash;
1450 struct shash_node *node;
1452 shash_init(&device_shash);
1453 netdev_get_devices(&netdev_linux_class, &device_shash);
1454 SHASH_FOR_EACH (node, &device_shash) {
1455 struct netdev *netdev = node->data;
1456 struct netdev_linux *dev = netdev_linux_cast(netdev);
1458 ovs_mutex_lock(&dev->mutex);
1459 if (dev->miimon_interval > 0) {
1460 timer_wait(&dev->miimon_timer);
1462 ovs_mutex_unlock(&dev->mutex);
1463 netdev_close(netdev);
1465 shash_destroy(&device_shash);
1469 swap_uint64(uint64_t *a, uint64_t *b)
1476 /* Copies 'src' into 'dst', performing format conversion in the process.
1478 * 'src' is allowed to be misaligned. */
1480 netdev_stats_from_ovs_vport_stats(struct netdev_stats *dst,
1481 const struct ovs_vport_stats *src)
1483 dst->rx_packets = get_unaligned_u64(&src->rx_packets);
1484 dst->tx_packets = get_unaligned_u64(&src->tx_packets);
1485 dst->rx_bytes = get_unaligned_u64(&src->rx_bytes);
1486 dst->tx_bytes = get_unaligned_u64(&src->tx_bytes);
1487 dst->rx_errors = get_unaligned_u64(&src->rx_errors);
1488 dst->tx_errors = get_unaligned_u64(&src->tx_errors);
1489 dst->rx_dropped = get_unaligned_u64(&src->rx_dropped);
1490 dst->tx_dropped = get_unaligned_u64(&src->tx_dropped);
1492 dst->collisions = 0;
1493 dst->rx_length_errors = 0;
1494 dst->rx_over_errors = 0;
1495 dst->rx_crc_errors = 0;
1496 dst->rx_frame_errors = 0;
1497 dst->rx_fifo_errors = 0;
1498 dst->rx_missed_errors = 0;
1499 dst->tx_aborted_errors = 0;
1500 dst->tx_carrier_errors = 0;
1501 dst->tx_fifo_errors = 0;
1502 dst->tx_heartbeat_errors = 0;
1503 dst->tx_window_errors = 0;
1507 get_stats_via_vport__(const struct netdev *netdev, struct netdev_stats *stats)
1509 struct dpif_linux_vport reply;
1513 error = dpif_linux_vport_get(netdev_get_name(netdev), &reply, &buf);
1516 } else if (!reply.stats) {
1521 netdev_stats_from_ovs_vport_stats(stats, reply.stats);
1529 get_stats_via_vport(const struct netdev *netdev_,
1530 struct netdev_stats *stats)
1532 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
1534 if (!netdev->vport_stats_error ||
1535 !(netdev->cache_valid & VALID_VPORT_STAT_ERROR)) {
1538 error = get_stats_via_vport__(netdev_, stats);
1539 if (error && error != ENOENT) {
1540 VLOG_WARN_RL(&rl, "%s: obtaining netdev stats via vport failed "
1542 netdev_get_name(netdev_), ovs_strerror(error));
1544 netdev->vport_stats_error = error;
1545 netdev->cache_valid |= VALID_VPORT_STAT_ERROR;
1549 /* Retrieves current device stats for 'netdev-linux'. */
1551 netdev_linux_get_stats(const struct netdev *netdev_,
1552 struct netdev_stats *stats)
1554 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
1555 struct netdev_stats dev_stats;
1558 ovs_mutex_lock(&netdev->mutex);
1559 get_stats_via_vport(netdev_, stats);
1560 error = get_stats_via_netlink(netdev_, &dev_stats);
1562 if (!netdev->vport_stats_error) {
1565 } else if (netdev->vport_stats_error) {
1566 /* stats not available from OVS then use ioctl stats. */
1569 stats->rx_errors += dev_stats.rx_errors;
1570 stats->tx_errors += dev_stats.tx_errors;
1571 stats->rx_dropped += dev_stats.rx_dropped;
1572 stats->tx_dropped += dev_stats.tx_dropped;
1573 stats->multicast += dev_stats.multicast;
1574 stats->collisions += dev_stats.collisions;
1575 stats->rx_length_errors += dev_stats.rx_length_errors;
1576 stats->rx_over_errors += dev_stats.rx_over_errors;
1577 stats->rx_crc_errors += dev_stats.rx_crc_errors;
1578 stats->rx_frame_errors += dev_stats.rx_frame_errors;
1579 stats->rx_fifo_errors += dev_stats.rx_fifo_errors;
1580 stats->rx_missed_errors += dev_stats.rx_missed_errors;
1581 stats->tx_aborted_errors += dev_stats.tx_aborted_errors;
1582 stats->tx_carrier_errors += dev_stats.tx_carrier_errors;
1583 stats->tx_fifo_errors += dev_stats.tx_fifo_errors;
1584 stats->tx_heartbeat_errors += dev_stats.tx_heartbeat_errors;
1585 stats->tx_window_errors += dev_stats.tx_window_errors;
1587 ovs_mutex_unlock(&netdev->mutex);
1592 /* Retrieves current device stats for 'netdev-tap' netdev or
1593 * netdev-internal. */
1595 netdev_tap_get_stats(const struct netdev *netdev_, struct netdev_stats *stats)
1597 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
1598 struct netdev_stats dev_stats;
1601 ovs_mutex_lock(&netdev->mutex);
1602 get_stats_via_vport(netdev_, stats);
1603 error = get_stats_via_netlink(netdev_, &dev_stats);
1605 if (!netdev->vport_stats_error) {
1608 } else if (netdev->vport_stats_error) {
1609 /* Transmit and receive stats will appear to be swapped relative to the
1610 * other ports since we are the one sending the data, not a remote
1611 * computer. For consistency, we swap them back here. This does not
1612 * apply if we are getting stats from the vport layer because it always
1613 * tracks stats from the perspective of the switch. */
1616 swap_uint64(&stats->rx_packets, &stats->tx_packets);
1617 swap_uint64(&stats->rx_bytes, &stats->tx_bytes);
1618 swap_uint64(&stats->rx_errors, &stats->tx_errors);
1619 swap_uint64(&stats->rx_dropped, &stats->tx_dropped);
1620 stats->rx_length_errors = 0;
1621 stats->rx_over_errors = 0;
1622 stats->rx_crc_errors = 0;
1623 stats->rx_frame_errors = 0;
1624 stats->rx_fifo_errors = 0;
1625 stats->rx_missed_errors = 0;
1626 stats->tx_aborted_errors = 0;
1627 stats->tx_carrier_errors = 0;
1628 stats->tx_fifo_errors = 0;
1629 stats->tx_heartbeat_errors = 0;
1630 stats->tx_window_errors = 0;
1632 stats->rx_dropped += dev_stats.tx_dropped;
1633 stats->tx_dropped += dev_stats.rx_dropped;
1635 stats->rx_errors += dev_stats.tx_errors;
1636 stats->tx_errors += dev_stats.rx_errors;
1638 stats->multicast += dev_stats.multicast;
1639 stats->collisions += dev_stats.collisions;
1641 ovs_mutex_unlock(&netdev->mutex);
1647 netdev_internal_get_stats(const struct netdev *netdev_,
1648 struct netdev_stats *stats)
1650 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
1653 ovs_mutex_lock(&netdev->mutex);
1654 get_stats_via_vport(netdev_, stats);
1655 error = netdev->vport_stats_error;
1656 ovs_mutex_unlock(&netdev->mutex);
1662 netdev_internal_set_stats(struct netdev *netdev,
1663 const struct netdev_stats *stats)
1665 struct ovs_vport_stats vport_stats;
1666 struct dpif_linux_vport vport;
1669 vport_stats.rx_packets = stats->rx_packets;
1670 vport_stats.tx_packets = stats->tx_packets;
1671 vport_stats.rx_bytes = stats->rx_bytes;
1672 vport_stats.tx_bytes = stats->tx_bytes;
1673 vport_stats.rx_errors = stats->rx_errors;
1674 vport_stats.tx_errors = stats->tx_errors;
1675 vport_stats.rx_dropped = stats->rx_dropped;
1676 vport_stats.tx_dropped = stats->tx_dropped;
1678 dpif_linux_vport_init(&vport);
1679 vport.cmd = OVS_VPORT_CMD_SET;
1680 vport.name = netdev_get_name(netdev);
1681 vport.stats = &vport_stats;
1683 err = dpif_linux_vport_transact(&vport, NULL, NULL);
1685 /* If the vport layer doesn't know about the device, that doesn't mean it
1686 * doesn't exist (after all were able to open it when netdev_open() was
1687 * called), it just means that it isn't attached and we'll be getting
1688 * stats a different way. */
1689 if (err == ENODEV) {
1697 netdev_linux_read_features(struct netdev_linux *netdev)
1699 struct ethtool_cmd ecmd;
1703 if (netdev->cache_valid & VALID_FEATURES) {
1707 COVERAGE_INC(netdev_get_ethtool);
1708 memset(&ecmd, 0, sizeof ecmd);
1709 error = netdev_linux_do_ethtool(netdev->up.name, &ecmd,
1710 ETHTOOL_GSET, "ETHTOOL_GSET");
1715 /* Supported features. */
1716 netdev->supported = 0;
1717 if (ecmd.supported & SUPPORTED_10baseT_Half) {
1718 netdev->supported |= NETDEV_F_10MB_HD;
1720 if (ecmd.supported & SUPPORTED_10baseT_Full) {
1721 netdev->supported |= NETDEV_F_10MB_FD;
1723 if (ecmd.supported & SUPPORTED_100baseT_Half) {
1724 netdev->supported |= NETDEV_F_100MB_HD;
1726 if (ecmd.supported & SUPPORTED_100baseT_Full) {
1727 netdev->supported |= NETDEV_F_100MB_FD;
1729 if (ecmd.supported & SUPPORTED_1000baseT_Half) {
1730 netdev->supported |= NETDEV_F_1GB_HD;
1732 if (ecmd.supported & SUPPORTED_1000baseT_Full) {
1733 netdev->supported |= NETDEV_F_1GB_FD;
1735 if (ecmd.supported & SUPPORTED_10000baseT_Full) {
1736 netdev->supported |= NETDEV_F_10GB_FD;
1738 if (ecmd.supported & SUPPORTED_TP) {
1739 netdev->supported |= NETDEV_F_COPPER;
1741 if (ecmd.supported & SUPPORTED_FIBRE) {
1742 netdev->supported |= NETDEV_F_FIBER;
1744 if (ecmd.supported & SUPPORTED_Autoneg) {
1745 netdev->supported |= NETDEV_F_AUTONEG;
1747 if (ecmd.supported & SUPPORTED_Pause) {
1748 netdev->supported |= NETDEV_F_PAUSE;
1750 if (ecmd.supported & SUPPORTED_Asym_Pause) {
1751 netdev->supported |= NETDEV_F_PAUSE_ASYM;
1754 /* Advertised features. */
1755 netdev->advertised = 0;
1756 if (ecmd.advertising & ADVERTISED_10baseT_Half) {
1757 netdev->advertised |= NETDEV_F_10MB_HD;
1759 if (ecmd.advertising & ADVERTISED_10baseT_Full) {
1760 netdev->advertised |= NETDEV_F_10MB_FD;
1762 if (ecmd.advertising & ADVERTISED_100baseT_Half) {
1763 netdev->advertised |= NETDEV_F_100MB_HD;
1765 if (ecmd.advertising & ADVERTISED_100baseT_Full) {
1766 netdev->advertised |= NETDEV_F_100MB_FD;
1768 if (ecmd.advertising & ADVERTISED_1000baseT_Half) {
1769 netdev->advertised |= NETDEV_F_1GB_HD;
1771 if (ecmd.advertising & ADVERTISED_1000baseT_Full) {
1772 netdev->advertised |= NETDEV_F_1GB_FD;
1774 if (ecmd.advertising & ADVERTISED_10000baseT_Full) {
1775 netdev->advertised |= NETDEV_F_10GB_FD;
1777 if (ecmd.advertising & ADVERTISED_TP) {
1778 netdev->advertised |= NETDEV_F_COPPER;
1780 if (ecmd.advertising & ADVERTISED_FIBRE) {
1781 netdev->advertised |= NETDEV_F_FIBER;
1783 if (ecmd.advertising & ADVERTISED_Autoneg) {
1784 netdev->advertised |= NETDEV_F_AUTONEG;
1786 if (ecmd.advertising & ADVERTISED_Pause) {
1787 netdev->advertised |= NETDEV_F_PAUSE;
1789 if (ecmd.advertising & ADVERTISED_Asym_Pause) {
1790 netdev->advertised |= NETDEV_F_PAUSE_ASYM;
1793 /* Current settings. */
1795 if (speed == SPEED_10) {
1796 netdev->current = ecmd.duplex ? NETDEV_F_10MB_FD : NETDEV_F_10MB_HD;
1797 } else if (speed == SPEED_100) {
1798 netdev->current = ecmd.duplex ? NETDEV_F_100MB_FD : NETDEV_F_100MB_HD;
1799 } else if (speed == SPEED_1000) {
1800 netdev->current = ecmd.duplex ? NETDEV_F_1GB_FD : NETDEV_F_1GB_HD;
1801 } else if (speed == SPEED_10000) {
1802 netdev->current = NETDEV_F_10GB_FD;
1803 } else if (speed == 40000) {
1804 netdev->current = NETDEV_F_40GB_FD;
1805 } else if (speed == 100000) {
1806 netdev->current = NETDEV_F_100GB_FD;
1807 } else if (speed == 1000000) {
1808 netdev->current = NETDEV_F_1TB_FD;
1810 netdev->current = 0;
1813 if (ecmd.port == PORT_TP) {
1814 netdev->current |= NETDEV_F_COPPER;
1815 } else if (ecmd.port == PORT_FIBRE) {
1816 netdev->current |= NETDEV_F_FIBER;
1820 netdev->current |= NETDEV_F_AUTONEG;
1824 netdev->cache_valid |= VALID_FEATURES;
1825 netdev->get_features_error = error;
1828 /* Stores the features supported by 'netdev' into of '*current', '*advertised',
1829 * '*supported', and '*peer'. Each value is a bitmap of NETDEV_* bits.
1830 * Returns 0 if successful, otherwise a positive errno value. */
1832 netdev_linux_get_features(const struct netdev *netdev_,
1833 enum netdev_features *current,
1834 enum netdev_features *advertised,
1835 enum netdev_features *supported,
1836 enum netdev_features *peer)
1838 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
1841 ovs_mutex_lock(&netdev->mutex);
1842 netdev_linux_read_features(netdev);
1843 if (!netdev->get_features_error) {
1844 *current = netdev->current;
1845 *advertised = netdev->advertised;
1846 *supported = netdev->supported;
1847 *peer = 0; /* XXX */
1849 error = netdev->get_features_error;
1850 ovs_mutex_unlock(&netdev->mutex);
1855 /* Set the features advertised by 'netdev' to 'advertise'. */
1857 netdev_linux_set_advertisements(struct netdev *netdev_,
1858 enum netdev_features advertise)
1860 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
1861 struct ethtool_cmd ecmd;
1864 ovs_mutex_lock(&netdev->mutex);
1866 COVERAGE_INC(netdev_get_ethtool);
1867 memset(&ecmd, 0, sizeof ecmd);
1868 error = netdev_linux_do_ethtool(netdev_get_name(netdev_), &ecmd,
1869 ETHTOOL_GSET, "ETHTOOL_GSET");
1874 ecmd.advertising = 0;
1875 if (advertise & NETDEV_F_10MB_HD) {
1876 ecmd.advertising |= ADVERTISED_10baseT_Half;
1878 if (advertise & NETDEV_F_10MB_FD) {
1879 ecmd.advertising |= ADVERTISED_10baseT_Full;
1881 if (advertise & NETDEV_F_100MB_HD) {
1882 ecmd.advertising |= ADVERTISED_100baseT_Half;
1884 if (advertise & NETDEV_F_100MB_FD) {
1885 ecmd.advertising |= ADVERTISED_100baseT_Full;
1887 if (advertise & NETDEV_F_1GB_HD) {
1888 ecmd.advertising |= ADVERTISED_1000baseT_Half;
1890 if (advertise & NETDEV_F_1GB_FD) {
1891 ecmd.advertising |= ADVERTISED_1000baseT_Full;
1893 if (advertise & NETDEV_F_10GB_FD) {
1894 ecmd.advertising |= ADVERTISED_10000baseT_Full;
1896 if (advertise & NETDEV_F_COPPER) {
1897 ecmd.advertising |= ADVERTISED_TP;
1899 if (advertise & NETDEV_F_FIBER) {
1900 ecmd.advertising |= ADVERTISED_FIBRE;
1902 if (advertise & NETDEV_F_AUTONEG) {
1903 ecmd.advertising |= ADVERTISED_Autoneg;
1905 if (advertise & NETDEV_F_PAUSE) {
1906 ecmd.advertising |= ADVERTISED_Pause;
1908 if (advertise & NETDEV_F_PAUSE_ASYM) {
1909 ecmd.advertising |= ADVERTISED_Asym_Pause;
1911 COVERAGE_INC(netdev_set_ethtool);
1912 error = netdev_linux_do_ethtool(netdev_get_name(netdev_), &ecmd,
1913 ETHTOOL_SSET, "ETHTOOL_SSET");
1916 ovs_mutex_unlock(&netdev->mutex);
1920 /* Attempts to set input rate limiting (policing) policy. Returns 0 if
1921 * successful, otherwise a positive errno value. */
1923 netdev_linux_set_policing(struct netdev *netdev_,
1924 uint32_t kbits_rate, uint32_t kbits_burst)
1926 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
1927 const char *netdev_name = netdev_get_name(netdev_);
1930 kbits_burst = (!kbits_rate ? 0 /* Force to 0 if no rate specified. */
1931 : !kbits_burst ? 1000 /* Default to 1000 kbits if 0. */
1932 : kbits_burst); /* Stick with user-specified value. */
1934 ovs_mutex_lock(&netdev->mutex);
1935 if (netdev->cache_valid & VALID_POLICING) {
1936 error = netdev->netdev_policing_error;
1937 if (error || (netdev->kbits_rate == kbits_rate &&
1938 netdev->kbits_burst == kbits_burst)) {
1939 /* Assume that settings haven't changed since we last set them. */
1942 netdev->cache_valid &= ~VALID_POLICING;
1945 COVERAGE_INC(netdev_set_policing);
1946 /* Remove any existing ingress qdisc. */
1947 error = tc_add_del_ingress_qdisc(netdev_, false);
1949 VLOG_WARN_RL(&rl, "%s: removing policing failed: %s",
1950 netdev_name, ovs_strerror(error));
1955 error = tc_add_del_ingress_qdisc(netdev_, true);
1957 VLOG_WARN_RL(&rl, "%s: adding policing qdisc failed: %s",
1958 netdev_name, ovs_strerror(error));
1962 error = tc_add_policer(netdev_, kbits_rate, kbits_burst);
1964 VLOG_WARN_RL(&rl, "%s: adding policing action failed: %s",
1965 netdev_name, ovs_strerror(error));
1970 netdev->kbits_rate = kbits_rate;
1971 netdev->kbits_burst = kbits_burst;
1974 if (!error || error == ENODEV) {
1975 netdev->netdev_policing_error = error;
1976 netdev->cache_valid |= VALID_POLICING;
1978 ovs_mutex_unlock(&netdev->mutex);
1983 netdev_linux_get_qos_types(const struct netdev *netdev OVS_UNUSED,
1986 const struct tc_ops *const *opsp;
1988 for (opsp = tcs; *opsp != NULL; opsp++) {
1989 const struct tc_ops *ops = *opsp;
1990 if (ops->tc_install && ops->ovs_name[0] != '\0') {
1991 sset_add(types, ops->ovs_name);
1997 static const struct tc_ops *
1998 tc_lookup_ovs_name(const char *name)
2000 const struct tc_ops *const *opsp;
2002 for (opsp = tcs; *opsp != NULL; opsp++) {
2003 const struct tc_ops *ops = *opsp;
2004 if (!strcmp(name, ops->ovs_name)) {
2011 static const struct tc_ops *
2012 tc_lookup_linux_name(const char *name)
2014 const struct tc_ops *const *opsp;
2016 for (opsp = tcs; *opsp != NULL; opsp++) {
2017 const struct tc_ops *ops = *opsp;
2018 if (ops->linux_name && !strcmp(name, ops->linux_name)) {
2025 static struct tc_queue *
2026 tc_find_queue__(const struct netdev *netdev_, unsigned int queue_id,
2029 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
2030 struct tc_queue *queue;
2032 HMAP_FOR_EACH_IN_BUCKET (queue, hmap_node, hash, &netdev->tc->queues) {
2033 if (queue->queue_id == queue_id) {
2040 static struct tc_queue *
2041 tc_find_queue(const struct netdev *netdev, unsigned int queue_id)
2043 return tc_find_queue__(netdev, queue_id, hash_int(queue_id, 0));
2047 netdev_linux_get_qos_capabilities(const struct netdev *netdev OVS_UNUSED,
2049 struct netdev_qos_capabilities *caps)
2051 const struct tc_ops *ops = tc_lookup_ovs_name(type);
2055 caps->n_queues = ops->n_queues;
2060 netdev_linux_get_qos(const struct netdev *netdev_,
2061 const char **typep, struct smap *details)
2063 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
2066 ovs_mutex_lock(&netdev->mutex);
2067 error = tc_query_qdisc(netdev_);
2069 *typep = netdev->tc->ops->ovs_name;
2070 error = (netdev->tc->ops->qdisc_get
2071 ? netdev->tc->ops->qdisc_get(netdev_, details)
2074 ovs_mutex_unlock(&netdev->mutex);
2080 netdev_linux_set_qos(struct netdev *netdev_,
2081 const char *type, const struct smap *details)
2083 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
2084 const struct tc_ops *new_ops;
2087 new_ops = tc_lookup_ovs_name(type);
2088 if (!new_ops || !new_ops->tc_install) {
2092 ovs_mutex_lock(&netdev->mutex);
2093 error = tc_query_qdisc(netdev_);
2098 if (new_ops == netdev->tc->ops) {
2099 error = new_ops->qdisc_set ? new_ops->qdisc_set(netdev_, details) : 0;
2101 /* Delete existing qdisc. */
2102 error = tc_del_qdisc(netdev_);
2106 ovs_assert(netdev->tc == NULL);
2108 /* Install new qdisc. */
2109 error = new_ops->tc_install(netdev_, details);
2110 ovs_assert((error == 0) == (netdev->tc != NULL));
2114 ovs_mutex_unlock(&netdev->mutex);
2119 netdev_linux_get_queue(const struct netdev *netdev_,
2120 unsigned int queue_id, struct smap *details)
2122 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
2125 ovs_mutex_lock(&netdev->mutex);
2126 error = tc_query_qdisc(netdev_);
2128 struct tc_queue *queue = tc_find_queue(netdev_, queue_id);
2130 ? netdev->tc->ops->class_get(netdev_, queue, details)
2133 ovs_mutex_unlock(&netdev->mutex);
2139 netdev_linux_set_queue(struct netdev *netdev_,
2140 unsigned int queue_id, const struct smap *details)
2142 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
2145 ovs_mutex_lock(&netdev->mutex);
2146 error = tc_query_qdisc(netdev_);
2148 error = (queue_id < netdev->tc->ops->n_queues
2149 && netdev->tc->ops->class_set
2150 ? netdev->tc->ops->class_set(netdev_, queue_id, details)
2153 ovs_mutex_unlock(&netdev->mutex);
2159 netdev_linux_delete_queue(struct netdev *netdev_, unsigned int queue_id)
2161 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
2164 ovs_mutex_lock(&netdev->mutex);
2165 error = tc_query_qdisc(netdev_);
2167 if (netdev->tc->ops->class_delete) {
2168 struct tc_queue *queue = tc_find_queue(netdev_, queue_id);
2170 ? netdev->tc->ops->class_delete(netdev_, queue)
2176 ovs_mutex_unlock(&netdev->mutex);
2182 netdev_linux_get_queue_stats(const struct netdev *netdev_,
2183 unsigned int queue_id,
2184 struct netdev_queue_stats *stats)
2186 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
2189 ovs_mutex_lock(&netdev->mutex);
2190 error = tc_query_qdisc(netdev_);
2192 if (netdev->tc->ops->class_get_stats) {
2193 const struct tc_queue *queue = tc_find_queue(netdev_, queue_id);
2195 stats->created = queue->created;
2196 error = netdev->tc->ops->class_get_stats(netdev_, queue,
2205 ovs_mutex_unlock(&netdev->mutex);
2210 struct queue_dump_state {
2211 struct nl_dump dump;
2216 start_queue_dump(const struct netdev *netdev, struct queue_dump_state *state)
2218 struct ofpbuf request;
2219 struct tcmsg *tcmsg;
2221 tcmsg = tc_make_request(netdev, RTM_GETTCLASS, 0, &request);
2225 tcmsg->tcm_parent = 0;
2226 nl_dump_start(&state->dump, NETLINK_ROUTE, &request);
2227 ofpbuf_uninit(&request);
2229 ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE);
2234 finish_queue_dump(struct queue_dump_state *state)
2236 ofpbuf_uninit(&state->buf);
2237 return nl_dump_done(&state->dump);
2240 struct netdev_linux_queue_state {
2241 unsigned int *queues;
2247 netdev_linux_queue_dump_start(const struct netdev *netdev_, void **statep)
2249 const struct netdev_linux *netdev = netdev_linux_cast(netdev_);
2252 ovs_mutex_lock(&netdev->mutex);
2253 error = tc_query_qdisc(netdev_);
2255 if (netdev->tc->ops->class_get) {
2256 struct netdev_linux_queue_state *state;
2257 struct tc_queue *queue;
2260 *statep = state = xmalloc(sizeof *state);
2261 state->n_queues = hmap_count(&netdev->tc->queues);
2262 state->cur_queue = 0;
2263 state->queues = xmalloc(state->n_queues * sizeof *state->queues);
2266 HMAP_FOR_EACH (queue, hmap_node, &netdev->tc->queues) {
2267 state->queues[i++] = queue->queue_id;
2273 ovs_mutex_unlock(&netdev->mutex);
2279 netdev_linux_queue_dump_next(const struct netdev *netdev_, void *state_,
2280 unsigned int *queue_idp, struct smap *details)
2282 const struct netdev_linux *netdev = netdev_linux_cast(netdev_);
2283 struct netdev_linux_queue_state *state = state_;
2286 ovs_mutex_lock(&netdev->mutex);
2287 while (state->cur_queue < state->n_queues) {
2288 unsigned int queue_id = state->queues[state->cur_queue++];
2289 struct tc_queue *queue = tc_find_queue(netdev_, queue_id);
2292 *queue_idp = queue_id;
2293 error = netdev->tc->ops->class_get(netdev_, queue, details);
2297 ovs_mutex_unlock(&netdev->mutex);
2303 netdev_linux_queue_dump_done(const struct netdev *netdev OVS_UNUSED,
2306 struct netdev_linux_queue_state *state = state_;
2308 free(state->queues);
2314 netdev_linux_dump_queue_stats(const struct netdev *netdev_,
2315 netdev_dump_queue_stats_cb *cb, void *aux)
2317 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
2320 ovs_mutex_lock(&netdev->mutex);
2321 error = tc_query_qdisc(netdev_);
2323 struct queue_dump_state state;
2325 if (!netdev->tc->ops->class_dump_stats) {
2327 } else if (!start_queue_dump(netdev_, &state)) {
2333 while (nl_dump_next(&state.dump, &msg, &state.buf)) {
2334 retval = netdev->tc->ops->class_dump_stats(netdev_, &msg,
2341 retval = finish_queue_dump(&state);
2347 ovs_mutex_unlock(&netdev->mutex);
2353 netdev_linux_get_in4(const struct netdev *netdev_,
2354 struct in_addr *address, struct in_addr *netmask)
2356 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
2359 ovs_mutex_lock(&netdev->mutex);
2360 if (!(netdev->cache_valid & VALID_IN4)) {
2361 error = netdev_linux_get_ipv4(netdev_, &netdev->address,
2362 SIOCGIFADDR, "SIOCGIFADDR");
2364 error = netdev_linux_get_ipv4(netdev_, &netdev->netmask,
2365 SIOCGIFNETMASK, "SIOCGIFNETMASK");
2367 netdev->cache_valid |= VALID_IN4;
2375 if (netdev->address.s_addr != INADDR_ANY) {
2376 *address = netdev->address;
2377 *netmask = netdev->netmask;
2379 error = EADDRNOTAVAIL;
2382 ovs_mutex_unlock(&netdev->mutex);
2388 netdev_linux_set_in4(struct netdev *netdev_, struct in_addr address,
2389 struct in_addr netmask)
2391 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
2394 ovs_mutex_lock(&netdev->mutex);
2395 error = do_set_addr(netdev_, SIOCSIFADDR, "SIOCSIFADDR", address);
2397 netdev->cache_valid |= VALID_IN4;
2398 netdev->address = address;
2399 netdev->netmask = netmask;
2400 if (address.s_addr != INADDR_ANY) {
2401 error = do_set_addr(netdev_, SIOCSIFNETMASK,
2402 "SIOCSIFNETMASK", netmask);
2405 ovs_mutex_unlock(&netdev->mutex);
2411 parse_if_inet6_line(const char *line,
2412 struct in6_addr *in6, char ifname[16 + 1])
2414 uint8_t *s6 = in6->s6_addr;
2415 #define X8 "%2"SCNx8
2416 return ovs_scan(line,
2417 " "X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8
2418 "%*x %*x %*x %*x %16s\n",
2419 &s6[0], &s6[1], &s6[2], &s6[3],
2420 &s6[4], &s6[5], &s6[6], &s6[7],
2421 &s6[8], &s6[9], &s6[10], &s6[11],
2422 &s6[12], &s6[13], &s6[14], &s6[15],
2426 /* If 'netdev' has an assigned IPv6 address, sets '*in6' to that address (if
2427 * 'in6' is non-null) and returns true. Otherwise, returns false. */
2429 netdev_linux_get_in6(const struct netdev *netdev_, struct in6_addr *in6)
2431 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
2433 ovs_mutex_lock(&netdev->mutex);
2434 if (!(netdev->cache_valid & VALID_IN6)) {
2438 netdev->in6 = in6addr_any;
2440 file = fopen("/proc/net/if_inet6", "r");
2442 const char *name = netdev_get_name(netdev_);
2443 while (fgets(line, sizeof line, file)) {
2444 struct in6_addr in6_tmp;
2445 char ifname[16 + 1];
2446 if (parse_if_inet6_line(line, &in6_tmp, ifname)
2447 && !strcmp(name, ifname))
2449 netdev->in6 = in6_tmp;
2455 netdev->cache_valid |= VALID_IN6;
2458 ovs_mutex_unlock(&netdev->mutex);
2464 make_in4_sockaddr(struct sockaddr *sa, struct in_addr addr)
2466 struct sockaddr_in sin;
2467 memset(&sin, 0, sizeof sin);
2468 sin.sin_family = AF_INET;
2469 sin.sin_addr = addr;
2472 memset(sa, 0, sizeof *sa);
2473 memcpy(sa, &sin, sizeof sin);
2477 do_set_addr(struct netdev *netdev,
2478 int ioctl_nr, const char *ioctl_name, struct in_addr addr)
2482 make_in4_sockaddr(&ifr.ifr_addr, addr);
2483 return af_inet_ifreq_ioctl(netdev_get_name(netdev), &ifr, ioctl_nr,
2487 /* Adds 'router' as a default IP gateway. */
2489 netdev_linux_add_router(struct netdev *netdev OVS_UNUSED, struct in_addr router)
2491 struct in_addr any = { INADDR_ANY };
2495 memset(&rt, 0, sizeof rt);
2496 make_in4_sockaddr(&rt.rt_dst, any);
2497 make_in4_sockaddr(&rt.rt_gateway, router);
2498 make_in4_sockaddr(&rt.rt_genmask, any);
2499 rt.rt_flags = RTF_UP | RTF_GATEWAY;
2500 error = af_inet_ioctl(SIOCADDRT, &rt);
2502 VLOG_WARN("ioctl(SIOCADDRT): %s", ovs_strerror(error));
2508 netdev_linux_get_next_hop(const struct in_addr *host, struct in_addr *next_hop,
2511 static const char fn[] = "/proc/net/route";
2516 *netdev_name = NULL;
2517 stream = fopen(fn, "r");
2518 if (stream == NULL) {
2519 VLOG_WARN_RL(&rl, "%s: open failed: %s", fn, ovs_strerror(errno));
2524 while (fgets(line, sizeof line, stream)) {
2527 ovs_be32 dest, gateway, mask;
2528 int refcnt, metric, mtu;
2529 unsigned int flags, use, window, irtt;
2532 "%16s %"SCNx32" %"SCNx32" %04X %d %u %d %"SCNx32
2534 iface, &dest, &gateway, &flags, &refcnt,
2535 &use, &metric, &mask, &mtu, &window, &irtt)) {
2536 VLOG_WARN_RL(&rl, "%s: could not parse line %d: %s",
2540 if (!(flags & RTF_UP)) {
2541 /* Skip routes that aren't up. */
2545 /* The output of 'dest', 'mask', and 'gateway' were given in
2546 * network byte order, so we don't need need any endian
2547 * conversions here. */
2548 if ((dest & mask) == (host->s_addr & mask)) {
2550 /* The host is directly reachable. */
2551 next_hop->s_addr = 0;
2553 /* To reach the host, we must go through a gateway. */
2554 next_hop->s_addr = gateway;
2556 *netdev_name = xstrdup(iface);
2568 netdev_linux_get_status(const struct netdev *netdev_, struct smap *smap)
2570 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
2573 ovs_mutex_lock(&netdev->mutex);
2574 if (!(netdev->cache_valid & VALID_DRVINFO)) {
2575 struct ethtool_cmd *cmd = (struct ethtool_cmd *) &netdev->drvinfo;
2577 COVERAGE_INC(netdev_get_ethtool);
2578 memset(&netdev->drvinfo, 0, sizeof netdev->drvinfo);
2579 error = netdev_linux_do_ethtool(netdev->up.name,
2582 "ETHTOOL_GDRVINFO");
2584 netdev->cache_valid |= VALID_DRVINFO;
2589 smap_add(smap, "driver_name", netdev->drvinfo.driver);
2590 smap_add(smap, "driver_version", netdev->drvinfo.version);
2591 smap_add(smap, "firmware_version", netdev->drvinfo.fw_version);
2593 ovs_mutex_unlock(&netdev->mutex);
2599 netdev_internal_get_status(const struct netdev *netdev OVS_UNUSED,
2602 smap_add(smap, "driver_name", "openvswitch");
2606 /* Looks up the ARP table entry for 'ip' on 'netdev'. If one exists and can be
2607 * successfully retrieved, it stores the corresponding MAC address in 'mac' and
2608 * returns 0. Otherwise, it returns a positive errno value; in particular,
2609 * ENXIO indicates that there is not ARP table entry for 'ip' on 'netdev'. */
2611 netdev_linux_arp_lookup(const struct netdev *netdev,
2612 ovs_be32 ip, uint8_t mac[ETH_ADDR_LEN])
2615 struct sockaddr_in sin;
2618 memset(&r, 0, sizeof r);
2619 memset(&sin, 0, sizeof sin);
2620 sin.sin_family = AF_INET;
2621 sin.sin_addr.s_addr = ip;
2623 memcpy(&r.arp_pa, &sin, sizeof sin);
2624 r.arp_ha.sa_family = ARPHRD_ETHER;
2626 ovs_strzcpy(r.arp_dev, netdev_get_name(netdev), sizeof r.arp_dev);
2627 COVERAGE_INC(netdev_arp_lookup);
2628 retval = af_inet_ioctl(SIOCGARP, &r);
2630 memcpy(mac, r.arp_ha.sa_data, ETH_ADDR_LEN);
2631 } else if (retval != ENXIO) {
2632 VLOG_WARN_RL(&rl, "%s: could not look up ARP entry for "IP_FMT": %s",
2633 netdev_get_name(netdev), IP_ARGS(ip),
2634 ovs_strerror(retval));
2640 nd_to_iff_flags(enum netdev_flags nd)
2643 if (nd & NETDEV_UP) {
2646 if (nd & NETDEV_PROMISC) {
2649 if (nd & NETDEV_LOOPBACK) {
2650 iff |= IFF_LOOPBACK;
2656 iff_to_nd_flags(int iff)
2658 enum netdev_flags nd = 0;
2662 if (iff & IFF_PROMISC) {
2663 nd |= NETDEV_PROMISC;
2665 if (iff & IFF_LOOPBACK) {
2666 nd |= NETDEV_LOOPBACK;
2672 update_flags(struct netdev_linux *netdev, enum netdev_flags off,
2673 enum netdev_flags on, enum netdev_flags *old_flagsp)
2674 OVS_REQUIRES(netdev->mutex)
2676 int old_flags, new_flags;
2679 old_flags = netdev->ifi_flags;
2680 *old_flagsp = iff_to_nd_flags(old_flags);
2681 new_flags = (old_flags & ~nd_to_iff_flags(off)) | nd_to_iff_flags(on);
2682 if (new_flags != old_flags) {
2683 error = set_flags(netdev_get_name(&netdev->up), new_flags);
2684 get_flags(&netdev->up, &netdev->ifi_flags);
2691 netdev_linux_update_flags(struct netdev *netdev_, enum netdev_flags off,
2692 enum netdev_flags on, enum netdev_flags *old_flagsp)
2694 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
2697 ovs_mutex_lock(&netdev->mutex);
2698 error = update_flags(netdev, off, on, old_flagsp);
2699 ovs_mutex_unlock(&netdev->mutex);
2704 #define NETDEV_LINUX_CLASS(NAME, CONSTRUCT, GET_STATS, SET_STATS, \
2705 GET_FEATURES, GET_STATUS) \
2711 netdev_linux_wait, \
2713 netdev_linux_alloc, \
2715 netdev_linux_destruct, \
2716 netdev_linux_dealloc, \
2717 NULL, /* get_config */ \
2718 NULL, /* set_config */ \
2719 NULL, /* get_tunnel_config */ \
2721 netdev_linux_send, \
2722 netdev_linux_send_wait, \
2724 netdev_linux_set_etheraddr, \
2725 netdev_linux_get_etheraddr, \
2726 netdev_linux_get_mtu, \
2727 netdev_linux_set_mtu, \
2728 netdev_linux_get_ifindex, \
2729 netdev_linux_get_carrier, \
2730 netdev_linux_get_carrier_resets, \
2731 netdev_linux_set_miimon_interval, \
2736 netdev_linux_set_advertisements, \
2738 netdev_linux_set_policing, \
2739 netdev_linux_get_qos_types, \
2740 netdev_linux_get_qos_capabilities, \
2741 netdev_linux_get_qos, \
2742 netdev_linux_set_qos, \
2743 netdev_linux_get_queue, \
2744 netdev_linux_set_queue, \
2745 netdev_linux_delete_queue, \
2746 netdev_linux_get_queue_stats, \
2747 netdev_linux_queue_dump_start, \
2748 netdev_linux_queue_dump_next, \
2749 netdev_linux_queue_dump_done, \
2750 netdev_linux_dump_queue_stats, \
2752 netdev_linux_get_in4, \
2753 netdev_linux_set_in4, \
2754 netdev_linux_get_in6, \
2755 netdev_linux_add_router, \
2756 netdev_linux_get_next_hop, \
2758 netdev_linux_arp_lookup, \
2760 netdev_linux_update_flags, \
2762 netdev_linux_rxq_alloc, \
2763 netdev_linux_rxq_construct, \
2764 netdev_linux_rxq_destruct, \
2765 netdev_linux_rxq_dealloc, \
2766 netdev_linux_rxq_recv, \
2767 netdev_linux_rxq_wait, \
2768 netdev_linux_rxq_drain, \
2771 const struct netdev_class netdev_linux_class =
2774 netdev_linux_construct,
2775 netdev_linux_get_stats,
2776 NULL, /* set_stats */
2777 netdev_linux_get_features,
2778 netdev_linux_get_status);
2780 const struct netdev_class netdev_tap_class =
2783 netdev_linux_construct_tap,
2784 netdev_tap_get_stats,
2785 NULL, /* set_stats */
2786 netdev_linux_get_features,
2787 netdev_linux_get_status);
2789 const struct netdev_class netdev_internal_class =
2792 netdev_linux_construct,
2793 netdev_internal_get_stats,
2794 netdev_internal_set_stats,
2795 NULL, /* get_features */
2796 netdev_internal_get_status);
2798 /* HTB traffic control class. */
2800 #define HTB_N_QUEUES 0xf000
2804 unsigned int max_rate; /* In bytes/s. */
2808 struct tc_queue tc_queue;
2809 unsigned int min_rate; /* In bytes/s. */
2810 unsigned int max_rate; /* In bytes/s. */
2811 unsigned int burst; /* In bytes. */
2812 unsigned int priority; /* Lower values are higher priorities. */
2816 htb_get__(const struct netdev *netdev_)
2818 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
2819 return CONTAINER_OF(netdev->tc, struct htb, tc);
2823 htb_install__(struct netdev *netdev_, uint64_t max_rate)
2825 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
2828 htb = xmalloc(sizeof *htb);
2829 tc_init(&htb->tc, &tc_ops_htb);
2830 htb->max_rate = max_rate;
2832 netdev->tc = &htb->tc;
2835 /* Create an HTB qdisc.
2837 * Equivalent to "tc qdisc add dev <dev> root handle 1: htb default 1". */
2839 htb_setup_qdisc__(struct netdev *netdev)
2842 struct tc_htb_glob opt;
2843 struct ofpbuf request;
2844 struct tcmsg *tcmsg;
2846 tc_del_qdisc(netdev);
2848 tcmsg = tc_make_request(netdev, RTM_NEWQDISC,
2849 NLM_F_EXCL | NLM_F_CREATE, &request);
2853 tcmsg->tcm_handle = tc_make_handle(1, 0);
2854 tcmsg->tcm_parent = TC_H_ROOT;
2856 nl_msg_put_string(&request, TCA_KIND, "htb");
2858 memset(&opt, 0, sizeof opt);
2859 opt.rate2quantum = 10;
2863 opt_offset = nl_msg_start_nested(&request, TCA_OPTIONS);
2864 nl_msg_put_unspec(&request, TCA_HTB_INIT, &opt, sizeof opt);
2865 nl_msg_end_nested(&request, opt_offset);
2867 return tc_transact(&request, NULL);
2870 /* Equivalent to "tc class replace <dev> classid <handle> parent <parent> htb
2871 * rate <min_rate>bps ceil <max_rate>bps burst <burst>b prio <priority>". */
2873 htb_setup_class__(struct netdev *netdev, unsigned int handle,
2874 unsigned int parent, struct htb_class *class)
2877 struct tc_htb_opt opt;
2878 struct ofpbuf request;
2879 struct tcmsg *tcmsg;
2883 error = netdev_linux_get_mtu__(netdev_linux_cast(netdev), &mtu);
2885 VLOG_WARN_RL(&rl, "cannot set up HTB on device %s that lacks MTU",
2886 netdev_get_name(netdev));
2890 memset(&opt, 0, sizeof opt);
2891 tc_fill_rate(&opt.rate, class->min_rate, mtu);
2892 tc_fill_rate(&opt.ceil, class->max_rate, mtu);
2893 opt.buffer = tc_calc_buffer(opt.rate.rate, mtu, class->burst);
2894 opt.cbuffer = tc_calc_buffer(opt.ceil.rate, mtu, class->burst);
2895 opt.prio = class->priority;
2897 tcmsg = tc_make_request(netdev, RTM_NEWTCLASS, NLM_F_CREATE, &request);
2901 tcmsg->tcm_handle = handle;
2902 tcmsg->tcm_parent = parent;
2904 nl_msg_put_string(&request, TCA_KIND, "htb");
2905 opt_offset = nl_msg_start_nested(&request, TCA_OPTIONS);
2906 nl_msg_put_unspec(&request, TCA_HTB_PARMS, &opt, sizeof opt);
2907 tc_put_rtab(&request, TCA_HTB_RTAB, &opt.rate);
2908 tc_put_rtab(&request, TCA_HTB_CTAB, &opt.ceil);
2909 nl_msg_end_nested(&request, opt_offset);
2911 error = tc_transact(&request, NULL);
2913 VLOG_WARN_RL(&rl, "failed to replace %s class %u:%u, parent %u:%u, "
2914 "min_rate=%u max_rate=%u burst=%u prio=%u (%s)",
2915 netdev_get_name(netdev),
2916 tc_get_major(handle), tc_get_minor(handle),
2917 tc_get_major(parent), tc_get_minor(parent),
2918 class->min_rate, class->max_rate,
2919 class->burst, class->priority, ovs_strerror(error));
2924 /* Parses Netlink attributes in 'options' for HTB parameters and stores a
2925 * description of them into 'details'. The description complies with the
2926 * specification given in the vswitch database documentation for linux-htb
2929 htb_parse_tca_options__(struct nlattr *nl_options, struct htb_class *class)
2931 static const struct nl_policy tca_htb_policy[] = {
2932 [TCA_HTB_PARMS] = { .type = NL_A_UNSPEC, .optional = false,
2933 .min_len = sizeof(struct tc_htb_opt) },
2936 struct nlattr *attrs[ARRAY_SIZE(tca_htb_policy)];
2937 const struct tc_htb_opt *htb;
2939 if (!nl_parse_nested(nl_options, tca_htb_policy,
2940 attrs, ARRAY_SIZE(tca_htb_policy))) {
2941 VLOG_WARN_RL(&rl, "failed to parse HTB class options");
2945 htb = nl_attr_get(attrs[TCA_HTB_PARMS]);
2946 class->min_rate = htb->rate.rate;
2947 class->max_rate = htb->ceil.rate;
2948 class->burst = tc_ticks_to_bytes(htb->rate.rate, htb->buffer);
2949 class->priority = htb->prio;
2954 htb_parse_tcmsg__(struct ofpbuf *tcmsg, unsigned int *queue_id,
2955 struct htb_class *options,
2956 struct netdev_queue_stats *stats)
2958 struct nlattr *nl_options;
2959 unsigned int handle;
2962 error = tc_parse_class(tcmsg, &handle, &nl_options, stats);
2963 if (!error && queue_id) {
2964 unsigned int major = tc_get_major(handle);
2965 unsigned int minor = tc_get_minor(handle);
2966 if (major == 1 && minor > 0 && minor <= HTB_N_QUEUES) {
2967 *queue_id = minor - 1;
2972 if (!error && options) {
2973 error = htb_parse_tca_options__(nl_options, options);
2979 htb_parse_qdisc_details__(struct netdev *netdev_,
2980 const struct smap *details, struct htb_class *hc)
2982 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
2983 const char *max_rate_s;
2985 max_rate_s = smap_get(details, "max-rate");
2986 hc->max_rate = max_rate_s ? strtoull(max_rate_s, NULL, 10) / 8 : 0;
2987 if (!hc->max_rate) {
2988 enum netdev_features current;
2990 netdev_linux_read_features(netdev);
2991 current = !netdev->get_features_error ? netdev->current : 0;
2992 hc->max_rate = netdev_features_to_bps(current, 100 * 1000 * 1000) / 8;
2994 hc->min_rate = hc->max_rate;
3000 htb_parse_class_details__(struct netdev *netdev,
3001 const struct smap *details, struct htb_class *hc)
3003 const struct htb *htb = htb_get__(netdev);
3004 const char *min_rate_s = smap_get(details, "min-rate");
3005 const char *max_rate_s = smap_get(details, "max-rate");
3006 const char *burst_s = smap_get(details, "burst");
3007 const char *priority_s = smap_get(details, "priority");
3010 error = netdev_linux_get_mtu__(netdev_linux_cast(netdev), &mtu);
3012 VLOG_WARN_RL(&rl, "cannot parse HTB class on device %s that lacks MTU",
3013 netdev_get_name(netdev));
3017 /* HTB requires at least an mtu sized min-rate to send any traffic even
3018 * on uncongested links. */
3019 hc->min_rate = min_rate_s ? strtoull(min_rate_s, NULL, 10) / 8 : 0;
3020 hc->min_rate = MAX(hc->min_rate, mtu);
3021 hc->min_rate = MIN(hc->min_rate, htb->max_rate);
3024 hc->max_rate = (max_rate_s
3025 ? strtoull(max_rate_s, NULL, 10) / 8
3027 hc->max_rate = MAX(hc->max_rate, hc->min_rate);
3028 hc->max_rate = MIN(hc->max_rate, htb->max_rate);
3032 * According to hints in the documentation that I've read, it is important
3033 * that 'burst' be at least as big as the largest frame that might be
3034 * transmitted. Also, making 'burst' a bit bigger than necessary is OK,
3035 * but having it a bit too small is a problem. Since netdev_get_mtu()
3036 * doesn't include the Ethernet header, we need to add at least 14 (18?) to
3037 * the MTU. We actually add 64, instead of 14, as a guard against
3038 * additional headers get tacked on somewhere that we're not aware of. */
3039 hc->burst = burst_s ? strtoull(burst_s, NULL, 10) / 8 : 0;
3040 hc->burst = MAX(hc->burst, mtu + 64);
3043 hc->priority = priority_s ? strtoul(priority_s, NULL, 10) : 0;
3049 htb_query_class__(const struct netdev *netdev, unsigned int handle,
3050 unsigned int parent, struct htb_class *options,
3051 struct netdev_queue_stats *stats)
3053 struct ofpbuf *reply;
3056 error = tc_query_class(netdev, handle, parent, &reply);
3058 error = htb_parse_tcmsg__(reply, NULL, options, stats);
3059 ofpbuf_delete(reply);
3065 htb_tc_install(struct netdev *netdev, const struct smap *details)
3069 error = htb_setup_qdisc__(netdev);
3071 struct htb_class hc;
3073 htb_parse_qdisc_details__(netdev, details, &hc);
3074 error = htb_setup_class__(netdev, tc_make_handle(1, 0xfffe),
3075 tc_make_handle(1, 0), &hc);
3077 htb_install__(netdev, hc.max_rate);
3083 static struct htb_class *
3084 htb_class_cast__(const struct tc_queue *queue)
3086 return CONTAINER_OF(queue, struct htb_class, tc_queue);
3090 htb_update_queue__(struct netdev *netdev, unsigned int queue_id,
3091 const struct htb_class *hc)
3093 struct htb *htb = htb_get__(netdev);
3094 size_t hash = hash_int(queue_id, 0);
3095 struct tc_queue *queue;
3096 struct htb_class *hcp;
3098 queue = tc_find_queue__(netdev, queue_id, hash);
3100 hcp = htb_class_cast__(queue);
3102 hcp = xmalloc(sizeof *hcp);
3103 queue = &hcp->tc_queue;
3104 queue->queue_id = queue_id;
3105 queue->created = time_msec();
3106 hmap_insert(&htb->tc.queues, &queue->hmap_node, hash);
3109 hcp->min_rate = hc->min_rate;
3110 hcp->max_rate = hc->max_rate;
3111 hcp->burst = hc->burst;
3112 hcp->priority = hc->priority;
3116 htb_tc_load(struct netdev *netdev, struct ofpbuf *nlmsg OVS_UNUSED)
3119 struct queue_dump_state state;
3120 struct htb_class hc;
3122 /* Get qdisc options. */
3124 htb_query_class__(netdev, tc_make_handle(1, 0xfffe), 0, &hc, NULL);
3125 htb_install__(netdev, hc.max_rate);
3128 if (!start_queue_dump(netdev, &state)) {
3131 while (nl_dump_next(&state.dump, &msg, &state.buf)) {
3132 unsigned int queue_id;
3134 if (!htb_parse_tcmsg__(&msg, &queue_id, &hc, NULL)) {
3135 htb_update_queue__(netdev, queue_id, &hc);
3138 finish_queue_dump(&state);
3144 htb_tc_destroy(struct tc *tc)
3146 struct htb *htb = CONTAINER_OF(tc, struct htb, tc);
3147 struct htb_class *hc, *next;
3149 HMAP_FOR_EACH_SAFE (hc, next, tc_queue.hmap_node, &htb->tc.queues) {
3150 hmap_remove(&htb->tc.queues, &hc->tc_queue.hmap_node);
3158 htb_qdisc_get(const struct netdev *netdev, struct smap *details)
3160 const struct htb *htb = htb_get__(netdev);
3161 smap_add_format(details, "max-rate", "%llu", 8ULL * htb->max_rate);
3166 htb_qdisc_set(struct netdev *netdev, const struct smap *details)
3168 struct htb_class hc;
3171 htb_parse_qdisc_details__(netdev, details, &hc);
3172 error = htb_setup_class__(netdev, tc_make_handle(1, 0xfffe),
3173 tc_make_handle(1, 0), &hc);
3175 htb_get__(netdev)->max_rate = hc.max_rate;
3181 htb_class_get(const struct netdev *netdev OVS_UNUSED,
3182 const struct tc_queue *queue, struct smap *details)
3184 const struct htb_class *hc = htb_class_cast__(queue);
3186 smap_add_format(details, "min-rate", "%llu", 8ULL * hc->min_rate);
3187 if (hc->min_rate != hc->max_rate) {
3188 smap_add_format(details, "max-rate", "%llu", 8ULL * hc->max_rate);
3190 smap_add_format(details, "burst", "%llu", 8ULL * hc->burst);
3192 smap_add_format(details, "priority", "%u", hc->priority);
3198 htb_class_set(struct netdev *netdev, unsigned int queue_id,
3199 const struct smap *details)
3201 struct htb_class hc;
3204 error = htb_parse_class_details__(netdev, details, &hc);
3209 error = htb_setup_class__(netdev, tc_make_handle(1, queue_id + 1),
3210 tc_make_handle(1, 0xfffe), &hc);
3215 htb_update_queue__(netdev, queue_id, &hc);
3220 htb_class_delete(struct netdev *netdev, struct tc_queue *queue)
3222 struct htb_class *hc = htb_class_cast__(queue);
3223 struct htb *htb = htb_get__(netdev);
3226 error = tc_delete_class(netdev, tc_make_handle(1, queue->queue_id + 1));
3228 hmap_remove(&htb->tc.queues, &hc->tc_queue.hmap_node);
3235 htb_class_get_stats(const struct netdev *netdev, const struct tc_queue *queue,
3236 struct netdev_queue_stats *stats)
3238 return htb_query_class__(netdev, tc_make_handle(1, queue->queue_id + 1),
3239 tc_make_handle(1, 0xfffe), NULL, stats);
3243 htb_class_dump_stats(const struct netdev *netdev OVS_UNUSED,
3244 const struct ofpbuf *nlmsg,
3245 netdev_dump_queue_stats_cb *cb, void *aux)
3247 struct netdev_queue_stats stats;
3248 unsigned int handle, major, minor;
3251 error = tc_parse_class(nlmsg, &handle, NULL, &stats);
3256 major = tc_get_major(handle);
3257 minor = tc_get_minor(handle);
3258 if (major == 1 && minor > 0 && minor <= HTB_N_QUEUES) {
3259 (*cb)(minor - 1, &stats, aux);
3264 static const struct tc_ops tc_ops_htb = {
3265 "htb", /* linux_name */
3266 "linux-htb", /* ovs_name */
3267 HTB_N_QUEUES, /* n_queues */
3276 htb_class_get_stats,
3277 htb_class_dump_stats
3280 /* "linux-hfsc" traffic control class. */
3282 #define HFSC_N_QUEUES 0xf000
3290 struct tc_queue tc_queue;
3295 static struct hfsc *
3296 hfsc_get__(const struct netdev *netdev_)
3298 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
3299 return CONTAINER_OF(netdev->tc, struct hfsc, tc);
3302 static struct hfsc_class *
3303 hfsc_class_cast__(const struct tc_queue *queue)
3305 return CONTAINER_OF(queue, struct hfsc_class, tc_queue);
3309 hfsc_install__(struct netdev *netdev_, uint32_t max_rate)
3311 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
3314 hfsc = xmalloc(sizeof *hfsc);
3315 tc_init(&hfsc->tc, &tc_ops_hfsc);
3316 hfsc->max_rate = max_rate;
3317 netdev->tc = &hfsc->tc;
3321 hfsc_update_queue__(struct netdev *netdev, unsigned int queue_id,
3322 const struct hfsc_class *hc)
3326 struct hfsc_class *hcp;
3327 struct tc_queue *queue;
3329 hfsc = hfsc_get__(netdev);
3330 hash = hash_int(queue_id, 0);
3332 queue = tc_find_queue__(netdev, queue_id, hash);
3334 hcp = hfsc_class_cast__(queue);
3336 hcp = xmalloc(sizeof *hcp);
3337 queue = &hcp->tc_queue;
3338 queue->queue_id = queue_id;
3339 queue->created = time_msec();
3340 hmap_insert(&hfsc->tc.queues, &queue->hmap_node, hash);
3343 hcp->min_rate = hc->min_rate;
3344 hcp->max_rate = hc->max_rate;
3348 hfsc_parse_tca_options__(struct nlattr *nl_options, struct hfsc_class *class)
3350 const struct tc_service_curve *rsc, *fsc, *usc;
3351 static const struct nl_policy tca_hfsc_policy[] = {
3353 .type = NL_A_UNSPEC,
3355 .min_len = sizeof(struct tc_service_curve),
3358 .type = NL_A_UNSPEC,
3360 .min_len = sizeof(struct tc_service_curve),
3363 .type = NL_A_UNSPEC,
3365 .min_len = sizeof(struct tc_service_curve),
3368 struct nlattr *attrs[ARRAY_SIZE(tca_hfsc_policy)];
3370 if (!nl_parse_nested(nl_options, tca_hfsc_policy,
3371 attrs, ARRAY_SIZE(tca_hfsc_policy))) {
3372 VLOG_WARN_RL(&rl, "failed to parse HFSC class options");
3376 rsc = nl_attr_get(attrs[TCA_HFSC_RSC]);
3377 fsc = nl_attr_get(attrs[TCA_HFSC_FSC]);
3378 usc = nl_attr_get(attrs[TCA_HFSC_USC]);
3380 if (rsc->m1 != 0 || rsc->d != 0 ||
3381 fsc->m1 != 0 || fsc->d != 0 ||
3382 usc->m1 != 0 || usc->d != 0) {
3383 VLOG_WARN_RL(&rl, "failed to parse HFSC class options. "
3384 "Non-linear service curves are not supported.");
3388 if (rsc->m2 != fsc->m2) {
3389 VLOG_WARN_RL(&rl, "failed to parse HFSC class options. "
3390 "Real-time service curves are not supported ");
3394 if (rsc->m2 > usc->m2) {
3395 VLOG_WARN_RL(&rl, "failed to parse HFSC class options. "
3396 "Min-rate service curve is greater than "
3397 "the max-rate service curve.");
3401 class->min_rate = fsc->m2;
3402 class->max_rate = usc->m2;
3407 hfsc_parse_tcmsg__(struct ofpbuf *tcmsg, unsigned int *queue_id,
3408 struct hfsc_class *options,
3409 struct netdev_queue_stats *stats)
3412 unsigned int handle;
3413 struct nlattr *nl_options;
3415 error = tc_parse_class(tcmsg, &handle, &nl_options, stats);
3421 unsigned int major, minor;
3423 major = tc_get_major(handle);
3424 minor = tc_get_minor(handle);
3425 if (major == 1 && minor > 0 && minor <= HFSC_N_QUEUES) {
3426 *queue_id = minor - 1;
3433 error = hfsc_parse_tca_options__(nl_options, options);
3440 hfsc_query_class__(const struct netdev *netdev, unsigned int handle,
3441 unsigned int parent, struct hfsc_class *options,
3442 struct netdev_queue_stats *stats)
3445 struct ofpbuf *reply;
3447 error = tc_query_class(netdev, handle, parent, &reply);
3452 error = hfsc_parse_tcmsg__(reply, NULL, options, stats);
3453 ofpbuf_delete(reply);
3458 hfsc_parse_qdisc_details__(struct netdev *netdev_, const struct smap *details,
3459 struct hfsc_class *class)
3461 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
3463 const char *max_rate_s;
3465 max_rate_s = smap_get(details, "max-rate");
3466 max_rate = max_rate_s ? strtoull(max_rate_s, NULL, 10) / 8 : 0;
3469 enum netdev_features current;
3471 netdev_linux_read_features(netdev);
3472 current = !netdev->get_features_error ? netdev->current : 0;
3473 max_rate = netdev_features_to_bps(current, 100 * 1000 * 1000) / 8;
3476 class->min_rate = max_rate;
3477 class->max_rate = max_rate;
3481 hfsc_parse_class_details__(struct netdev *netdev,
3482 const struct smap *details,
3483 struct hfsc_class * class)
3485 const struct hfsc *hfsc;
3486 uint32_t min_rate, max_rate;
3487 const char *min_rate_s, *max_rate_s;
3489 hfsc = hfsc_get__(netdev);
3490 min_rate_s = smap_get(details, "min-rate");
3491 max_rate_s = smap_get(details, "max-rate");
3493 min_rate = min_rate_s ? strtoull(min_rate_s, NULL, 10) / 8 : 0;
3494 min_rate = MAX(min_rate, 1);
3495 min_rate = MIN(min_rate, hfsc->max_rate);
3497 max_rate = (max_rate_s
3498 ? strtoull(max_rate_s, NULL, 10) / 8
3500 max_rate = MAX(max_rate, min_rate);
3501 max_rate = MIN(max_rate, hfsc->max_rate);
3503 class->min_rate = min_rate;
3504 class->max_rate = max_rate;
3509 /* Create an HFSC qdisc.
3511 * Equivalent to "tc qdisc add dev <dev> root handle 1: hfsc default 1". */
3513 hfsc_setup_qdisc__(struct netdev * netdev)
3515 struct tcmsg *tcmsg;
3516 struct ofpbuf request;
3517 struct tc_hfsc_qopt opt;
3519 tc_del_qdisc(netdev);
3521 tcmsg = tc_make_request(netdev, RTM_NEWQDISC,
3522 NLM_F_EXCL | NLM_F_CREATE, &request);
3528 tcmsg->tcm_handle = tc_make_handle(1, 0);
3529 tcmsg->tcm_parent = TC_H_ROOT;
3531 memset(&opt, 0, sizeof opt);
3534 nl_msg_put_string(&request, TCA_KIND, "hfsc");
3535 nl_msg_put_unspec(&request, TCA_OPTIONS, &opt, sizeof opt);
3537 return tc_transact(&request, NULL);
3540 /* Create an HFSC class.
3542 * Equivalent to "tc class add <dev> parent <parent> classid <handle> hfsc
3543 * sc rate <min_rate> ul rate <max_rate>" */
3545 hfsc_setup_class__(struct netdev *netdev, unsigned int handle,
3546 unsigned int parent, struct hfsc_class *class)
3550 struct tcmsg *tcmsg;
3551 struct ofpbuf request;
3552 struct tc_service_curve min, max;
3554 tcmsg = tc_make_request(netdev, RTM_NEWTCLASS, NLM_F_CREATE, &request);
3560 tcmsg->tcm_handle = handle;
3561 tcmsg->tcm_parent = parent;
3565 min.m2 = class->min_rate;
3569 max.m2 = class->max_rate;
3571 nl_msg_put_string(&request, TCA_KIND, "hfsc");
3572 opt_offset = nl_msg_start_nested(&request, TCA_OPTIONS);
3573 nl_msg_put_unspec(&request, TCA_HFSC_RSC, &min, sizeof min);
3574 nl_msg_put_unspec(&request, TCA_HFSC_FSC, &min, sizeof min);
3575 nl_msg_put_unspec(&request, TCA_HFSC_USC, &max, sizeof max);
3576 nl_msg_end_nested(&request, opt_offset);
3578 error = tc_transact(&request, NULL);
3580 VLOG_WARN_RL(&rl, "failed to replace %s class %u:%u, parent %u:%u, "
3581 "min-rate %ubps, max-rate %ubps (%s)",
3582 netdev_get_name(netdev),
3583 tc_get_major(handle), tc_get_minor(handle),
3584 tc_get_major(parent), tc_get_minor(parent),
3585 class->min_rate, class->max_rate, ovs_strerror(error));
3592 hfsc_tc_install(struct netdev *netdev, const struct smap *details)
3595 struct hfsc_class class;
3597 error = hfsc_setup_qdisc__(netdev);
3603 hfsc_parse_qdisc_details__(netdev, details, &class);
3604 error = hfsc_setup_class__(netdev, tc_make_handle(1, 0xfffe),
3605 tc_make_handle(1, 0), &class);
3611 hfsc_install__(netdev, class.max_rate);
3616 hfsc_tc_load(struct netdev *netdev, struct ofpbuf *nlmsg OVS_UNUSED)
3619 struct queue_dump_state state;
3620 struct hfsc_class hc;
3623 hfsc_query_class__(netdev, tc_make_handle(1, 0xfffe), 0, &hc, NULL);
3624 hfsc_install__(netdev, hc.max_rate);
3626 if (!start_queue_dump(netdev, &state)) {
3630 while (nl_dump_next(&state.dump, &msg, &state.buf)) {
3631 unsigned int queue_id;
3633 if (!hfsc_parse_tcmsg__(&msg, &queue_id, &hc, NULL)) {
3634 hfsc_update_queue__(netdev, queue_id, &hc);
3638 finish_queue_dump(&state);
3643 hfsc_tc_destroy(struct tc *tc)
3646 struct hfsc_class *hc, *next;
3648 hfsc = CONTAINER_OF(tc, struct hfsc, tc);
3650 HMAP_FOR_EACH_SAFE (hc, next, tc_queue.hmap_node, &hfsc->tc.queues) {
3651 hmap_remove(&hfsc->tc.queues, &hc->tc_queue.hmap_node);
3660 hfsc_qdisc_get(const struct netdev *netdev, struct smap *details)
3662 const struct hfsc *hfsc;
3663 hfsc = hfsc_get__(netdev);
3664 smap_add_format(details, "max-rate", "%llu", 8ULL * hfsc->max_rate);
3669 hfsc_qdisc_set(struct netdev *netdev, const struct smap *details)
3672 struct hfsc_class class;
3674 hfsc_parse_qdisc_details__(netdev, details, &class);
3675 error = hfsc_setup_class__(netdev, tc_make_handle(1, 0xfffe),
3676 tc_make_handle(1, 0), &class);
3679 hfsc_get__(netdev)->max_rate = class.max_rate;
3686 hfsc_class_get(const struct netdev *netdev OVS_UNUSED,
3687 const struct tc_queue *queue, struct smap *details)
3689 const struct hfsc_class *hc;
3691 hc = hfsc_class_cast__(queue);
3692 smap_add_format(details, "min-rate", "%llu", 8ULL * hc->min_rate);
3693 if (hc->min_rate != hc->max_rate) {
3694 smap_add_format(details, "max-rate", "%llu", 8ULL * hc->max_rate);
3700 hfsc_class_set(struct netdev *netdev, unsigned int queue_id,
3701 const struct smap *details)
3704 struct hfsc_class class;
3706 error = hfsc_parse_class_details__(netdev, details, &class);
3711 error = hfsc_setup_class__(netdev, tc_make_handle(1, queue_id + 1),
3712 tc_make_handle(1, 0xfffe), &class);
3717 hfsc_update_queue__(netdev, queue_id, &class);
3722 hfsc_class_delete(struct netdev *netdev, struct tc_queue *queue)
3726 struct hfsc_class *hc;
3728 hc = hfsc_class_cast__(queue);
3729 hfsc = hfsc_get__(netdev);
3731 error = tc_delete_class(netdev, tc_make_handle(1, queue->queue_id + 1));
3733 hmap_remove(&hfsc->tc.queues, &hc->tc_queue.hmap_node);
3740 hfsc_class_get_stats(const struct netdev *netdev, const struct tc_queue *queue,
3741 struct netdev_queue_stats *stats)
3743 return hfsc_query_class__(netdev, tc_make_handle(1, queue->queue_id + 1),
3744 tc_make_handle(1, 0xfffe), NULL, stats);
3748 hfsc_class_dump_stats(const struct netdev *netdev OVS_UNUSED,
3749 const struct ofpbuf *nlmsg,
3750 netdev_dump_queue_stats_cb *cb, void *aux)
3752 struct netdev_queue_stats stats;
3753 unsigned int handle, major, minor;
3756 error = tc_parse_class(nlmsg, &handle, NULL, &stats);
3761 major = tc_get_major(handle);
3762 minor = tc_get_minor(handle);
3763 if (major == 1 && minor > 0 && minor <= HFSC_N_QUEUES) {
3764 (*cb)(minor - 1, &stats, aux);
3769 static const struct tc_ops tc_ops_hfsc = {
3770 "hfsc", /* linux_name */
3771 "linux-hfsc", /* ovs_name */
3772 HFSC_N_QUEUES, /* n_queues */
3773 hfsc_tc_install, /* tc_install */
3774 hfsc_tc_load, /* tc_load */
3775 hfsc_tc_destroy, /* tc_destroy */
3776 hfsc_qdisc_get, /* qdisc_get */
3777 hfsc_qdisc_set, /* qdisc_set */
3778 hfsc_class_get, /* class_get */
3779 hfsc_class_set, /* class_set */
3780 hfsc_class_delete, /* class_delete */
3781 hfsc_class_get_stats, /* class_get_stats */
3782 hfsc_class_dump_stats /* class_dump_stats */
3785 /* "linux-default" traffic control class.
3787 * This class represents the default, unnamed Linux qdisc. It corresponds to
3788 * the "" (empty string) QoS type in the OVS database. */
3791 default_install__(struct netdev *netdev_)
3793 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
3794 static const struct tc tc = TC_INITIALIZER(&tc, &tc_ops_default);
3796 /* Nothing but a tc class implementation is allowed to write to a tc. This
3797 * class never does that, so we can legitimately use a const tc object. */
3798 netdev->tc = CONST_CAST(struct tc *, &tc);
3802 default_tc_install(struct netdev *netdev,
3803 const struct smap *details OVS_UNUSED)
3805 default_install__(netdev);
3810 default_tc_load(struct netdev *netdev, struct ofpbuf *nlmsg OVS_UNUSED)
3812 default_install__(netdev);
3816 static const struct tc_ops tc_ops_default = {
3817 NULL, /* linux_name */
3822 NULL, /* tc_destroy */
3823 NULL, /* qdisc_get */
3824 NULL, /* qdisc_set */
3825 NULL, /* class_get */
3826 NULL, /* class_set */
3827 NULL, /* class_delete */
3828 NULL, /* class_get_stats */
3829 NULL /* class_dump_stats */
3832 /* "linux-other" traffic control class.
3837 other_tc_load(struct netdev *netdev_, struct ofpbuf *nlmsg OVS_UNUSED)
3839 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
3840 static const struct tc tc = TC_INITIALIZER(&tc, &tc_ops_other);
3842 /* Nothing but a tc class implementation is allowed to write to a tc. This
3843 * class never does that, so we can legitimately use a const tc object. */
3844 netdev->tc = CONST_CAST(struct tc *, &tc);
3848 static const struct tc_ops tc_ops_other = {
3849 NULL, /* linux_name */
3850 "linux-other", /* ovs_name */
3852 NULL, /* tc_install */
3854 NULL, /* tc_destroy */
3855 NULL, /* qdisc_get */
3856 NULL, /* qdisc_set */
3857 NULL, /* class_get */
3858 NULL, /* class_set */
3859 NULL, /* class_delete */
3860 NULL, /* class_get_stats */
3861 NULL /* class_dump_stats */
3864 /* Traffic control. */
3866 /* Number of kernel "tc" ticks per second. */
3867 static double ticks_per_s;
3869 /* Number of kernel "jiffies" per second. This is used for the purpose of
3870 * computing buffer sizes. Generally kernel qdiscs need to be able to buffer
3871 * one jiffy's worth of data.
3873 * There are two possibilities here:
3875 * - 'buffer_hz' is the kernel's real timer tick rate, a small number in the
3876 * approximate range of 100 to 1024. That means that we really need to
3877 * make sure that the qdisc can buffer that much data.
3879 * - 'buffer_hz' is an absurdly large number. That means that the kernel
3880 * has finely granular timers and there's no need to fudge additional room
3881 * for buffers. (There's no extra effort needed to implement that: the
3882 * large 'buffer_hz' is used as a divisor, so practically any number will
3883 * come out as 0 in the division. Small integer results in the case of
3884 * really high dividends won't have any real effect anyhow.)
3886 static unsigned int buffer_hz;
3888 /* Returns tc handle 'major':'minor'. */
3890 tc_make_handle(unsigned int major, unsigned int minor)
3892 return TC_H_MAKE(major << 16, minor);
3895 /* Returns the major number from 'handle'. */
3897 tc_get_major(unsigned int handle)
3899 return TC_H_MAJ(handle) >> 16;
3902 /* Returns the minor number from 'handle'. */
3904 tc_get_minor(unsigned int handle)
3906 return TC_H_MIN(handle);
3909 static struct tcmsg *
3910 tc_make_request(const struct netdev *netdev, int type, unsigned int flags,
3911 struct ofpbuf *request)
3913 struct tcmsg *tcmsg;
3917 error = get_ifindex(netdev, &ifindex);
3922 ofpbuf_init(request, 512);
3923 nl_msg_put_nlmsghdr(request, sizeof *tcmsg, type, NLM_F_REQUEST | flags);
3924 tcmsg = ofpbuf_put_zeros(request, sizeof *tcmsg);
3925 tcmsg->tcm_family = AF_UNSPEC;
3926 tcmsg->tcm_ifindex = ifindex;
3927 /* Caller should fill in tcmsg->tcm_handle. */
3928 /* Caller should fill in tcmsg->tcm_parent. */
3934 tc_transact(struct ofpbuf *request, struct ofpbuf **replyp)
3936 int error = nl_transact(NETLINK_ROUTE, request, replyp);
3937 ofpbuf_uninit(request);
3941 /* Adds or deletes a root ingress qdisc on 'netdev'. We use this for
3942 * policing configuration.
3944 * This function is equivalent to running the following when 'add' is true:
3945 * /sbin/tc qdisc add dev <devname> handle ffff: ingress
3947 * This function is equivalent to running the following when 'add' is false:
3948 * /sbin/tc qdisc del dev <devname> handle ffff: ingress
3950 * The configuration and stats may be seen with the following command:
3951 * /sbin/tc -s qdisc show dev <devname>
3953 * Returns 0 if successful, otherwise a positive errno value.
3956 tc_add_del_ingress_qdisc(struct netdev *netdev, bool add)
3958 struct ofpbuf request;
3959 struct tcmsg *tcmsg;
3961 int type = add ? RTM_NEWQDISC : RTM_DELQDISC;
3962 int flags = add ? NLM_F_EXCL | NLM_F_CREATE : 0;
3964 tcmsg = tc_make_request(netdev, type, flags, &request);
3968 tcmsg->tcm_handle = tc_make_handle(0xffff, 0);
3969 tcmsg->tcm_parent = TC_H_INGRESS;
3970 nl_msg_put_string(&request, TCA_KIND, "ingress");
3971 nl_msg_put_unspec(&request, TCA_OPTIONS, NULL, 0);
3973 error = tc_transact(&request, NULL);
3975 /* If we're deleting the qdisc, don't worry about some of the
3976 * error conditions. */
3977 if (!add && (error == ENOENT || error == EINVAL)) {
3986 /* Adds a policer to 'netdev' with a rate of 'kbits_rate' and a burst size
3989 * This function is equivalent to running:
3990 * /sbin/tc filter add dev <devname> parent ffff: protocol all prio 49
3991 * basic police rate <kbits_rate>kbit burst <kbits_burst>k
3994 * The configuration and stats may be seen with the following command:
3995 * /sbin/tc -s filter show <devname> eth0 parent ffff:
3997 * Returns 0 if successful, otherwise a positive errno value.
4000 tc_add_policer(struct netdev *netdev, int kbits_rate, int kbits_burst)
4002 struct tc_police tc_police;
4003 struct ofpbuf request;
4004 struct tcmsg *tcmsg;
4005 size_t basic_offset;
4006 size_t police_offset;
4010 memset(&tc_police, 0, sizeof tc_police);
4011 tc_police.action = TC_POLICE_SHOT;
4012 tc_police.mtu = mtu;
4013 tc_fill_rate(&tc_police.rate, (kbits_rate * 1000)/8, mtu);
4014 tc_police.burst = tc_bytes_to_ticks(tc_police.rate.rate,
4015 kbits_burst * 1024);
4017 tcmsg = tc_make_request(netdev, RTM_NEWTFILTER,
4018 NLM_F_EXCL | NLM_F_CREATE, &request);
4022 tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
4023 tcmsg->tcm_info = tc_make_handle(49,
4024 (OVS_FORCE uint16_t) htons(ETH_P_ALL));
4026 nl_msg_put_string(&request, TCA_KIND, "basic");
4027 basic_offset = nl_msg_start_nested(&request, TCA_OPTIONS);
4028 police_offset = nl_msg_start_nested(&request, TCA_BASIC_POLICE);
4029 nl_msg_put_unspec(&request, TCA_POLICE_TBF, &tc_police, sizeof tc_police);
4030 tc_put_rtab(&request, TCA_POLICE_RATE, &tc_police.rate);
4031 nl_msg_end_nested(&request, police_offset);
4032 nl_msg_end_nested(&request, basic_offset);
4034 error = tc_transact(&request, NULL);
4045 /* The values in psched are not individually very meaningful, but they are
4046 * important. The tables below show some values seen in the wild.
4050 * - "c" has always been a constant 1000000 since at least Linux 2.4.14.
4051 * (Before that, there are hints that it was 1000000000.)
4053 * - "d" can be unrealistically large, see the comment on 'buffer_hz'
4057 * -----------------------------------
4058 * [1] 000c8000 000f4240 000f4240 00000064
4059 * [2] 000003e8 00000400 000f4240 3b9aca00
4060 * [3] 000003e8 00000400 000f4240 3b9aca00
4061 * [4] 000003e8 00000400 000f4240 00000064
4062 * [5] 000003e8 00000040 000f4240 3b9aca00
4063 * [6] 000003e8 00000040 000f4240 000000f9
4065 * a b c d ticks_per_s buffer_hz
4066 * ------- --------- ---------- ------------- ----------- -------------
4067 * [1] 819,200 1,000,000 1,000,000 100 819,200 100
4068 * [2] 1,000 1,024 1,000,000 1,000,000,000 976,562 1,000,000,000
4069 * [3] 1,000 1,024 1,000,000 1,000,000,000 976,562 1,000,000,000
4070 * [4] 1,000 1,024 1,000,000 100 976,562 100
4071 * [5] 1,000 64 1,000,000 1,000,000,000 15,625,000 1,000,000,000
4072 * [6] 1,000 64 1,000,000 249 15,625,000 249
4074 * [1] 2.6.18-128.1.6.el5.xs5.5.0.505.1024xen from XenServer 5.5.0-24648p
4075 * [2] 2.6.26-1-686-bigmem from Debian lenny
4076 * [3] 2.6.26-2-sparc64 from Debian lenny
4077 * [4] 2.6.27.42-0.1.1.xs5.6.810.44.111163xen from XenServer 5.6.810-31078p
4078 * [5] 2.6.32.21.22 (approx.) from Ubuntu 10.04 on VMware Fusion
4079 * [6] 2.6.34 from kernel.org on KVM
4081 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
4082 static const char fn[] = "/proc/net/psched";
4083 unsigned int a, b, c, d;
4086 if (!ovsthread_once_start(&once)) {
4093 stream = fopen(fn, "r");
4095 VLOG_WARN("%s: open failed: %s", fn, ovs_strerror(errno));
4099 if (fscanf(stream, "%x %x %x %x", &a, &b, &c, &d) != 4) {
4100 VLOG_WARN("%s: read failed", fn);
4104 VLOG_DBG("%s: psched parameters are: %u %u %u %u", fn, a, b, c, d);
4108 VLOG_WARN("%s: invalid scheduler parameters", fn);
4112 ticks_per_s = (double) a * c / b;
4116 VLOG_WARN("%s: unexpected psched parameters: %u %u %u %u",
4119 VLOG_DBG("%s: ticks_per_s=%f buffer_hz=%u", fn, ticks_per_s, buffer_hz);
4122 ovsthread_once_done(&once);
4125 /* Returns the number of bytes that can be transmitted in 'ticks' ticks at a
4126 * rate of 'rate' bytes per second. */
4128 tc_ticks_to_bytes(unsigned int rate, unsigned int ticks)
4131 return (rate * ticks) / ticks_per_s;
4134 /* Returns the number of ticks that it would take to transmit 'size' bytes at a
4135 * rate of 'rate' bytes per second. */
4137 tc_bytes_to_ticks(unsigned int rate, unsigned int size)
4140 return rate ? ((unsigned long long int) ticks_per_s * size) / rate : 0;
4143 /* Returns the number of bytes that need to be reserved for qdisc buffering at
4144 * a transmission rate of 'rate' bytes per second. */
4146 tc_buffer_per_jiffy(unsigned int rate)
4149 return rate / buffer_hz;
4152 /* Given Netlink 'msg' that describes a qdisc, extracts the name of the qdisc,
4153 * e.g. "htb", into '*kind' (if it is nonnull). If 'options' is nonnull,
4154 * extracts 'msg''s TCA_OPTIONS attributes into '*options' if it is present or
4155 * stores NULL into it if it is absent.
4157 * '*kind' and '*options' point into 'msg', so they are owned by whoever owns
4160 * Returns 0 if successful, otherwise a positive errno value. */
4162 tc_parse_qdisc(const struct ofpbuf *msg, const char **kind,
4163 struct nlattr **options)
4165 static const struct nl_policy tca_policy[] = {
4166 [TCA_KIND] = { .type = NL_A_STRING, .optional = false },
4167 [TCA_OPTIONS] = { .type = NL_A_NESTED, .optional = true },
4169 struct nlattr *ta[ARRAY_SIZE(tca_policy)];
4171 if (!nl_policy_parse(msg, NLMSG_HDRLEN + sizeof(struct tcmsg),
4172 tca_policy, ta, ARRAY_SIZE(ta))) {
4173 VLOG_WARN_RL(&rl, "failed to parse qdisc message");
4178 *kind = nl_attr_get_string(ta[TCA_KIND]);
4182 *options = ta[TCA_OPTIONS];
4197 /* Given Netlink 'msg' that describes a class, extracts the queue ID (e.g. the
4198 * minor number of its class ID) into '*queue_id', its TCA_OPTIONS attribute
4199 * into '*options', and its queue statistics into '*stats'. Any of the output
4200 * arguments may be null.
4202 * Returns 0 if successful, otherwise a positive errno value. */
4204 tc_parse_class(const struct ofpbuf *msg, unsigned int *handlep,
4205 struct nlattr **options, struct netdev_queue_stats *stats)
4207 static const struct nl_policy tca_policy[] = {
4208 [TCA_OPTIONS] = { .type = NL_A_NESTED, .optional = false },
4209 [TCA_STATS2] = { .type = NL_A_NESTED, .optional = false },
4211 struct nlattr *ta[ARRAY_SIZE(tca_policy)];
4213 if (!nl_policy_parse(msg, NLMSG_HDRLEN + sizeof(struct tcmsg),
4214 tca_policy, ta, ARRAY_SIZE(ta))) {
4215 VLOG_WARN_RL(&rl, "failed to parse class message");
4220 struct tcmsg *tc = ofpbuf_at_assert(msg, NLMSG_HDRLEN, sizeof *tc);
4221 *handlep = tc->tcm_handle;
4225 *options = ta[TCA_OPTIONS];
4229 const struct gnet_stats_queue *gsq;
4230 struct gnet_stats_basic gsb;
4232 static const struct nl_policy stats_policy[] = {
4233 [TCA_STATS_BASIC] = { .type = NL_A_UNSPEC, .optional = false,
4234 .min_len = sizeof gsb },
4235 [TCA_STATS_QUEUE] = { .type = NL_A_UNSPEC, .optional = false,
4236 .min_len = sizeof *gsq },
4238 struct nlattr *sa[ARRAY_SIZE(stats_policy)];
4240 if (!nl_parse_nested(ta[TCA_STATS2], stats_policy,
4241 sa, ARRAY_SIZE(sa))) {
4242 VLOG_WARN_RL(&rl, "failed to parse class stats");
4246 /* Alignment issues screw up the length of struct gnet_stats_basic on
4247 * some arch/bitsize combinations. Newer versions of Linux have a
4248 * struct gnet_stats_basic_packed, but we can't depend on that. The
4249 * easiest thing to do is just to make a copy. */
4250 memset(&gsb, 0, sizeof gsb);
4251 memcpy(&gsb, nl_attr_get(sa[TCA_STATS_BASIC]),
4252 MIN(nl_attr_get_size(sa[TCA_STATS_BASIC]), sizeof gsb));
4253 stats->tx_bytes = gsb.bytes;
4254 stats->tx_packets = gsb.packets;
4256 gsq = nl_attr_get(sa[TCA_STATS_QUEUE]);
4257 stats->tx_errors = gsq->drops;
4267 memset(stats, 0, sizeof *stats);
4272 /* Queries the kernel for class with identifier 'handle' and parent 'parent'
4275 tc_query_class(const struct netdev *netdev,
4276 unsigned int handle, unsigned int parent,
4277 struct ofpbuf **replyp)
4279 struct ofpbuf request;
4280 struct tcmsg *tcmsg;
4283 tcmsg = tc_make_request(netdev, RTM_GETTCLASS, NLM_F_ECHO, &request);
4287 tcmsg->tcm_handle = handle;
4288 tcmsg->tcm_parent = parent;
4290 error = tc_transact(&request, replyp);
4292 VLOG_WARN_RL(&rl, "query %s class %u:%u (parent %u:%u) failed (%s)",
4293 netdev_get_name(netdev),
4294 tc_get_major(handle), tc_get_minor(handle),
4295 tc_get_major(parent), tc_get_minor(parent),
4296 ovs_strerror(error));
4301 /* Equivalent to "tc class del dev <name> handle <handle>". */
4303 tc_delete_class(const struct netdev *netdev, unsigned int handle)
4305 struct ofpbuf request;
4306 struct tcmsg *tcmsg;
4309 tcmsg = tc_make_request(netdev, RTM_DELTCLASS, 0, &request);
4313 tcmsg->tcm_handle = handle;
4314 tcmsg->tcm_parent = 0;
4316 error = tc_transact(&request, NULL);
4318 VLOG_WARN_RL(&rl, "delete %s class %u:%u failed (%s)",
4319 netdev_get_name(netdev),
4320 tc_get_major(handle), tc_get_minor(handle),
4321 ovs_strerror(error));
4326 /* Equivalent to "tc qdisc del dev <name> root". */
4328 tc_del_qdisc(struct netdev *netdev_)
4330 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
4331 struct ofpbuf request;
4332 struct tcmsg *tcmsg;
4335 tcmsg = tc_make_request(netdev_, RTM_DELQDISC, 0, &request);
4339 tcmsg->tcm_handle = tc_make_handle(1, 0);
4340 tcmsg->tcm_parent = TC_H_ROOT;
4342 error = tc_transact(&request, NULL);
4343 if (error == EINVAL) {
4344 /* EINVAL probably means that the default qdisc was in use, in which
4345 * case we've accomplished our purpose. */
4348 if (!error && netdev->tc) {
4349 if (netdev->tc->ops->tc_destroy) {
4350 netdev->tc->ops->tc_destroy(netdev->tc);
4357 /* If 'netdev''s qdisc type and parameters are not yet known, queries the
4358 * kernel to determine what they are. Returns 0 if successful, otherwise a
4359 * positive errno value. */
4361 tc_query_qdisc(const struct netdev *netdev_)
4363 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
4364 struct ofpbuf request, *qdisc;
4365 const struct tc_ops *ops;
4366 struct tcmsg *tcmsg;
4374 /* This RTM_GETQDISC is crafted to avoid OOPSing kernels that do not have
4375 * commit 53b0f08 "net_sched: Fix qdisc_notify()", which is anything before
4376 * 2.6.35 without that fix backported to it.
4378 * To avoid the OOPS, we must not make a request that would attempt to dump
4379 * a "built-in" qdisc, that is, the default pfifo_fast qdisc or one of a
4380 * few others. There are a few ways that I can see to do this, but most of
4381 * them seem to be racy (and if you lose the race the kernel OOPSes). The
4382 * technique chosen here is to assume that any non-default qdisc that we
4383 * create will have a class with handle 1:0. The built-in qdiscs only have
4384 * a class with handle 0:0.
4386 * We could check for Linux 2.6.35+ and use a more straightforward method
4388 tcmsg = tc_make_request(netdev_, RTM_GETQDISC, NLM_F_ECHO, &request);
4392 tcmsg->tcm_handle = tc_make_handle(1, 0);
4393 tcmsg->tcm_parent = 0;
4395 /* Figure out what tc class to instantiate. */
4396 error = tc_transact(&request, &qdisc);
4400 error = tc_parse_qdisc(qdisc, &kind, NULL);
4402 ops = &tc_ops_other;
4404 ops = tc_lookup_linux_name(kind);
4406 static struct vlog_rate_limit rl2 = VLOG_RATE_LIMIT_INIT(1, 1);
4407 VLOG_INFO_RL(&rl2, "unknown qdisc \"%s\"", kind);
4409 ops = &tc_ops_other;
4412 } else if (error == ENOENT) {
4413 /* Either it's a built-in qdisc, or it's a qdisc set up by some
4414 * other entity that doesn't have a handle 1:0. We will assume
4415 * that it's the system default qdisc. */
4416 ops = &tc_ops_default;
4419 /* Who knows? Maybe the device got deleted. */
4420 VLOG_WARN_RL(&rl, "query %s qdisc failed (%s)",
4421 netdev_get_name(netdev_), ovs_strerror(error));
4422 ops = &tc_ops_other;
4425 /* Instantiate it. */
4426 load_error = ops->tc_load(CONST_CAST(struct netdev *, netdev_), qdisc);
4427 ovs_assert((load_error == 0) == (netdev->tc != NULL));
4428 ofpbuf_delete(qdisc);
4430 return error ? error : load_error;
4433 /* Linux traffic control uses tables with 256 entries ("rtab" tables) to
4434 approximate the time to transmit packets of various lengths. For an MTU of
4435 256 or less, each entry is exact; for an MTU of 257 through 512, each entry
4436 represents two possible packet lengths; for a MTU of 513 through 1024, four
4437 possible lengths; and so on.
4439 Returns, for the specified 'mtu', the number of bits that packet lengths
4440 need to be shifted right to fit within such a 256-entry table. */
4442 tc_calc_cell_log(unsigned int mtu)
4447 mtu = ETH_PAYLOAD_MAX;
4449 mtu += ETH_HEADER_LEN + VLAN_HEADER_LEN;
4451 for (cell_log = 0; mtu >= 256; cell_log++) {
4458 /* Initializes 'rate' properly for a rate of 'Bps' bytes per second with an MTU
4461 tc_fill_rate(struct tc_ratespec *rate, uint64_t Bps, int mtu)
4463 memset(rate, 0, sizeof *rate);
4464 rate->cell_log = tc_calc_cell_log(mtu);
4465 /* rate->overhead = 0; */ /* New in 2.6.24, not yet in some */
4466 /* rate->cell_align = 0; */ /* distro headers. */
4467 rate->mpu = ETH_TOTAL_MIN;
4471 /* Appends to 'msg' an "rtab" table for the specified 'rate' as a Netlink
4472 * attribute of the specified "type".
4474 * See tc_calc_cell_log() above for a description of "rtab"s. */
4476 tc_put_rtab(struct ofpbuf *msg, uint16_t type, const struct tc_ratespec *rate)
4481 rtab = nl_msg_put_unspec_uninit(msg, type, TC_RTAB_SIZE);
4482 for (i = 0; i < TC_RTAB_SIZE / sizeof *rtab; i++) {
4483 unsigned packet_size = (i + 1) << rate->cell_log;
4484 if (packet_size < rate->mpu) {
4485 packet_size = rate->mpu;
4487 rtab[i] = tc_bytes_to_ticks(rate->rate, packet_size);
4491 /* Calculates the proper value of 'buffer' or 'cbuffer' in HTB options given a
4492 * rate of 'Bps' bytes per second, the specified 'mtu', and a user-requested
4493 * burst size of 'burst_bytes'. (If no value was requested, a 'burst_bytes' of
4496 tc_calc_buffer(unsigned int Bps, int mtu, uint64_t burst_bytes)
4498 unsigned int min_burst = tc_buffer_per_jiffy(Bps) + mtu;
4499 return tc_bytes_to_ticks(Bps, MAX(burst_bytes, min_burst));
4502 /* Linux-only functions declared in netdev-linux.h */
4504 /* Modifies the 'flag' bit in ethtool's flags field for 'netdev'. If
4505 * 'enable' is true, the bit is set. Otherwise, it is cleared. */
4507 netdev_linux_ethtool_set_flag(struct netdev *netdev, uint32_t flag,
4508 const char *flag_name, bool enable)
4510 const char *netdev_name = netdev_get_name(netdev);
4511 struct ethtool_value evalue;
4515 COVERAGE_INC(netdev_get_ethtool);
4516 memset(&evalue, 0, sizeof evalue);
4517 error = netdev_linux_do_ethtool(netdev_name,
4518 (struct ethtool_cmd *)&evalue,
4519 ETHTOOL_GFLAGS, "ETHTOOL_GFLAGS");
4524 COVERAGE_INC(netdev_set_ethtool);
4525 evalue.data = new_flags = (evalue.data & ~flag) | (enable ? flag : 0);
4526 error = netdev_linux_do_ethtool(netdev_name,
4527 (struct ethtool_cmd *)&evalue,
4528 ETHTOOL_SFLAGS, "ETHTOOL_SFLAGS");
4533 COVERAGE_INC(netdev_get_ethtool);
4534 memset(&evalue, 0, sizeof evalue);
4535 error = netdev_linux_do_ethtool(netdev_name,
4536 (struct ethtool_cmd *)&evalue,
4537 ETHTOOL_GFLAGS, "ETHTOOL_GFLAGS");
4542 if (new_flags != evalue.data) {
4543 VLOG_WARN_RL(&rl, "attempt to %s ethtool %s flag on network "
4544 "device %s failed", enable ? "enable" : "disable",
4545 flag_name, netdev_name);
4552 /* Utility functions. */
4554 /* Copies 'src' into 'dst', performing format conversion in the process. */
4556 netdev_stats_from_rtnl_link_stats(struct netdev_stats *dst,
4557 const struct rtnl_link_stats *src)
4559 dst->rx_packets = src->rx_packets;
4560 dst->tx_packets = src->tx_packets;
4561 dst->rx_bytes = src->rx_bytes;
4562 dst->tx_bytes = src->tx_bytes;
4563 dst->rx_errors = src->rx_errors;
4564 dst->tx_errors = src->tx_errors;
4565 dst->rx_dropped = src->rx_dropped;
4566 dst->tx_dropped = src->tx_dropped;
4567 dst->multicast = src->multicast;
4568 dst->collisions = src->collisions;
4569 dst->rx_length_errors = src->rx_length_errors;
4570 dst->rx_over_errors = src->rx_over_errors;
4571 dst->rx_crc_errors = src->rx_crc_errors;
4572 dst->rx_frame_errors = src->rx_frame_errors;
4573 dst->rx_fifo_errors = src->rx_fifo_errors;
4574 dst->rx_missed_errors = src->rx_missed_errors;
4575 dst->tx_aborted_errors = src->tx_aborted_errors;
4576 dst->tx_carrier_errors = src->tx_carrier_errors;
4577 dst->tx_fifo_errors = src->tx_fifo_errors;
4578 dst->tx_heartbeat_errors = src->tx_heartbeat_errors;
4579 dst->tx_window_errors = src->tx_window_errors;
4583 get_stats_via_netlink(const struct netdev *netdev_, struct netdev_stats *stats)
4585 struct ofpbuf request;
4586 struct ofpbuf *reply;
4589 ofpbuf_init(&request, 0);
4590 nl_msg_put_nlmsghdr(&request,
4591 sizeof(struct ifinfomsg) + NL_ATTR_SIZE(IFNAMSIZ),
4592 RTM_GETLINK, NLM_F_REQUEST);
4593 ofpbuf_put_zeros(&request, sizeof(struct ifinfomsg));
4594 nl_msg_put_string(&request, IFLA_IFNAME, netdev_get_name(netdev_));
4595 error = nl_transact(NETLINK_ROUTE, &request, &reply);
4596 ofpbuf_uninit(&request);
4601 if (ofpbuf_try_pull(reply, NLMSG_HDRLEN + sizeof(struct ifinfomsg))) {
4602 const struct nlattr *a = nl_attr_find(reply, 0, IFLA_STATS);
4603 if (a && nl_attr_get_size(a) >= sizeof(struct rtnl_link_stats)) {
4604 netdev_stats_from_rtnl_link_stats(stats, nl_attr_get(a));
4607 VLOG_WARN_RL(&rl, "RTM_GETLINK reply lacks stats");
4611 VLOG_WARN_RL(&rl, "short RTM_GETLINK reply");
4616 ofpbuf_delete(reply);
4621 get_flags(const struct netdev *dev, unsigned int *flags)
4627 error = af_inet_ifreq_ioctl(dev->name, &ifr, SIOCGIFFLAGS, "SIOCGIFFLAGS");
4629 *flags = ifr.ifr_flags;
4635 set_flags(const char *name, unsigned int flags)
4639 ifr.ifr_flags = flags;
4640 return af_inet_ifreq_ioctl(name, &ifr, SIOCSIFFLAGS, "SIOCSIFFLAGS");
4644 do_get_ifindex(const char *netdev_name)
4649 ovs_strzcpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name);
4650 COVERAGE_INC(netdev_get_ifindex);
4652 error = af_inet_ioctl(SIOCGIFINDEX, &ifr);
4654 VLOG_WARN_RL(&rl, "ioctl(SIOCGIFINDEX) on %s device failed: %s",
4655 netdev_name, ovs_strerror(error));
4658 return ifr.ifr_ifindex;
4662 get_ifindex(const struct netdev *netdev_, int *ifindexp)
4664 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
4666 if (!(netdev->cache_valid & VALID_IFINDEX)) {
4667 int ifindex = do_get_ifindex(netdev_get_name(netdev_));
4670 netdev->get_ifindex_error = -ifindex;
4671 netdev->ifindex = 0;
4673 netdev->get_ifindex_error = 0;
4674 netdev->ifindex = ifindex;
4676 netdev->cache_valid |= VALID_IFINDEX;
4679 *ifindexp = netdev->ifindex;
4680 return netdev->get_ifindex_error;
4684 get_etheraddr(const char *netdev_name, uint8_t ea[ETH_ADDR_LEN])
4690 memset(&ifr, 0, sizeof ifr);
4691 ovs_strzcpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name);
4692 COVERAGE_INC(netdev_get_hwaddr);
4693 error = af_inet_ioctl(SIOCGIFHWADDR, &ifr);
4695 /* ENODEV probably means that a vif disappeared asynchronously and
4696 * hasn't been removed from the database yet, so reduce the log level
4697 * to INFO for that case. */
4698 VLOG(error == ENODEV ? VLL_INFO : VLL_ERR,
4699 "ioctl(SIOCGIFHWADDR) on %s device failed: %s",
4700 netdev_name, ovs_strerror(error));
4703 hwaddr_family = ifr.ifr_hwaddr.sa_family;
4704 if (hwaddr_family != AF_UNSPEC && hwaddr_family != ARPHRD_ETHER) {
4705 VLOG_WARN("%s device has unknown hardware address family %d",
4706 netdev_name, hwaddr_family);
4708 memcpy(ea, ifr.ifr_hwaddr.sa_data, ETH_ADDR_LEN);
4713 set_etheraddr(const char *netdev_name,
4714 const uint8_t mac[ETH_ADDR_LEN])
4719 memset(&ifr, 0, sizeof ifr);
4720 ovs_strzcpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name);
4721 ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER;
4722 memcpy(ifr.ifr_hwaddr.sa_data, mac, ETH_ADDR_LEN);
4723 COVERAGE_INC(netdev_set_hwaddr);
4724 error = af_inet_ioctl(SIOCSIFHWADDR, &ifr);
4726 VLOG_ERR("ioctl(SIOCSIFHWADDR) on %s device failed: %s",
4727 netdev_name, ovs_strerror(error));
4733 netdev_linux_do_ethtool(const char *name, struct ethtool_cmd *ecmd,
4734 int cmd, const char *cmd_name)
4739 memset(&ifr, 0, sizeof ifr);
4740 ovs_strzcpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
4741 ifr.ifr_data = (caddr_t) ecmd;
4744 error = af_inet_ioctl(SIOCETHTOOL, &ifr);
4746 if (error != EOPNOTSUPP) {
4747 VLOG_WARN_RL(&rl, "ethtool command %s on network device %s "
4748 "failed: %s", cmd_name, name, ovs_strerror(error));
4750 /* The device doesn't support this operation. That's pretty
4751 * common, so there's no point in logging anything. */
4758 netdev_linux_get_ipv4(const struct netdev *netdev, struct in_addr *ip,
4759 int cmd, const char *cmd_name)
4764 ifr.ifr_addr.sa_family = AF_INET;
4765 error = af_inet_ifreq_ioctl(netdev_get_name(netdev), &ifr, cmd, cmd_name);
4767 const struct sockaddr_in *sin = ALIGNED_CAST(struct sockaddr_in *,
4769 *ip = sin->sin_addr;
4774 /* Returns an AF_PACKET raw socket or a negative errno value. */
4776 af_packet_sock(void)
4778 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
4781 if (ovsthread_once_start(&once)) {
4782 sock = socket(AF_PACKET, SOCK_RAW, 0);
4784 int error = set_nonblocking(sock);
4791 VLOG_ERR("failed to create packet socket: %s",
4792 ovs_strerror(errno));
4794 ovsthread_once_done(&once);