Add ability to monitor both ends of secchan connections from dpctl.

[sliver-openvswitch.git] / lib / netlink.c
diff --git a/lib/netlink.c b/lib/netlink.c

index eb199e5..1316547 100644 (file)
--- a/lib/netlink.c
+++ b/lib/netlink.c
@@ -31,6 +31,7 @@
   * derivatives without specific, written prior permission.
   */
  
+#include <config.h>
  #include "netlink.h"
  #include <assert.h>
  #include <errno.h>
@@ -41,6 +42,9 @@
  #include <time.h>
  #include <unistd.h>
  #include "buffer.h"
+#include "netlink-protocol.h"
+#include "dynamic-string.h"
+#include "timeval.h"
  #include "util.h"
  
  #include "vlog.h"
@@ -50,6 +54,14 @@
  #ifndef SOL_NETLINK
  #define SOL_NETLINK 270
  #endif
+
+/* A single (bad) Netlink message can in theory dump out many, many log
+ * messages, so the burst size is set quite high here to avoid missing useful
+ * information.  Also, at high logging levels we log *all* Netlink messages. */
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 600);
+
+static void log_nlmsg(const char *function, int error,
+                      const void *message, size_t size);
  \f
  /* Netlink sockets. */
  
@@ -93,7 +105,7 @@ nl_sock_create(int protocol, int multicast_group,
  
      if (next_seq == 0) {
          /* Pick initial sequence number. */
-        next_seq = getpid() ^ time(0);
+        next_seq = getpid() ^ time_now();
      }
  
      *sockp = NULL;
@@ -150,6 +162,13 @@ nl_sock_create(int protocol, int multicast_group,
          goto error_free_pid;
      }
  
+    /* Older kernel headers failed to define this macro.  We want our programs
+     * to support the newer kernel features even if compiled with older
+     * headers, so define it ourselves in such a case. */
+#ifndef NETLINK_ADD_MEMBERSHIP
+#define NETLINK_ADD_MEMBERSHIP 1
+#endif
+
      /* This method of joining multicast groups is only supported by newish
       * kernels, but it allows for an arbitrary number of multicast groups. */
      if (multicast_group > 32
@@ -200,13 +219,16 @@ nl_sock_destroy(struct nl_sock *sock)
  int
  nl_sock_send(struct nl_sock *sock, const struct buffer *msg, bool wait) 
  {
-    int retval;
+    int error;
  
      nl_msg_nlmsghdr(msg)->nlmsg_len = msg->size;
      do {
+        int retval;
          retval = send(sock->fd, msg->data, msg->size, wait ? 0 : MSG_DONTWAIT);
-    } while (retval < 0 && errno == EINTR);
-    return retval < 0 ? errno : 0;
+        error = retval < 0 ? errno : 0;
+    } while (error == EINTR);
+    log_nlmsg(__func__, error, msg->data, msg->size);
+    return error;
  }
  
  /* Tries to send the 'n_iov' chunks of data in 'iov' to the kernel on 'sock' as
@@ -221,15 +243,20 @@ nl_sock_sendv(struct nl_sock *sock, const struct iovec iov[], size_t n_iov,
                bool wait) 
  {
      struct msghdr msg;
-    int retval;
+    int error;
  
      memset(&msg, 0, sizeof msg);
      msg.msg_iov = (struct iovec *) iov;
      msg.msg_iovlen = n_iov;
      do {
-        retval = sendmsg(sock->fd, &msg, MSG_DONTWAIT);
-    } while (retval < 0 && errno == EINTR);
-    return retval < 0 ? errno : 0;
+        int retval;
+        retval = sendmsg(sock->fd, &msg, wait ? 0 : MSG_DONTWAIT);
+        error = retval < 0 ? errno : 0;
+    } while (error == EINTR);
+    if (error != EAGAIN) {
+        log_nlmsg(__func__, error, iov[0].iov_base, iov[0].iov_len);
+    }
+    return error;
  }
  
  /* Tries to receive a netlink message from the kernel on 'sock'.  If
@@ -289,18 +316,30 @@ try_again:
      iov.iov_len = 1;
      do {
          nbytes2 = recvmsg(sock->fd, &msg, MSG_DONTWAIT);
-        if (nbytes2 < 0) {
-            VLOG_ERR("failed to remove nlmsg from socket: %d\n", errno);
-        }
      } while (nbytes2 < 0 && errno == EINTR);
-
-    if (!NLMSG_OK(nlmsghdr, nbytes)) {
-        VLOG_ERR("received invalid nlmsg (%zd bytes < %d)",
-                 bufsize, NLMSG_HDRLEN);
+    if (nbytes2 < 0) {
+        if (errno == ENOBUFS) {
+            /* The kernel is notifying us that a message it tried to send to us
+             * was dropped.  We have to pass this along to the caller in case
+             * it wants to retry a request.  So kill the buffer, which we can
+             * re-read next time. */
+            buffer_delete(buf);
+            return ENOBUFS;
+        } else {
+            VLOG_ERR_RL(&rl, "failed to remove nlmsg from socket: %s\n",
+                        strerror(errno));
+        }
+    }
+    if (nbytes < sizeof *nlmsghdr
+        || nlmsghdr->nlmsg_len < sizeof *nlmsghdr
+        || nlmsghdr->nlmsg_len > nbytes) {
+        VLOG_ERR_RL(&rl, "received invalid nlmsg (%zd bytes < %d)",
+                    bufsize, NLMSG_HDRLEN);
          buffer_delete(buf);
          return EPROTO;
      }
      *bufp = buf;
+    log_nlmsg(__func__, 0, buf->data, buf->size);
      return 0;
  }
  
@@ -360,7 +399,7 @@ recv:
      retval = nl_sock_recv(sock, &reply, true);
      if (retval) {
          if (retval == ENOBUFS) {
-            VLOG_DBG("receive buffer overflow, resending request");
+            VLOG_DBG_RL(&rl, "receive buffer overflow, resending request");
              goto send;
          } else {
              return retval;
@@ -368,14 +407,15 @@ recv:
      }
      nlmsghdr = nl_msg_nlmsghdr(reply);
      if (seq != nlmsghdr->nlmsg_seq) {
-        VLOG_DBG("ignoring seq %"PRIu32" != expected %"PRIu32,
-                 nl_msg_nlmsghdr(reply)->nlmsg_seq, seq);
+        VLOG_DBG_RL(&rl, "ignoring seq %"PRIu32" != expected %"PRIu32,
+                    nl_msg_nlmsghdr(reply)->nlmsg_seq, seq);
          buffer_delete(reply);
          goto recv;
      }
      if (nl_msg_nlmsgerr(reply, &retval)) {
          if (retval) {
-            VLOG_DBG("received NAK error=%d (%s)", retval, strerror(retval));
+            VLOG_DBG_RL(&rl, "received NAK error=%d (%s)",
+                        retval, strerror(retval));
          }
          return retval != EAGAIN ? retval : EPROTO;
      }
@@ -424,8 +464,8 @@ nl_msg_nlmsgerr(const struct buffer *msg, int *errorp)
          struct nlmsgerr *err = buffer_at(msg, NLMSG_HDRLEN, sizeof *err);
          int code = EPROTO;
          if (!err) {
-            VLOG_ERR("received invalid nlmsgerr (%zd bytes < %zd)",
-                     msg->size, NLMSG_HDRLEN + sizeof *err);
+            VLOG_ERR_RL(&rl, "received invalid nlmsgerr (%zd bytes < %zd)",
+                        msg->size, NLMSG_HDRLEN + sizeof *err);
          } else if (err->error <= 0 && err->error > INT_MIN) {
              code = -err->error;
          }
@@ -438,12 +478,12 @@ nl_msg_nlmsgerr(const struct buffer *msg, int *errorp)
      }
  }
  
-/* Ensures that 'b' has room for at least 'size' bytes plus netlink pading at
+/* Ensures that 'b' has room for at least 'size' bytes plus netlink padding at
   * its tail end, reallocating and copying its data if necessary. */
  void
  nl_msg_reserve(struct buffer *msg, size_t size) 
  {
-    buffer_reserve_tailroom(msg, NLMSG_ALIGN(size));
+    buffer_prealloc_tailroom(msg, NLMSG_ALIGN(size));
  }
  
  /* Puts a nlmsghdr at the beginning of 'msg', which must be initially empty.
@@ -744,7 +784,7 @@ nl_policy_parse(const struct buffer *msg, const struct nl_policy policy[],
  
      p = buffer_at(msg, NLMSG_HDRLEN + GENL_HDRLEN, 0);
      if (p == NULL) {
-        VLOG_DBG("missing headers in nl_policy_parse");
+        VLOG_DBG_RL(&rl, "missing headers in nl_policy_parse");
          return false;
      }
      tail = buffer_tail(msg);
@@ -757,16 +797,16 @@ nl_policy_parse(const struct buffer *msg, const struct nl_policy policy[],
  
          /* Make sure its claimed length is plausible. */
          if (nla->nla_len < NLA_HDRLEN) {
-            VLOG_DBG("%zu: attr shorter than NLA_HDRLEN (%"PRIu16")",
-                     offset, nla->nla_len);
+            VLOG_DBG_RL(&rl, "%zu: attr shorter than NLA_HDRLEN (%"PRIu16")",
+                        offset, nla->nla_len);
              return false;
          }
          len = nla->nla_len - NLA_HDRLEN;
          aligned_len = NLA_ALIGN(len);
          if (aligned_len > tail - p) {
-            VLOG_DBG("%zu: attr %"PRIu16" aligned data len (%zu) "
-                     "> bytes left (%tu)",
-                     offset, nla->nla_type, aligned_len, tail - p);
+            VLOG_DBG_RL(&rl, "%zu: attr %"PRIu16" aligned data len (%zu) "
+                        "> bytes left (%tu)",
+                        offset, nla->nla_type, aligned_len, tail - p);
              return false;
          }
  
@@ -779,19 +819,20 @@ nl_policy_parse(const struct buffer *msg, const struct nl_policy policy[],
              min_len = p->min_len ? p->min_len : attr_len_range[p->type][0];
              max_len = p->max_len ? p->max_len : attr_len_range[p->type][1];
              if (len < min_len || len > max_len) {
-                VLOG_DBG("%zu: attr %"PRIu16" length %zu not in allowed range "
-                         "%zu...%zu", offset, type, len, min_len, max_len);
+                VLOG_DBG_RL(&rl, "%zu: attr %"PRIu16" length %zu not in "
+                            "allowed range %zu...%zu",
+                            offset, type, len, min_len, max_len);
                  return false;
              }
              if (p->type == NL_A_STRING) {
                  if (((char *) nla)[nla->nla_len - 1]) {
-                    VLOG_DBG("%zu: attr %"PRIu16" lacks null terminator",
-                             offset, type);
+                    VLOG_DBG_RL(&rl, "%zu: attr %"PRIu16" lacks null at end",
+                                offset, type);
                      return false;
                  }
                  if (memchr(nla + 1, '\0', len - 1) != NULL) {
-                    VLOG_DBG("%zu: attr %"PRIu16" lies about string length",
-                             offset, type);
+                    VLOG_DBG_RL(&rl, "%zu: attr %"PRIu16" has bad length",
+                                offset, type);
                      return false;
                  }
              }
@@ -806,7 +847,7 @@ nl_policy_parse(const struct buffer *msg, const struct nl_policy policy[],
          p += NLA_ALIGN(nla->nla_len);
      }
      if (n_required) {
-        VLOG_DBG("%zu required attrs missing", n_required);
+        VLOG_DBG_RL(&rl, "%zu required attrs missing", n_required);
          return false;
      }
      return true;
@@ -918,3 +959,114 @@ free_pid(uint32_t pid)
      assert(avail_sockets[sock / 32] & (1u << (sock % 32)));
      avail_sockets[sock / 32] &= ~(1u << (sock % 32));
  }
+\f
+static void
+nlmsghdr_to_string(const struct nlmsghdr *h, struct ds *ds)
+{
+    struct nlmsg_flag {
+        unsigned int bits;
+        const char *name;
+    };
+    static const struct nlmsg_flag flags[] = { 
+        { NLM_F_REQUEST, "REQUEST" },
+        { NLM_F_MULTI, "MULTI" },
+        { NLM_F_ACK, "ACK" },
+        { NLM_F_ECHO, "ECHO" },
+        { NLM_F_DUMP, "DUMP" },
+        { NLM_F_ROOT, "ROOT" },
+        { NLM_F_MATCH, "MATCH" },
+        { NLM_F_ATOMIC, "ATOMIC" },
+    };
+    const struct nlmsg_flag *flag;
+    uint16_t flags_left;
+
+    ds_put_format(ds, "nl(len:%"PRIu32", type=%"PRIu16,
+                  h->nlmsg_len, h->nlmsg_type);
+    if (h->nlmsg_type == NLMSG_NOOP) {
+        ds_put_cstr(ds, "(no-op)");
+    } else if (h->nlmsg_type == NLMSG_ERROR) {
+        ds_put_cstr(ds, "(error)");
+    } else if (h->nlmsg_type == NLMSG_DONE) {
+        ds_put_cstr(ds, "(done)");
+    } else if (h->nlmsg_type == NLMSG_OVERRUN) {
+        ds_put_cstr(ds, "(overrun)");
+    } else if (h->nlmsg_type < NLMSG_MIN_TYPE) {
+        ds_put_cstr(ds, "(reserved)");
+    } else {
+        ds_put_cstr(ds, "(family-defined)");
+    }
+    ds_put_format(ds, ", flags=%"PRIx16, h->nlmsg_flags);
+    flags_left = h->nlmsg_flags;
+    for (flag = flags; flag < &flags[ARRAY_SIZE(flags)]; flag++) {
+        if ((flags_left & flag->bits) == flag->bits) {
+            ds_put_format(ds, "[%s]", flag->name);
+            flags_left &= ~flag->bits;
+        }
+    }
+    if (flags_left) {
+        ds_put_format(ds, "[OTHER:%"PRIx16"]", flags_left);
+    }
+    ds_put_format(ds, ", seq=%"PRIx32", pid=%"PRIu32"(%d:%d))",
+                  h->nlmsg_seq, h->nlmsg_pid,
+                  (int) (h->nlmsg_pid & PROCESS_MASK),
+                  (int) (h->nlmsg_pid >> PROCESS_BITS));
+}
+
+static char *
+nlmsg_to_string(const struct buffer *buffer)
+{
+    struct ds ds = DS_EMPTY_INITIALIZER;
+    const struct nlmsghdr *h = buffer_at(buffer, 0, NLMSG_HDRLEN);
+    if (h) {
+        nlmsghdr_to_string(h, &ds);
+        if (h->nlmsg_type == NLMSG_ERROR) {
+            const struct nlmsgerr *e;
+            e = buffer_at(buffer, NLMSG_HDRLEN,
+                          NLMSG_ALIGN(sizeof(struct nlmsgerr)));
+            if (e) {
+                ds_put_format(&ds, " error(%d", e->error);
+                if (e->error < 0) {
+                    ds_put_format(&ds, "(%s)", strerror(-e->error));
+                }
+                ds_put_cstr(&ds, ", in-reply-to(");
+                nlmsghdr_to_string(&e->msg, &ds);
+                ds_put_cstr(&ds, "))");
+            } else {
+                ds_put_cstr(&ds, " error(truncated)");
+            }
+        } else if (h->nlmsg_type == NLMSG_DONE) {
+            int *error = buffer_at(buffer, NLMSG_HDRLEN, sizeof *error);
+            if (error) {
+                ds_put_format(&ds, " done(%d", *error);
+                if (*error < 0) {
+                    ds_put_format(&ds, "(%s)", strerror(-*error));
+                }
+                ds_put_cstr(&ds, ")");
+            } else {
+                ds_put_cstr(&ds, " done(truncated)");
+            }
+        }
+    } else {
+        ds_put_cstr(&ds, "nl(truncated)");
+    }
+    return ds.string;
+}
+
+static void
+log_nlmsg(const char *function, int error,
+          const void *message, size_t size)
+{
+    struct buffer buffer;
+    char *nlmsg;
+
+    if (!VLOG_IS_DBG_ENABLED()) {
+        return;
+    }
+
+    buffer.data = (void *) message;
+    buffer.size = size;
+    nlmsg = nlmsg_to_string(&buffer);
+    VLOG_DBG_RL(&rl, "%s (%s): %s", function, strerror(error), nlmsg);
+    free(nlmsg);
+}
+