* derivatives without specific, written prior permission.
*/
+#include <config.h>
#include "netlink.h"
#include <assert.h>
#include <errno.h>
#include <time.h>
#include <unistd.h>
#include "buffer.h"
+#include "netlink-protocol.h"
+#include "dynamic-string.h"
+#include "timeval.h"
#include "util.h"
#include "vlog.h"
#ifndef SOL_NETLINK
#define SOL_NETLINK 270
#endif
+
+/* A single (bad) Netlink message can in theory dump out many, many log
+ * messages, so the burst size is set quite high here to avoid missing useful
+ * information. Also, at high logging levels we log *all* Netlink messages. */
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 600);
+
+static void log_nlmsg(const char *function, int error,
+ const void *message, size_t size);
\f
/* Netlink sockets. */
if (next_seq == 0) {
/* Pick initial sequence number. */
- next_seq = getpid() ^ time(0);
+ next_seq = getpid() ^ time_now();
}
*sockp = NULL;
int
nl_sock_send(struct nl_sock *sock, const struct buffer *msg, bool wait)
{
- int retval;
+ int error;
nl_msg_nlmsghdr(msg)->nlmsg_len = msg->size;
do {
+ int retval;
retval = send(sock->fd, msg->data, msg->size, wait ? 0 : MSG_DONTWAIT);
- } while (retval < 0 && errno == EINTR);
- return retval < 0 ? errno : 0;
+ error = retval < 0 ? errno : 0;
+ } while (error == EINTR);
+ log_nlmsg(__func__, error, msg->data, msg->size);
+ return error;
}
/* Tries to send the 'n_iov' chunks of data in 'iov' to the kernel on 'sock' as
bool wait)
{
struct msghdr msg;
- int retval;
+ int error;
memset(&msg, 0, sizeof msg);
msg.msg_iov = (struct iovec *) iov;
msg.msg_iovlen = n_iov;
do {
- retval = sendmsg(sock->fd, &msg, MSG_DONTWAIT);
- } while (retval < 0 && errno == EINTR);
- return retval < 0 ? errno : 0;
+ int retval;
+ retval = sendmsg(sock->fd, &msg, wait ? 0 : MSG_DONTWAIT);
+ error = retval < 0 ? errno : 0;
+ } while (error == EINTR);
+ if (error != EAGAIN) {
+ log_nlmsg(__func__, error, iov[0].iov_base, iov[0].iov_len);
+ }
+ return error;
}
/* Tries to receive a netlink message from the kernel on 'sock'. If
iov.iov_len = 1;
do {
nbytes2 = recvmsg(sock->fd, &msg, MSG_DONTWAIT);
- if (nbytes2 < 0) {
- VLOG_ERR("failed to remove nlmsg from socket: %d\n", errno);
- }
} while (nbytes2 < 0 && errno == EINTR);
-
- if (!NLMSG_OK(nlmsghdr, nbytes)) {
- VLOG_ERR("received invalid nlmsg (%zd bytes < %d)",
- bufsize, NLMSG_HDRLEN);
+ if (nbytes2 < 0) {
+ if (errno == ENOBUFS) {
+ /* The kernel is notifying us that a message it tried to send to us
+ * was dropped. We have to pass this along to the caller in case
+ * it wants to retry a request. So kill the buffer, which we can
+ * re-read next time. */
+ buffer_delete(buf);
+ return ENOBUFS;
+ } else {
+ VLOG_ERR_RL(&rl, "failed to remove nlmsg from socket: %s\n",
+ strerror(errno));
+ }
+ }
+ if (nbytes < sizeof *nlmsghdr
+ || nlmsghdr->nlmsg_len < sizeof *nlmsghdr
+ || nlmsghdr->nlmsg_len > nbytes) {
+ VLOG_ERR_RL(&rl, "received invalid nlmsg (%zd bytes < %d)",
+ bufsize, NLMSG_HDRLEN);
buffer_delete(buf);
return EPROTO;
}
*bufp = buf;
+ log_nlmsg(__func__, 0, buf->data, buf->size);
return 0;
}
retval = nl_sock_recv(sock, &reply, true);
if (retval) {
if (retval == ENOBUFS) {
- VLOG_DBG("receive buffer overflow, resending request");
+ VLOG_DBG_RL(&rl, "receive buffer overflow, resending request");
goto send;
} else {
return retval;
}
nlmsghdr = nl_msg_nlmsghdr(reply);
if (seq != nlmsghdr->nlmsg_seq) {
- VLOG_DBG("ignoring seq %"PRIu32" != expected %"PRIu32,
- nl_msg_nlmsghdr(reply)->nlmsg_seq, seq);
+ VLOG_DBG_RL(&rl, "ignoring seq %"PRIu32" != expected %"PRIu32,
+ nl_msg_nlmsghdr(reply)->nlmsg_seq, seq);
buffer_delete(reply);
goto recv;
}
if (nl_msg_nlmsgerr(reply, &retval)) {
if (retval) {
- VLOG_DBG("received NAK error=%d (%s)", retval, strerror(retval));
+ VLOG_DBG_RL(&rl, "received NAK error=%d (%s)",
+ retval, strerror(retval));
}
return retval != EAGAIN ? retval : EPROTO;
}
struct nlmsgerr *err = buffer_at(msg, NLMSG_HDRLEN, sizeof *err);
int code = EPROTO;
if (!err) {
- VLOG_ERR("received invalid nlmsgerr (%zd bytes < %zd)",
- msg->size, NLMSG_HDRLEN + sizeof *err);
+ VLOG_ERR_RL(&rl, "received invalid nlmsgerr (%zd bytes < %zd)",
+ msg->size, NLMSG_HDRLEN + sizeof *err);
} else if (err->error <= 0 && err->error > INT_MIN) {
code = -err->error;
}
}
}
-/* Ensures that 'b' has room for at least 'size' bytes plus netlink pading at
+/* Ensures that 'b' has room for at least 'size' bytes plus netlink padding at
* its tail end, reallocating and copying its data if necessary. */
void
nl_msg_reserve(struct buffer *msg, size_t size)
{
- buffer_reserve_tailroom(msg, NLMSG_ALIGN(size));
+ buffer_prealloc_tailroom(msg, NLMSG_ALIGN(size));
}
/* Puts a nlmsghdr at the beginning of 'msg', which must be initially empty.
p = buffer_at(msg, NLMSG_HDRLEN + GENL_HDRLEN, 0);
if (p == NULL) {
- VLOG_DBG("missing headers in nl_policy_parse");
+ VLOG_DBG_RL(&rl, "missing headers in nl_policy_parse");
return false;
}
tail = buffer_tail(msg);
/* Make sure its claimed length is plausible. */
if (nla->nla_len < NLA_HDRLEN) {
- VLOG_DBG("%zu: attr shorter than NLA_HDRLEN (%"PRIu16")",
- offset, nla->nla_len);
+ VLOG_DBG_RL(&rl, "%zu: attr shorter than NLA_HDRLEN (%"PRIu16")",
+ offset, nla->nla_len);
return false;
}
len = nla->nla_len - NLA_HDRLEN;
aligned_len = NLA_ALIGN(len);
if (aligned_len > tail - p) {
- VLOG_DBG("%zu: attr %"PRIu16" aligned data len (%zu) "
- "> bytes left (%tu)",
- offset, nla->nla_type, aligned_len, tail - p);
+ VLOG_DBG_RL(&rl, "%zu: attr %"PRIu16" aligned data len (%zu) "
+ "> bytes left (%tu)",
+ offset, nla->nla_type, aligned_len, tail - p);
return false;
}
min_len = p->min_len ? p->min_len : attr_len_range[p->type][0];
max_len = p->max_len ? p->max_len : attr_len_range[p->type][1];
if (len < min_len || len > max_len) {
- VLOG_DBG("%zu: attr %"PRIu16" length %zu not in allowed range "
- "%zu...%zu", offset, type, len, min_len, max_len);
+ VLOG_DBG_RL(&rl, "%zu: attr %"PRIu16" length %zu not in "
+ "allowed range %zu...%zu",
+ offset, type, len, min_len, max_len);
return false;
}
if (p->type == NL_A_STRING) {
if (((char *) nla)[nla->nla_len - 1]) {
- VLOG_DBG("%zu: attr %"PRIu16" lacks null terminator",
- offset, type);
+ VLOG_DBG_RL(&rl, "%zu: attr %"PRIu16" lacks null at end",
+ offset, type);
return false;
}
if (memchr(nla + 1, '\0', len - 1) != NULL) {
- VLOG_DBG("%zu: attr %"PRIu16" lies about string length",
- offset, type);
+ VLOG_DBG_RL(&rl, "%zu: attr %"PRIu16" has bad length",
+ offset, type);
return false;
}
}
p += NLA_ALIGN(nla->nla_len);
}
if (n_required) {
- VLOG_DBG("%zu required attrs missing", n_required);
+ VLOG_DBG_RL(&rl, "%zu required attrs missing", n_required);
return false;
}
return true;
assert(avail_sockets[sock / 32] & (1u << (sock % 32)));
avail_sockets[sock / 32] &= ~(1u << (sock % 32));
}
+\f
+static void
+nlmsghdr_to_string(const struct nlmsghdr *h, struct ds *ds)
+{
+ struct nlmsg_flag {
+ unsigned int bits;
+ const char *name;
+ };
+ static const struct nlmsg_flag flags[] = {
+ { NLM_F_REQUEST, "REQUEST" },
+ { NLM_F_MULTI, "MULTI" },
+ { NLM_F_ACK, "ACK" },
+ { NLM_F_ECHO, "ECHO" },
+ { NLM_F_DUMP, "DUMP" },
+ { NLM_F_ROOT, "ROOT" },
+ { NLM_F_MATCH, "MATCH" },
+ { NLM_F_ATOMIC, "ATOMIC" },
+ };
+ const struct nlmsg_flag *flag;
+ uint16_t flags_left;
+
+ ds_put_format(ds, "nl(len:%"PRIu32", type=%"PRIu16,
+ h->nlmsg_len, h->nlmsg_type);
+ if (h->nlmsg_type == NLMSG_NOOP) {
+ ds_put_cstr(ds, "(no-op)");
+ } else if (h->nlmsg_type == NLMSG_ERROR) {
+ ds_put_cstr(ds, "(error)");
+ } else if (h->nlmsg_type == NLMSG_DONE) {
+ ds_put_cstr(ds, "(done)");
+ } else if (h->nlmsg_type == NLMSG_OVERRUN) {
+ ds_put_cstr(ds, "(overrun)");
+ } else if (h->nlmsg_type < NLMSG_MIN_TYPE) {
+ ds_put_cstr(ds, "(reserved)");
+ } else {
+ ds_put_cstr(ds, "(family-defined)");
+ }
+ ds_put_format(ds, ", flags=%"PRIx16, h->nlmsg_flags);
+ flags_left = h->nlmsg_flags;
+ for (flag = flags; flag < &flags[ARRAY_SIZE(flags)]; flag++) {
+ if ((flags_left & flag->bits) == flag->bits) {
+ ds_put_format(ds, "[%s]", flag->name);
+ flags_left &= ~flag->bits;
+ }
+ }
+ if (flags_left) {
+ ds_put_format(ds, "[OTHER:%"PRIx16"]", flags_left);
+ }
+ ds_put_format(ds, ", seq=%"PRIx32", pid=%"PRIu32"(%d:%d))",
+ h->nlmsg_seq, h->nlmsg_pid,
+ (int) (h->nlmsg_pid & PROCESS_MASK),
+ (int) (h->nlmsg_pid >> PROCESS_BITS));
+}
+
+static char *
+nlmsg_to_string(const struct buffer *buffer)
+{
+ struct ds ds = DS_EMPTY_INITIALIZER;
+ const struct nlmsghdr *h = buffer_at(buffer, 0, NLMSG_HDRLEN);
+ if (h) {
+ nlmsghdr_to_string(h, &ds);
+ if (h->nlmsg_type == NLMSG_ERROR) {
+ const struct nlmsgerr *e;
+ e = buffer_at(buffer, NLMSG_HDRLEN,
+ NLMSG_ALIGN(sizeof(struct nlmsgerr)));
+ if (e) {
+ ds_put_format(&ds, " error(%d", e->error);
+ if (e->error < 0) {
+ ds_put_format(&ds, "(%s)", strerror(-e->error));
+ }
+ ds_put_cstr(&ds, ", in-reply-to(");
+ nlmsghdr_to_string(&e->msg, &ds);
+ ds_put_cstr(&ds, "))");
+ } else {
+ ds_put_cstr(&ds, " error(truncated)");
+ }
+ } else if (h->nlmsg_type == NLMSG_DONE) {
+ int *error = buffer_at(buffer, NLMSG_HDRLEN, sizeof *error);
+ if (error) {
+ ds_put_format(&ds, " done(%d", *error);
+ if (*error < 0) {
+ ds_put_format(&ds, "(%s)", strerror(-*error));
+ }
+ ds_put_cstr(&ds, ")");
+ } else {
+ ds_put_cstr(&ds, " done(truncated)");
+ }
+ }
+ } else {
+ ds_put_cstr(&ds, "nl(truncated)");
+ }
+ return ds.string;
+}
+
+static void
+log_nlmsg(const char *function, int error,
+ const void *message, size_t size)
+{
+ struct buffer buffer;
+ char *nlmsg;
+
+ if (!VLOG_IS_DBG_ENABLED()) {
+ return;
+ }
+
+ buffer.data = (void *) message;
+ buffer.size = size;
+ nlmsg = nlmsg_to_string(&buffer);
+ VLOG_DBG_RL(&rl, "%s (%s): %s", function, strerror(error), nlmsg);
+ free(nlmsg);
+}
+