2 * Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
22 #include <sys/types.h>
25 #include "netlink-protocol.h"
28 #include "unaligned.h"
31 VLOG_DEFINE_THIS_MODULE(netlink);
33 /* A single (bad) Netlink message can in theory dump out many, many log
34 * messages, so the burst size is set quite high here to avoid missing useful
35 * information. Also, at high logging levels we log *all* Netlink messages. */
36 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 600);
38 /* Returns the nlmsghdr at the head of 'msg'.
40 * 'msg' must be at least as large as a nlmsghdr. */
42 nl_msg_nlmsghdr(const struct ofpbuf *msg)
44 return ofpbuf_at_assert(msg, 0, NLMSG_HDRLEN);
47 /* Returns the genlmsghdr just past 'msg''s nlmsghdr.
49 * Returns a null pointer if 'msg' is not large enough to contain an nlmsghdr
50 * and a genlmsghdr. */
52 nl_msg_genlmsghdr(const struct ofpbuf *msg)
54 return ofpbuf_at(msg, NLMSG_HDRLEN, GENL_HDRLEN);
57 /* If 'buffer' is a NLMSG_ERROR message, stores 0 in '*errorp' if it is an ACK
58 * message, otherwise a positive errno value, and returns true. If 'buffer' is
59 * not an NLMSG_ERROR message, returns false.
61 * 'msg' must be at least as large as a nlmsghdr. */
63 nl_msg_nlmsgerr(const struct ofpbuf *msg, int *errorp)
65 if (nl_msg_nlmsghdr(msg)->nlmsg_type == NLMSG_ERROR) {
66 struct nlmsgerr *err = ofpbuf_at(msg, NLMSG_HDRLEN, sizeof *err);
69 VLOG_ERR_RL(&rl, "received invalid nlmsgerr (%zd bytes < %zd)",
70 msg->size, NLMSG_HDRLEN + sizeof *err);
71 } else if (err->error <= 0 && err->error > INT_MIN) {
83 /* Ensures that 'b' has room for at least 'size' bytes plus netlink padding at
84 * its tail end, reallocating and copying its data if necessary. */
86 nl_msg_reserve(struct ofpbuf *msg, size_t size)
88 ofpbuf_prealloc_tailroom(msg, NLMSG_ALIGN(size));
94 /* Next nlmsghdr sequence number.
96 * This implementation uses sequence numbers that are unique process-wide,
97 * to avoid a hypothetical race: send request, close socket, open new
98 * socket that reuses the old socket's PID value, send request on new
99 * socket, receive reply from kernel to old socket but with same PID and
100 * sequence number. (This race could be avoided other ways, e.g. by
101 * preventing PIDs from being quickly reused). */
102 static uint32_t next_seq;
105 /* Pick initial sequence number. */
106 next_seq = getpid() ^ time_wall();
111 /* Puts a nlmsghdr at the beginning of 'msg', which must be initially empty.
112 * Uses the given 'type' and 'flags'. 'expected_payload' should be
113 * an estimate of the number of payload bytes to be supplied; if the size of
114 * the payload is unknown a value of 0 is acceptable.
116 * 'type' is ordinarily an enumerated value specific to the Netlink protocol
117 * (e.g. RTM_NEWLINK, for NETLINK_ROUTE protocol). For Generic Netlink, 'type'
118 * is the family number obtained via nl_lookup_genl_family().
120 * 'flags' is a bit-mask that indicates what kind of request is being made. It
121 * is often NLM_F_REQUEST indicating that a request is being made, commonly
122 * or'd with NLM_F_ACK to request an acknowledgement.
124 * Sets the new nlmsghdr's nlmsg_pid field to 0 for now. nl_sock_send() will
125 * fill it in just before sending the message.
127 * nl_msg_put_genlmsghdr() is more convenient for composing a Generic Netlink
130 nl_msg_put_nlmsghdr(struct ofpbuf *msg,
131 size_t expected_payload, uint32_t type, uint32_t flags)
133 struct nlmsghdr *nlmsghdr;
135 assert(msg->size == 0);
137 nl_msg_reserve(msg, NLMSG_HDRLEN + expected_payload);
138 nlmsghdr = nl_msg_put_uninit(msg, NLMSG_HDRLEN);
139 nlmsghdr->nlmsg_len = 0;
140 nlmsghdr->nlmsg_type = type;
141 nlmsghdr->nlmsg_flags = flags;
142 nlmsghdr->nlmsg_seq = get_nlmsg_seq();
143 nlmsghdr->nlmsg_pid = 0;
146 /* Puts a nlmsghdr and genlmsghdr at the beginning of 'msg', which must be
147 * initially empty. 'expected_payload' should be an estimate of the number of
148 * payload bytes to be supplied; if the size of the payload is unknown a value
149 * of 0 is acceptable.
151 * 'family' is the family number obtained via nl_lookup_genl_family().
153 * 'flags' is a bit-mask that indicates what kind of request is being made. It
154 * is often NLM_F_REQUEST indicating that a request is being made, commonly
155 * or'd with NLM_F_ACK to request an acknowledgement.
157 * 'cmd' is an enumerated value specific to the Generic Netlink family
158 * (e.g. CTRL_CMD_NEWFAMILY for the GENL_ID_CTRL family).
160 * 'version' is a version number specific to the family and command (often 1).
162 * Sets the new nlmsghdr's nlmsg_pid field to 0 for now. nl_sock_send() will
163 * fill it in just before sending the message.
165 * nl_msg_put_nlmsghdr() should be used to compose Netlink messages that are
166 * not Generic Netlink messages. */
168 nl_msg_put_genlmsghdr(struct ofpbuf *msg, size_t expected_payload,
169 int family, uint32_t flags, uint8_t cmd, uint8_t version)
171 struct genlmsghdr *genlmsghdr;
173 nl_msg_put_nlmsghdr(msg, GENL_HDRLEN + expected_payload, family, flags);
174 assert(msg->size == NLMSG_HDRLEN);
175 genlmsghdr = nl_msg_put_uninit(msg, GENL_HDRLEN);
176 genlmsghdr->cmd = cmd;
177 genlmsghdr->version = version;
178 genlmsghdr->reserved = 0;
181 /* Appends the 'size' bytes of data in 'p', plus Netlink padding if needed, to
182 * the tail end of 'msg'. Data in 'msg' is reallocated and copied if
185 nl_msg_put(struct ofpbuf *msg, const void *data, size_t size)
187 memcpy(nl_msg_put_uninit(msg, size), data, size);
190 /* Appends 'size' bytes of data, plus Netlink padding if needed, to the tail
191 * end of 'msg', reallocating and copying its data if necessary. Returns a
192 * pointer to the first byte of the new data, which is left uninitialized. */
194 nl_msg_put_uninit(struct ofpbuf *msg, size_t size)
196 size_t pad = NLMSG_ALIGN(size) - size;
197 char *p = ofpbuf_put_uninit(msg, size + pad);
199 memset(p + size, 0, pad);
204 /* Appends a Netlink attribute of the given 'type' and room for 'size' bytes of
205 * data as its payload, plus Netlink padding if needed, to the tail end of
206 * 'msg', reallocating and copying its data if necessary. Returns a pointer to
207 * the first byte of data in the attribute, which is left uninitialized. */
209 nl_msg_put_unspec_uninit(struct ofpbuf *msg, uint16_t type, size_t size)
211 size_t total_size = NLA_HDRLEN + size;
212 struct nlattr* nla = nl_msg_put_uninit(msg, total_size);
213 assert(NLA_ALIGN(total_size) <= UINT16_MAX);
214 nla->nla_len = total_size;
215 nla->nla_type = type;
219 /* Appends a Netlink attribute of the given 'type' and the 'size' bytes of
220 * 'data' as its payload, to the tail end of 'msg', reallocating and copying
221 * its data if necessary. Returns a pointer to the first byte of data in the
222 * attribute, which is left uninitialized. */
224 nl_msg_put_unspec(struct ofpbuf *msg, uint16_t type,
225 const void *data, size_t size)
227 memcpy(nl_msg_put_unspec_uninit(msg, type, size), data, size);
230 /* Appends a Netlink attribute of the given 'type' and no payload to 'msg'.
231 * (Some Netlink protocols use the presence or absence of an attribute as a
234 nl_msg_put_flag(struct ofpbuf *msg, uint16_t type)
236 nl_msg_put_unspec(msg, type, NULL, 0);
239 /* Appends a Netlink attribute of the given 'type' and the given 8-bit 'value'
242 nl_msg_put_u8(struct ofpbuf *msg, uint16_t type, uint8_t value)
244 nl_msg_put_unspec(msg, type, &value, sizeof value);
247 /* Appends a Netlink attribute of the given 'type' and the given 16-bit host
248 * byte order 'value' to 'msg'. */
250 nl_msg_put_u16(struct ofpbuf *msg, uint16_t type, uint16_t value)
252 nl_msg_put_unspec(msg, type, &value, sizeof value);
255 /* Appends a Netlink attribute of the given 'type' and the given 32-bit host
256 * byte order 'value' to 'msg'. */
258 nl_msg_put_u32(struct ofpbuf *msg, uint16_t type, uint32_t value)
260 nl_msg_put_unspec(msg, type, &value, sizeof value);
263 /* Appends a Netlink attribute of the given 'type' and the given 64-bit host
264 * byte order 'value' to 'msg'. */
266 nl_msg_put_u64(struct ofpbuf *msg, uint16_t type, uint64_t value)
268 nl_msg_put_unspec(msg, type, &value, sizeof value);
271 /* Appends a Netlink attribute of the given 'type' and the given 16-bit network
272 * byte order 'value' to 'msg'. */
274 nl_msg_put_be16(struct ofpbuf *msg, uint16_t type, ovs_be16 value)
276 nl_msg_put_unspec(msg, type, &value, sizeof value);
279 /* Appends a Netlink attribute of the given 'type' and the given 32-bit network
280 * byte order 'value' to 'msg'. */
282 nl_msg_put_be32(struct ofpbuf *msg, uint16_t type, ovs_be32 value)
284 nl_msg_put_unspec(msg, type, &value, sizeof value);
287 /* Appends a Netlink attribute of the given 'type' and the given 64-bit network
288 * byte order 'value' to 'msg'. */
290 nl_msg_put_be64(struct ofpbuf *msg, uint16_t type, ovs_be64 value)
292 nl_msg_put_unspec(msg, type, &value, sizeof value);
295 /* Appends a Netlink attribute of the given 'type' and the given
296 * null-terminated string 'value' to 'msg'. */
298 nl_msg_put_string(struct ofpbuf *msg, uint16_t type, const char *value)
300 nl_msg_put_unspec(msg, type, value, strlen(value) + 1);
303 /* Adds the header for nested Netlink attributes to 'msg', with the specified
304 * 'type', and returns the header's offset within 'msg'. The caller should add
305 * the content for the nested Netlink attribute to 'msg' (e.g. using the other
306 * nl_msg_*() functions), and then pass the returned offset to
307 * nl_msg_end_nested() to finish up the nested attributes. */
309 nl_msg_start_nested(struct ofpbuf *msg, uint16_t type)
311 size_t offset = msg->size;
312 nl_msg_put_unspec(msg, type, NULL, 0);
316 /* Finalizes a nested Netlink attribute in 'msg'. 'offset' should be the value
317 * returned by nl_msg_start_nested(). */
319 nl_msg_end_nested(struct ofpbuf *msg, size_t offset)
321 struct nlattr *attr = ofpbuf_at_assert(msg, offset, sizeof *attr);
322 attr->nla_len = msg->size - offset;
325 /* Appends a nested Netlink attribute of the given 'type', with the 'size'
326 * bytes of content starting at 'data', to 'msg'. */
328 nl_msg_put_nested(struct ofpbuf *msg,
329 uint16_t type, const void *data, size_t size)
331 size_t offset = nl_msg_start_nested(msg, type);
332 nl_msg_put(msg, data, size);
333 nl_msg_end_nested(msg, offset);
336 /* If 'buffer' begins with a valid "struct nlmsghdr", pulls the header and its
337 * payload off 'buffer', stores header and payload in 'msg->data' and
338 * 'msg->size', and returns a pointer to the header.
340 * If 'buffer' does not begin with a "struct nlmsghdr" or begins with one that
341 * is invalid, returns NULL without modifying 'buffer'. */
343 nl_msg_next(struct ofpbuf *buffer, struct ofpbuf *msg)
345 if (buffer->size >= sizeof(struct nlmsghdr)) {
346 struct nlmsghdr *nlmsghdr = nl_msg_nlmsghdr(buffer);
347 size_t len = nlmsghdr->nlmsg_len;
348 if (len >= sizeof *nlmsghdr && len <= buffer->size) {
349 ofpbuf_use_const(msg, nlmsghdr, len);
350 ofpbuf_pull(buffer, len);
362 /* Returns the bits of 'nla->nla_type' that are significant for determining its
365 nl_attr_type(const struct nlattr *nla)
367 return nla->nla_type & NLA_TYPE_MASK;
370 /* Returns the first byte in the payload of attribute 'nla'. */
372 nl_attr_get(const struct nlattr *nla)
374 assert(nla->nla_len >= NLA_HDRLEN);
378 /* Returns the number of bytes in the payload of attribute 'nla'. */
380 nl_attr_get_size(const struct nlattr *nla)
382 assert(nla->nla_len >= NLA_HDRLEN);
383 return nla->nla_len - NLA_HDRLEN;
386 /* Asserts that 'nla''s payload is at least 'size' bytes long, and returns the
387 * first byte of the payload. */
389 nl_attr_get_unspec(const struct nlattr *nla, size_t size)
391 assert(nla->nla_len >= NLA_HDRLEN + size);
395 /* Returns true if 'nla' is nonnull. (Some Netlink protocols use the presence
396 * or absence of an attribute as a Boolean flag.) */
398 nl_attr_get_flag(const struct nlattr *nla)
403 #define NL_ATTR_GET_AS(NLA, TYPE) \
404 (*(TYPE*) nl_attr_get_unspec(nla, sizeof(TYPE)))
406 /* Returns the 8-bit value in 'nla''s payload.
408 * Asserts that 'nla''s payload is at least 1 byte long. */
410 nl_attr_get_u8(const struct nlattr *nla)
412 return NL_ATTR_GET_AS(nla, uint8_t);
415 /* Returns the 16-bit host byte order value in 'nla''s payload.
417 * Asserts that 'nla''s payload is at least 2 bytes long. */
419 nl_attr_get_u16(const struct nlattr *nla)
421 return NL_ATTR_GET_AS(nla, uint16_t);
424 /* Returns the 32-bit host byte order value in 'nla''s payload.
426 * Asserts that 'nla''s payload is at least 4 bytes long. */
428 nl_attr_get_u32(const struct nlattr *nla)
430 return NL_ATTR_GET_AS(nla, uint32_t);
433 /* Returns the 64-bit host byte order value in 'nla''s payload.
435 * Asserts that 'nla''s payload is at least 8 bytes long. */
437 nl_attr_get_u64(const struct nlattr *nla)
439 const ovs_32aligned_u64 *x = nl_attr_get_unspec(nla, sizeof *x);
440 return get_32aligned_u64(x);
443 /* Returns the 16-bit network byte order value in 'nla''s payload.
445 * Asserts that 'nla''s payload is at least 2 bytes long. */
447 nl_attr_get_be16(const struct nlattr *nla)
449 return NL_ATTR_GET_AS(nla, ovs_be16);
452 /* Returns the 32-bit network byte order value in 'nla''s payload.
454 * Asserts that 'nla''s payload is at least 4 bytes long. */
456 nl_attr_get_be32(const struct nlattr *nla)
458 return NL_ATTR_GET_AS(nla, ovs_be32);
461 /* Returns the 64-bit network byte order value in 'nla''s payload.
463 * Asserts that 'nla''s payload is at least 8 bytes long. */
465 nl_attr_get_be64(const struct nlattr *nla)
467 const ovs_32aligned_be64 *x = nl_attr_get_unspec(nla, sizeof *x);
468 return get_32aligned_be64(x);
471 /* Returns the null-terminated string value in 'nla''s payload.
473 * Asserts that 'nla''s payload contains a null-terminated string. */
475 nl_attr_get_string(const struct nlattr *nla)
477 assert(nla->nla_len > NLA_HDRLEN);
478 assert(memchr(nl_attr_get(nla), '\0', nla->nla_len - NLA_HDRLEN) != NULL);
479 return nl_attr_get(nla);
482 /* Initializes 'nested' to the payload of 'nla'. */
484 nl_attr_get_nested(const struct nlattr *nla, struct ofpbuf *nested)
486 ofpbuf_use_const(nested, nl_attr_get(nla), nl_attr_get_size(nla));
489 /* Default minimum and maximum payload sizes for each type of attribute. */
490 static const size_t attr_len_range[][2] = {
491 [0 ... N_NL_ATTR_TYPES - 1] = { 0, SIZE_MAX },
492 [NL_A_U8] = { 1, 1 },
493 [NL_A_U16] = { 2, 2 },
494 [NL_A_U32] = { 4, 4 },
495 [NL_A_U64] = { 8, 8 },
496 [NL_A_STRING] = { 1, SIZE_MAX },
497 [NL_A_FLAG] = { 0, SIZE_MAX },
498 [NL_A_NESTED] = { 0, SIZE_MAX },
501 /* Parses the 'msg' starting at the given 'nla_offset' as a sequence of Netlink
502 * attributes. 'policy[i]', for 0 <= i < n_attrs, specifies how the attribute
503 * with nla_type == i is parsed; a pointer to attribute i is stored in
504 * attrs[i]. Returns true if successful, false on failure.
506 * If the Netlink attributes in 'msg' follow a Netlink header and a Generic
507 * Netlink header, then 'nla_offset' should be NLMSG_HDRLEN + GENL_HDRLEN. */
509 nl_policy_parse(const struct ofpbuf *msg, size_t nla_offset,
510 const struct nl_policy policy[],
511 struct nlattr *attrs[], size_t n_attrs)
518 for (i = 0; i < n_attrs; i++) {
521 assert(policy[i].type < N_NL_ATTR_TYPES);
522 if (policy[i].type != NL_A_NO_ATTR
523 && policy[i].type != NL_A_FLAG
524 && !policy[i].optional) {
529 p = ofpbuf_at(msg, nla_offset, 0);
531 VLOG_DBG_RL(&rl, "missing headers in nl_policy_parse");
534 tail = ofpbuf_tail(msg);
537 size_t offset = (char*)p - (char*)msg->data;
538 struct nlattr *nla = p;
539 size_t len, aligned_len;
542 /* Make sure its claimed length is plausible. */
543 if (nla->nla_len < NLA_HDRLEN) {
544 VLOG_DBG_RL(&rl, "%zu: attr shorter than NLA_HDRLEN (%"PRIu16")",
545 offset, nla->nla_len);
548 len = nla->nla_len - NLA_HDRLEN;
549 aligned_len = NLA_ALIGN(len);
550 if (aligned_len > (char*)tail - (char*)p) {
551 VLOG_DBG_RL(&rl, "%zu: attr %"PRIu16" aligned data len (%zu) "
552 "> bytes left (%tu)",
553 offset, nl_attr_type(nla), aligned_len,
554 (char*)tail - (char*)p);
558 type = nl_attr_type(nla);
559 if (type < n_attrs && policy[type].type != NL_A_NO_ATTR) {
560 const struct nl_policy *e = &policy[type];
561 size_t min_len, max_len;
563 /* Validate length and content. */
564 min_len = e->min_len ? e->min_len : attr_len_range[e->type][0];
565 max_len = e->max_len ? e->max_len : attr_len_range[e->type][1];
566 if (len < min_len || len > max_len) {
567 VLOG_DBG_RL(&rl, "%zu: attr %"PRIu16" length %zu not in "
568 "allowed range %zu...%zu",
569 offset, type, len, min_len, max_len);
572 if (e->type == NL_A_STRING) {
573 if (((char *) nla)[nla->nla_len - 1]) {
574 VLOG_DBG_RL(&rl, "%zu: attr %"PRIu16" lacks null at end",
578 if (memchr(nla + 1, '\0', len - 1) != NULL) {
579 VLOG_DBG_RL(&rl, "%zu: attr %"PRIu16" has bad length",
584 if (!e->optional && attrs[type] == NULL) {
585 assert(n_required > 0);
589 VLOG_DBG_RL(&rl, "%zu: duplicate attr %"PRIu16, offset, type);
593 /* Skip attribute type that we don't care about. */
595 p = (char*)p + NLA_ALIGN(nla->nla_len);
598 VLOG_DBG_RL(&rl, "%zu required attrs missing", n_required);
604 /* Parses the Netlink attributes within 'nla'. 'policy[i]', for 0 <= i <
605 * n_attrs, specifies how the attribute with nla_type == i is parsed; a pointer
606 * to attribute i is stored in attrs[i]. Returns true if successful, false on
609 nl_parse_nested(const struct nlattr *nla, const struct nl_policy policy[],
610 struct nlattr *attrs[], size_t n_attrs)
614 nl_attr_get_nested(nla, &buf);
615 return nl_policy_parse(&buf, 0, policy, attrs, n_attrs);
618 static const struct nlattr *
619 nl_attr_find__(const struct nlattr *attrs, size_t size, uint16_t type)
621 const struct nlattr *nla;
624 NL_ATTR_FOR_EACH (nla, left, attrs, size) {
625 if (nl_attr_type (nla) == type) {
632 /* Returns the first Netlink attribute within 'buf' with the specified 'type',
633 * skipping a header of 'hdr_len' bytes at the beginning of 'buf'.
635 * This function does not validate the attribute's length. */
636 const struct nlattr *
637 nl_attr_find(const struct ofpbuf *buf, size_t hdr_len, uint16_t type)
639 const uint8_t *start = (const uint8_t *) buf->data + hdr_len;
640 return nl_attr_find__((const struct nlattr *) start, buf->size - hdr_len,
644 /* Returns the first Netlink attribute within 'nla' with the specified
647 * This function does not validate the attribute's length. */
648 const struct nlattr *
649 nl_attr_find_nested(const struct nlattr *nla, uint16_t type)
651 return nl_attr_find__(nl_attr_get(nla), nl_attr_get_size(nla), type);