From: Ben Pfaff Date: Mon, 29 Apr 2013 20:57:50 +0000 (-0700) Subject: netlink-socket: Simplify use of transactions and dumps. X-Git-Tag: sliver-openvswitch-2.0.90-1~36^2~7 X-Git-Url: http://git.onelab.eu/?a=commitdiff_plain;h=a88b4e04128310d0eb0c3d811782f8619bceb95c;p=sliver-openvswitch.git netlink-socket: Simplify use of transactions and dumps. This disentangles "struct nl_dump" from "struct nl_sock", clearing the way to make the use of either one thread-safe in an obviously correct manner. Signed-off-by: Ben Pfaff --- diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c index 804a90ffd..958873cb7 100644 --- a/lib/dpif-linux.c +++ b/lib/dpif-linux.c @@ -162,7 +162,6 @@ static int ovs_flow_family; static int ovs_packet_family; /* Generic Netlink socket. */ -static struct nl_sock *genl_sock; static struct nln *nln = NULL; static int dpif_linux_init(void); @@ -692,7 +691,7 @@ dpif_linux_port_dump_start(const struct dpif *dpif_, void **statep) buf = ofpbuf_new(1024); dpif_linux_vport_to_ofpbuf(&request, buf); - nl_dump_start(&state->dump, genl_sock, buf); + nl_dump_start(&state->dump, NETLINK_GENERIC, buf); ofpbuf_delete(buf); return 0; @@ -898,7 +897,7 @@ dpif_linux_flow_dump_start(const struct dpif *dpif_, void **statep) buf = ofpbuf_new(1024); dpif_linux_flow_to_ofpbuf(&request, buf); - nl_dump_start(&state->dump, genl_sock, buf); + nl_dump_start(&state->dump, NETLINK_GENERIC, buf); ofpbuf_delete(buf); state->buf = NULL; @@ -1005,7 +1004,7 @@ dpif_linux_execute__(int dp_ifindex, const struct dpif_execute *execute) ofpbuf_use_stub(&request, request_stub, sizeof request_stub); dpif_linux_encode_execute(dp_ifindex, execute, &request); - error = nl_sock_transact(genl_sock, &request, NULL); + error = nl_transact(NETLINK_GENERIC, &request, NULL); ofpbuf_uninit(&request); return error; @@ -1090,7 +1089,7 @@ dpif_linux_operate__(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops) for (i = 0; i < n_ops; i++) { txnsp[i] = &auxes[i].txn; } - nl_sock_transact_multiple(genl_sock, txnsp, n_ops); + nl_transact_multiple(NETLINK_GENERIC, txnsp, n_ops); for (i = 0; i < n_ops; i++) { struct op_auxdata *aux = &auxes[i]; @@ -1463,9 +1462,6 @@ dpif_linux_init(void) error = nl_lookup_genl_family(OVS_PACKET_FAMILY, &ovs_packet_family); } - if (!error) { - error = nl_sock_create(NETLINK_GENERIC, &genl_sock); - } if (!error) { error = nl_lookup_genl_mcgroup(OVS_VPORT_FAMILY, OVS_VPORT_MCGROUP, &ovs_vport_mcgroup, @@ -1659,7 +1655,7 @@ dpif_linux_vport_transact(const struct dpif_linux_vport *request, request_buf = ofpbuf_new(1024); dpif_linux_vport_to_ofpbuf(request, request_buf); - error = nl_sock_transact(genl_sock, request_buf, bufp); + error = nl_transact(NETLINK_GENERIC, request_buf, bufp); ofpbuf_delete(request_buf); if (reply) { @@ -1780,7 +1776,7 @@ dpif_linux_dp_dump_start(struct nl_dump *dump) buf = ofpbuf_new(1024); dpif_linux_dp_to_ofpbuf(&request, buf); - nl_dump_start(dump, genl_sock, buf); + nl_dump_start(dump, NETLINK_GENERIC, buf); ofpbuf_delete(buf); } @@ -1801,7 +1797,7 @@ dpif_linux_dp_transact(const struct dpif_linux_dp *request, request_buf = ofpbuf_new(1024); dpif_linux_dp_to_ofpbuf(request, request_buf); - error = nl_sock_transact(genl_sock, request_buf, bufp); + error = nl_transact(NETLINK_GENERIC, request_buf, bufp); ofpbuf_delete(request_buf); if (reply) { @@ -1965,7 +1961,7 @@ dpif_linux_flow_transact(struct dpif_linux_flow *request, request_buf = ofpbuf_new(1024); dpif_linux_flow_to_ofpbuf(request, request_buf); - error = nl_sock_transact(genl_sock, request_buf, bufp); + error = nl_transact(NETLINK_GENERIC, request_buf, bufp); ofpbuf_delete(request_buf); if (reply) { diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 1f7388649..05877c1a4 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -409,9 +409,6 @@ static const struct netdev_rx_class netdev_rx_linux_class; /* Sockets used for ioctl operations. */ static int af_inet_sock = -1; /* AF_INET, SOCK_DGRAM. */ -/* A Netlink routing socket that is not subscribed to any multicast groups. */ -static struct nl_sock *rtnl_sock; - /* This is set pretty low because we probably won't learn anything from the * additional log messages. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); @@ -477,15 +474,6 @@ netdev_linux_init(void) if (status) { VLOG_ERR("failed to create inet socket: %s", ovs_strerror(status)); } - - /* Create rtnetlink socket. */ - if (!status) { - status = nl_sock_create(NETLINK_ROUTE, &rtnl_sock); - if (status) { - VLOG_ERR_RL(&rl, "failed to create rtnetlink socket: %s", - ovs_strerror(status)); - } - } } return status; } @@ -2025,7 +2013,7 @@ start_queue_dump(const struct netdev *netdev, struct nl_dump *dump) return false; } tcmsg->tcm_parent = 0; - nl_dump_start(dump, rtnl_sock, &request); + nl_dump_start(dump, NETLINK_ROUTE, &request); ofpbuf_uninit(&request); return true; } @@ -3644,7 +3632,7 @@ tc_make_request(const struct netdev *netdev, int type, unsigned int flags, static int tc_transact(struct ofpbuf *request, struct ofpbuf **replyp) { - int error = nl_sock_transact(rtnl_sock, request, replyp); + int error = nl_transact(NETLINK_ROUTE, request, replyp); ofpbuf_uninit(request); return error; } @@ -4320,7 +4308,7 @@ get_stats_via_netlink(int ifindex, struct netdev_stats *stats) ifi = ofpbuf_put_zeros(&request, sizeof *ifi); ifi->ifi_family = PF_UNSPEC; ifi->ifi_index = ifindex; - error = nl_sock_transact(rtnl_sock, &request, &reply); + error = nl_transact(NETLINK_ROUTE, &request, &reply); ofpbuf_uninit(&request); if (error) { return error; diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c index aa7fca2fa..8e0884105 100644 --- a/lib/netlink-socket.c +++ b/lib/netlink-socket.c @@ -63,7 +63,6 @@ struct nl_sock { uint32_t next_seq; uint32_t pid; int protocol; - struct nl_dump *dump; unsigned int rcvbuf; /* Receive buffer size (SO_RCVBUF). */ }; @@ -77,11 +76,12 @@ struct nl_sock { * Initialized by nl_sock_create(). */ static int max_iovs; -static int nl_sock_cow__(struct nl_sock *); +static int nl_pool_alloc(int protocol, struct nl_sock **sockp); +static void nl_pool_release(struct nl_sock *); /* Creates a new netlink socket for the given netlink 'protocol' * (NETLINK_ROUTE, NETLINK_GENERIC, ...). Returns 0 and sets '*sockp' to the - * new socket if successful, otherwise returns a positive errno value. */ + * new socket if successful, otherwise returns a positive errno value. */ int nl_sock_create(int protocol, struct nl_sock **sockp) { @@ -117,7 +117,6 @@ nl_sock_create(int protocol, struct nl_sock **sockp) goto error; } sock->protocol = protocol; - sock->dump = NULL; sock->next_seq = 1; rcvbuf = 1024 * 1024; @@ -191,12 +190,8 @@ void nl_sock_destroy(struct nl_sock *sock) { if (sock) { - if (sock->dump) { - sock->dump = NULL; - } else { - close(sock->fd); - free(sock); - } + close(sock->fd); + free(sock); } } @@ -214,10 +209,6 @@ nl_sock_destroy(struct nl_sock *sock) int nl_sock_join_mcgroup(struct nl_sock *sock, unsigned int multicast_group) { - int error = nl_sock_cow__(sock); - if (error) { - return error; - } if (setsockopt(sock->fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &multicast_group, sizeof multicast_group) < 0) { VLOG_WARN("could not join multicast group %u (%s)", @@ -240,7 +231,6 @@ nl_sock_join_mcgroup(struct nl_sock *sock, unsigned int multicast_group) int nl_sock_leave_mcgroup(struct nl_sock *sock, unsigned int multicast_group) { - ovs_assert(!sock->dump); if (setsockopt(sock->fd, SOL_NETLINK, NETLINK_DROP_MEMBERSHIP, &multicast_group, sizeof multicast_group) < 0) { VLOG_WARN("could not leave multicast group %u (%s)", @@ -301,10 +291,6 @@ int nl_sock_send_seq(struct nl_sock *sock, const struct ofpbuf *msg, uint32_t nlmsg_seq, bool wait) { - int error = nl_sock_cow__(sock); - if (error) { - return error; - } return nl_sock_send__(sock, msg, nlmsg_seq, wait); } @@ -395,10 +381,6 @@ nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait) int nl_sock_recv(struct nl_sock *sock, struct ofpbuf *buf, bool wait) { - int error = nl_sock_cow__(sock); - if (error) { - return error; - } return nl_sock_recv__(sock, buf, wait); } @@ -571,12 +553,6 @@ nl_sock_transact_multiple(struct nl_sock *sock, return; } - error = nl_sock_cow__(sock); - if (error) { - nl_sock_record_errors__(transactions, n, error); - return; - } - /* In theory, every request could have a 64 kB reply. But the default and * maximum socket rcvbuf size with typical Dom0 memory sizes both tend to * be a bit below 128 kB, so that would only allow a single message in a @@ -690,93 +666,36 @@ nl_sock_transact(struct nl_sock *sock, const struct ofpbuf *request, int nl_sock_drain(struct nl_sock *sock) { - int error = nl_sock_cow__(sock); - if (error) { - return error; - } return drain_rcvbuf(sock->fd); } -/* The client is attempting some operation on 'sock'. If 'sock' has an ongoing - * dump operation, then replace 'sock''s fd with a new socket and hand 'sock''s - * old fd over to the dump. */ -static int -nl_sock_cow__(struct nl_sock *sock) -{ - struct nl_sock *copy; - uint32_t tmp_pid; - int tmp_fd; - int error; - - if (!sock->dump) { - return 0; - } - - error = nl_sock_clone(sock, ©); - if (error) { - return error; - } - - tmp_fd = sock->fd; - sock->fd = copy->fd; - copy->fd = tmp_fd; - - tmp_pid = sock->pid; - sock->pid = copy->pid; - copy->pid = tmp_pid; - - sock->dump->sock = copy; - sock->dump = NULL; - - return 0; -} - -/* Starts a Netlink "dump" operation, by sending 'request' to the kernel via - * 'sock', and initializes 'dump' to reflect the state of the operation. +/* Starts a Netlink "dump" operation, by sending 'request' to the kernel on a + * Netlink socket created with the given 'protocol', and initializes 'dump' to + * reflect the state of the operation. * * nlmsg_len in 'msg' will be finalized to match msg->size, and nlmsg_pid will - * be set to 'sock''s pid, before the message is sent. NLM_F_DUMP and - * NLM_F_ACK will be set in nlmsg_flags. - * - * This Netlink socket library is designed to ensure that the dump is reliable - * and that it will not interfere with other operations on 'sock', including - * destroying or sending and receiving messages on 'sock'. One corner case is - * not handled: + * be set to the Netlink socket's pid, before the message is sent. NLM_F_DUMP + * and NLM_F_ACK will be set in nlmsg_flags. * - * - If 'sock' has been used to send a request (e.g. with nl_sock_send()) - * whose response has not yet been received (e.g. with nl_sock_recv()). - * This is unusual: usually nl_sock_transact() is used to send a message - * and receive its reply all in one go. + * The design of this Netlink socket library ensures that the dump is reliable. * * This function provides no status indication. An error status for the entire * dump operation is provided when it is completed by calling nl_dump_done(). * * The caller is responsible for destroying 'request'. - * - * The new 'dump' is independent of 'sock'. 'sock' and 'dump' may be destroyed - * in either order. */ void -nl_dump_start(struct nl_dump *dump, - struct nl_sock *sock, const struct ofpbuf *request) +nl_dump_start(struct nl_dump *dump, int protocol, const struct ofpbuf *request) { ofpbuf_init(&dump->buffer, 4096); - if (sock->dump) { - /* 'sock' already has an ongoing dump. Clone the socket because - * Netlink only allows one dump at a time. */ - dump->status = nl_sock_clone(sock, &dump->sock); - if (dump->status) { - return; - } - } else { - sock->dump = dump; - dump->sock = sock; - dump->status = 0; + dump->status = nl_pool_alloc(protocol, &dump->sock); + if (dump->status) { + return; } nl_msg_nlmsghdr(request)->nlmsg_flags |= NLM_F_DUMP | NLM_F_ACK; - dump->status = nl_sock_send__(sock, request, nl_sock_allocate_seq(sock, 1), - true); + dump->status = nl_sock_send__(dump->sock, request, + nl_sock_allocate_seq(dump->sock, 1), true); dump->seq = nl_msg_nlmsghdr(request)->nlmsg_seq; } @@ -862,21 +781,16 @@ int nl_dump_done(struct nl_dump *dump) { /* Drain any remaining messages that the client didn't read. Otherwise the - * kernel will continue to queue them up and waste buffer space. */ + * kernel will continue to queue them up and waste buffer space. + * + * XXX We could just destroy and discard the socket in this case. */ while (!dump->status) { struct ofpbuf reply; if (!nl_dump_next(dump, &reply)) { ovs_assert(dump->status); } } - - if (dump->sock) { - if (dump->sock->dump) { - dump->sock->dump = NULL; - } else { - nl_sock_destroy(dump->sock); - } - } + nl_pool_release(dump->sock); ofpbuf_uninit(&dump->buffer); return dump->status == EOF ? 0 : dump->status; } @@ -1089,6 +1003,79 @@ nl_lookup_genl_family(const char *name, int *number) } return *number > 0 ? 0 : -*number; } + +struct nl_pool { + struct nl_sock *socks[16]; + int n; +}; + +static struct nl_pool pools[MAX_LINKS]; + +static int +nl_pool_alloc(int protocol, struct nl_sock **sockp) +{ + struct nl_pool *pool; + + ovs_assert(protocol >= 0 && protocol < ARRAY_SIZE(pools)); + + pool = &pools[protocol]; + if (pool->n > 0) { + *sockp = pool->socks[--pool->n]; + return 0; + } else { + return nl_sock_create(protocol, sockp); + } +} + +static void +nl_pool_release(struct nl_sock *sock) +{ + if (sock) { + struct nl_pool *pool = &pools[sock->protocol]; + + if (pool->n < ARRAY_SIZE(pool->socks)) { + pool->socks[pool->n++] = sock; + } else { + nl_sock_destroy(sock); + } + } +} + +int +nl_transact(int protocol, const struct ofpbuf *request, + struct ofpbuf **replyp) +{ + struct nl_sock *sock; + int error; + + error = nl_pool_alloc(protocol, &sock); + if (error) { + *replyp = NULL; + return error; + } + + error = nl_sock_transact(sock, request, replyp); + + nl_pool_release(sock); + return error; +} + +void +nl_transact_multiple(int protocol, + struct nl_transaction **transactions, size_t n) +{ + struct nl_sock *sock; + int error; + + error = nl_pool_alloc(protocol, &sock); + if (!error) { + nl_sock_transact_multiple(sock, transactions, n); + nl_pool_release(sock); + } else { + nl_sock_record_errors__(transactions, n, error); + } +} + static uint32_t nl_sock_allocate_seq(struct nl_sock *sock, unsigned int n) diff --git a/lib/netlink-socket.h b/lib/netlink-socket.h index 78dd7b23a..c77050ee9 100644 --- a/lib/netlink-socket.h +++ b/lib/netlink-socket.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc. + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -84,6 +84,11 @@ struct nl_transaction { void nl_sock_transact_multiple(struct nl_sock *, struct nl_transaction **, size_t n); +/* Transactions without an allocated socket. */ +int nl_transact(int protocol, const struct ofpbuf *request, + struct ofpbuf **replyp); +void nl_transact_multiple(int protocol, struct nl_transaction **, size_t n); + /* Table dumping. */ struct nl_dump { struct nl_sock *sock; /* Socket being dumped. */ @@ -92,7 +97,7 @@ struct nl_dump { int status; /* 0=OK, EOF=done, or positive errno value. */ }; -void nl_dump_start(struct nl_dump *, struct nl_sock *, +void nl_dump_start(struct nl_dump *, int protocol, const struct ofpbuf *request); bool nl_dump_next(struct nl_dump *, struct ofpbuf *reply); int nl_dump_done(struct nl_dump *); diff --git a/lib/route-table.c b/lib/route-table.c index 5891ae8e3..d572e8cea 100644 --- a/lib/route-table.c +++ b/lib/route-table.c @@ -221,22 +221,13 @@ route_table_wait(void) static int route_table_reset(void) { - int error; struct nl_dump dump; struct rtgenmsg *rtmsg; struct ofpbuf request, reply; - struct nl_sock *rtnl_sock; route_map_clear(); route_table_valid = true; - error = nl_sock_create(NETLINK_ROUTE, &rtnl_sock); - if (error) { - VLOG_WARN_RL(&rl, "failed to reset routing table, " - "cannot create RTNETLINK_ROUTE socket"); - return error; - } - ofpbuf_init(&request, 0); nl_msg_put_nlmsghdr(&request, sizeof *rtmsg, RTM_GETROUTE, NLM_F_REQUEST); @@ -244,7 +235,7 @@ route_table_reset(void) rtmsg = ofpbuf_put_zeros(&request, sizeof *rtmsg); rtmsg->rtgen_family = AF_INET; - nl_dump_start(&dump, rtnl_sock, &request); + nl_dump_start(&dump, NETLINK_ROUTE, &request); ofpbuf_uninit(&request); while (nl_dump_next(&dump, &reply)) { @@ -255,10 +246,7 @@ route_table_reset(void) } } - error = nl_dump_done(&dump); - nl_sock_destroy(rtnl_sock); - - return error; + return nl_dump_done(&dump); } @@ -417,26 +405,19 @@ name_table_uninit(void) static int name_table_reset(void) { - int error; struct nl_dump dump; struct rtgenmsg *rtmsg; struct ofpbuf request, reply; - struct nl_sock *rtnl_sock; name_table_valid = true; name_map_clear(); - error = nl_sock_create(NETLINK_ROUTE, &rtnl_sock); - if (error) { - VLOG_WARN_RL(&rl, "failed to create NETLINK_ROUTE socket"); - return error; - } ofpbuf_init(&request, 0); nl_msg_put_nlmsghdr(&request, sizeof *rtmsg, RTM_GETLINK, NLM_F_REQUEST); rtmsg = ofpbuf_put_zeros(&request, sizeof *rtmsg); rtmsg->rtgen_family = AF_INET; - nl_dump_start(&dump, rtnl_sock, &request); + nl_dump_start(&dump, NETLINK_ROUTE, &request); ofpbuf_uninit(&request); while (nl_dump_next(&dump, &reply)) { @@ -453,7 +434,6 @@ name_table_reset(void) hmap_insert(&name_map, &nn->node, hash_int(nn->ifi_index, 0)); } } - nl_sock_destroy(rtnl_sock); return nl_dump_done(&dump); }