datapath: Move Netlink PID for userspace actions from flows to actions.
[sliver-openvswitch.git] / datapath / datapath.c
index 4d40ac3..551b384 100644 (file)
@@ -84,7 +84,7 @@ EXPORT_SYMBOL(dp_ioctl_hook);
 static LIST_HEAD(dps);
 
 static struct vport *new_vport(const struct vport_parms *);
-static int queue_userspace_packets(struct datapath *, u32 pid, struct sk_buff *,
+static int queue_userspace_packets(struct datapath *, struct sk_buff *,
                                 const struct dp_upcall_info *);
 
 /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
@@ -280,9 +280,10 @@ void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
        struct datapath *dp = p->dp;
        struct sw_flow *flow;
        struct dp_stats_percpu *stats;
-       int stats_counter_off;
+       u64 *stats_counter;
        int error;
 
+       stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
        OVS_CB(skb)->vport = p;
 
        if (!OVS_CB(skb)->flow) {
@@ -299,7 +300,7 @@ void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 
                if (is_frag && dp->drop_frags) {
                        consume_skb(skb);
-                       stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
+                       stats_counter = &stats->n_frags;
                        goto out;
                }
 
@@ -310,32 +311,27 @@ void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 
                        upcall.cmd = OVS_PACKET_CMD_MISS;
                        upcall.key = &key;
-                       upcall.userdata = 0;
-                       upcall.sample_pool = 0;
-                       upcall.actions = NULL;
-                       upcall.actions_len = 0;
+                       upcall.userdata = NULL;
+                       upcall.pid = p->upcall_pid;
                        dp_upcall(dp, skb, &upcall);
-                       stats_counter_off = offsetof(struct dp_stats_percpu, n_missed);
+                       kfree_skb(skb);
+                       stats_counter = &stats->n_missed;
                        goto out;
                }
 
                OVS_CB(skb)->flow = flow;
        }
 
-       stats_counter_off = offsetof(struct dp_stats_percpu, n_hit);
+       stats_counter = &stats->n_hit;
        flow_used(OVS_CB(skb)->flow, skb);
        execute_actions(dp, skb);
 
 out:
        /* Update datapath statistics. */
-       local_bh_disable();
-       stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
 
        write_seqcount_begin(&stats->seqlock);
-       (*(u64 *)((u8 *)stats + stats_counter_off))++;
+       (*stats_counter)++;
        write_seqcount_end(&stats->seqlock);
-
-       local_bh_enable();
 }
 
 static void copy_and_csum_skb(struct sk_buff *skb, void *to)
@@ -361,20 +357,15 @@ static struct genl_family dp_packet_genl_family = {
        .maxattr = OVS_PACKET_ATTR_MAX
 };
 
-int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info)
+int dp_upcall(struct datapath *dp, struct sk_buff *skb,
+             const struct dp_upcall_info *upcall_info)
 {
+       struct sk_buff *segs = NULL;
        struct dp_stats_percpu *stats;
-       u32 pid;
        int err;
 
-       if (OVS_CB(skb)->flow)
-               pid = OVS_CB(skb)->flow->upcall_pid;
-       else
-               pid = OVS_CB(skb)->vport->upcall_pid;
-
-       if (pid == 0) {
+       if (upcall_info->pid == 0) {
                err = -ENOTCONN;
-               kfree_skb(skb);
                goto err;
        }
 
@@ -383,33 +374,37 @@ int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_i
        /* Break apart GSO packets into their component pieces.  Otherwise
         * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
        if (skb_is_gso(skb)) {
-               struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
+               segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
                
-               if (IS_ERR(nskb)) {
-                       kfree_skb(skb);
-                       err = PTR_ERR(nskb);
+               if (IS_ERR(segs)) {
+                       err = PTR_ERR(segs);
                        goto err;
                }
-               consume_skb(skb);
-               skb = nskb;
+               skb = segs;
+       }
+
+       err = queue_userspace_packets(dp, skb, upcall_info);
+       if (segs) {
+               struct sk_buff *next;
+               /* Free GSO-segments */
+               do {
+                       next = segs->next;
+                       kfree_skb(segs);
+               } while ((segs = next) != NULL);
        }
 
-       err = queue_userspace_packets(dp, pid, skb, upcall_info);
        if (err)
                goto err;
 
        return 0;
 
 err:
-       local_bh_disable();
        stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
 
        write_seqcount_begin(&stats->seqlock);
        stats->n_lost++;
        write_seqcount_end(&stats->seqlock);
 
-       local_bh_enable();
-
        return err;
 }
 
@@ -417,56 +412,41 @@ err:
  * 'upcall_info'.  There will be only one packet unless we broke up a GSO
  * packet.
  */
-static int queue_userspace_packets(struct datapath *dp, u32 pid,
-                                  struct sk_buff *skb,
+static int queue_userspace_packets(struct datapath *dp, struct sk_buff *skb,
                                   const struct dp_upcall_info *upcall_info)
 {
        int dp_ifindex;
-       struct sk_buff *nskb;
-       int err;
 
        dp_ifindex = get_dpifindex(dp);
-       if (!dp_ifindex) {
-               err = -ENODEV;
-               nskb = skb->next;
-               goto err_kfree_skbs;
-       }
+       if (!dp_ifindex)
+               return -ENODEV;
 
        do {
                struct ovs_header *upcall;
                struct sk_buff *user_skb; /* to be queued to userspace */
                struct nlattr *nla;
                unsigned int len;
-
-               nskb = skb->next;
-               skb->next = NULL;
+               int err;
 
                err = vlan_deaccel_tag(skb);
                if (unlikely(err))
-                       goto err_kfree_skbs;
+                       return err;
 
-               if (nla_attr_size(skb->len) > USHRT_MAX) {
-                       err = -EFBIG;
-                       goto err_kfree_skbs;
-               }
+               if (nla_attr_size(skb->len) > USHRT_MAX)
+                       return -EFBIG;
 
                len = sizeof(struct ovs_header);
                len += nla_total_size(skb->len);
                len += nla_total_size(FLOW_BUFSIZE);
-               if (upcall_info->userdata)
+               if (upcall_info->cmd == OVS_PACKET_CMD_ACTION)
                        len += nla_total_size(8);
-               if (upcall_info->sample_pool)
-                       len += nla_total_size(4);
-               if (upcall_info->actions_len)
-                       len += nla_total_size(upcall_info->actions_len);
 
                user_skb = genlmsg_new(len, GFP_ATOMIC);
-               if (!user_skb) {
-                       err = -ENOMEM;
-                       goto err_kfree_skbs;
-               }
+               if (!user_skb)
+                       return -ENOMEM;
 
-               upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 0, upcall_info->cmd);
+               upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
+                                        0, upcall_info->cmd);
                upcall->dp_ifindex = dp_ifindex;
 
                nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
@@ -474,17 +454,8 @@ static int queue_userspace_packets(struct datapath *dp, u32 pid,
                nla_nest_end(user_skb, nla);
 
                if (upcall_info->userdata)
-                       nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA, upcall_info->userdata);
-               if (upcall_info->sample_pool)
-                       nla_put_u32(user_skb, OVS_PACKET_ATTR_SAMPLE_POOL, upcall_info->sample_pool);
-               if (upcall_info->actions_len) {
-                       const struct nlattr *actions = upcall_info->actions;
-                       u32 actions_len = upcall_info->actions_len;
-
-                       nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);
-                       memcpy(__skb_put(user_skb, actions_len), actions, actions_len);
-                       nla_nest_end(user_skb, nla);
-               }
+                       nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA,
+                                   nla_get_u64(upcall_info->userdata));
 
                nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
                if (skb->ip_summed == CHECKSUM_PARTIAL)
@@ -492,22 +463,13 @@ static int queue_userspace_packets(struct datapath *dp, u32 pid,
                else
                        skb_copy_bits(skb, 0, nla_data(nla), skb->len);
 
-               err = genlmsg_unicast(&init_net, user_skb, pid);
+               err = genlmsg_unicast(&init_net, user_skb, upcall_info->pid);
                if (err)
-                       goto err_kfree_skbs;
+                       return err;
 
-               consume_skb(skb);
-               skb = nskb;
-       } while (skb);
-       return 0;
+       } while ((skb = skb->next));
 
-err_kfree_skbs:
-       kfree_skb(skb);
-       while ((skb = nskb) != NULL) {
-               nskb = skb->next;
-               kfree_skb(skb);
-       }
-       return err;
+       return 0;
 }
 
 /* Called with genl_mutex. */
@@ -532,15 +494,63 @@ static int flush_flows(int dp_ifindex)
        return 0;
 }
 
-static int validate_actions(const struct nlattr *attr)
+static int validate_actions(const struct nlattr *attr, int depth);
+
+static int validate_sample(const struct nlattr *attr, int depth)
+{
+       static const struct nla_policy sample_policy[OVS_SAMPLE_ATTR_MAX + 1] =
+       {
+               [OVS_SAMPLE_ATTR_PROBABILITY] = {.type = NLA_U32 },
+               [OVS_SAMPLE_ATTR_ACTIONS] = {.type = NLA_UNSPEC },
+       };
+       struct nlattr *a[OVS_SAMPLE_ATTR_MAX + 1];
+       int error;
+
+       error = nla_parse_nested(a, OVS_SAMPLE_ATTR_MAX, attr, sample_policy);
+       if (error)
+               return error;
+
+       if (!a[OVS_SAMPLE_ATTR_PROBABILITY])
+               return -EINVAL;
+       if (!a[OVS_SAMPLE_ATTR_ACTIONS])
+               return -EINVAL;
+
+       return validate_actions(a[OVS_SAMPLE_ATTR_ACTIONS], (depth + 1));
+}
+
+static int validate_userspace(const struct nlattr *attr)
+{
+       static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] =
+       {
+               [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
+               [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_U64 },
+       };
+       struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
+       int error;
+
+       error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, attr, userspace_policy);
+       if (error)
+               return error;
+
+       if (!a[OVS_USERSPACE_ATTR_PID] || !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
+               return -EINVAL;
+
+       return 0;
+}
+
+static int validate_actions(const struct nlattr *attr, int depth)
 {
        const struct nlattr *a;
-       int rem;
+       int rem, err;
+
+       if (depth >= SAMPLE_ACTION_DEPTH)
+               return -EOVERFLOW;
 
        nla_for_each_nested(a, attr, rem) {
+               /* Expected argument lengths, (u32)-1 for variable length. */
                static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
                        [OVS_ACTION_ATTR_OUTPUT] = 4,
-                       [OVS_ACTION_ATTR_USERSPACE] = 8,
+                       [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
                        [OVS_ACTION_ATTR_PUSH_VLAN] = 2,
                        [OVS_ACTION_ATTR_POP_VLAN] = 0,
                        [OVS_ACTION_ATTR_SET_DL_SRC] = ETH_ALEN,
@@ -553,17 +563,19 @@ static int validate_actions(const struct nlattr *attr)
                        [OVS_ACTION_ATTR_SET_TUNNEL] = 8,
                        [OVS_ACTION_ATTR_SET_PRIORITY] = 4,
                        [OVS_ACTION_ATTR_POP_PRIORITY] = 0,
+                       [OVS_ACTION_ATTR_SAMPLE] = (u32)-1
                };
                int type = nla_type(a);
 
-               if (type > OVS_ACTION_ATTR_MAX || nla_len(a) != action_lens[type])
+               if (type > OVS_ACTION_ATTR_MAX ||
+                   (action_lens[type] != nla_len(a) &&
+                    action_lens[type] != (u32)-1))
                        return -EINVAL;
 
                switch (type) {
                case OVS_ACTION_ATTR_UNSPEC:
                        return -EINVAL;
 
-               case OVS_ACTION_ATTR_USERSPACE:
                case OVS_ACTION_ATTR_POP_VLAN:
                case OVS_ACTION_ATTR_SET_DL_SRC:
                case OVS_ACTION_ATTR_SET_DL_DST:
@@ -577,6 +589,12 @@ static int validate_actions(const struct nlattr *attr)
                        /* No validation needed. */
                        break;
 
+               case OVS_ACTION_ATTR_USERSPACE:
+                       err = validate_userspace(a);
+                       if (err)
+                               return err;
+                       break;
+
                case OVS_ACTION_ATTR_OUTPUT:
                        if (nla_get_u32(a) >= DP_MAX_PORTS)
                                return -EINVAL;
@@ -592,6 +610,12 @@ static int validate_actions(const struct nlattr *attr)
                                return -EINVAL;
                        break;
 
+               case OVS_ACTION_ATTR_SAMPLE:
+                       err = validate_sample(a, depth);
+                       if (err)
+                               return err;
+                       break;
+
                default:
                        return -EOPNOTSUPP;
                }
@@ -630,7 +654,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
            nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN)
                goto err;
 
-       err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS]);
+       err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS], 0);
        if (err)
                goto err;
 
@@ -672,11 +696,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 
        flow->hash = flow_hash(&flow->key, key_len);
 
-       if (a[OVS_PACKET_ATTR_UPCALL_PID])
-               flow->upcall_pid = nla_get_u32(a[OVS_PACKET_ATTR_UPCALL_PID]);
-       else
-               flow->upcall_pid = NETLINK_CB(skb).pid;
-
        acts = flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]);
        err = PTR_ERR(acts);
        if (IS_ERR(acts))
@@ -695,7 +714,9 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
                OVS_CB(packet)->vport = get_vport_protected(dp,
                                                        flow->key.eth.in_port);
 
+       local_bh_disable();
        err = execute_actions(dp, packet);
+       local_bh_enable();
        rcu_read_unlock();
 
        flow_put(flow);
@@ -715,7 +736,6 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
        [OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
        [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
        [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
-       [OVS_PACKET_ATTR_UPCALL_PID] = { .type = NLA_U32 },
 };
 
 static struct genl_ops dp_packet_genl_ops[] = {
@@ -755,7 +775,6 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
 
 static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
        [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
-       [OVS_FLOW_ATTR_UPCALL_PID] = { .type = NLA_U32 },
        [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
        [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
 };
@@ -802,8 +821,6 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
                goto error;
        nla_nest_end(skb, nla);
 
-       NLA_PUT_U32(skb, OVS_FLOW_ATTR_UPCALL_PID, flow->upcall_pid);
-
        spin_lock_bh(&flow->lock);
        used = flow->used;
        stats.n_packets = flow->packet_count;
@@ -897,7 +914,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 
        /* Validate actions. */
        if (a[OVS_FLOW_ATTR_ACTIONS]) {
-               error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS]);
+               error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], 0);
                if (error)
                        goto error;
        } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
@@ -941,11 +958,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
                flow->key = key;
                clear_stats(flow);
 
-               if (a[OVS_FLOW_ATTR_UPCALL_PID])
-                       flow->upcall_pid = nla_get_u32(a[OVS_FLOW_ATTR_UPCALL_PID]);
-               else
-                       flow->upcall_pid = NETLINK_CB(skb).pid;
-
                /* Obtain actions. */
                acts = flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]);
                error = PTR_ERR(acts);
@@ -995,9 +1007,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
                reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
                                                info->snd_seq, OVS_FLOW_CMD_NEW);
 
-               if (a[OVS_FLOW_ATTR_UPCALL_PID])
-                       flow->upcall_pid = nla_get_u32(a[OVS_FLOW_ATTR_UPCALL_PID]);
-
                /* Clear stats. */
                if (a[OVS_FLOW_ATTR_CLEAR]) {
                        spin_lock_bh(&flow->lock);
@@ -1158,7 +1167,6 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
 #endif
        [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
        [OVS_DP_ATTR_IPV4_FRAGS] = { .type = NLA_U32 },
-       [OVS_DP_ATTR_SAMPLING] = { .type = NLA_U32 },
 };
 
 static struct genl_family dp_datapath_genl_family = {
@@ -1201,9 +1209,6 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
        NLA_PUT_U32(skb, OVS_DP_ATTR_IPV4_FRAGS,
                    dp->drop_frags ? OVS_DP_FRAG_DROP : OVS_DP_FRAG_ZERO);
 
-       if (dp->sflow_probability)
-               NLA_PUT_U32(skb, OVS_DP_ATTR_SAMPLING, dp->sflow_probability);
-
        return genlmsg_end(skb, ovs_header);
 
 nla_put_failure:
@@ -1265,8 +1270,6 @@ static void change_datapath(struct datapath *dp, struct nlattr *a[OVS_DP_ATTR_MA
 {
        if (a[OVS_DP_ATTR_IPV4_FRAGS])
                dp->drop_frags = nla_get_u32(a[OVS_DP_ATTR_IPV4_FRAGS]) == OVS_DP_FRAG_DROP;
-       if (a[OVS_DP_ATTR_SAMPLING])
-               dp->sflow_probability = nla_get_u32(a[OVS_DP_ATTR_SAMPLING]);
 }
 
 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
@@ -1546,7 +1549,6 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
 {
        struct ovs_header *ovs_header;
        struct nlattr *nla;
-       int ifindex;
        int err;
 
        ovs_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family,
@@ -1573,10 +1575,6 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
        if (err == -EMSGSIZE)
                goto error;
 
-       ifindex = vport_get_ifindex(vport);
-       if (ifindex > 0)
-               NLA_PUT_U32(skb, OVS_VPORT_ATTR_IFINDEX, ifindex);
-
        return genlmsg_end(skb, ovs_header);
 
 nla_put_failure: