Properly set in_port in skb for Flow Mod messages.
[sliver-openvswitch.git] / datapath / datapath.c
index 428f25e..d84240b 100644 (file)
@@ -88,12 +88,10 @@ DEFINE_MUTEX(dp_mutex);
 EXPORT_SYMBOL(dp_mutex);
 
 static int dp_maint_func(void *data);
-static int update_port_status(struct net_bridge_port *p);
-static int send_port_status(struct net_bridge_port *p, uint8_t status);
+static void init_port_status(struct net_bridge_port *p);
 static int dp_genl_openflow_done(struct netlink_callback *);
 static struct net_bridge_port *new_nbp(struct datapath *,
                                       struct net_device *, int port_no);
-static int del_switch_port(struct net_bridge_port *);
 
 /* nla_shrink - reduce amount of space reserved by nla_reserve
  * @skb: socket buffer from which to recover room
@@ -309,7 +307,7 @@ static int new_dp(int dp_idx)
        return 0;
 
 err_destroy_local_port:
-       del_switch_port(dp->local_port);
+       dp_del_switch_port(dp->local_port);
 err_destroy_chain:
        chain_destroy(dp->chain);
 err_destroy_dp_dev:
@@ -378,16 +376,16 @@ int add_switch_port(struct datapath *dp, struct net_device *dev)
        if (IS_ERR(p))
                return PTR_ERR(p);
 
-       update_port_status(p);
+       init_port_status(p);
 
        /* Notify the ctlpath that this port has been added */
-       send_port_status(p, OFPPR_ADD);
+       dp_send_port_status(p, OFPPR_ADD);
 
        return 0;
 }
 
 /* Delete 'p' from switch. */
-static int del_switch_port(struct net_bridge_port *p)
+int dp_del_switch_port(struct net_bridge_port *p)
 {
        /* First drop references to device. */
        cancel_work_sync(&p->port_task);
@@ -403,7 +401,7 @@ static int del_switch_port(struct net_bridge_port *p)
        synchronize_rcu();
 
        /* Notify the ctlpath that this port no longer exists */
-       send_port_status(p, OFPPR_DELETE);
+       dp_send_port_status(p, OFPPR_DELETE);
 
        dev_put(p->dev);
        kfree(p);
@@ -419,7 +417,7 @@ static void del_dp(struct datapath *dp)
 
        /* Drop references to DP. */
        list_for_each_entry_safe (p, n, &dp->port_list, node)
-               del_switch_port(p);
+               dp_del_switch_port(p);
        rcu_assign_pointer(dps[dp->dp_idx], NULL);
 
        /* Kill off local_port dev references from buffered packets that have
@@ -443,15 +441,6 @@ static int dp_maint_func(void *data)
        struct datapath *dp = (struct datapath *) data;
 
        while (!kthread_should_stop()) {
-               struct net_bridge_port *p;
-
-               /* Check if port status has changed */
-               rcu_read_lock();
-               list_for_each_entry_rcu (p, &dp->port_list, node) 
-                       if (update_port_status(p)) 
-                               send_port_status(p, OFPPR_MOD);
-               rcu_read_unlock();
-
                /* Timeout old entries */
                chain_timeout(dp->chain);
                msleep_interruptible(MAINT_SLEEP_MSECS);
@@ -517,12 +506,12 @@ static inline unsigned packet_length(const struct sk_buff *skb)
 static int
 output_all(struct datapath *dp, struct sk_buff *skb, int flood)
 {
-       u32 disable = flood ? OFPPFL_NO_FLOOD : 0;
+       u32 disable = flood ? OFPPC_NO_FLOOD : 0;
        struct net_bridge_port *p;
        int prev_port = -1;
 
        list_for_each_entry_rcu (p, &dp->port_list, node) {
-               if (skb->dev == p->dev || p->flags & disable)
+               if (skb->dev == p->dev || p->config & disable)
                        continue;
                if (prev_port != -1) {
                        struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
@@ -624,7 +613,7 @@ int dp_output_port(struct datapath *dp, struct sk_buff *skb, int out_port,
                                printk("can't directly forward to input port\n");
                        return -EINVAL;
                }
-               if (p->flags & OFPPFL_NO_FWD && !ignore_no_fwd) {
+               if (p->config & OFPPC_NO_FWD && !ignore_no_fwd) {
                        kfree_skb(skb);
                        return 0;
                }
@@ -692,19 +681,14 @@ static void fill_port_desc(struct net_bridge_port *p, struct ofp_phy_port *desc)
        strncpy(desc->name, p->dev->name, OFP_MAX_PORT_NAME_LEN);
        desc->name[OFP_MAX_PORT_NAME_LEN-1] = '\0';
        memcpy(desc->hw_addr, p->dev->dev_addr, ETH_ALEN);
-       desc->flags = 0;
-       desc->features = 0;
-       desc->speed = 0;
-
-       if (p->port_no < 255) {
-               /* FIXME: this is a layering violation and should really be
-                * done in the secchan, as with OFPC_STP in
-                * OFP_SUPPORTED_CAPABILITIES. */
-               desc->features |= OFPPF_STP;
-       }
+       desc->curr = 0;
+       desc->supported = 0;
+       desc->advertised = 0;
+       desc->peer = 0;
 
        spin_lock_irqsave(&p->lock, flags);
-       desc->flags = htonl(p->flags | p->status);
+       desc->config = htonl(p->config);
+       desc->state = htonl(p->state);
        spin_unlock_irqrestore(&p->lock, flags);
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,24)
@@ -712,27 +696,86 @@ static void fill_port_desc(struct net_bridge_port *p, struct ofp_phy_port *desc)
                struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
 
                if (!p->dev->ethtool_ops->get_settings(p->dev, &ecmd)) {
+                       /* Set the supported features */
                        if (ecmd.supported & SUPPORTED_10baseT_Half) 
-                               desc->features |= OFPPF_10MB_HD;
+                               desc->supported |= OFPPF_10MB_HD;
                        if (ecmd.supported & SUPPORTED_10baseT_Full)
-                               desc->features |= OFPPF_10MB_FD;
+                               desc->supported |= OFPPF_10MB_FD;
                        if (ecmd.supported & SUPPORTED_100baseT_Half) 
-                               desc->features |= OFPPF_100MB_HD;
+                               desc->supported |= OFPPF_100MB_HD;
                        if (ecmd.supported & SUPPORTED_100baseT_Full)
-                               desc->features |= OFPPF_100MB_FD;
+                               desc->supported |= OFPPF_100MB_FD;
                        if (ecmd.supported & SUPPORTED_1000baseT_Half)
-                               desc->features |= OFPPF_1GB_HD;
+                               desc->supported |= OFPPF_1GB_HD;
                        if (ecmd.supported & SUPPORTED_1000baseT_Full)
-                               desc->features |= OFPPF_1GB_FD;
-                       /* 10Gbps half-duplex doesn't exist... */
+                               desc->supported |= OFPPF_1GB_FD;
                        if (ecmd.supported & SUPPORTED_10000baseT_Full)
-                               desc->features |= OFPPF_10GB_FD;
-
-                       desc->speed = htonl(ecmd.speed);
+                               desc->supported |= OFPPF_10GB_FD;
+                       if (ecmd.supported & SUPPORTED_TP)
+                               desc->supported |= OFPPF_COPPER;
+                       if (ecmd.supported & SUPPORTED_FIBRE)
+                               desc->supported |= OFPPF_FIBER;
+                       if (ecmd.supported & SUPPORTED_Autoneg)
+                               desc->supported |= OFPPF_AUTONEG;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14)
+                       if (ecmd.supported & SUPPORTED_Pause)
+                               desc->supported |= OFPPF_PAUSE;
+                       if (ecmd.supported & SUPPORTED_Asym_Pause)
+                               desc->supported |= OFPPF_PAUSE_ASYM;
+#endif /* kernel >= 2.6.14 */
+
+                       /* Set the advertised features */
+                       if (ecmd.advertising & ADVERTISED_10baseT_Half) 
+                               desc->advertised |= OFPPF_10MB_HD;
+                       if (ecmd.advertising & ADVERTISED_10baseT_Full)
+                               desc->advertised |= OFPPF_10MB_FD;
+                       if (ecmd.advertising & ADVERTISED_100baseT_Half) 
+                               desc->advertised |= OFPPF_100MB_HD;
+                       if (ecmd.advertising & ADVERTISED_100baseT_Full)
+                               desc->advertised |= OFPPF_100MB_FD;
+                       if (ecmd.advertising & ADVERTISED_1000baseT_Half)
+                               desc->advertised |= OFPPF_1GB_HD;
+                       if (ecmd.advertising & ADVERTISED_1000baseT_Full)
+                               desc->advertised |= OFPPF_1GB_FD;
+                       if (ecmd.advertising & ADVERTISED_10000baseT_Full)
+                               desc->advertised |= OFPPF_10GB_FD;
+                       if (ecmd.advertising & ADVERTISED_TP)
+                               desc->advertised |= OFPPF_COPPER;
+                       if (ecmd.advertising & ADVERTISED_FIBRE)
+                               desc->advertised |= OFPPF_FIBER;
+                       if (ecmd.advertising & ADVERTISED_Autoneg)
+                               desc->advertised |= OFPPF_AUTONEG;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14)
+                       if (ecmd.advertising & ADVERTISED_Pause)
+                               desc->advertised |= OFPPF_PAUSE;
+                       if (ecmd.advertising & ADVERTISED_Asym_Pause)
+                               desc->advertised |= OFPPF_PAUSE_ASYM;
+#endif /* kernel >= 2.6.14 */
+
+                       /* Set the current features */
+                       if (ecmd.speed == SPEED_10)
+                               desc->curr = (ecmd.duplex) ? OFPPF_10MB_FD : OFPPF_10MB_HD;
+                       else if (ecmd.speed == SPEED_100)
+                               desc->curr = (ecmd.duplex) ? OFPPF_100MB_FD : OFPPF_100MB_HD;
+                       else if (ecmd.speed == SPEED_1000)
+                               desc->curr = (ecmd.duplex) ? OFPPF_1GB_FD : OFPPF_1GB_HD;
+                       else if (ecmd.speed == SPEED_10000)
+                               desc->curr = OFPPF_10GB_FD;
+
+                       if (ecmd.port == PORT_TP) 
+                               desc->curr |= OFPPF_COPPER;
+                       else if (ecmd.port == PORT_FIBRE) 
+                               desc->curr |= OFPPF_FIBER;
+
+                       if (ecmd.autoneg)
+                               desc->curr |= OFPPF_AUTONEG;
                }
        }
 #endif
-       desc->features = htonl(desc->features);
+       desc->curr = htonl(desc->curr);
+       desc->supported = htonl(desc->supported);
+       desc->advertised = htonl(desc->advertised);
+       desc->peer = htonl(desc->peer);
 }
 
 static int 
@@ -798,6 +841,29 @@ dp_send_config_reply(struct datapath *dp, const struct sender *sender)
        return send_openflow_skb(skb, sender);
 }
 
+int
+dp_send_hello(struct datapath *dp, const struct sender *sender,
+             const struct ofp_header *request)
+{
+       if (request->version < OFP_VERSION) {
+               char err[64];
+               sprintf(err, "Only version 0x%02x supported", OFP_VERSION);
+               dp_send_error_msg(dp, sender, OFPET_HELLO_FAILED,
+                                 OFPHFC_INCOMPATIBLE, err, strlen(err));
+               return -EINVAL;
+       } else {
+               struct sk_buff *skb;
+               struct ofp_header *reply;
+
+               reply = alloc_openflow_skb(dp, sizeof *reply,
+                                          OFPT_HELLO, sender, &skb);
+               if (!reply)
+                       return -ENOMEM;
+
+               return send_openflow_skb(skb, sender);
+       }
+}
+
 /* Callback function for a workqueue to disable an interface */
 static void
 down_port_cb(struct work_struct *work)
@@ -810,7 +876,7 @@ down_port_cb(struct work_struct *work)
                if (net_ratelimit())
                        printk("problem bringing up port %s\n", p->dev->name);
        rtnl_unlock();
-       p->status |= OFPPFL_PORT_DOWN;
+       p->config |= OFPPC_PORT_DOWN;
 }
 
 /* Callback function for a workqueue to enable an interface */
@@ -825,42 +891,40 @@ up_port_cb(struct work_struct *work)
                if (net_ratelimit())
                        printk("problem bringing down port %s\n", p->dev->name);
        rtnl_unlock();
-       p->status &= ~OFPPFL_PORT_DOWN;
+       p->config &= ~OFPPC_PORT_DOWN;
 }
 
 int
 dp_update_port_flags(struct datapath *dp, const struct ofp_port_mod *opm)
 {
        unsigned long int flags;
-       const struct ofp_phy_port *opp = &opm->desc;
-       int port_no = ntohs(opp->port_no);
+       int port_no = ntohs(opm->port_no);
        struct net_bridge_port *p = (port_no < OFPP_MAX ? dp->ports[port_no]
                                     : port_no == OFPP_LOCAL ? dp->local_port
                                     : NULL);
-       uint32_t flag_mask;
 
        /* Make sure the port id hasn't changed since this was sent */
-       if (!p || memcmp(opp->hw_addr, p->dev->dev_addr, ETH_ALEN))
+       if (!p || memcmp(opm->hw_addr, p->dev->dev_addr, ETH_ALEN))
                return -1;
 
        spin_lock_irqsave(&p->lock, flags);
-       flag_mask = ntohl(opm->mask) & PORT_FLAG_BITS;
-       if (flag_mask) {
-               p->flags &= ~flag_mask;
-               p->flags |= ntohl(opp->flags) & flag_mask;
+       if (opm->mask) {
+               uint32_t config_mask = ntohl(opm->mask);
+               p->config &= ~config_mask;
+               p->config |= ntohl(opm->config) & config_mask;
        }
 
        /* Modifying the status of an interface requires taking a lock
         * that cannot be done from here.  For this reason, we use a shared 
         * workqueue, which will cause it to be executed from a safer 
         * context. */
-       if (opm->mask & htonl(OFPPFL_PORT_DOWN)) {
-               if ((opp->flags & htonl(OFPPFL_PORT_DOWN))
-                   && (p->status & OFPPFL_PORT_DOWN) == 0) {
+       if (opm->mask & htonl(OFPPC_PORT_DOWN)) {
+               if ((opm->config & htonl(OFPPC_PORT_DOWN))
+                   && (p->config & OFPPC_PORT_DOWN) == 0) {
                        PREPARE_WORK(&p->port_task, down_port_cb);
                        schedule_work(&p->port_task);
-               } else if ((opp->flags & htonl(OFPPFL_PORT_DOWN)) == 0
-                          && (p->status & OFPPFL_PORT_DOWN)) {
+               } else if ((opm->config & htonl(OFPPC_PORT_DOWN)) == 0
+                          && (p->config & OFPPC_PORT_DOWN)) {
                        PREPARE_WORK(&p->port_task, up_port_cb);
                        schedule_work(&p->port_task);
                }
@@ -870,37 +934,29 @@ dp_update_port_flags(struct datapath *dp, const struct ofp_port_mod *opm)
        return 0;
 }
 
-/* Update the port status field of the bridge port.  A non-zero return
- * value indicates some field has changed. 
- *
- * NB: Callers of this function may hold the RCU read lock, so any
- * additional checks must not sleep.
- */
-static int
-update_port_status(struct net_bridge_port *p)
+/* Initialize the port status field of the bridge port. */
+static void
+init_port_status(struct net_bridge_port *p)
 {
        unsigned long int flags;
-       uint32_t orig_status;
 
        spin_lock_irqsave(&p->lock, flags);
-       orig_status = p->status;
 
        if (p->dev->flags & IFF_UP) 
-               p->status &= ~OFPPFL_PORT_DOWN;
+               p->config &= ~OFPPC_PORT_DOWN;
        else
-               p->status |= OFPPFL_PORT_DOWN;
+               p->config |= OFPPC_PORT_DOWN;
 
        if (netif_carrier_ok(p->dev))
-               p->status &= ~OFPPFL_LINK_DOWN;
+               p->state &= ~OFPPS_LINK_DOWN;
        else
-               p->status |= OFPPFL_LINK_DOWN;
+               p->state |= OFPPS_LINK_DOWN;
 
        spin_unlock_irqrestore(&p->lock, flags);
-       return (orig_status != p->status);
 }
 
-static int
-send_port_status(struct net_bridge_port *p, uint8_t status)
+int
+dp_send_port_status(struct net_bridge_port *p, uint8_t status)
 {
        struct sk_buff *skb;
        struct ofp_port_status *ops;
@@ -953,7 +1009,7 @@ dp_send_error_msg(struct datapath *dp, const struct sender *sender,
        struct ofp_error_msg *oem;
 
 
-       oem = alloc_openflow_skb(dp, sizeof(*oem)+len, OFPT_ERROR_MSG
+       oem = alloc_openflow_skb(dp, sizeof(*oem)+len, OFPT_ERROR, 
                        sender, &skb);
        if (!oem)
                return -ENOMEM;
@@ -1146,7 +1202,7 @@ static int dp_genl_add_del_port(struct sk_buff *skb, struct genl_info *info)
                        err = -ENOENT;
                        goto out_put;
                }
-               err = del_switch_port(port->br_port);
+               err = dp_del_switch_port(port->br_port);
        }
 
 out_put:
@@ -1249,13 +1305,12 @@ static int flow_stats_init(struct datapath *dp, const void *body, int body_len,
 
 static int flow_stats_dump_callback(struct sw_flow *flow, void *private)
 {
+       struct sw_flow_actions *sf_acts = rcu_dereference(flow->sf_acts);
        struct flow_stats_state *s = private;
        struct ofp_flow_stats *ofs;
-       int actions_length;
        int length;
 
-       actions_length = sizeof *ofs->actions * flow->n_actions;
-       length = sizeof *ofs + sizeof *ofs->actions * flow->n_actions;
+       length = sizeof *ofs + sf_acts->actions_len;
        if (length + s->bytes_used > s->bytes_allocated)
                return 1;
 
@@ -1282,7 +1337,7 @@ static int flow_stats_dump_callback(struct sw_flow *flow, void *private)
        memset(ofs->pad2, 0, sizeof ofs->pad2);
        ofs->packet_count    = cpu_to_be64(flow->packet_count);
        ofs->byte_count      = cpu_to_be64(flow->byte_count);
-       memcpy(ofs->actions, flow->actions, actions_length);
+       memcpy(ofs->actions, sf_acts->actions, sf_acts->actions_len);
 
        s->bytes_used += length;
        return 0;
@@ -1403,6 +1458,7 @@ static int table_stats_dump(struct datapath *dp, void *state,
                memset(ots->pad, 0, sizeof ots->pad);
                ots->max_entries = htonl(stats.max_flows);
                ots->active_count = htonl(stats.n_flows);
+               ots->lookup_count = cpu_to_be64(stats.n_lookup);
                ots->matched_count = cpu_to_be64(stats.n_matched);
        }
        return 0;
@@ -1722,10 +1778,14 @@ static int __init dp_init(void)
        if (err)
                goto error;
 
-       err = dp_init_netlink();
+       err = register_netdevice_notifier(&dp_device_notifier);
        if (err)
                goto error_flow_exit;
 
+       err = dp_init_netlink();
+       if (err)
+               goto error_unreg_notifier;
+
        /* Hook into callback used by the bridge to intercept packets.
         * Parasites we are. */
        if (br_handle_frame_hook)
@@ -1734,6 +1794,8 @@ static int __init dp_init(void)
 
        return 0;
 
+error_unreg_notifier:
+       unregister_netdevice_notifier(&dp_device_notifier);
 error_flow_exit:
        flow_exit();
 error:
@@ -1745,6 +1807,7 @@ static void dp_cleanup(void)
 {
        fwd_exit();
        dp_uninit_netlink();
+       unregister_netdevice_notifier(&dp_device_notifier);
        flow_exit();
        br_handle_frame_hook = NULL;
 }