3 * Distributed under the terms of the GNU GPL version 2.
4 * Copyright (c) 2008 Nicira Networks
7 #include <linux/netdevice.h>
8 #include <linux/netfilter.h>
9 #include <linux/netfilter_ipv4.h>
12 #include <linux/icmp.h>
13 #include <linux/if_ether.h>
15 #include <net/route.h>
19 #include "nx_act_snat.h"
22 /* We need these fake structures to make netfilter happy --
23 * lots of places assume that skb->dst != NULL, which isn't
24 * all that unreasonable.
26 * Currently, we fill in the PMTU entry because netfilter
27 * refragmentation needs it, and the rt_flags entry because
28 * ipt_REJECT needs it. Future netfilter modules might
29 * require us to fill additional fields. */
30 static struct net_device __fake_net_device = {
31 .hard_header_len = ETH_HLEN
34 static struct rtable __fake_rtable = {
37 .__refcnt = ATOMIC_INIT(1),
38 .dev = &__fake_net_device,
39 .path = &__fake_rtable.u.dst,
40 .metrics = {[RTAX_MTU - 1] = 1500},
47 /* Define ARP for IP since the Linux headers don't do it cleanly. */
54 uint8_t ar_sha[ETH_ALEN];
56 uint8_t ar_tha[ETH_ALEN];
58 } __attribute__((packed));
59 OFP_ASSERT(sizeof(struct ip_arphdr) == 28);
62 /* Push the Ethernet header back on and tranmit the packet. */
64 dp_xmit_skb_push(struct sk_buff *skb)
66 skb_push(skb, ETH_HLEN);
67 return dp_xmit_skb(skb);
70 /* Perform maintainence related to a SNAT'd interface. Currently, this only
71 * checks whether MAC->IP bindings have expired.
73 * Called with the RCU read lock */
75 snat_maint(struct net_bridge_port *p)
78 struct snat_mapping *m, *n;
80 unsigned long timeout;
82 spin_lock_irqsave(&p->lock, flags);
87 timeout = sc->mac_timeout * HZ;
89 list_for_each_entry_safe (m, n, &sc->mappings, node) {
90 if (time_after(jiffies, m->used + timeout)) {
97 spin_unlock_irqrestore(&p->lock, flags);
100 /* When the packet is bound for a local interface, strip off the fake
103 void snat_local_in(struct sk_buff *skb)
105 if (skb->dst == (struct dst_entry *)&__fake_rtable) {
106 dst_release(skb->dst);
111 /* Check whether destination IP's address is in the IP->MAC mappings.
112 * If it is, then overwrite the destination MAC with the value from the
115 * Returns -1 if there is a problem, otherwise 0. */
117 dnat_mac(struct net_bridge_port *p, struct sk_buff *skb)
119 struct snat_conf *sc = p->snat;
120 struct iphdr *iph = ip_hdr(skb);
121 struct ethhdr *eh = eth_hdr(skb);
122 struct snat_mapping *m;
124 if (skb->protocol != htons(ETH_P_IP))
127 list_for_each_entry (m, &sc->mappings, node) {
128 if (m->ip_addr == iph->daddr){
130 if (!make_writable(&skb)) {
132 printk("make_writable failed\n");
136 memcpy(eh->h_dest, m->hw_addr, ETH_ALEN);
145 __snat_this_address(struct snat_conf *sc, u32 ip_addr)
148 u32 h_ip_addr = ntohl(ip_addr);
149 return (h_ip_addr >= sc->ip_addr_start &&
150 h_ip_addr <= sc->ip_addr_end);
156 snat_this_address(struct net_bridge_port *p, u32 ip_addr)
158 unsigned long int flags;
161 spin_lock_irqsave(&p->lock, flags);
162 retval = __snat_this_address(p->snat, ip_addr);
163 spin_unlock_irqrestore(&p->lock, flags);
169 snat_pre_route_finish(struct sk_buff *skb)
171 struct net_bridge_port *p = skb->dev->br_port;
172 struct snat_conf *sc;
173 struct iphdr *iph = ip_hdr(skb);
176 skb->dst = (struct dst_entry *)&__fake_rtable;
179 /* Don't process packets that were not translated due to NAT */
180 spin_lock_irqsave(&p->lock, flags);
182 if (__snat_this_address(sc, iph->daddr)) {
183 spin_unlock_irqrestore(&p->lock, flags);
187 /* If SNAT is configured for this input device, check the IP->MAC
188 * mappings to see if we should update the destination MAC. */
190 dnat_mac(skb->dev->br_port, skb);
192 spin_unlock_irqrestore(&p->lock, flags);
197 /* Checks whether 'skb' is an ARP request for an SNAT'd interface. If
198 * so, it will generate a response.
200 * Returns 0 if the packet was not handled. Otherwise, -1 is returned
201 * and the caller is responsible for freeing 'skb'. */
203 handle_arp_snat(struct sk_buff *skb)
205 struct net_bridge_port *p = skb->dev->br_port;
206 struct ip_arphdr *ah;
208 if (!pskb_may_pull(skb, sizeof *ah))
211 ah = (struct ip_arphdr *)arp_hdr(skb);
212 if ((ah->ar_op != htons(ARPOP_REQUEST))
213 || ah->ar_hln != ETH_ALEN
214 || ah->ar_pro != htons(ETH_P_IP)
218 /* We're only interested in addresses we rewrite. */
219 if (!snat_this_address(p, ah->ar_tip)) {
223 arp_send(ARPOP_REPLY, ETH_P_ARP, ah->ar_sip, skb->dev, ah->ar_tip,
224 ah->ar_sha, p->dp->netdev->dev_addr, ah->ar_sha);
229 /* Checks whether 'skb' is a ping request for an SNAT'd interface. If
230 * so, it will generate a response.
232 * Returns 0 if the packet was not handled. Otherwise, -1 is returned
233 * and the caller is responsible for freeing 'skb'. */
235 handle_icmp_snat(struct sk_buff *skb)
237 struct net_bridge_port *p = skb->dev->br_port;
240 struct icmphdr *icmph;
241 uint8_t tmp_eth[ETH_ALEN];
243 struct sk_buff *nskb;
245 /* We're only interested in addresses we rewrite. */
247 if (!snat_this_address(p, iph->daddr)) {
251 /* Drop fragments and packets not long enough to hold the ICMP
253 if ((ntohs(iph->frag_off) & IP_OFFSET) != 0 ||
254 !pskb_may_pull(skb, skb_transport_offset(skb) + 4))
257 /* We only respond to echo requests to our address. Continue
258 * processing replies and other ICMP messages since they may be
259 * intended for NAT'd hosts. */
260 icmph = icmp_hdr(skb);
261 if (icmph->type != ICMP_ECHO)
264 /* Send an echo reply in response */
265 nskb = skb_copy(skb, GFP_ATOMIC);
268 printk("skb copy failed for icmp reply\n");
272 /* Update Ethernet header. */
274 memcpy(tmp_eth, eh->h_dest, ETH_ALEN);
275 memcpy(eh->h_dest, eh->h_source, ETH_ALEN);
276 memcpy(eh->h_source, tmp_eth, ETH_ALEN);
279 * This is kind of busted, at least in that it doesn't check that the
280 * echoed IP options make sense. */
287 iph->daddr = iph->saddr;
289 iph->check = ip_fast_csum(iph, iph->ihl);
291 /* Update ICMP header. */
292 icmph = icmp_hdr(nskb);
293 icmph->type = ICMP_ECHOREPLY;
295 icmph->checksum = ip_compute_csum(icmph,
296 nskb->tail - nskb->transport_header);
298 dp_xmit_skb_push(nskb);
303 /* Check if any SNAT maintenance needs to be done on 'skb' before it's
304 * checked against the datapath's tables. This includes DNAT
305 * modification based on prior SNAT action and responding to ARP and
306 * echo requests for the SNAT interface.
308 * Returns 0 if 'skb' should continue to be processed by the caller.
309 * Returns -1 if the packet was handled, and the caller should free
313 snat_pre_route(struct sk_buff *skb)
318 WARN_ON_ONCE(skb_network_offset(skb));
319 if (skb->protocol == htons(ETH_P_ARP))
320 return handle_arp_snat(skb);
321 else if (skb->protocol != htons(ETH_P_IP))
324 if (!pskb_may_pull(skb, sizeof *iph))
328 if (iph->ihl < 5 || iph->version != 4)
331 if (!pskb_may_pull(skb, ip_hdrlen(skb)))
333 skb_set_transport_header(skb, ip_hdrlen(skb));
335 /* Check if we need to echo reply for this address */
337 if ((iph->protocol == IPPROTO_ICMP) && (handle_icmp_snat(skb)))
341 if (unlikely(ip_fast_csum(iph, iph->ihl)))
344 len = ntohs(iph->tot_len);
345 if ((skb->len < len) || len < (iph->ihl*4))
348 if (pskb_trim_rcsum(skb, len))
351 return NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
352 snat_pre_route_finish);
360 snat_skb_finish(struct sk_buff *skb)
362 NF_HOOK(PF_INET, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
368 /* Update the MAC->IP mappings for the private side of the SNAT'd
371 update_mapping(struct net_bridge_port *p, const struct sk_buff *skb)
374 struct snat_conf *sc;
375 const struct iphdr *iph = ip_hdr(skb);
376 const struct ethhdr *eh = eth_hdr(skb);
377 struct snat_mapping *m;
379 spin_lock_irqsave(&p->lock, flags);
384 list_for_each_entry (m, &sc->mappings, node) {
385 if (m->ip_addr == iph->saddr){
386 memcpy(m->hw_addr, eh->h_source, ETH_ALEN);
392 m = kmalloc(sizeof *m, GFP_ATOMIC);
395 m->ip_addr = iph->saddr;
396 memcpy(m->hw_addr, eh->h_source, ETH_ALEN);
399 list_add(&m->node, &sc->mappings);
402 spin_unlock_irqrestore(&p->lock, flags);
405 /* Perform SNAT modification on 'skb' and send out 'out_port'. If the
406 * port was not configured for SNAT, it will be sent through the interface
407 * unmodified. 'skb' is not consumed, so caller will need to free it.
410 snat_skb(struct datapath *dp, const struct sk_buff *skb, int out_port)
412 struct net_bridge_port *p = dp->ports[out_port];
413 struct sk_buff *nskb;
418 /* FIXME: Expensive. Just need to skb_clone() here?
419 * (However, the skb_copy() does linearize and ensure that the headers
420 * are accessible.) */
421 nskb = skb_copy(skb, GFP_ATOMIC);
427 /* We only SNAT IP, so just send it on its way if not */
428 if (skb->protocol != htons(ETH_P_IP)) {
433 /* Set the source MAC to the OF interface */
434 memcpy(eth_hdr(nskb)->h_source, dp->netdev->dev_addr, ETH_ALEN);
436 update_mapping(p, skb);
438 /* Take the Ethernet header back off for netfilter hooks. */
439 skb_pull(nskb, ETH_HLEN);
441 NF_HOOK(PF_INET, NF_INET_FORWARD, nskb, skb->dev, nskb->dev,
445 /* Remove SNAT configuration on port 'p'.
447 * NB: The caller must hold the port's spinlock. */
449 snat_free_conf(struct net_bridge_port *p)
451 struct snat_conf *sc = p->snat;
456 /* Free existing mapping entries */
457 while (!list_empty(&sc->mappings)) {
458 struct snat_mapping *m = list_entry(sc->mappings.next,
459 struct snat_mapping, node);
470 /* Remove SNAT configuration from an interface. */
472 snat_del_port(struct datapath *dp, uint16_t port)
475 struct net_bridge_port *p = dp->ports[port];
479 printk("Attempt to remove snat on non-existent port: %d\n", port);
483 spin_lock_irqsave(&p->lock, flags);
484 if (snat_free_conf(p)) {
485 /* SNAT not configured on this port */
486 spin_unlock_irqrestore(&p->lock, flags);
488 printk("Attempt to remove snat on non-snat port: %d\n", port);
492 spin_unlock_irqrestore(&p->lock, flags);
497 /* Add SNAT configuration to an interface. */
499 snat_add_port(struct datapath *dp, uint16_t port,
500 uint32_t ip_addr_start, uint32_t ip_addr_end,
501 uint16_t mac_timeout)
504 struct net_bridge_port *p = dp->ports[port];
505 struct snat_conf *sc;
508 if (mac_timeout == 0)
509 mac_timeout = MAC_TIMEOUT_DEFAULT;
513 printk("Attempt to add snat on non-existent port: %d\n", port);
517 /* If SNAT is already configured on the port, check whether the same
518 * IP addresses are used. If so, just update the mac timeout
519 * configuration. Otherwise, drop all SNAT configuration and
521 spin_lock_irqsave(&p->lock, flags);
523 if ((p->snat->ip_addr_start == ip_addr_start)
524 && (p->snat->ip_addr_end == ip_addr_end)) {
525 p->snat->mac_timeout = mac_timeout;
526 spin_unlock_irqrestore(&p->lock, flags);
530 /* Free the existing configuration and mappings. */
534 sc = kzalloc(sizeof *sc, GFP_ATOMIC);
536 spin_unlock_irqrestore(&p->lock, flags);
540 sc->ip_addr_start = ip_addr_start;
541 sc->ip_addr_end = ip_addr_end;
542 sc->mac_timeout = mac_timeout;
543 INIT_LIST_HEAD(&sc->mappings);
546 spin_unlock_irqrestore(&p->lock, flags);
551 /* Handle a SNAT configuration message.
553 * Returns 0 if no problems are found. Otherwise, a negative errno. */
555 snat_mod_config(struct datapath *dp, const struct nx_act_config *nac)
557 int n_entries = (ntohs(nac->header.header.length) - sizeof *nac)
558 / sizeof (struct nx_snat_config);
562 for (i=0; i<n_entries; i++) {
563 const struct nx_snat_config *sc = &nac->snat[i];
564 uint16_t port = ntohs(sc->port);
567 if (sc->command == NXSC_ADD)
568 r = snat_add_port(dp, port,
569 ntohl(sc->ip_addr_start), ntohl(sc->ip_addr_end),
570 ntohs(sc->mac_timeout));
572 r = snat_del_port(dp, port);