fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / net / ipv4 / ipvs / ip_vs_core.c
index 168e5b3..3425752 100644 (file)
@@ -22,6 +22,7 @@
  *
  * Changes:
  *     Paul `Rusty' Russell            properly handle non-linear skbs
+ *     Harald Welte                    don't use nfcache
  *
  */
 
@@ -57,7 +58,6 @@ EXPORT_SYMBOL(ip_vs_conn_put);
 #ifdef CONFIG_IP_VS_DEBUG
 EXPORT_SYMBOL(ip_vs_get_debug_level);
 #endif
-EXPORT_SYMBOL(check_for_ip_vs_out);
 EXPORT_SYMBOL(ip_vs_make_skb_writable);
 
 
@@ -209,14 +209,14 @@ int ip_vs_make_skb_writable(struct sk_buff **pskb, int writable_len)
 static struct ip_vs_conn *
 ip_vs_sched_persist(struct ip_vs_service *svc,
                    const struct sk_buff *skb,
-                   __u16 ports[2])
+                   __be16 ports[2])
 {
        struct ip_vs_conn *cp = NULL;
        struct iphdr *iph = skb->nh.iph;
        struct ip_vs_dest *dest;
        struct ip_vs_conn *ct;
-       __u16  dport;    /* destination port to forward */
-       __u32  snet;     /* source network of the client, after masking */
+       __be16  dport;   /* destination port to forward */
+       __be32  snet;    /* source network of the client, after masking */
 
        /* Mask saddr with the netmask to adjust template granularity */
        snet = iph->saddr & svc->netmask;
@@ -243,10 +243,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
        if (ports[1] == svc->port) {
                /* Check if a template already exists */
                if (svc->port != FTPPORT)
-                       ct = ip_vs_conn_in_get(iph->protocol, snet, 0,
+                       ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
                                               iph->daddr, ports[1]);
                else
-                       ct = ip_vs_conn_in_get(iph->protocol, snet, 0,
+                       ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
                                               iph->daddr, 0);
 
                if (!ct || !ip_vs_check_template(ct)) {
@@ -272,14 +272,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                                                    iph->daddr,
                                                    ports[1],
                                                    dest->addr, dest->port,
-                                                   0,
+                                                   IP_VS_CONN_F_TEMPLATE,
                                                    dest);
                        else
                                ct = ip_vs_conn_new(iph->protocol,
                                                    snet, 0,
                                                    iph->daddr, 0,
                                                    dest->addr, 0,
-                                                   0,
+                                                   IP_VS_CONN_F_TEMPLATE,
                                                    dest);
                        if (ct == NULL)
                                return NULL;
@@ -298,10 +298,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                 * port zero template: <protocol,caddr,0,vaddr,0,daddr,0>
                 */
                if (svc->fwmark)
-                       ct = ip_vs_conn_in_get(IPPROTO_IP, snet, 0,
+                       ct = ip_vs_ct_in_get(IPPROTO_IP, snet, 0,
                                               htonl(svc->fwmark), 0);
                else
-                       ct = ip_vs_conn_in_get(iph->protocol, snet, 0,
+                       ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
                                               iph->daddr, 0);
 
                if (!ct || !ip_vs_check_template(ct)) {
@@ -326,14 +326,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                                                    snet, 0,
                                                    htonl(svc->fwmark), 0,
                                                    dest->addr, 0,
-                                                   0,
+                                                   IP_VS_CONN_F_TEMPLATE,
                                                    dest);
                        else
                                ct = ip_vs_conn_new(iph->protocol,
                                                    snet, 0,
                                                    iph->daddr, 0,
                                                    dest->addr, 0,
-                                                   0,
+                                                   IP_VS_CONN_F_TEMPLATE,
                                                    dest);
                        if (ct == NULL)
                                return NULL;
@@ -383,7 +383,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
        struct ip_vs_conn *cp = NULL;
        struct iphdr *iph = skb->nh.iph;
        struct ip_vs_dest *dest;
-       __u16 _ports[2], *pptr;
+       __be16 _ports[2], *pptr;
 
        pptr = skb_header_pointer(skb, iph->ihl*4,
                                  sizeof(_ports), _ports);
@@ -426,7 +426,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
                return NULL;
 
        IP_VS_DBG(6, "Schedule fwd:%c c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u "
-                 "d:%u.%u.%u.%u:%u flg:%X cnt:%d\n",
+                 "d:%u.%u.%u.%u:%u conn->flags:%X conn->refcnt:%d\n",
                  ip_vs_fwd_tag(cp),
                  NIPQUAD(cp->caddr), ntohs(cp->cport),
                  NIPQUAD(cp->vaddr), ntohs(cp->vport),
@@ -446,7 +446,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
                struct ip_vs_protocol *pp)
 {
-       __u16 _ports[2], *pptr;
+       __be16 _ports[2], *pptr;
        struct iphdr *iph = skb->nh.iph;
 
        pptr = skb_header_pointer(skb, iph->ihl*4,
@@ -530,24 +530,21 @@ static unsigned int ip_vs_post_routing(unsigned int hooknum,
                                       const struct net_device *out,
                                       int (*okfn)(struct sk_buff *))
 {
-       if (!((*pskb)->nfcache & NFC_IPVS_PROPERTY))
+       if (!((*pskb)->ipvs_property))
                return NF_ACCEPT;
-
        /* The packet was sent from IPVS, exit this chain */
-       (*okfn)(*pskb);
-
-       return NF_STOLEN;
+       return NF_STOP;
 }
 
-u16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
+__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
 {
-       return (u16) csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
+       return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
 }
 
 static inline struct sk_buff *
-ip_vs_gather_frags(struct sk_buff *skb)
+ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
 {
-       skb = ip_defrag(skb);
+       skb = ip_defrag(skb, user);
        if (skb)
                ip_send_check(skb->nh.iph);
        return skb;
@@ -579,7 +576,7 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
 
        /* the TCP/UDP port */
        if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol) {
-               __u16 *ports = (void *)ciph + ciph->ihl*4;
+               __be16 *ports = (void *)ciph + ciph->ihl*4;
 
                if (inout)
                        ports[1] = cp->vport;
@@ -621,7 +618,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
 
        /* reassemble IP fragments */
        if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
-               skb = ip_vs_gather_frags(skb);
+               skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
                if (!skb)
                        return NF_STOLEN;
                *pskb = skb;
@@ -702,7 +699,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
        /* do the statistics and put it back */
        ip_vs_out_stats(cp, skb);
 
-       skb->nfcache |= NFC_IPVS_PROPERTY;
+       skb->ipvs_property = 1;
        verdict = NF_ACCEPT;
 
   out:
@@ -740,7 +737,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
 
        EnterFunction(11);
 
-       if (skb->nfcache & NFC_IPVS_PROPERTY)
+       if (skb->ipvs_property)
                return NF_ACCEPT;
 
        iph = skb->nh.iph;
@@ -760,7 +757,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
        /* reassemble IP fragments */
        if (unlikely(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET) &&
                     !pp->dont_defrag)) {
-               skb = ip_vs_gather_frags(skb);
+               skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
                if (!skb)
                        return NF_STOLEN;
                iph = skb->nh.iph;
@@ -778,7 +775,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
                if (sysctl_ip_vs_nat_icmp_send &&
                    (pp->protocol == IPPROTO_TCP ||
                     pp->protocol == IPPROTO_UDP)) {
-                       __u16 _ports[2], *pptr;
+                       __be16 _ports[2], *pptr;
 
                        pptr = skb_header_pointer(skb, ihl,
                                                  sizeof(_ports), _ports);
@@ -816,13 +813,23 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
        skb->nh.iph->saddr = cp->vaddr;
        ip_send_check(skb->nh.iph);
 
+       /* For policy routing, packets originating from this
+        * machine itself may be routed differently to packets
+        * passing through.  We want this packet to be routed as
+        * if it came from this machine itself.  So re-compute
+        * the routing information.
+        */
+       if (ip_route_me_harder(pskb, RTN_LOCAL) != 0)
+               goto drop;
+       skb = *pskb;
+
        IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
 
        ip_vs_out_stats(cp, skb);
        ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
        ip_vs_conn_put(cp);
 
-       skb->nfcache |= NFC_IPVS_PROPERTY;
+       skb->ipvs_property = 1;
 
        LeaveFunction(11);
        return NF_ACCEPT;
@@ -834,38 +841,14 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
 }
 
 
-/*
- *      Check if the packet is for VS/NAT connections, then send it
- *      immediately.
- *      Called by ip_fw_compact to detect packets for VS/NAT before
- *      they are changed by ipchains masquerading code.
- */
-unsigned int
-check_for_ip_vs_out(struct sk_buff **pskb, int (*okfn)(struct sk_buff *))
-{
-       unsigned int ret;
-
-       ret = ip_vs_out(NF_IP_FORWARD, pskb, NULL, NULL, NULL);
-       if (ret != NF_ACCEPT) {
-               return ret;
-       } else {
-               /* send the packet immediately if it is already mangled
-                  by ip_vs_out */
-               if ((*pskb)->nfcache & NFC_IPVS_PROPERTY) {
-                       (*okfn)(*pskb);
-                       return NF_STOLEN;
-               }
-       }
-       return NF_ACCEPT;
-}
-
 /*
  *     Handle ICMP messages in the outside-to-inside direction (incoming).
  *     Find any that might be relevant, check against existing connections,
  *     forward to the right destination host if relevant.
  *     Currently handles error types - unreachable, quench, ttl exceeded.
  */
-static int ip_vs_in_icmp(struct sk_buff **pskb, int *related)
+static int 
+ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
 {
        struct sk_buff *skb = *pskb;
        struct iphdr *iph;
@@ -879,7 +862,9 @@ static int ip_vs_in_icmp(struct sk_buff **pskb, int *related)
 
        /* reassemble IP fragments */
        if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
-               skb = ip_vs_gather_frags(skb);
+               skb = ip_vs_gather_frags(skb,
+                                        hooknum == NF_IP_LOCAL_IN ?
+                                        IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD);
                if (!skb)
                        return NF_STOLEN;
                *pskb = skb;
@@ -988,7 +973,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
 
        iph = skb->nh.iph;
        if (unlikely(iph->protocol == IPPROTO_ICMP)) {
-               int related, verdict = ip_vs_in_icmp(pskb, &related);
+               int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum);
 
                if (related)
                        return verdict;
@@ -1026,16 +1011,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
 
        /* Check the server status */
        if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
-               /* the destination server is not availabe */
+               /* the destination server is not available */
 
                if (sysctl_ip_vs_expire_nodest_conn) {
                        /* try to expire the connection immediately */
                        ip_vs_conn_expire_now(cp);
-               } else {
-                       /* don't restart its timer, and silently
-                          drop the packet. */
-                       __ip_vs_conn_put(cp);
                }
+               /* don't restart its timer, and silently
+                  drop the packet. */
+               __ip_vs_conn_put(cp);
                return NF_DROP;
        }
 
@@ -1083,7 +1067,7 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff **pskb,
        if ((*pskb)->nh.iph->protocol != IPPROTO_ICMP)
                return NF_ACCEPT;
 
-       return ip_vs_in_icmp(pskb, &r);
+       return ip_vs_in_icmp(pskb, &r, hooknum);
 }