X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fipv4%2Fipvs%2Fip_vs_core.c;h=34257520a3a6b8132eb87af036382aea06f753f2;hb=refs%2Fheads%2Fvserver;hp=9f4b94f0768dadfb3b85439da9aa9bdd3476d997;hpb=9213980e6a70d8473e0ffd4b39ab5b6caaba9ff5;p=linux-2.6.git diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 9f4b94f07..34257520a 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -22,6 +22,7 @@ * * Changes: * Paul `Rusty' Russell properly handle non-linear skbs + * Harald Welte don't use nfcache * */ @@ -57,7 +58,6 @@ EXPORT_SYMBOL(ip_vs_conn_put); #ifdef CONFIG_IP_VS_DEBUG EXPORT_SYMBOL(ip_vs_get_debug_level); #endif -EXPORT_SYMBOL(check_for_ip_vs_out); EXPORT_SYMBOL(ip_vs_make_skb_writable); @@ -209,14 +209,14 @@ int ip_vs_make_skb_writable(struct sk_buff **pskb, int writable_len) static struct ip_vs_conn * ip_vs_sched_persist(struct ip_vs_service *svc, const struct sk_buff *skb, - __u16 ports[2]) + __be16 ports[2]) { struct ip_vs_conn *cp = NULL; struct iphdr *iph = skb->nh.iph; struct ip_vs_dest *dest; struct ip_vs_conn *ct; - __u16 dport; /* destination port to forward */ - __u32 snet; /* source network of the client, after masking */ + __be16 dport; /* destination port to forward */ + __be32 snet; /* source network of the client, after masking */ /* Mask saddr with the netmask to adjust template granularity */ snet = iph->saddr & svc->netmask; @@ -243,10 +243,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc, if (ports[1] == svc->port) { /* Check if a template already exists */ if (svc->port != FTPPORT) - ct = ip_vs_conn_in_get(iph->protocol, snet, 0, + ct = ip_vs_ct_in_get(iph->protocol, snet, 0, iph->daddr, ports[1]); else - ct = ip_vs_conn_in_get(iph->protocol, snet, 0, + ct = ip_vs_ct_in_get(iph->protocol, snet, 0, iph->daddr, 0); if (!ct || !ip_vs_check_template(ct)) { @@ -272,14 +272,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc, iph->daddr, ports[1], dest->addr, dest->port, - 0, + IP_VS_CONN_F_TEMPLATE, dest); else ct = ip_vs_conn_new(iph->protocol, snet, 0, iph->daddr, 0, dest->addr, 0, - 0, + IP_VS_CONN_F_TEMPLATE, dest); if (ct == NULL) return NULL; @@ -298,10 +298,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * port zero template: */ if (svc->fwmark) - ct = ip_vs_conn_in_get(IPPROTO_IP, snet, 0, + ct = ip_vs_ct_in_get(IPPROTO_IP, snet, 0, htonl(svc->fwmark), 0); else - ct = ip_vs_conn_in_get(iph->protocol, snet, 0, + ct = ip_vs_ct_in_get(iph->protocol, snet, 0, iph->daddr, 0); if (!ct || !ip_vs_check_template(ct)) { @@ -326,14 +326,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc, snet, 0, htonl(svc->fwmark), 0, dest->addr, 0, - 0, + IP_VS_CONN_F_TEMPLATE, dest); else ct = ip_vs_conn_new(iph->protocol, snet, 0, iph->daddr, 0, dest->addr, 0, - 0, + IP_VS_CONN_F_TEMPLATE, dest); if (ct == NULL) return NULL; @@ -383,21 +383,23 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) struct ip_vs_conn *cp = NULL; struct iphdr *iph = skb->nh.iph; struct ip_vs_dest *dest; - __u16 ports[2]; + __be16 _ports[2], *pptr; - if (skb_copy_bits(skb, iph->ihl*4, ports, sizeof(ports)) < 0) + pptr = skb_header_pointer(skb, iph->ihl*4, + sizeof(_ports), _ports); + if (pptr == NULL) return NULL; /* * Persistent service */ if (svc->flags & IP_VS_SVC_F_PERSISTENT) - return ip_vs_sched_persist(svc, skb, ports); + return ip_vs_sched_persist(svc, skb, pptr); /* * Non-persistent service */ - if (!svc->fwmark && ports[1] != svc->port) { + if (!svc->fwmark && pptr[1] != svc->port) { if (!svc->port) IP_VS_ERR("Schedule: port zero only supported " "in persistent services, " @@ -415,16 +417,16 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) * Create a connection entry. */ cp = ip_vs_conn_new(iph->protocol, - iph->saddr, ports[0], - iph->daddr, ports[1], - dest->addr, dest->port?dest->port:ports[1], + iph->saddr, pptr[0], + iph->daddr, pptr[1], + dest->addr, dest->port?dest->port:pptr[1], 0, dest); if (cp == NULL) return NULL; IP_VS_DBG(6, "Schedule fwd:%c c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u " - "d:%u.%u.%u.%u:%u flg:%X cnt:%d\n", + "d:%u.%u.%u.%u:%u conn->flags:%X conn->refcnt:%d\n", ip_vs_fwd_tag(cp), NIPQUAD(cp->caddr), ntohs(cp->cport), NIPQUAD(cp->vaddr), ntohs(cp->vport), @@ -444,10 +446,12 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_protocol *pp) { - __u16 ports[2]; + __be16 _ports[2], *pptr; struct iphdr *iph = skb->nh.iph; - if (skb_copy_bits(skb, iph->ihl*4, ports, sizeof(ports)) < 0) { + pptr = skb_header_pointer(skb, iph->ihl*4, + sizeof(_ports), _ports); + if (pptr == NULL) { ip_vs_service_put(svc); return NF_DROP; } @@ -465,8 +469,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, /* create a new connection entry */ IP_VS_DBG(6, "ip_vs_leave: create a cache_bypass entry\n"); cp = ip_vs_conn_new(iph->protocol, - iph->saddr, ports[0], - iph->daddr, ports[1], + iph->saddr, pptr[0], + iph->daddr, pptr[1], 0, 0, IP_VS_CONN_F_BYPASS, NULL); @@ -494,7 +498,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, * listed in the ipvs table), pass the packets, because it is * not ipvs job to decide to drop the packets. */ - if ((svc->port == FTPPORT) && (ports[1] != FTPPORT)) { + if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) { ip_vs_service_put(svc); return NF_ACCEPT; } @@ -526,24 +530,21 @@ static unsigned int ip_vs_post_routing(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - if (!((*pskb)->nfcache & NFC_IPVS_PROPERTY)) + if (!((*pskb)->ipvs_property)) return NF_ACCEPT; - /* The packet was sent from IPVS, exit this chain */ - (*okfn)(*pskb); - - return NF_STOLEN; + return NF_STOP; } -u16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) +__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) { - return (u16) csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); + return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); } static inline struct sk_buff * -ip_vs_gather_frags(struct sk_buff *skb) +ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) { - skb = ip_defrag(skb); + skb = ip_defrag(skb, user); if (skb) ip_send_check(skb->nh.iph); return skb; @@ -575,7 +576,7 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, /* the TCP/UDP port */ if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol) { - __u16 *ports = (void *)ciph + ciph->ihl*4; + __be16 *ports = (void *)ciph + ciph->ihl*4; if (inout) ports[1] = cp->vport; @@ -607,8 +608,8 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) { struct sk_buff *skb = *pskb; struct iphdr *iph; - struct icmphdr icmph; - struct iphdr ciph; /* The ip header contained within the ICMP */ + struct icmphdr _icmph, *ic; + struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ struct ip_vs_conn *cp; struct ip_vs_protocol *pp; unsigned int offset, ihl, verdict; @@ -617,7 +618,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) /* reassemble IP fragments */ if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) { - skb = ip_vs_gather_frags(skb); + skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT); if (!skb) return NF_STOLEN; *pskb = skb; @@ -625,11 +626,12 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) iph = skb->nh.iph; offset = ihl = iph->ihl * 4; - if (skb_copy_bits(skb, offset, &icmph, sizeof(icmph)) < 0) + ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); + if (ic == NULL) return NF_DROP; IP_VS_DBG(12, "Outgoing ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n", - icmph.type, ntohs(icmp_id(&icmph)), + ic->type, ntohs(icmp_id(ic)), NIPQUAD(iph->saddr), NIPQUAD(iph->daddr)); /* @@ -639,33 +641,34 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) * this means that some packets will manage to get a long way * down this stack and then be rejected, but that's life. */ - if ((icmph.type != ICMP_DEST_UNREACH) && - (icmph.type != ICMP_SOURCE_QUENCH) && - (icmph.type != ICMP_TIME_EXCEEDED)) { + if ((ic->type != ICMP_DEST_UNREACH) && + (ic->type != ICMP_SOURCE_QUENCH) && + (ic->type != ICMP_TIME_EXCEEDED)) { *related = 0; return NF_ACCEPT; } /* Now find the contained IP header */ - offset += sizeof(icmph); - if (skb_copy_bits(skb, offset, &ciph, sizeof(ciph)) < 0) + offset += sizeof(_icmph); + cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); + if (cih == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ - pp = ip_vs_proto_get(ciph.protocol); + pp = ip_vs_proto_get(cih->protocol); if (!pp) return NF_ACCEPT; /* Is the embedded protocol header present? */ - if (unlikely(ciph.frag_off & __constant_htons(IP_OFFSET) && + if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) && pp->dont_defrag)) return NF_ACCEPT; IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMP for"); - offset += ciph.ihl * 4; + offset += cih->ihl * 4; /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_out_get(skb, pp, &ciph, offset, 1); + cp = pp->conn_out_get(skb, pp, cih, offset, 1); if (!cp) return NF_ACCEPT; @@ -685,7 +688,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) goto out; } - if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol) + if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) offset += 2 * sizeof(__u16); if (!ip_vs_make_skb_writable(pskb, offset)) goto out; @@ -696,7 +699,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) /* do the statistics and put it back */ ip_vs_out_stats(cp, skb); - skb->nfcache |= NFC_IPVS_PROPERTY; + skb->ipvs_property = 1; verdict = NF_ACCEPT; out: @@ -707,11 +710,13 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) static inline int is_tcp_reset(const struct sk_buff *skb) { - struct tcphdr tcph; + struct tcphdr _tcph, *th; - if (skb_copy_bits(skb, skb->nh.iph->ihl * 4, &tcph, sizeof(tcph)) < 0) + th = skb_header_pointer(skb, skb->nh.iph->ihl * 4, + sizeof(_tcph), &_tcph); + if (th == NULL) return 0; - return tcph.rst; + return th->rst; } /* @@ -732,16 +737,9 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, EnterFunction(11); - if (skb->nfcache & NFC_IPVS_PROPERTY) + if (skb->ipvs_property) return NF_ACCEPT; - if (skb->ip_summed == CHECKSUM_HW) { - if (skb_checksum_help(pskb, (out == NULL))) - return NF_DROP; - if (skb != *pskb) - skb = *pskb; - } - iph = skb->nh.iph; if (unlikely(iph->protocol == IPPROTO_ICMP)) { int related, verdict = ip_vs_out_icmp(pskb, &related); @@ -759,7 +757,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, /* reassemble IP fragments */ if (unlikely(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET) && !pp->dont_defrag)) { - skb = ip_vs_gather_frags(skb); + skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT); if (!skb) return NF_STOLEN; iph = skb->nh.iph; @@ -777,12 +775,14 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, if (sysctl_ip_vs_nat_icmp_send && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP)) { - __u16 ports[2]; + __be16 _ports[2], *pptr; - if (skb_copy_bits(skb, ihl, ports, sizeof(ports)) < 0) + pptr = skb_header_pointer(skb, ihl, + sizeof(_ports), _ports); + if (pptr == NULL) return NF_ACCEPT; /* Not for me */ if (ip_vs_lookup_real_service(iph->protocol, - iph->saddr, ports[0])) { + iph->saddr, pptr[0])) { /* * Notify the real server: there is no * existing entry if it is not RST @@ -813,13 +813,23 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, skb->nh.iph->saddr = cp->vaddr; ip_send_check(skb->nh.iph); + /* For policy routing, packets originating from this + * machine itself may be routed differently to packets + * passing through. We want this packet to be routed as + * if it came from this machine itself. So re-compute + * the routing information. + */ + if (ip_route_me_harder(pskb, RTN_LOCAL) != 0) + goto drop; + skb = *pskb; + IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); ip_vs_out_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); ip_vs_conn_put(cp); - skb->nfcache |= NFC_IPVS_PROPERTY; + skb->ipvs_property = 1; LeaveFunction(11); return NF_ACCEPT; @@ -831,43 +841,19 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, } -/* - * Check if the packet is for VS/NAT connections, then send it - * immediately. - * Called by ip_fw_compact to detect packets for VS/NAT before - * they are changed by ipchains masquerading code. - */ -unsigned int -check_for_ip_vs_out(struct sk_buff **pskb, int (*okfn)(struct sk_buff *)) -{ - unsigned int ret; - - ret = ip_vs_out(NF_IP_FORWARD, pskb, NULL, NULL, NULL); - if (ret != NF_ACCEPT) { - return ret; - } else { - /* send the packet immediately if it is already mangled - by ip_vs_out */ - if ((*pskb)->nfcache & NFC_IPVS_PROPERTY) { - (*okfn)(*pskb); - return NF_STOLEN; - } - } - return NF_ACCEPT; -} - /* * Handle ICMP messages in the outside-to-inside direction (incoming). * Find any that might be relevant, check against existing connections, * forward to the right destination host if relevant. * Currently handles error types - unreachable, quench, ttl exceeded. */ -static int ip_vs_in_icmp(struct sk_buff **pskb, int *related) +static int +ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum) { struct sk_buff *skb = *pskb; struct iphdr *iph; - struct icmphdr icmph; - struct iphdr ciph; /* The ip header contained within the ICMP */ + struct icmphdr _icmph, *ic; + struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ struct ip_vs_conn *cp; struct ip_vs_protocol *pp; unsigned int offset, ihl, verdict; @@ -876,7 +862,9 @@ static int ip_vs_in_icmp(struct sk_buff **pskb, int *related) /* reassemble IP fragments */ if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) { - skb = ip_vs_gather_frags(skb); + skb = ip_vs_gather_frags(skb, + hooknum == NF_IP_LOCAL_IN ? + IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD); if (!skb) return NF_STOLEN; *pskb = skb; @@ -884,11 +872,12 @@ static int ip_vs_in_icmp(struct sk_buff **pskb, int *related) iph = skb->nh.iph; offset = ihl = iph->ihl * 4; - if (skb_copy_bits(skb, offset, &icmph, sizeof(icmph)) < 0) + ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); + if (ic == NULL) return NF_DROP; IP_VS_DBG(12, "Incoming ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n", - icmph.type, ntohs(icmp_id(&icmph)), + ic->type, ntohs(icmp_id(ic)), NIPQUAD(iph->saddr), NIPQUAD(iph->daddr)); /* @@ -898,33 +887,34 @@ static int ip_vs_in_icmp(struct sk_buff **pskb, int *related) * this means that some packets will manage to get a long way * down this stack and then be rejected, but that's life. */ - if ((icmph.type != ICMP_DEST_UNREACH) && - (icmph.type != ICMP_SOURCE_QUENCH) && - (icmph.type != ICMP_TIME_EXCEEDED)) { + if ((ic->type != ICMP_DEST_UNREACH) && + (ic->type != ICMP_SOURCE_QUENCH) && + (ic->type != ICMP_TIME_EXCEEDED)) { *related = 0; return NF_ACCEPT; } /* Now find the contained IP header */ - offset += sizeof(icmph); - if (skb_copy_bits(skb, offset, &ciph, sizeof(ciph)) < 0) + offset += sizeof(_icmph); + cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); + if (cih == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ - pp = ip_vs_proto_get(ciph.protocol); + pp = ip_vs_proto_get(cih->protocol); if (!pp) return NF_ACCEPT; /* Is the embedded protocol header present? */ - if (unlikely(ciph.frag_off & __constant_htons(IP_OFFSET) && + if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) && pp->dont_defrag)) return NF_ACCEPT; IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMP for"); - offset += ciph.ihl * 4; + offset += cih->ihl * 4; /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_in_get(skb, pp, &ciph, offset, 1); + cp = pp->conn_in_get(skb, pp, cih, offset, 1); if (!cp) return NF_ACCEPT; @@ -941,7 +931,7 @@ static int ip_vs_in_icmp(struct sk_buff **pskb, int *related) /* do the statistics and put it back */ ip_vs_in_stats(cp, skb); - if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol) + if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) offset += 2 * sizeof(__u16); verdict = ip_vs_icmp_xmit(skb, cp, pp, offset); /* do not touch skb anymore */ @@ -981,16 +971,9 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb, return NF_ACCEPT; } - if (skb->ip_summed == CHECKSUM_HW) { - if (skb_checksum_help(pskb, (out == NULL))) - return NF_DROP; - if (skb != *pskb) - skb = *pskb; - } - iph = skb->nh.iph; if (unlikely(iph->protocol == IPPROTO_ICMP)) { - int related, verdict = ip_vs_in_icmp(pskb, &related); + int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum); if (related) return verdict; @@ -1028,16 +1011,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb, /* Check the server status */ if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { - /* the destination server is not availabe */ + /* the destination server is not available */ if (sysctl_ip_vs_expire_nodest_conn) { /* try to expire the connection immediately */ ip_vs_conn_expire_now(cp); - } else { - /* don't restart its timer, and silently - drop the packet. */ - __ip_vs_conn_put(cp); } + /* don't restart its timer, and silently + drop the packet. */ + __ip_vs_conn_put(cp); return NF_DROP; } @@ -1085,7 +1067,7 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff **pskb, if ((*pskb)->nh.iph->protocol != IPPROTO_ICMP) return NF_ACCEPT; - return ip_vs_in_icmp(pskb, &r); + return ip_vs_in_icmp(pskb, &r, hooknum); }