*
* Changes:
* Paul `Rusty' Russell properly handle non-linear skbs
+ * Harald Welte don't use nfcache
*
*/
#ifdef CONFIG_IP_VS_DEBUG
EXPORT_SYMBOL(ip_vs_get_debug_level);
#endif
-EXPORT_SYMBOL(check_for_ip_vs_out);
EXPORT_SYMBOL(ip_vs_make_skb_writable);
static struct ip_vs_conn *
ip_vs_sched_persist(struct ip_vs_service *svc,
const struct sk_buff *skb,
- __u16 ports[2])
+ __be16 ports[2])
{
struct ip_vs_conn *cp = NULL;
struct iphdr *iph = skb->nh.iph;
struct ip_vs_dest *dest;
struct ip_vs_conn *ct;
- __u16 dport; /* destination port to forward */
- __u32 snet; /* source network of the client, after masking */
+ __be16 dport; /* destination port to forward */
+ __be32 snet; /* source network of the client, after masking */
/* Mask saddr with the netmask to adjust template granularity */
snet = iph->saddr & svc->netmask;
if (ports[1] == svc->port) {
/* Check if a template already exists */
if (svc->port != FTPPORT)
- ct = ip_vs_conn_in_get(iph->protocol, snet, 0,
+ ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
iph->daddr, ports[1]);
else
- ct = ip_vs_conn_in_get(iph->protocol, snet, 0,
+ ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
iph->daddr, 0);
if (!ct || !ip_vs_check_template(ct)) {
iph->daddr,
ports[1],
dest->addr, dest->port,
- 0,
+ IP_VS_CONN_F_TEMPLATE,
dest);
else
ct = ip_vs_conn_new(iph->protocol,
snet, 0,
iph->daddr, 0,
dest->addr, 0,
- 0,
+ IP_VS_CONN_F_TEMPLATE,
dest);
if (ct == NULL)
return NULL;
* port zero template: <protocol,caddr,0,vaddr,0,daddr,0>
*/
if (svc->fwmark)
- ct = ip_vs_conn_in_get(IPPROTO_IP, snet, 0,
+ ct = ip_vs_ct_in_get(IPPROTO_IP, snet, 0,
htonl(svc->fwmark), 0);
else
- ct = ip_vs_conn_in_get(iph->protocol, snet, 0,
+ ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
iph->daddr, 0);
if (!ct || !ip_vs_check_template(ct)) {
snet, 0,
htonl(svc->fwmark), 0,
dest->addr, 0,
- 0,
+ IP_VS_CONN_F_TEMPLATE,
dest);
else
ct = ip_vs_conn_new(iph->protocol,
snet, 0,
iph->daddr, 0,
dest->addr, 0,
- 0,
+ IP_VS_CONN_F_TEMPLATE,
dest);
if (ct == NULL)
return NULL;
struct ip_vs_conn *cp = NULL;
struct iphdr *iph = skb->nh.iph;
struct ip_vs_dest *dest;
- __u16 ports[2];
+ __be16 _ports[2], *pptr;
- if (skb_copy_bits(skb, iph->ihl*4, ports, sizeof(ports)) < 0)
+ pptr = skb_header_pointer(skb, iph->ihl*4,
+ sizeof(_ports), _ports);
+ if (pptr == NULL)
return NULL;
/*
* Persistent service
*/
if (svc->flags & IP_VS_SVC_F_PERSISTENT)
- return ip_vs_sched_persist(svc, skb, ports);
+ return ip_vs_sched_persist(svc, skb, pptr);
/*
* Non-persistent service
*/
- if (!svc->fwmark && ports[1] != svc->port) {
+ if (!svc->fwmark && pptr[1] != svc->port) {
if (!svc->port)
IP_VS_ERR("Schedule: port zero only supported "
"in persistent services, "
* Create a connection entry.
*/
cp = ip_vs_conn_new(iph->protocol,
- iph->saddr, ports[0],
- iph->daddr, ports[1],
- dest->addr, dest->port?dest->port:ports[1],
+ iph->saddr, pptr[0],
+ iph->daddr, pptr[1],
+ dest->addr, dest->port?dest->port:pptr[1],
0,
dest);
if (cp == NULL)
return NULL;
IP_VS_DBG(6, "Schedule fwd:%c c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u "
- "d:%u.%u.%u.%u:%u flg:%X cnt:%d\n",
+ "d:%u.%u.%u.%u:%u conn->flags:%X conn->refcnt:%d\n",
ip_vs_fwd_tag(cp),
NIPQUAD(cp->caddr), ntohs(cp->cport),
NIPQUAD(cp->vaddr), ntohs(cp->vport),
int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_protocol *pp)
{
- __u16 ports[2];
+ __be16 _ports[2], *pptr;
struct iphdr *iph = skb->nh.iph;
- if (skb_copy_bits(skb, iph->ihl*4, ports, sizeof(ports)) < 0) {
+ pptr = skb_header_pointer(skb, iph->ihl*4,
+ sizeof(_ports), _ports);
+ if (pptr == NULL) {
ip_vs_service_put(svc);
return NF_DROP;
}
/* create a new connection entry */
IP_VS_DBG(6, "ip_vs_leave: create a cache_bypass entry\n");
cp = ip_vs_conn_new(iph->protocol,
- iph->saddr, ports[0],
- iph->daddr, ports[1],
+ iph->saddr, pptr[0],
+ iph->daddr, pptr[1],
0, 0,
IP_VS_CONN_F_BYPASS,
NULL);
* listed in the ipvs table), pass the packets, because it is
* not ipvs job to decide to drop the packets.
*/
- if ((svc->port == FTPPORT) && (ports[1] != FTPPORT)) {
+ if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) {
ip_vs_service_put(svc);
return NF_ACCEPT;
}
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- if (!((*pskb)->nfcache & NFC_IPVS_PROPERTY))
+ if (!((*pskb)->ipvs_property))
return NF_ACCEPT;
-
/* The packet was sent from IPVS, exit this chain */
- (*okfn)(*pskb);
-
- return NF_STOLEN;
+ return NF_STOP;
}
-u16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
+__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
{
- return (u16) csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
+ return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
}
static inline struct sk_buff *
-ip_vs_gather_frags(struct sk_buff *skb)
+ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
{
- skb = ip_defrag(skb);
+ skb = ip_defrag(skb, user);
if (skb)
ip_send_check(skb->nh.iph);
return skb;
/* the TCP/UDP port */
if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol) {
- __u16 *ports = (void *)ciph + ciph->ihl*4;
+ __be16 *ports = (void *)ciph + ciph->ihl*4;
if (inout)
ports[1] = cp->vport;
{
struct sk_buff *skb = *pskb;
struct iphdr *iph;
- struct icmphdr icmph;
- struct iphdr ciph; /* The ip header contained within the ICMP */
+ struct icmphdr _icmph, *ic;
+ struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
unsigned int offset, ihl, verdict;
/* reassemble IP fragments */
if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
- skb = ip_vs_gather_frags(skb);
+ skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
if (!skb)
return NF_STOLEN;
*pskb = skb;
iph = skb->nh.iph;
offset = ihl = iph->ihl * 4;
- if (skb_copy_bits(skb, offset, &icmph, sizeof(icmph)) < 0)
+ ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+ if (ic == NULL)
return NF_DROP;
IP_VS_DBG(12, "Outgoing ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n",
- icmph.type, ntohs(icmp_id(&icmph)),
+ ic->type, ntohs(icmp_id(ic)),
NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
/*
* this means that some packets will manage to get a long way
* down this stack and then be rejected, but that's life.
*/
- if ((icmph.type != ICMP_DEST_UNREACH) &&
- (icmph.type != ICMP_SOURCE_QUENCH) &&
- (icmph.type != ICMP_TIME_EXCEEDED)) {
+ if ((ic->type != ICMP_DEST_UNREACH) &&
+ (ic->type != ICMP_SOURCE_QUENCH) &&
+ (ic->type != ICMP_TIME_EXCEEDED)) {
*related = 0;
return NF_ACCEPT;
}
/* Now find the contained IP header */
- offset += sizeof(icmph);
- if (skb_copy_bits(skb, offset, &ciph, sizeof(ciph)) < 0)
+ offset += sizeof(_icmph);
+ cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+ if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
- pp = ip_vs_proto_get(ciph.protocol);
+ pp = ip_vs_proto_get(cih->protocol);
if (!pp)
return NF_ACCEPT;
/* Is the embedded protocol header present? */
- if (unlikely(ciph.frag_off & __constant_htons(IP_OFFSET) &&
+ if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) &&
pp->dont_defrag))
return NF_ACCEPT;
IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMP for");
- offset += ciph.ihl * 4;
+ offset += cih->ihl * 4;
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_out_get(skb, pp, &ciph, offset, 1);
+ cp = pp->conn_out_get(skb, pp, cih, offset, 1);
if (!cp)
return NF_ACCEPT;
goto out;
}
- if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol)
+ if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
offset += 2 * sizeof(__u16);
if (!ip_vs_make_skb_writable(pskb, offset))
goto out;
/* do the statistics and put it back */
ip_vs_out_stats(cp, skb);
- skb->nfcache |= NFC_IPVS_PROPERTY;
+ skb->ipvs_property = 1;
verdict = NF_ACCEPT;
out:
static inline int is_tcp_reset(const struct sk_buff *skb)
{
- struct tcphdr tcph;
+ struct tcphdr _tcph, *th;
- if (skb_copy_bits(skb, skb->nh.iph->ihl * 4, &tcph, sizeof(tcph)) < 0)
+ th = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+ sizeof(_tcph), &_tcph);
+ if (th == NULL)
return 0;
- return tcph.rst;
+ return th->rst;
}
/*
EnterFunction(11);
- if (skb->nfcache & NFC_IPVS_PROPERTY)
+ if (skb->ipvs_property)
return NF_ACCEPT;
- if (skb->ip_summed == CHECKSUM_HW) {
- if (skb_checksum_help(pskb, (out == NULL)))
- return NF_DROP;
- if (skb != *pskb)
- skb = *pskb;
- }
-
iph = skb->nh.iph;
if (unlikely(iph->protocol == IPPROTO_ICMP)) {
int related, verdict = ip_vs_out_icmp(pskb, &related);
/* reassemble IP fragments */
if (unlikely(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET) &&
!pp->dont_defrag)) {
- skb = ip_vs_gather_frags(skb);
+ skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
if (!skb)
return NF_STOLEN;
iph = skb->nh.iph;
if (sysctl_ip_vs_nat_icmp_send &&
(pp->protocol == IPPROTO_TCP ||
pp->protocol == IPPROTO_UDP)) {
- __u16 ports[2];
+ __be16 _ports[2], *pptr;
- if (skb_copy_bits(skb, ihl, ports, sizeof(ports)) < 0)
+ pptr = skb_header_pointer(skb, ihl,
+ sizeof(_ports), _ports);
+ if (pptr == NULL)
return NF_ACCEPT; /* Not for me */
if (ip_vs_lookup_real_service(iph->protocol,
- iph->saddr, ports[0])) {
+ iph->saddr, pptr[0])) {
/*
* Notify the real server: there is no
* existing entry if it is not RST
skb->nh.iph->saddr = cp->vaddr;
ip_send_check(skb->nh.iph);
+ /* For policy routing, packets originating from this
+ * machine itself may be routed differently to packets
+ * passing through. We want this packet to be routed as
+ * if it came from this machine itself. So re-compute
+ * the routing information.
+ */
+ if (ip_route_me_harder(pskb, RTN_LOCAL) != 0)
+ goto drop;
+ skb = *pskb;
+
IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
ip_vs_out_stats(cp, skb);
ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
ip_vs_conn_put(cp);
- skb->nfcache |= NFC_IPVS_PROPERTY;
+ skb->ipvs_property = 1;
LeaveFunction(11);
return NF_ACCEPT;
}
-/*
- * Check if the packet is for VS/NAT connections, then send it
- * immediately.
- * Called by ip_fw_compact to detect packets for VS/NAT before
- * they are changed by ipchains masquerading code.
- */
-unsigned int
-check_for_ip_vs_out(struct sk_buff **pskb, int (*okfn)(struct sk_buff *))
-{
- unsigned int ret;
-
- ret = ip_vs_out(NF_IP_FORWARD, pskb, NULL, NULL, NULL);
- if (ret != NF_ACCEPT) {
- return ret;
- } else {
- /* send the packet immediately if it is already mangled
- by ip_vs_out */
- if ((*pskb)->nfcache & NFC_IPVS_PROPERTY) {
- (*okfn)(*pskb);
- return NF_STOLEN;
- }
- }
- return NF_ACCEPT;
-}
-
/*
* Handle ICMP messages in the outside-to-inside direction (incoming).
* Find any that might be relevant, check against existing connections,
* forward to the right destination host if relevant.
* Currently handles error types - unreachable, quench, ttl exceeded.
*/
-static int ip_vs_in_icmp(struct sk_buff **pskb, int *related)
+static int
+ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
{
struct sk_buff *skb = *pskb;
struct iphdr *iph;
- struct icmphdr icmph;
- struct iphdr ciph; /* The ip header contained within the ICMP */
+ struct icmphdr _icmph, *ic;
+ struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
unsigned int offset, ihl, verdict;
/* reassemble IP fragments */
if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
- skb = ip_vs_gather_frags(skb);
+ skb = ip_vs_gather_frags(skb,
+ hooknum == NF_IP_LOCAL_IN ?
+ IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD);
if (!skb)
return NF_STOLEN;
*pskb = skb;
iph = skb->nh.iph;
offset = ihl = iph->ihl * 4;
- if (skb_copy_bits(skb, offset, &icmph, sizeof(icmph)) < 0)
+ ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+ if (ic == NULL)
return NF_DROP;
IP_VS_DBG(12, "Incoming ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n",
- icmph.type, ntohs(icmp_id(&icmph)),
+ ic->type, ntohs(icmp_id(ic)),
NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
/*
* this means that some packets will manage to get a long way
* down this stack and then be rejected, but that's life.
*/
- if ((icmph.type != ICMP_DEST_UNREACH) &&
- (icmph.type != ICMP_SOURCE_QUENCH) &&
- (icmph.type != ICMP_TIME_EXCEEDED)) {
+ if ((ic->type != ICMP_DEST_UNREACH) &&
+ (ic->type != ICMP_SOURCE_QUENCH) &&
+ (ic->type != ICMP_TIME_EXCEEDED)) {
*related = 0;
return NF_ACCEPT;
}
/* Now find the contained IP header */
- offset += sizeof(icmph);
- if (skb_copy_bits(skb, offset, &ciph, sizeof(ciph)) < 0)
+ offset += sizeof(_icmph);
+ cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+ if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
- pp = ip_vs_proto_get(ciph.protocol);
+ pp = ip_vs_proto_get(cih->protocol);
if (!pp)
return NF_ACCEPT;
/* Is the embedded protocol header present? */
- if (unlikely(ciph.frag_off & __constant_htons(IP_OFFSET) &&
+ if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) &&
pp->dont_defrag))
return NF_ACCEPT;
IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMP for");
- offset += ciph.ihl * 4;
+ offset += cih->ihl * 4;
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_in_get(skb, pp, &ciph, offset, 1);
+ cp = pp->conn_in_get(skb, pp, cih, offset, 1);
if (!cp)
return NF_ACCEPT;
/* do the statistics and put it back */
ip_vs_in_stats(cp, skb);
- if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol)
+ if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
offset += 2 * sizeof(__u16);
verdict = ip_vs_icmp_xmit(skb, cp, pp, offset);
/* do not touch skb anymore */
return NF_ACCEPT;
}
- if (skb->ip_summed == CHECKSUM_HW) {
- if (skb_checksum_help(pskb, (out == NULL)))
- return NF_DROP;
- if (skb != *pskb)
- skb = *pskb;
- }
-
iph = skb->nh.iph;
if (unlikely(iph->protocol == IPPROTO_ICMP)) {
- int related, verdict = ip_vs_in_icmp(pskb, &related);
+ int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum);
if (related)
return verdict;
/* Check the server status */
if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
- /* the destination server is not availabe */
+ /* the destination server is not available */
if (sysctl_ip_vs_expire_nodest_conn) {
/* try to expire the connection immediately */
ip_vs_conn_expire_now(cp);
- } else {
- /* don't restart its timer, and silently
- drop the packet. */
- __ip_vs_conn_put(cp);
}
+ /* don't restart its timer, and silently
+ drop the packet. */
+ __ip_vs_conn_put(cp);
return NF_DROP;
}
if ((*pskb)->nh.iph->protocol != IPPROTO_ICMP)
return NF_ACCEPT;
- return ip_vs_in_icmp(pskb, &r);
+ return ip_vs_in_icmp(pskb, &r, hooknum);
}