X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fxfrm%2Fxfrm_policy.c;h=6965931bd34efffc38a70f2c2167213d6a7e9172;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=ae62054a9fc4c22bb41678f2c8fd64ce87e7d427;hpb=76828883507a47dae78837ab5dec5a5b4513c667;p=linux-2.6.git diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ae62054a9..6965931bd 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -13,7 +13,6 @@ * */ -#include #include #include #include @@ -23,68 +22,103 @@ #include #include #include +#include #include #include +#include -DECLARE_MUTEX(xfrm_cfg_sem); -EXPORT_SYMBOL(xfrm_cfg_sem); +#include "xfrm_hash.h" + +DEFINE_MUTEX(xfrm_cfg_mutex); +EXPORT_SYMBOL(xfrm_cfg_mutex); static DEFINE_RWLOCK(xfrm_policy_lock); -struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2]; -EXPORT_SYMBOL(xfrm_policy_list); +unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; +EXPORT_SYMBOL(xfrm_policy_count); static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; -static kmem_cache_t *xfrm_dst_cache __read_mostly; +static struct kmem_cache *xfrm_dst_cache __read_mostly; static struct work_struct xfrm_policy_gc_work; -static struct list_head xfrm_policy_gc_list = - LIST_HEAD_INIT(xfrm_policy_gc_list); +static HLIST_HEAD(xfrm_policy_gc_list); static DEFINE_SPINLOCK(xfrm_policy_gc_lock); static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); +static struct xfrm_policy_afinfo *xfrm_policy_lock_afinfo(unsigned int family); +static void xfrm_policy_unlock_afinfo(struct xfrm_policy_afinfo *afinfo); + +static inline int +__xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl) +{ + return addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) && + addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) && + !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && + !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && + (fl->proto == sel->proto || !sel->proto) && + (fl->oif == sel->ifindex || !sel->ifindex); +} + +static inline int +__xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl) +{ + return addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) && + addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) && + !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && + !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && + (fl->proto == sel->proto || !sel->proto) && + (fl->oif == sel->ifindex || !sel->ifindex); +} + +int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl, + unsigned short family) +{ + switch (family) { + case AF_INET: + return __xfrm4_selector_match(sel, fl); + case AF_INET6: + return __xfrm6_selector_match(sel, fl); + } + return 0; +} int xfrm_register_type(struct xfrm_type *type, unsigned short family) { - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); - struct xfrm_type_map *typemap; + struct xfrm_policy_afinfo *afinfo = xfrm_policy_lock_afinfo(family); + struct xfrm_type **typemap; int err = 0; if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; typemap = afinfo->type_map; - write_lock(&typemap->lock); - if (likely(typemap->map[type->proto] == NULL)) - typemap->map[type->proto] = type; + if (likely(typemap[type->proto] == NULL)) + typemap[type->proto] = type; else err = -EEXIST; - write_unlock(&typemap->lock); - xfrm_policy_put_afinfo(afinfo); + xfrm_policy_unlock_afinfo(afinfo); return err; } EXPORT_SYMBOL(xfrm_register_type); int xfrm_unregister_type(struct xfrm_type *type, unsigned short family) { - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); - struct xfrm_type_map *typemap; + struct xfrm_policy_afinfo *afinfo = xfrm_policy_lock_afinfo(family); + struct xfrm_type **typemap; int err = 0; if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; typemap = afinfo->type_map; - write_lock(&typemap->lock); - if (unlikely(typemap->map[type->proto] != type)) + if (unlikely(typemap[type->proto] != type)) err = -ENOENT; else - typemap->map[type->proto] = NULL; - write_unlock(&typemap->lock); - xfrm_policy_put_afinfo(afinfo); + typemap[type->proto] = NULL; + xfrm_policy_unlock_afinfo(afinfo); return err; } EXPORT_SYMBOL(xfrm_unregister_type); @@ -92,7 +126,7 @@ EXPORT_SYMBOL(xfrm_unregister_type); struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family) { struct xfrm_policy_afinfo *afinfo; - struct xfrm_type_map *typemap; + struct xfrm_type **typemap; struct xfrm_type *type; int modload_attempted = 0; @@ -102,11 +136,9 @@ retry: return NULL; typemap = afinfo->type_map; - read_lock(&typemap->lock); - type = typemap->map[proto]; + type = typemap[proto]; if (unlikely(type && !try_module_get(type->owner))) type = NULL; - read_unlock(&typemap->lock); if (!type && !modload_attempted) { xfrm_policy_put_afinfo(afinfo); request_module("xfrm-type-%d-%d", @@ -142,6 +174,89 @@ void xfrm_put_type(struct xfrm_type *type) module_put(type->owner); } +int xfrm_register_mode(struct xfrm_mode *mode, int family) +{ + struct xfrm_policy_afinfo *afinfo; + struct xfrm_mode **modemap; + int err; + + if (unlikely(mode->encap >= XFRM_MODE_MAX)) + return -EINVAL; + + afinfo = xfrm_policy_lock_afinfo(family); + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + + err = -EEXIST; + modemap = afinfo->mode_map; + if (likely(modemap[mode->encap] == NULL)) { + modemap[mode->encap] = mode; + err = 0; + } + + xfrm_policy_unlock_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_register_mode); + +int xfrm_unregister_mode(struct xfrm_mode *mode, int family) +{ + struct xfrm_policy_afinfo *afinfo; + struct xfrm_mode **modemap; + int err; + + if (unlikely(mode->encap >= XFRM_MODE_MAX)) + return -EINVAL; + + afinfo = xfrm_policy_lock_afinfo(family); + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + + err = -ENOENT; + modemap = afinfo->mode_map; + if (likely(modemap[mode->encap] == mode)) { + modemap[mode->encap] = NULL; + err = 0; + } + + xfrm_policy_unlock_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_unregister_mode); + +struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family) +{ + struct xfrm_policy_afinfo *afinfo; + struct xfrm_mode *mode; + int modload_attempted = 0; + + if (unlikely(encap >= XFRM_MODE_MAX)) + return NULL; + +retry: + afinfo = xfrm_policy_get_afinfo(family); + if (unlikely(afinfo == NULL)) + return NULL; + + mode = afinfo->mode_map[encap]; + if (unlikely(mode && !try_module_get(mode->owner))) + mode = NULL; + if (!mode && !modload_attempted) { + xfrm_policy_put_afinfo(afinfo); + request_module("xfrm-mode-%d-%d", family, encap); + modload_attempted = 1; + goto retry; + } + + xfrm_policy_put_afinfo(afinfo); + return mode; +} + +void xfrm_put_mode(struct xfrm_mode *mode) +{ + module_put(mode->owner); +} + static inline unsigned long make_jiffies(long secs) { if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ) @@ -203,7 +318,7 @@ static void xfrm_policy_timer(unsigned long data) } if (warn) - km_policy_expired(xp, dir, 0); + km_policy_expired(xp, dir, 0, 0); if (next != LONG_MAX && !mod_timer(&xp->timer, jiffies + make_jiffies(next))) xfrm_pol_hold(xp); @@ -216,7 +331,7 @@ out: expired: read_unlock(&xp->lock); if (!xfrm_policy_delete(xp, dir)) - km_policy_expired(xp, dir, 1); + km_policy_expired(xp, dir, 1, 0); xfrm_pol_put(xp); } @@ -229,12 +344,13 @@ struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp) { struct xfrm_policy *policy; - policy = kmalloc(sizeof(struct xfrm_policy), gfp); + policy = kzalloc(sizeof(struct xfrm_policy), gfp); if (policy) { - memset(policy, 0, sizeof(struct xfrm_policy)); - atomic_set(&policy->refcnt, 1); + INIT_HLIST_NODE(&policy->bydst); + INIT_HLIST_NODE(&policy->byidx); rwlock_init(&policy->lock); + atomic_set(&policy->refcnt, 1); init_timer(&policy->timer); policy->timer.data = (unsigned long)policy; policy->timer.function = xfrm_policy_timer; @@ -277,20 +393,19 @@ static void xfrm_policy_gc_kill(struct xfrm_policy *policy) xfrm_pol_put(policy); } -static void xfrm_policy_gc_task(void *data) +static void xfrm_policy_gc_task(struct work_struct *work) { struct xfrm_policy *policy; - struct list_head *entry, *tmp; - struct list_head gc_list = LIST_HEAD_INIT(gc_list); + struct hlist_node *entry, *tmp; + struct hlist_head gc_list; spin_lock_bh(&xfrm_policy_gc_lock); - list_splice_init(&xfrm_policy_gc_list, &gc_list); + gc_list.first = xfrm_policy_gc_list.first; + INIT_HLIST_HEAD(&xfrm_policy_gc_list); spin_unlock_bh(&xfrm_policy_gc_lock); - list_for_each_safe(entry, tmp, &gc_list) { - policy = list_entry(entry, struct xfrm_policy, list); + hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst) xfrm_policy_gc_kill(policy); - } } /* Rule must be locked. Release descentant resources, announce @@ -312,70 +427,269 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) } spin_lock(&xfrm_policy_gc_lock); - list_add(&policy->list, &xfrm_policy_gc_list); + hlist_add_head(&policy->bydst, &xfrm_policy_gc_list); spin_unlock(&xfrm_policy_gc_lock); schedule_work(&xfrm_policy_gc_work); } +struct xfrm_policy_hash { + struct hlist_head *table; + unsigned int hmask; +}; + +static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2]; +static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly; +static struct hlist_head *xfrm_policy_byidx __read_mostly; +static unsigned int xfrm_idx_hmask __read_mostly; +static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; + +static inline unsigned int idx_hash(u32 index) +{ + return __idx_hash(index, xfrm_idx_hmask); +} + +static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir) +{ + unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int hash = __sel_hash(sel, family, hmask); + + return (hash == hmask + 1 ? + &xfrm_policy_inexact[dir] : + xfrm_policy_bydst[dir].table + hash); +} + +static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir) +{ + unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int hash = __addr_hash(daddr, saddr, family, hmask); + + return xfrm_policy_bydst[dir].table + hash; +} + +static void xfrm_dst_hash_transfer(struct hlist_head *list, + struct hlist_head *ndsttable, + unsigned int nhashmask) +{ + struct hlist_node *entry, *tmp; + struct xfrm_policy *pol; + + hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) { + unsigned int h; + + h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr, + pol->family, nhashmask); + hlist_add_head(&pol->bydst, ndsttable+h); + } +} + +static void xfrm_idx_hash_transfer(struct hlist_head *list, + struct hlist_head *nidxtable, + unsigned int nhashmask) +{ + struct hlist_node *entry, *tmp; + struct xfrm_policy *pol; + + hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) { + unsigned int h; + + h = __idx_hash(pol->index, nhashmask); + hlist_add_head(&pol->byidx, nidxtable+h); + } +} + +static unsigned long xfrm_new_hash_mask(unsigned int old_hmask) +{ + return ((old_hmask + 1) << 1) - 1; +} + +static void xfrm_bydst_resize(int dir) +{ + unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int nhashmask = xfrm_new_hash_mask(hmask); + unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); + struct hlist_head *odst = xfrm_policy_bydst[dir].table; + struct hlist_head *ndst = xfrm_hash_alloc(nsize); + int i; + + if (!ndst) + return; + + write_lock_bh(&xfrm_policy_lock); + + for (i = hmask; i >= 0; i--) + xfrm_dst_hash_transfer(odst + i, ndst, nhashmask); + + xfrm_policy_bydst[dir].table = ndst; + xfrm_policy_bydst[dir].hmask = nhashmask; + + write_unlock_bh(&xfrm_policy_lock); + + xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); +} + +static void xfrm_byidx_resize(int total) +{ + unsigned int hmask = xfrm_idx_hmask; + unsigned int nhashmask = xfrm_new_hash_mask(hmask); + unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); + struct hlist_head *oidx = xfrm_policy_byidx; + struct hlist_head *nidx = xfrm_hash_alloc(nsize); + int i; + + if (!nidx) + return; + + write_lock_bh(&xfrm_policy_lock); + + for (i = hmask; i >= 0; i--) + xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); + + xfrm_policy_byidx = nidx; + xfrm_idx_hmask = nhashmask; + + write_unlock_bh(&xfrm_policy_lock); + + xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); +} + +static inline int xfrm_bydst_should_resize(int dir, int *total) +{ + unsigned int cnt = xfrm_policy_count[dir]; + unsigned int hmask = xfrm_policy_bydst[dir].hmask; + + if (total) + *total += cnt; + + if ((hmask + 1) < xfrm_policy_hashmax && + cnt > hmask) + return 1; + + return 0; +} + +static inline int xfrm_byidx_should_resize(int total) +{ + unsigned int hmask = xfrm_idx_hmask; + + if ((hmask + 1) < xfrm_policy_hashmax && + total > hmask) + return 1; + + return 0; +} + +static DEFINE_MUTEX(hash_resize_mutex); + +static void xfrm_hash_resize(struct work_struct *__unused) +{ + int dir, total; + + mutex_lock(&hash_resize_mutex); + + total = 0; + for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + if (xfrm_bydst_should_resize(dir, &total)) + xfrm_bydst_resize(dir); + } + if (xfrm_byidx_should_resize(total)) + xfrm_byidx_resize(total); + + mutex_unlock(&hash_resize_mutex); +} + +static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize); + /* Generate new index... KAME seems to generate them ordered by cost * of an absolute inpredictability of ordering of rules. This will not pass. */ -static u32 xfrm_gen_index(int dir) +static u32 xfrm_gen_index(u8 type, int dir) { - u32 idx; - struct xfrm_policy *p; static u32 idx_generator; for (;;) { + struct hlist_node *entry; + struct hlist_head *list; + struct xfrm_policy *p; + u32 idx; + int found; + idx = (idx_generator | dir); idx_generator += 8; if (idx == 0) idx = 8; - for (p = xfrm_policy_list[dir]; p; p = p->next) { - if (p->index == idx) + list = xfrm_policy_byidx + idx_hash(idx); + found = 0; + hlist_for_each_entry(p, entry, list, byidx) { + if (p->index == idx) { + found = 1; break; + } } - if (!p) + if (!found) return idx; } } +static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2) +{ + u32 *p1 = (u32 *) s1; + u32 *p2 = (u32 *) s2; + int len = sizeof(struct xfrm_selector) / sizeof(u32); + int i; + + for (i = 0; i < len; i++) { + if (p1[i] != p2[i]) + return 1; + } + + return 0; +} + int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) { - struct xfrm_policy *pol, **p; - struct xfrm_policy *delpol = NULL; - struct xfrm_policy **newpos = NULL; + struct xfrm_policy *pol; + struct xfrm_policy *delpol; + struct hlist_head *chain; + struct hlist_node *entry, *newpos; struct dst_entry *gc_list; write_lock_bh(&xfrm_policy_lock); - for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) { - if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 && - xfrm_sec_ctx_match(pol->security, policy->security)) { + chain = policy_hash_bysel(&policy->selector, policy->family, dir); + delpol = NULL; + newpos = NULL; + hlist_for_each_entry(pol, entry, chain, bydst) { + if (pol->type == policy->type && + !selector_cmp(&pol->selector, &policy->selector) && + xfrm_sec_ctx_match(pol->security, policy->security) && + !WARN_ON(delpol)) { if (excl) { write_unlock_bh(&xfrm_policy_lock); return -EEXIST; } - *p = pol->next; delpol = pol; if (policy->priority > pol->priority) continue; } else if (policy->priority >= pol->priority) { - p = &pol->next; + newpos = &pol->bydst; continue; } - if (!newpos) - newpos = p; if (delpol) break; - p = &pol->next; } if (newpos) - p = newpos; + hlist_add_after(newpos, &policy->bydst); + else + hlist_add_head(&policy->bydst, chain); xfrm_pol_hold(policy); - policy->next = *p; - *p = policy; + xfrm_policy_count[dir]++; atomic_inc(&flow_cache_genid); - policy->index = delpol ? delpol->index : xfrm_gen_index(dir); + if (delpol) { + hlist_del(&delpol->bydst); + hlist_del(&delpol->byidx); + xfrm_policy_count[dir]--; + } + policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir); + hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index)); policy->curlft.add_time = (unsigned long)xtime.tv_sec; policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) @@ -384,10 +698,13 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) if (delpol) xfrm_policy_kill(delpol); + else if (xfrm_bydst_should_resize(dir, NULL)) + schedule_work(&xfrm_hash_work); read_lock_bh(&xfrm_policy_lock); gc_list = NULL; - for (policy = policy->next; policy; policy = policy->next) { + entry = &policy->bydst; + hlist_for_each_entry_continue(policy, entry, bydst) { struct dst_entry *dst; write_lock(&policy->lock); @@ -416,135 +733,292 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) } EXPORT_SYMBOL(xfrm_policy_insert); -struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel, +struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, + struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete) { - struct xfrm_policy *pol, **p; + struct xfrm_policy *pol, *ret; + struct hlist_head *chain; + struct hlist_node *entry; write_lock_bh(&xfrm_policy_lock); - for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { - if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) && - (xfrm_sec_ctx_match(ctx, pol->security))) { + chain = policy_hash_bysel(sel, sel->family, dir); + ret = NULL; + hlist_for_each_entry(pol, entry, chain, bydst) { + if (pol->type == type && + !selector_cmp(sel, &pol->selector) && + xfrm_sec_ctx_match(ctx, pol->security)) { xfrm_pol_hold(pol); - if (delete) - *p = pol->next; + if (delete) { + hlist_del(&pol->bydst); + hlist_del(&pol->byidx); + xfrm_policy_count[dir]--; + } + ret = pol; break; } } write_unlock_bh(&xfrm_policy_lock); - if (pol && delete) { + if (ret && delete) { atomic_inc(&flow_cache_genid); - xfrm_policy_kill(pol); + xfrm_policy_kill(ret); } - return pol; + return ret; } EXPORT_SYMBOL(xfrm_policy_bysel_ctx); -struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete) +struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete) { - struct xfrm_policy *pol, **p; + struct xfrm_policy *pol, *ret; + struct hlist_head *chain; + struct hlist_node *entry; write_lock_bh(&xfrm_policy_lock); - for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { - if (pol->index == id) { + chain = xfrm_policy_byidx + idx_hash(id); + ret = NULL; + hlist_for_each_entry(pol, entry, chain, byidx) { + if (pol->type == type && pol->index == id) { xfrm_pol_hold(pol); - if (delete) - *p = pol->next; + if (delete) { + hlist_del(&pol->bydst); + hlist_del(&pol->byidx); + xfrm_policy_count[dir]--; + } + ret = pol; break; } } write_unlock_bh(&xfrm_policy_lock); - if (pol && delete) { + if (ret && delete) { atomic_inc(&flow_cache_genid); - xfrm_policy_kill(pol); + xfrm_policy_kill(ret); } - return pol; + return ret; } EXPORT_SYMBOL(xfrm_policy_byid); -void xfrm_policy_flush(void) +void xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) { - struct xfrm_policy *xp; int dir; write_lock_bh(&xfrm_policy_lock); for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { - while ((xp = xfrm_policy_list[dir]) != NULL) { - xfrm_policy_list[dir] = xp->next; + struct xfrm_policy *pol; + struct hlist_node *entry; + int i, killed; + + killed = 0; + again1: + hlist_for_each_entry(pol, entry, + &xfrm_policy_inexact[dir], bydst) { + if (pol->type != type) + continue; + hlist_del(&pol->bydst); + hlist_del(&pol->byidx); write_unlock_bh(&xfrm_policy_lock); - xfrm_policy_kill(xp); + xfrm_audit_log(audit_info->loginuid, audit_info->secid, + AUDIT_MAC_IPSEC_DELSPD, 1, pol, NULL); + + xfrm_policy_kill(pol); + killed++; write_lock_bh(&xfrm_policy_lock); + goto again1; } + + for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + again2: + hlist_for_each_entry(pol, entry, + xfrm_policy_bydst[dir].table + i, + bydst) { + if (pol->type != type) + continue; + hlist_del(&pol->bydst); + hlist_del(&pol->byidx); + write_unlock_bh(&xfrm_policy_lock); + + xfrm_audit_log(audit_info->loginuid, + audit_info->secid, + AUDIT_MAC_IPSEC_DELSPD, 1, + pol, NULL); + + xfrm_policy_kill(pol); + killed++; + + write_lock_bh(&xfrm_policy_lock); + goto again2; + } + } + + xfrm_policy_count[dir] -= killed; } atomic_inc(&flow_cache_genid); write_unlock_bh(&xfrm_policy_lock); } EXPORT_SYMBOL(xfrm_policy_flush); -int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), +int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*), void *data) { - struct xfrm_policy *xp; - int dir; - int count = 0; - int error = 0; + struct xfrm_policy *pol, *last = NULL; + struct hlist_node *entry; + int dir, last_dir = 0, count, error; read_lock_bh(&xfrm_policy_lock); + count = 0; + for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { - for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) + struct hlist_head *table = xfrm_policy_bydst[dir].table; + int i; + + hlist_for_each_entry(pol, entry, + &xfrm_policy_inexact[dir], bydst) { + if (pol->type != type) + continue; + if (last) { + error = func(last, last_dir % XFRM_POLICY_MAX, + count, data); + if (error) + goto out; + } + last = pol; + last_dir = dir; count++; + } + for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + hlist_for_each_entry(pol, entry, table + i, bydst) { + if (pol->type != type) + continue; + if (last) { + error = func(last, last_dir % XFRM_POLICY_MAX, + count, data); + if (error) + goto out; + } + last = pol; + last_dir = dir; + count++; + } + } } - if (count == 0) { error = -ENOENT; goto out; } - - for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { - for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) { - error = func(xp, dir%XFRM_POLICY_MAX, --count, data); - if (error) - goto out; - } - } - + error = func(last, last_dir % XFRM_POLICY_MAX, 0, data); out: read_unlock_bh(&xfrm_policy_lock); return error; } EXPORT_SYMBOL(xfrm_policy_walk); -/* Find policy to apply to this flow. */ - -static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir, - void **objp, atomic_t **obj_refp) +/* + * Find policy to apply to this flow. + * + * Returns 0 if policy found, else an -errno. + */ +static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl, + u8 type, u16 family, int dir) { - struct xfrm_policy *pol; + struct xfrm_selector *sel = &pol->selector; + int match, ret = -ESRCH; - read_lock_bh(&xfrm_policy_lock); - for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) { - struct xfrm_selector *sel = &pol->selector; - int match; + if (pol->family != family || + pol->type != type) + return ret; - if (pol->family != family) - continue; + match = xfrm_selector_match(sel, fl, family); + if (match) + ret = security_xfrm_policy_lookup(pol, fl->secid, dir); - match = xfrm_selector_match(sel, fl, family); + return ret; +} - if (match) { - if (!security_xfrm_policy_lookup(pol, sk_sid, dir)) { - xfrm_pol_hold(pol); - break; +static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, + u16 family, u8 dir) +{ + int err; + struct xfrm_policy *pol, *ret; + xfrm_address_t *daddr, *saddr; + struct hlist_node *entry; + struct hlist_head *chain; + u32 priority = ~0U; + + daddr = xfrm_flowi_daddr(fl, family); + saddr = xfrm_flowi_saddr(fl, family); + if (unlikely(!daddr || !saddr)) + return NULL; + + read_lock_bh(&xfrm_policy_lock); + chain = policy_hash_direct(daddr, saddr, family, dir); + ret = NULL; + hlist_for_each_entry(pol, entry, chain, bydst) { + err = xfrm_policy_match(pol, fl, type, family, dir); + if (err) { + if (err == -ESRCH) + continue; + else { + ret = ERR_PTR(err); + goto fail; + } + } else { + ret = pol; + priority = ret->priority; + break; + } + } + chain = &xfrm_policy_inexact[dir]; + hlist_for_each_entry(pol, entry, chain, bydst) { + err = xfrm_policy_match(pol, fl, type, family, dir); + if (err) { + if (err == -ESRCH) + continue; + else { + ret = ERR_PTR(err); + goto fail; } + } else if (pol->priority < priority) { + ret = pol; + break; } } + if (ret) + xfrm_pol_hold(ret); +fail: read_unlock_bh(&xfrm_policy_lock); + + return ret; +} + +static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, + void **objp, atomic_t **obj_refp) +{ + struct xfrm_policy *pol; + int err = 0; + +#ifdef CONFIG_XFRM_SUB_POLICY + pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir); + if (IS_ERR(pol)) { + err = PTR_ERR(pol); + pol = NULL; + } + if (pol || err) + goto end; +#endif + pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir); + if (IS_ERR(pol)) { + err = PTR_ERR(pol); + pol = NULL; + } +#ifdef CONFIG_XFRM_SUB_POLICY +end: +#endif if ((*objp = (void *) pol) != NULL) *obj_refp = &pol->refcnt; + return err; } static inline int policy_to_flow_dir(int dir) @@ -564,7 +1038,7 @@ static inline int policy_to_flow_dir(int dir) }; } -static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl, u32 sk_sid) +static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl) { struct xfrm_policy *pol; @@ -574,12 +1048,16 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc sk->sk_family); int err = 0; - if (match) - err = security_xfrm_policy_lookup(pol, sk_sid, policy_to_flow_dir(dir)); - - if (match && !err) - xfrm_pol_hold(pol); - else + if (match) { + err = security_xfrm_policy_lookup(pol, fl->secid, + policy_to_flow_dir(dir)); + if (!err) + xfrm_pol_hold(pol); + else if (err == -ESRCH) + pol = NULL; + else + pol = ERR_PTR(err); + } else pol = NULL; } read_unlock_bh(&xfrm_policy_lock); @@ -588,24 +1066,29 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) { - pol->next = xfrm_policy_list[dir]; - xfrm_policy_list[dir] = pol; + struct hlist_head *chain = policy_hash_bysel(&pol->selector, + pol->family, dir); + + hlist_add_head(&pol->bydst, chain); + hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index)); + xfrm_policy_count[dir]++; xfrm_pol_hold(pol); + + if (xfrm_bydst_should_resize(dir, NULL)) + schedule_work(&xfrm_hash_work); } static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int dir) { - struct xfrm_policy **polp; + if (hlist_unhashed(&pol->bydst)) + return NULL; - for (polp = &xfrm_policy_list[dir]; - *polp != NULL; polp = &(*polp)->next) { - if (*polp == pol) { - *polp = pol->next; - return pol; - } - } - return NULL; + hlist_del(&pol->bydst); + hlist_del(&pol->byidx); + xfrm_policy_count[dir]--; + + return pol; } int xfrm_policy_delete(struct xfrm_policy *pol, int dir) @@ -621,17 +1104,23 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir) } return -ENOENT; } +EXPORT_SYMBOL(xfrm_policy_delete); int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) { struct xfrm_policy *old_pol; +#ifdef CONFIG_XFRM_SUB_POLICY + if (pol && pol->type != XFRM_POLICY_TYPE_MAIN) + return -EINVAL; +#endif + write_lock_bh(&xfrm_policy_lock); old_pol = sk->sk_policy[dir]; sk->sk_policy[dir] = pol; if (pol) { pol->curlft.add_time = (unsigned long)xtime.tv_sec; - pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir); + pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir); __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); } if (old_pol) @@ -660,6 +1149,7 @@ static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) newp->flags = old->flags; newp->xfrm_nr = old->xfrm_nr; newp->index = old->index; + newp->type = old->type; memcpy(newp->xfrm_vec, old->xfrm_vec, newp->xfrm_nr*sizeof(struct xfrm_tmpl)); write_lock_bh(&xfrm_policy_lock); @@ -683,17 +1173,32 @@ int __xfrm_sk_clone_policy(struct sock *sk) return 0; } +static int +xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote, + unsigned short family) +{ + int err; + struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + + if (unlikely(afinfo == NULL)) + return -EINVAL; + err = afinfo->get_saddr(local, remote); + xfrm_policy_put_afinfo(afinfo); + return err; +} + /* Resolve list of templates for the flow, given policy. */ static int -xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl, - struct xfrm_state **xfrm, - unsigned short family) +xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, + struct xfrm_state **xfrm, + unsigned short family) { int nx; int i, error; xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); + xfrm_address_t tmp; for (nx=0, i = 0; i < policy->xfrm_nr; i++) { struct xfrm_state *x; @@ -701,9 +1206,16 @@ xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl, xfrm_address_t *local = saddr; struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; - if (tmpl->mode) { + if (tmpl->mode == XFRM_MODE_TUNNEL) { remote = &tmpl->id.daddr; local = &tmpl->saddr; + family = tmpl->encap_family; + if (xfrm_addr_any(local, family)) { + error = xfrm_get_saddr(&tmp, remote, family); + if (error) + goto fail; + local = &tmp; + } } x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family); @@ -731,6 +1243,45 @@ fail: return error; } +static int +xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, + struct xfrm_state **xfrm, + unsigned short family) +{ + struct xfrm_state *tp[XFRM_MAX_DEPTH]; + struct xfrm_state **tpp = (npols > 1) ? tp : xfrm; + int cnx = 0; + int error; + int ret; + int i; + + for (i = 0; i < npols; i++) { + if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) { + error = -ENOBUFS; + goto fail; + } + + ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family); + if (ret < 0) { + error = ret; + goto fail; + } else + cnx += ret; + } + + /* found states are sorted for outbound processing */ + if (npols > 1) + xfrm_state_sort(xfrm, tpp, cnx, family); + + return cnx; + + fail: + for (cnx--; cnx>=0; cnx--) + xfrm_state_put(tpp[cnx]); + return error; + +} + /* Check that the bundle accepts the flow and its components are * still valid. */ @@ -777,6 +1328,11 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, struct sock *sk, int flags) { struct xfrm_policy *policy; + struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; + int npols; + int pol_dead; + int xfrm_nr; + int pi; struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; struct dst_entry *dst, *dst_orig = *dst_p; int nx = 0; @@ -784,20 +1340,32 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, u32 genid; u16 family; u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); - u32 sk_sid = security_sk_sid(sk, fl, dir); + restart: genid = atomic_read(&flow_cache_genid); policy = NULL; - if (sk && sk->sk_policy[1]) - policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, sk_sid); + for (pi = 0; pi < ARRAY_SIZE(pols); pi++) + pols[pi] = NULL; + npols = 0; + pol_dead = 0; + xfrm_nr = 0; + + if (sk && sk->sk_policy[1]) { + policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); + if (IS_ERR(policy)) + return PTR_ERR(policy); + } if (!policy) { /* To accelerate a bit... */ - if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT]) + if ((dst_orig->flags & DST_NOXFRM) || + !xfrm_policy_count[XFRM_POLICY_OUT]) return 0; - policy = flow_cache_lookup(fl, sk_sid, dst_orig->ops->family, + policy = flow_cache_lookup(fl, dst_orig->ops->family, dir, xfrm_policy_lookup); + if (IS_ERR(policy)) + return PTR_ERR(policy); } if (!policy) @@ -805,6 +1373,9 @@ restart: family = dst_orig->ops->family; policy->curlft.use_time = (unsigned long)xtime.tv_sec; + pols[0] = policy; + npols ++; + xfrm_nr += pols[0]->xfrm_nr; switch (policy->action) { case XFRM_POLICY_BLOCK: @@ -813,11 +1384,13 @@ restart: goto error; case XFRM_POLICY_ALLOW: +#ifndef CONFIG_XFRM_SUB_POLICY if (policy->xfrm_nr == 0) { /* Flow passes not transformed. */ xfrm_pol_put(policy); return 0; } +#endif /* Try to find matching bundle. * @@ -833,7 +1406,40 @@ restart: if (dst) break; - nx = xfrm_tmpl_resolve(policy, fl, xfrm, family); +#ifdef CONFIG_XFRM_SUB_POLICY + if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { + pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, + fl, family, + XFRM_POLICY_OUT); + if (pols[1]) { + if (IS_ERR(pols[1])) { + err = PTR_ERR(pols[1]); + goto error; + } + if (pols[1]->action == XFRM_POLICY_BLOCK) { + err = -EPERM; + goto error; + } + npols ++; + xfrm_nr += pols[1]->xfrm_nr; + } + } + + /* + * Because neither flowi nor bundle information knows about + * transformation template size. On more than one policy usage + * we can realize whether all of them is bypass or not after + * they are searched. See above not-transformed bypass + * is surrounded by non-sub policy configuration, too. + */ + if (xfrm_nr == 0) { + /* Flow passes not transformed. */ + xfrm_pols_put(pols, npols); + return 0; + } + +#endif + nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); if (unlikely(nx<0)) { err = nx; @@ -846,7 +1452,7 @@ restart: set_current_state(TASK_RUNNING); remove_wait_queue(&km_waitq, &wait); - nx = xfrm_tmpl_resolve(policy, fl, xfrm, family); + nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); if (nx == -EAGAIN && signal_pending(current)) { err = -ERESTART; @@ -854,7 +1460,7 @@ restart: } if (nx == -EAGAIN || genid != atomic_read(&flow_cache_genid)) { - xfrm_pol_put(policy); + xfrm_pols_put(pols, npols); goto restart; } err = nx; @@ -864,7 +1470,7 @@ restart: } if (nx == 0) { /* Flow passes not transformed. */ - xfrm_pol_put(policy); + xfrm_pols_put(pols, npols); return 0; } @@ -878,8 +1484,14 @@ restart: goto error; } + for (pi = 0; pi < npols; pi++) { + read_lock_bh(&pols[pi]->lock); + pol_dead |= pols[pi]->dead; + read_unlock_bh(&pols[pi]->lock); + } + write_lock_bh(&policy->lock); - if (unlikely(policy->dead || stale_bundle(dst))) { + if (unlikely(pol_dead || stale_bundle(dst))) { /* Wow! While we worked on resolving, this * policy has gone. Retry. It is not paranoia, * we just cannot enlist new bundle to dead object. @@ -899,17 +1511,34 @@ restart: } *dst_p = dst; dst_release(dst_orig); - xfrm_pol_put(policy); + xfrm_pols_put(pols, npols); return 0; error: dst_release(dst_orig); - xfrm_pol_put(policy); + xfrm_pols_put(pols, npols); *dst_p = NULL; return err; } EXPORT_SYMBOL(xfrm_lookup); +static inline int +xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl) +{ + struct xfrm_state *x; + int err; + + if (!skb->sp || idx < 0 || idx >= skb->sp->len) + return 0; + x = skb->sp->xvec[idx]; + if (!x->type->reject) + return 0; + xfrm_state_hold(x); + err = x->type->reject(x, skb, fl); + xfrm_state_put(x); + return err; +} + /* When skb is transformed back to its "native" form, we have to * check policy restrictions. At the moment we make this in maximally * stupid way. Shame on me. :-) Of course, connected sockets must @@ -926,10 +1555,19 @@ xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, (x->id.spi == tmpl->id.spi || !tmpl->id.spi) && (x->props.reqid == tmpl->reqid || !tmpl->reqid) && x->props.mode == tmpl->mode && - (tmpl->aalgos & (1<props.aalgo)) && - !(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family)); + ((tmpl->aalgos & (1<props.aalgo)) || + !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) && + !(x->props.mode != XFRM_MODE_TRANSPORT && + xfrm_state_addr_cmp(tmpl, x, family)); } +/* + * 0 or more than 0 is returned when validation is succeeded (either bypass + * because of optional transport mode, or next index of the mathced secpath + * state with the template. + * -1 is returned when no matching template is found. + * Otherwise "-2 - errored_index" is returned. + */ static inline int xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, unsigned short family) @@ -937,15 +1575,18 @@ xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, int idx = start; if (tmpl->optional) { - if (!tmpl->mode) + if (tmpl->mode == XFRM_MODE_TRANSPORT) return start; } else start = -1; for (; idx < sp->len; idx++) { - if (xfrm_state_ok(tmpl, sp->x[idx].xvec, family)) + if (xfrm_state_ok(tmpl, sp->xvec[idx], family)) return ++idx; - if (sp->x[idx].xvec->props.mode) + if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) { + if (start == -1) + start = -2-idx; break; + } } return start; } @@ -954,21 +1595,25 @@ int xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + int err; if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; afinfo->decode_session(skb, fl); + err = security_xfrm_decode_session(skb, &fl->secid); xfrm_policy_put_afinfo(afinfo); - return 0; + return err; } EXPORT_SYMBOL(xfrm_decode_session); -static inline int secpath_has_tunnel(struct sec_path *sp, int k) +static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp) { for (; k < sp->len; k++) { - if (sp->x[k].xvec->props.mode) + if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) { + *idxp = k; return 1; + } } return 0; @@ -978,69 +1623,123 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family) { struct xfrm_policy *pol; + struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; + int npols = 0; + int xfrm_nr; + int pi; struct flowi fl; u8 fl_dir = policy_to_flow_dir(dir); - u32 sk_sid; + int xerr_idx = -1; if (xfrm_decode_session(skb, &fl, family) < 0) return 0; nf_nat_decode_session(skb, &fl, family); - sk_sid = security_sk_sid(sk, &fl, fl_dir); - /* First, check used SA against their selectors. */ if (skb->sp) { int i; for (i=skb->sp->len-1; i>=0; i--) { - struct sec_decap_state *xvec = &(skb->sp->x[i]); - if (!xfrm_selector_match(&xvec->xvec->sel, &fl, family)) + struct xfrm_state *x = skb->sp->xvec[i]; + if (!xfrm_selector_match(&x->sel, &fl, family)) return 0; } } pol = NULL; - if (sk && sk->sk_policy[dir]) - pol = xfrm_sk_policy_lookup(sk, dir, &fl, sk_sid); + if (sk && sk->sk_policy[dir]) { + pol = xfrm_sk_policy_lookup(sk, dir, &fl); + if (IS_ERR(pol)) + return 0; + } if (!pol) - pol = flow_cache_lookup(&fl, sk_sid, family, fl_dir, + pol = flow_cache_lookup(&fl, family, fl_dir, xfrm_policy_lookup); - if (!pol) - return !skb->sp || !secpath_has_tunnel(skb->sp, 0); + if (IS_ERR(pol)) + return 0; + + if (!pol) { + if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) { + xfrm_secpath_reject(xerr_idx, skb, &fl); + return 0; + } + return 1; + } pol->curlft.use_time = (unsigned long)xtime.tv_sec; + pols[0] = pol; + npols ++; +#ifdef CONFIG_XFRM_SUB_POLICY + if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { + pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, + &fl, family, + XFRM_POLICY_IN); + if (pols[1]) { + if (IS_ERR(pols[1])) + return 0; + pols[1]->curlft.use_time = (unsigned long)xtime.tv_sec; + npols ++; + } + } +#endif + if (pol->action == XFRM_POLICY_ALLOW) { struct sec_path *sp; static struct sec_path dummy; + struct xfrm_tmpl *tp[XFRM_MAX_DEPTH]; + struct xfrm_tmpl *stp[XFRM_MAX_DEPTH]; + struct xfrm_tmpl **tpp = tp; + int ti = 0; int i, k; if ((sp = skb->sp) == NULL) sp = &dummy; + for (pi = 0; pi < npols; pi++) { + if (pols[pi] != pol && + pols[pi]->action != XFRM_POLICY_ALLOW) + goto reject; + if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) + goto reject_error; + for (i = 0; i < pols[pi]->xfrm_nr; i++) + tpp[ti++] = &pols[pi]->xfrm_vec[i]; + } + xfrm_nr = ti; + if (npols > 1) { + xfrm_tmpl_sort(stp, tpp, xfrm_nr, family); + tpp = stp; + } + /* For each tunnel xfrm, find the first matching tmpl. * For each tmpl before that, find corresponding xfrm. * Order is _important_. Later we will implement * some barriers, but at the moment barriers * are implied between each two transformations. */ - for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) { - k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family); - if (k < 0) + for (i = xfrm_nr-1, k = 0; i >= 0; i--) { + k = xfrm_policy_ok(tpp[i], sp, k, family); + if (k < 0) { + if (k < -1) + /* "-2 - errored_index" returned */ + xerr_idx = -(2+k); goto reject; + } } - if (secpath_has_tunnel(sp, k)) + if (secpath_has_nontransport(sp, k, &xerr_idx)) goto reject; - xfrm_pol_put(pol); + xfrm_pols_put(pols, npols); return 1; } reject: - xfrm_pol_put(pol); + xfrm_secpath_reject(xerr_idx, skb, &fl); +reject_error: + xfrm_pols_put(pols, npols); return 0; } EXPORT_SYMBOL(__xfrm_policy_check); @@ -1056,18 +1755,39 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) } EXPORT_SYMBOL(__xfrm_route_forward); +/* Optimize later using cookies and generation ids. */ + static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) { - /* If it is marked obsolete, which is how we even get here, - * then we have purged it from the policy bundle list and we - * did that for a good reason. + /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete + * to "-1" to force all XFRM destinations to get validated by + * dst_ops->check on every use. We do this because when a + * normal route referenced by an XFRM dst is obsoleted we do + * not go looking around for all parent referencing XFRM dsts + * so that we can invalidate them. It is just too much work. + * Instead we make the checks here on every use. For example: + * + * XFRM dst A --> IPv4 dst X + * + * X is the "xdst->route" of A (X is also the "dst->path" of A + * in this example). If X is marked obsolete, "A" will not + * notice. That's what we are validating here via the + * stale_bundle() check. + * + * When a policy's bundle is pruned, we dst_free() the XFRM + * dst which causes it's ->obsolete field to be set to a + * positive non-zero integer. If an XFRM dst has been pruned + * like this, we want to force a new route lookup. */ + if (dst->obsolete < 0 && !stale_bundle(dst)) + return dst; + return NULL; } static int stale_bundle(struct dst_entry *dst) { - return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC); + return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0); } void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) @@ -1097,33 +1817,50 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) return dst; } +static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p) +{ + struct dst_entry *dst, **dstp; + + write_lock(&pol->lock); + dstp = &pol->bundles; + while ((dst=*dstp) != NULL) { + if (func(dst)) { + *dstp = dst->next; + dst->next = *gc_list_p; + *gc_list_p = dst; + } else { + dstp = &dst->next; + } + } + write_unlock(&pol->lock); +} + static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) { - int i; - struct xfrm_policy *pol; - struct dst_entry *dst, **dstp, *gc_list = NULL; + struct dst_entry *gc_list = NULL; + int dir; read_lock_bh(&xfrm_policy_lock); - for (i=0; i<2*XFRM_POLICY_MAX; i++) { - for (pol = xfrm_policy_list[i]; pol; pol = pol->next) { - write_lock(&pol->lock); - dstp = &pol->bundles; - while ((dst=*dstp) != NULL) { - if (func(dst)) { - *dstp = dst->next; - dst->next = gc_list; - gc_list = dst; - } else { - dstp = &dst->next; - } - } - write_unlock(&pol->lock); + for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + struct xfrm_policy *pol; + struct hlist_node *entry; + struct hlist_head *table; + int i; + + hlist_for_each_entry(pol, entry, + &xfrm_policy_inexact[dir], bydst) + prune_one_bundle(pol, func, &gc_list); + + table = xfrm_policy_bydst[dir].table; + for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + hlist_for_each_entry(pol, entry, table + i, bydst) + prune_one_bundle(pol, func, &gc_list); } } read_unlock_bh(&xfrm_policy_lock); while (gc_list) { - dst = gc_list; + struct dst_entry *dst = gc_list; gc_list = dst->next; dst_free(dst); } @@ -1139,22 +1876,12 @@ static void __xfrm_garbage_collect(void) xfrm_prune_bundles(unused_bundle); } -int xfrm_flush_bundles(void) +static int xfrm_flush_bundles(void) { xfrm_prune_bundles(stale_bundle); return 0; } -static int always_true(struct dst_entry *dst) -{ - return 1; -} - -void xfrm_flush_all_bundles(void) -{ - xfrm_prune_bundles(always_true); -} - void xfrm_init_pmtu(struct dst_entry *dst) { do { @@ -1182,7 +1909,8 @@ EXPORT_SYMBOL(xfrm_init_pmtu); * still valid. */ -int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) +int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, + struct flowi *fl, int family, int strict) { struct dst_entry *dst = &first->u.dst; struct xfrm_dst *last; @@ -1199,8 +1927,17 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family)) return 0; + if (fl && pol && + !security_xfrm_state_pol_flow_match(dst->xfrm, pol, fl)) + return 0; if (dst->xfrm->km.state != XFRM_STATE_VALID) return 0; + if (xdst->genid != dst->xfrm->genid) + return 0; + + if (strict && fl && dst->xfrm->props.mode != XFRM_MODE_TUNNEL && + !xfrm_state_addr_flow_check(dst->xfrm, fl, family)) + return 0; mtu = dst_mtu(dst->child); if (xdst->child_mtu_cached != mtu) { @@ -1243,6 +1980,122 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) EXPORT_SYMBOL(xfrm_bundle_ok); +#ifdef CONFIG_AUDITSYSCALL +/* Audit addition and deletion of SAs and ipsec policy */ + +void xfrm_audit_log(uid_t auid, u32 sid, int type, int result, + struct xfrm_policy *xp, struct xfrm_state *x) +{ + + char *secctx; + u32 secctx_len; + struct xfrm_sec_ctx *sctx = NULL; + struct audit_buffer *audit_buf; + int family; + extern int audit_enabled; + + if (audit_enabled == 0) + return; + + BUG_ON((type == AUDIT_MAC_IPSEC_ADDSA || + type == AUDIT_MAC_IPSEC_DELSA) && !x); + BUG_ON((type == AUDIT_MAC_IPSEC_ADDSPD || + type == AUDIT_MAC_IPSEC_DELSPD) && !xp); + + audit_buf = audit_log_start(current->audit_context, GFP_ATOMIC, type); + if (audit_buf == NULL) + return; + + switch(type) { + case AUDIT_MAC_IPSEC_ADDSA: + audit_log_format(audit_buf, "SAD add: auid=%u", auid); + break; + case AUDIT_MAC_IPSEC_DELSA: + audit_log_format(audit_buf, "SAD delete: auid=%u", auid); + break; + case AUDIT_MAC_IPSEC_ADDSPD: + audit_log_format(audit_buf, "SPD add: auid=%u", auid); + break; + case AUDIT_MAC_IPSEC_DELSPD: + audit_log_format(audit_buf, "SPD delete: auid=%u", auid); + break; + default: + return; + } + + if (sid != 0 && + security_secid_to_secctx(sid, &secctx, &secctx_len) == 0) + audit_log_format(audit_buf, " subj=%s", secctx); + else + audit_log_task_context(audit_buf); + + if (xp) { + family = xp->selector.family; + if (xp->security) + sctx = xp->security; + } else { + family = x->props.family; + if (x->security) + sctx = x->security; + } + + if (sctx) + audit_log_format(audit_buf, + " sec_alg=%u sec_doi=%u sec_obj=%s", + sctx->ctx_alg, sctx->ctx_doi, sctx->ctx_str); + + switch(family) { + case AF_INET: + { + struct in_addr saddr, daddr; + if (xp) { + saddr.s_addr = xp->selector.saddr.a4; + daddr.s_addr = xp->selector.daddr.a4; + } else { + saddr.s_addr = x->props.saddr.a4; + daddr.s_addr = x->id.daddr.a4; + } + audit_log_format(audit_buf, + " src=%u.%u.%u.%u dst=%u.%u.%u.%u", + NIPQUAD(saddr), NIPQUAD(daddr)); + } + break; + case AF_INET6: + { + struct in6_addr saddr6, daddr6; + if (xp) { + memcpy(&saddr6, xp->selector.saddr.a6, + sizeof(struct in6_addr)); + memcpy(&daddr6, xp->selector.daddr.a6, + sizeof(struct in6_addr)); + } else { + memcpy(&saddr6, x->props.saddr.a6, + sizeof(struct in6_addr)); + memcpy(&daddr6, x->id.daddr.a6, + sizeof(struct in6_addr)); + } + audit_log_format(audit_buf, + " src=" NIP6_FMT " dst=" NIP6_FMT, + NIP6(saddr6), NIP6(daddr6)); + } + break; + } + + if (x) + audit_log_format(audit_buf, " spi=%lu(0x%lx) protocol=%s", + (unsigned long)ntohl(x->id.spi), + (unsigned long)ntohl(x->id.spi), + x->id.proto == IPPROTO_AH ? "AH" : + (x->id.proto == IPPROTO_ESP ? + "ESP" : "IPCOMP")); + + audit_log_format(audit_buf, " res=%u", result); + audit_log_end(audit_buf); +} + +EXPORT_SYMBOL(xfrm_audit_log); +#endif /* CONFIG_AUDITSYSCALL */ + int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) { int err = 0; @@ -1250,7 +2103,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) return -EINVAL; if (unlikely(afinfo->family >= NPROTO)) return -EAFNOSUPPORT; - write_lock(&xfrm_policy_afinfo_lock); + write_lock_bh(&xfrm_policy_afinfo_lock); if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL)) err = -ENOBUFS; else { @@ -1267,7 +2120,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) afinfo->garbage_collect = __xfrm_garbage_collect; xfrm_policy_afinfo[afinfo->family] = afinfo; } - write_unlock(&xfrm_policy_afinfo_lock); + write_unlock_bh(&xfrm_policy_afinfo_lock); return err; } EXPORT_SYMBOL(xfrm_policy_register_afinfo); @@ -1279,7 +2132,7 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) return -EINVAL; if (unlikely(afinfo->family >= NPROTO)) return -EAFNOSUPPORT; - write_lock(&xfrm_policy_afinfo_lock); + write_lock_bh(&xfrm_policy_afinfo_lock); if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) { if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo)) err = -EINVAL; @@ -1293,7 +2146,7 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) afinfo->garbage_collect = NULL; } } - write_unlock(&xfrm_policy_afinfo_lock); + write_unlock_bh(&xfrm_policy_afinfo_lock); return err; } EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); @@ -1305,17 +2158,31 @@ static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) return NULL; read_lock(&xfrm_policy_afinfo_lock); afinfo = xfrm_policy_afinfo[family]; - if (likely(afinfo != NULL)) - read_lock(&afinfo->lock); - read_unlock(&xfrm_policy_afinfo_lock); + if (unlikely(!afinfo)) + read_unlock(&xfrm_policy_afinfo_lock); return afinfo; } static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) { - if (unlikely(afinfo == NULL)) - return; - read_unlock(&afinfo->lock); + read_unlock(&xfrm_policy_afinfo_lock); +} + +static struct xfrm_policy_afinfo *xfrm_policy_lock_afinfo(unsigned int family) +{ + struct xfrm_policy_afinfo *afinfo; + if (unlikely(family >= NPROTO)) + return NULL; + write_lock_bh(&xfrm_policy_afinfo_lock); + afinfo = xfrm_policy_afinfo[family]; + if (unlikely(!afinfo)) + write_unlock_bh(&xfrm_policy_afinfo_lock); + return afinfo; +} + +static void xfrm_policy_unlock_afinfo(struct xfrm_policy_afinfo *afinfo) +{ + write_unlock_bh(&xfrm_policy_afinfo_lock); } static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) @@ -1335,14 +2202,35 @@ static struct notifier_block xfrm_dev_notifier = { static void __init xfrm_policy_init(void) { + unsigned int hmask, sz; + int dir; + xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", sizeof(struct xfrm_dst), - 0, SLAB_HWCACHE_ALIGN, + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); - if (!xfrm_dst_cache) - panic("XFRM: failed to allocate xfrm_dst_cache\n"); - INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL); + hmask = 8 - 1; + sz = (hmask+1) * sizeof(struct hlist_head); + + xfrm_policy_byidx = xfrm_hash_alloc(sz); + xfrm_idx_hmask = hmask; + if (!xfrm_policy_byidx) + panic("XFRM: failed to allocate byidx hash\n"); + + for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + struct xfrm_policy_hash *htab; + + INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]); + + htab = &xfrm_policy_bydst[dir]; + htab->table = xfrm_hash_alloc(sz); + htab->hmask = hmask; + if (!htab->table) + panic("XFRM: failed to allocate bydst hash\n"); + } + + INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task); register_netdevice_notifier(&xfrm_dev_notifier); }