From 43bd81b87d6440efd70ac30c1e957aba54a7fd8e Mon Sep 17 00:00:00 2001 From: Andy Bavier Date: Thu, 8 Sep 2005 03:10:24 +0000 Subject: [PATCH] Changes to support Andy's new CPU scheduler --- configs/kernel-2.6.12-i686-planetlab.config | 3 +- include/linux/vs_sched.h | 14 +++ include/linux/vserver/context.h | 3 +- include/linux/vserver/sched_def.h | 17 +++ kernel/sched.c | 49 +++++++- kernel/vserver/Kconfig | 7 ++ kernel/vserver/sched.c | 124 ++++++++++++++++++++ kernel/vserver/sched_init.h | 8 ++ 8 files changed, 221 insertions(+), 4 deletions(-) diff --git a/configs/kernel-2.6.12-i686-planetlab.config b/configs/kernel-2.6.12-i686-planetlab.config index 0e3ae6456..241cc9b18 100644 --- a/configs/kernel-2.6.12-i686-planetlab.config +++ b/configs/kernel-2.6.12-i686-planetlab.config @@ -1408,7 +1408,8 @@ CONFIG_VSERVER_LEGACY=y # CONFIG_VSERVER_NGNET is not set # CONFIG_VSERVER_PROC_SECURE is not set CONFIG_VSERVER_HARDCPU=y -# CONFIG_VSERVER_HARDCPU_IDLE is not set +CONFIG_VSERVER_HARDCPU_IDLE=y +CONFIG_VSERVER_ACB_SCHED=y # CONFIG_INOXID_NONE is not set # CONFIG_INOXID_UID16 is not set # CONFIG_INOXID_GID16 is not set diff --git a/include/linux/vs_sched.h b/include/linux/vs_sched.h index 42fca7de7..aa0ff7103 100644 --- a/include/linux/vs_sched.h +++ b/include/linux/vs_sched.h @@ -13,6 +13,18 @@ #define MAX_PRIO_BIAS 20 #define MIN_PRIO_BIAS -20 +#ifdef CONFIG_VSERVER_ACB_SCHED + +#define VX_INVALID_TICKS -1000000 +#define IS_BEST_EFFORT(vxi) (vx_info_flags(vxi, VXF_SCHED_SHARE, 0)) + +int vx_tokens_avail(struct vx_info *vxi); +void vx_consume_token(struct vx_info *vxi); +void vx_scheduler_tick(void); +void vx_advance_best_effort_ticks(int ticks); +void vx_advance_guaranteed_ticks(int ticks); + +#else static inline int vx_tokens_avail(struct vx_info *vxi) { @@ -24,6 +36,8 @@ static inline void vx_consume_token(struct vx_info *vxi) atomic_dec(&vxi->sched.tokens); } +#endif /* CONFIG_VSERVER_ACB_SCHED */ + static inline int vx_need_resched(struct task_struct *p) { #ifdef CONFIG_VSERVER_HARDCPU diff --git a/include/linux/vserver/context.h b/include/linux/vserver/context.h index 79f90537f..48ed9ca53 100644 --- a/include/linux/vserver/context.h +++ b/include/linux/vserver/context.h @@ -24,7 +24,8 @@ #define VXF_SCHED_HARD 0x00000100 #define VXF_SCHED_PRIO 0x00000200 #define VXF_SCHED_PAUSE 0x00000400 - +#define VXF_SCHED_SHARE 0x00000800 + #define VXF_VIRT_MEM 0x00010000 #define VXF_VIRT_UPTIME 0x00020000 #define VXF_VIRT_CPU 0x00040000 diff --git a/include/linux/vserver/sched_def.h b/include/linux/vserver/sched_def.h index 3a577616f..4d915a2e4 100644 --- a/include/linux/vserver/sched_def.h +++ b/include/linux/vserver/sched_def.h @@ -15,9 +15,26 @@ struct _vx_ticks { uint64_t unused[5]; /* cacheline ? */ }; +#ifdef CONFIG_VSERVER_ACB_SCHED +enum { +// Different scheduling classes + SCH_GUARANTEE = 0, + SCH_BEST_EFFORT = 1, + SCH_NUM_CLASSES = 2, +// States + SCH_UNINITIALIZED, + SCH_INITIALIZED, +}; +#endif + /* context sub struct */ struct _vx_sched { +#ifdef CONFIG_VSERVER_ACB_SCHED + uint64_t ticks[SCH_NUM_CLASSES]; + uint64_t last_ticks[SCH_NUM_CLASSES]; + int state[SCH_NUM_CLASSES]; +#endif atomic_t tokens; /* number of CPU tokens */ spinlock_t tokens_lock; /* lock for token bucket */ diff --git a/kernel/sched.c b/kernel/sched.c index daf0319ea..4e10dc63a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2453,6 +2453,10 @@ void scheduler_tick(void) rq->timestamp_last_tick = now; +#if defined(CONFIG_VSERVER_HARDCPU) && defined(CONFIG_VSERVER_ACB_SCHED) + vx_scheduler_tick(); +#endif + if (p == rq->idle) { if (wake_priority_sleeper(rq)) goto out; @@ -2711,6 +2715,10 @@ asmlinkage void __sched schedule(void) struct vx_info *vxi; #ifdef CONFIG_VSERVER_HARDCPU int maxidle = -HZ; +# ifdef CONFIG_VSERVER_ACB_SCHED + int min_guarantee_ticks = VX_INVALID_TICKS; + int min_best_effort_ticks = VX_INVALID_TICKS; +# endif #endif int cpu, idx; @@ -2781,6 +2789,9 @@ need_resched_nonpreemptible: } #ifdef CONFIG_VSERVER_HARDCPU +# ifdef CONFIG_VSERVER_ACB_SCHED +drain_hold_queue: +# endif if (!list_empty(&rq->hold_queue)) { struct list_head *l, *n; int ret; @@ -2800,6 +2811,17 @@ need_resched_nonpreemptible: } if ((ret < 0) && (maxidle < ret)) maxidle = ret; +# ifdef CONFIG_VSERVER_ACB_SCHED + if (ret < 0) { + if (IS_BEST_EFFORT(vxi)) { + if (min_best_effort_ticks < ret) + min_best_effort_ticks = ret; + } else { + if (min_guarantee_ticks < ret) + min_guarantee_ticks = ret; + } + } +# endif } } rq->idle_tokens = -maxidle; @@ -2860,8 +2882,19 @@ go_idle: int ret = vx_tokens_recalc(vxi); if (unlikely(ret <= 0)) { - if (ret && (rq->idle_tokens > -ret)) - rq->idle_tokens = -ret; + if (ret) { + if ((rq->idle_tokens > -ret)) + rq->idle_tokens = -ret; +# ifdef CONFIG_VSERVER_ACB_SCHED + if (IS_BEST_EFFORT(vxi)) { + if (min_best_effort_ticks < ret) + min_best_effort_ticks = ret; + } else { + if (min_guarantee_ticks < ret) + min_guarantee_ticks = ret; + } +# endif + } vx_hold_task(vxi, next, rq); goto pick_next; } @@ -2885,6 +2918,18 @@ go_idle: } next->activated = 0; switch_tasks: +#if defined(CONFIG_VSERVER_HARDCPU) && defined(CONFIG_VSERVER_ACB_SCHED) + if (next == rq->idle && !list_empty(&rq->hold_queue)) { + if (min_best_effort_ticks != VX_INVALID_TICKS) { + vx_advance_best_effort_ticks(-min_best_effort_ticks); + goto drain_hold_queue; + } + if (min_guarantee_ticks != VX_INVALID_TICKS) { + vx_advance_guaranteed_ticks(-min_guarantee_ticks); + goto drain_hold_queue; + } + } +#endif if (next == rq->idle) schedstat_inc(rq, sched_goidle); prefetch(next); diff --git a/kernel/vserver/Kconfig b/kernel/vserver/Kconfig index f8207296e..b4ee9b6c3 100644 --- a/kernel/vserver/Kconfig +++ b/kernel/vserver/Kconfig @@ -101,6 +101,13 @@ config VSERVER_HARDCPU_IDLE This might improve interactivity and latency, but will also marginally increase scheduling overhead. +config VSERVER_ACB_SCHED + bool "Guaranteed/fair share scheduler" + depends on VSERVER_HARDCPU + default n + help + Andy Bavier's experimental scheduler + choice prompt "Persistent Inode Context Tagging" default INOXID_UGID24 diff --git a/kernel/vserver/sched.c b/kernel/vserver/sched.c index 60f3c6ac5..e978e7a6b 100644 --- a/kernel/vserver/sched.c +++ b/kernel/vserver/sched.c @@ -19,6 +19,120 @@ #include #include +#ifdef CONFIG_VSERVER_ACB_SCHED + +#define TICK_SCALE 1000 +#define TICKS_PER_TOKEN(vxi) \ + ((vxi->sched.interval * TICK_SCALE) / vxi->sched.fill_rate) +#define CLASS(vxi) \ + (IS_BEST_EFFORT(vxi) ? SCH_BEST_EFFORT : SCH_GUARANTEE) +#define GLOBAL_TICKS(vxi) \ + (IS_BEST_EFFORT(vxi) ? vx_best_effort_ticks : vx_guaranteed_ticks) + +uint64_t vx_guaranteed_ticks = 0; +uint64_t vx_best_effort_ticks = 0; + +void vx_tokens_set(struct vx_info *vxi, int tokens) { + int class = CLASS(vxi); + + vxi->sched.ticks[class] = GLOBAL_TICKS(vxi); + vxi->sched.ticks[class] -= tokens * TICKS_PER_TOKEN(vxi); +} + +void vx_scheduler_tick(void) { + vx_guaranteed_ticks += TICK_SCALE; + vx_best_effort_ticks += TICK_SCALE; +} + +void vx_advance_best_effort_ticks(int ticks) { + vx_best_effort_ticks += TICK_SCALE * ticks; +} + +void vx_advance_guaranteed_ticks(int ticks) { + vx_guaranteed_ticks += TICK_SCALE * ticks; +} + +int vx_tokens_avail(struct vx_info *vxi) +{ + uint64_t diff; + int tokens; + long rem; + int class = CLASS(vxi); + + if (vxi->sched.state[class] == SCH_UNINITIALIZED) { + /* Set the "real" token count */ + tokens = atomic_read(&vxi->sched.tokens); + vx_tokens_set(vxi, tokens); + vxi->sched.state[class] = SCH_INITIALIZED; + goto out; + } + + if (vxi->sched.last_ticks[class] == GLOBAL_TICKS(vxi)) { + tokens = atomic_read(&vxi->sched.tokens); + goto out; + } + + /* Use of fixed-point arithmetic in these calculations leads to + * some limitations. These should be made explicit. + */ + diff = GLOBAL_TICKS(vxi) - vxi->sched.ticks[class]; + tokens = div_long_long_rem(diff, TICKS_PER_TOKEN(vxi), &rem); + + if (tokens > vxi->sched.tokens_max) { + vx_tokens_set(vxi, vxi->sched.tokens_max); + tokens = vxi->sched.tokens_max; + } + + atomic_set(&vxi->sched.tokens, tokens); + +out: + vxi->sched.last_ticks[class] = GLOBAL_TICKS(vxi); + return tokens; +} + +void vx_consume_token(struct vx_info *vxi) +{ + int class = CLASS(vxi); + + vxi->sched.ticks[class] += TICKS_PER_TOKEN(vxi); +} + +/* + * recalculate the context's scheduling tokens + * + * ret > 0 : number of tokens available + * ret = 0 : context is paused + * ret < 0 : number of jiffies until new tokens arrive + * + */ +int vx_tokens_recalc(struct vx_info *vxi) +{ + long delta, tokens; + + if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0)) + /* we are paused */ + return 0; + + tokens = vx_tokens_avail(vxi); + if (tokens <= 0) + vxi->vx_state |= VXS_ONHOLD; + if (tokens < vxi->sched.tokens_min) { + delta = tokens - vxi->sched.tokens_min; + /* enough tokens will be available in */ + return (delta * vxi->sched.interval) / vxi->sched.fill_rate; + } + + /* we have some tokens left */ + if (vx_info_state(vxi, VXS_ONHOLD) && + (tokens >= vxi->sched.tokens_min)) + vxi->vx_state &= ~VXS_ONHOLD; + if (vx_info_state(vxi, VXS_ONHOLD)) + tokens -= vxi->sched.tokens_min; + + return tokens; +} + +#else /* * recalculate the context's scheduling tokens @@ -81,6 +195,8 @@ int vx_tokens_recalc(struct vx_info *vxi) return tokens; } +#endif /* CONFIG_VSERVER_ACB_SCHED */ + /* * effective_prio - return the priority that is based on the static * priority but is modified by bonuses/penalties. @@ -159,6 +275,10 @@ int vc_set_sched_v2(uint32_t xid, void __user *data) if (vxi->sched.tokens_min > vxi->sched.tokens_max) vxi->sched.tokens_min = vxi->sched.tokens_max; +#ifdef CONFIG_VSERVER_ACB_SCHED + vx_tokens_set(vxi, atomic_read(&vxi->sched.tokens)); +#endif + spin_unlock(&vxi->sched.tokens_lock); put_vx_info(vxi); return 0; @@ -211,6 +331,10 @@ int vc_set_sched(uint32_t xid, void __user *data) if (vxi->sched.priority_bias < MIN_PRIO_BIAS) vxi->sched.priority_bias = MIN_PRIO_BIAS; +#ifdef CONFIG_VSERVER_ACB_SCHED + vx_tokens_set(vxi, atomic_read(&vxi->sched.tokens)); +#endif + spin_unlock(&vxi->sched.tokens_lock); put_vx_info(vxi); return 0; diff --git a/kernel/vserver/sched_init.h b/kernel/vserver/sched_init.h index 90d13960f..6724c286f 100644 --- a/kernel/vserver/sched_init.h +++ b/kernel/vserver/sched_init.h @@ -11,6 +11,14 @@ static inline void vx_info_init_sched(struct _vx_sched *sched) sched->jiffies = jiffies; sched->tokens_lock = SPIN_LOCK_UNLOCKED; +#ifdef CONFIG_VSERVER_ACB_SCHED + /* We can't set the "real" token count here because we don't have + * access to the vx_info struct. Do it later... */ + for (i = 0; i < SCH_NUM_CLASSES; i++) { + sched->state[i] = SCH_UNINITIALIZED; + } +#endif + atomic_set(&sched->tokens, HZ >> 2); sched->cpus_allowed = CPU_MASK_ALL; sched->priority_bias = 0; -- 2.43.0