# CONFIG_VSERVER_NGNET is not set
# CONFIG_VSERVER_PROC_SECURE is not set
CONFIG_VSERVER_HARDCPU=y
-# CONFIG_VSERVER_HARDCPU_IDLE is not set
+CONFIG_VSERVER_HARDCPU_IDLE=y
+CONFIG_VSERVER_ACB_SCHED=y
# CONFIG_INOXID_NONE is not set
# CONFIG_INOXID_UID16 is not set
# CONFIG_INOXID_GID16 is not set
#define MAX_PRIO_BIAS 20
#define MIN_PRIO_BIAS -20
+#ifdef CONFIG_VSERVER_ACB_SCHED
+
+#define VX_INVALID_TICKS -1000000
+#define IS_BEST_EFFORT(vxi) (vx_info_flags(vxi, VXF_SCHED_SHARE, 0))
+
+int vx_tokens_avail(struct vx_info *vxi);
+void vx_consume_token(struct vx_info *vxi);
+void vx_scheduler_tick(void);
+void vx_advance_best_effort_ticks(int ticks);
+void vx_advance_guaranteed_ticks(int ticks);
+
+#else
static inline int vx_tokens_avail(struct vx_info *vxi)
{
atomic_dec(&vxi->sched.tokens);
}
+#endif /* CONFIG_VSERVER_ACB_SCHED */
+
static inline int vx_need_resched(struct task_struct *p)
{
#ifdef CONFIG_VSERVER_HARDCPU
#define VXF_SCHED_HARD 0x00000100
#define VXF_SCHED_PRIO 0x00000200
#define VXF_SCHED_PAUSE 0x00000400
-
+#define VXF_SCHED_SHARE 0x00000800
+
#define VXF_VIRT_MEM 0x00010000
#define VXF_VIRT_UPTIME 0x00020000
#define VXF_VIRT_CPU 0x00040000
uint64_t unused[5]; /* cacheline ? */
};
+#ifdef CONFIG_VSERVER_ACB_SCHED
+enum {
+// Different scheduling classes
+ SCH_GUARANTEE = 0,
+ SCH_BEST_EFFORT = 1,
+ SCH_NUM_CLASSES = 2,
+// States
+ SCH_UNINITIALIZED,
+ SCH_INITIALIZED,
+};
+#endif
+
/* context sub struct */
struct _vx_sched {
+#ifdef CONFIG_VSERVER_ACB_SCHED
+ uint64_t ticks[SCH_NUM_CLASSES];
+ uint64_t last_ticks[SCH_NUM_CLASSES];
+ int state[SCH_NUM_CLASSES];
+#endif
atomic_t tokens; /* number of CPU tokens */
spinlock_t tokens_lock; /* lock for token bucket */
rq->timestamp_last_tick = now;
+#if defined(CONFIG_VSERVER_HARDCPU) && defined(CONFIG_VSERVER_ACB_SCHED)
+ vx_scheduler_tick();
+#endif
+
if (p == rq->idle) {
if (wake_priority_sleeper(rq))
goto out;
struct vx_info *vxi;
#ifdef CONFIG_VSERVER_HARDCPU
int maxidle = -HZ;
+# ifdef CONFIG_VSERVER_ACB_SCHED
+ int min_guarantee_ticks = VX_INVALID_TICKS;
+ int min_best_effort_ticks = VX_INVALID_TICKS;
+# endif
#endif
int cpu, idx;
}
#ifdef CONFIG_VSERVER_HARDCPU
+# ifdef CONFIG_VSERVER_ACB_SCHED
+drain_hold_queue:
+# endif
if (!list_empty(&rq->hold_queue)) {
struct list_head *l, *n;
int ret;
}
if ((ret < 0) && (maxidle < ret))
maxidle = ret;
+# ifdef CONFIG_VSERVER_ACB_SCHED
+ if (ret < 0) {
+ if (IS_BEST_EFFORT(vxi)) {
+ if (min_best_effort_ticks < ret)
+ min_best_effort_ticks = ret;
+ } else {
+ if (min_guarantee_ticks < ret)
+ min_guarantee_ticks = ret;
+ }
+ }
+# endif
}
}
rq->idle_tokens = -maxidle;
int ret = vx_tokens_recalc(vxi);
if (unlikely(ret <= 0)) {
- if (ret && (rq->idle_tokens > -ret))
- rq->idle_tokens = -ret;
+ if (ret) {
+ if ((rq->idle_tokens > -ret))
+ rq->idle_tokens = -ret;
+# ifdef CONFIG_VSERVER_ACB_SCHED
+ if (IS_BEST_EFFORT(vxi)) {
+ if (min_best_effort_ticks < ret)
+ min_best_effort_ticks = ret;
+ } else {
+ if (min_guarantee_ticks < ret)
+ min_guarantee_ticks = ret;
+ }
+# endif
+ }
vx_hold_task(vxi, next, rq);
goto pick_next;
}
}
next->activated = 0;
switch_tasks:
+#if defined(CONFIG_VSERVER_HARDCPU) && defined(CONFIG_VSERVER_ACB_SCHED)
+ if (next == rq->idle && !list_empty(&rq->hold_queue)) {
+ if (min_best_effort_ticks != VX_INVALID_TICKS) {
+ vx_advance_best_effort_ticks(-min_best_effort_ticks);
+ goto drain_hold_queue;
+ }
+ if (min_guarantee_ticks != VX_INVALID_TICKS) {
+ vx_advance_guaranteed_ticks(-min_guarantee_ticks);
+ goto drain_hold_queue;
+ }
+ }
+#endif
if (next == rq->idle)
schedstat_inc(rq, sched_goidle);
prefetch(next);
This might improve interactivity and latency, but
will also marginally increase scheduling overhead.
+config VSERVER_ACB_SCHED
+ bool "Guaranteed/fair share scheduler"
+ depends on VSERVER_HARDCPU
+ default n
+ help
+ Andy Bavier's experimental scheduler
+
choice
prompt "Persistent Inode Context Tagging"
default INOXID_UGID24
#include <asm/errno.h>
#include <asm/uaccess.h>
+#ifdef CONFIG_VSERVER_ACB_SCHED
+
+#define TICK_SCALE 1000
+#define TICKS_PER_TOKEN(vxi) \
+ ((vxi->sched.interval * TICK_SCALE) / vxi->sched.fill_rate)
+#define CLASS(vxi) \
+ (IS_BEST_EFFORT(vxi) ? SCH_BEST_EFFORT : SCH_GUARANTEE)
+#define GLOBAL_TICKS(vxi) \
+ (IS_BEST_EFFORT(vxi) ? vx_best_effort_ticks : vx_guaranteed_ticks)
+
+uint64_t vx_guaranteed_ticks = 0;
+uint64_t vx_best_effort_ticks = 0;
+
+void vx_tokens_set(struct vx_info *vxi, int tokens) {
+ int class = CLASS(vxi);
+
+ vxi->sched.ticks[class] = GLOBAL_TICKS(vxi);
+ vxi->sched.ticks[class] -= tokens * TICKS_PER_TOKEN(vxi);
+}
+
+void vx_scheduler_tick(void) {
+ vx_guaranteed_ticks += TICK_SCALE;
+ vx_best_effort_ticks += TICK_SCALE;
+}
+
+void vx_advance_best_effort_ticks(int ticks) {
+ vx_best_effort_ticks += TICK_SCALE * ticks;
+}
+
+void vx_advance_guaranteed_ticks(int ticks) {
+ vx_guaranteed_ticks += TICK_SCALE * ticks;
+}
+
+int vx_tokens_avail(struct vx_info *vxi)
+{
+ uint64_t diff;
+ int tokens;
+ long rem;
+ int class = CLASS(vxi);
+
+ if (vxi->sched.state[class] == SCH_UNINITIALIZED) {
+ /* Set the "real" token count */
+ tokens = atomic_read(&vxi->sched.tokens);
+ vx_tokens_set(vxi, tokens);
+ vxi->sched.state[class] = SCH_INITIALIZED;
+ goto out;
+ }
+
+ if (vxi->sched.last_ticks[class] == GLOBAL_TICKS(vxi)) {
+ tokens = atomic_read(&vxi->sched.tokens);
+ goto out;
+ }
+
+ /* Use of fixed-point arithmetic in these calculations leads to
+ * some limitations. These should be made explicit.
+ */
+ diff = GLOBAL_TICKS(vxi) - vxi->sched.ticks[class];
+ tokens = div_long_long_rem(diff, TICKS_PER_TOKEN(vxi), &rem);
+
+ if (tokens > vxi->sched.tokens_max) {
+ vx_tokens_set(vxi, vxi->sched.tokens_max);
+ tokens = vxi->sched.tokens_max;
+ }
+
+ atomic_set(&vxi->sched.tokens, tokens);
+
+out:
+ vxi->sched.last_ticks[class] = GLOBAL_TICKS(vxi);
+ return tokens;
+}
+
+void vx_consume_token(struct vx_info *vxi)
+{
+ int class = CLASS(vxi);
+
+ vxi->sched.ticks[class] += TICKS_PER_TOKEN(vxi);
+}
+
+/*
+ * recalculate the context's scheduling tokens
+ *
+ * ret > 0 : number of tokens available
+ * ret = 0 : context is paused
+ * ret < 0 : number of jiffies until new tokens arrive
+ *
+ */
+int vx_tokens_recalc(struct vx_info *vxi)
+{
+ long delta, tokens;
+
+ if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0))
+ /* we are paused */
+ return 0;
+
+ tokens = vx_tokens_avail(vxi);
+ if (tokens <= 0)
+ vxi->vx_state |= VXS_ONHOLD;
+ if (tokens < vxi->sched.tokens_min) {
+ delta = tokens - vxi->sched.tokens_min;
+ /* enough tokens will be available in */
+ return (delta * vxi->sched.interval) / vxi->sched.fill_rate;
+ }
+
+ /* we have some tokens left */
+ if (vx_info_state(vxi, VXS_ONHOLD) &&
+ (tokens >= vxi->sched.tokens_min))
+ vxi->vx_state &= ~VXS_ONHOLD;
+ if (vx_info_state(vxi, VXS_ONHOLD))
+ tokens -= vxi->sched.tokens_min;
+
+ return tokens;
+}
+
+#else
/*
* recalculate the context's scheduling tokens
return tokens;
}
+#endif /* CONFIG_VSERVER_ACB_SCHED */
+
/*
* effective_prio - return the priority that is based on the static
* priority but is modified by bonuses/penalties.
if (vxi->sched.tokens_min > vxi->sched.tokens_max)
vxi->sched.tokens_min = vxi->sched.tokens_max;
+#ifdef CONFIG_VSERVER_ACB_SCHED
+ vx_tokens_set(vxi, atomic_read(&vxi->sched.tokens));
+#endif
+
spin_unlock(&vxi->sched.tokens_lock);
put_vx_info(vxi);
return 0;
if (vxi->sched.priority_bias < MIN_PRIO_BIAS)
vxi->sched.priority_bias = MIN_PRIO_BIAS;
+#ifdef CONFIG_VSERVER_ACB_SCHED
+ vx_tokens_set(vxi, atomic_read(&vxi->sched.tokens));
+#endif
+
spin_unlock(&vxi->sched.tokens_lock);
put_vx_info(vxi);
return 0;
sched->jiffies = jiffies;
sched->tokens_lock = SPIN_LOCK_UNLOCKED;
+#ifdef CONFIG_VSERVER_ACB_SCHED
+ /* We can't set the "real" token count here because we don't have
+ * access to the vx_info struct. Do it later... */
+ for (i = 0; i < SCH_NUM_CLASSES; i++) {
+ sched->state[i] = SCH_UNINITIALIZED;
+ }
+#endif
+
atomic_set(&sched->tokens, HZ >> 2);
sched->cpus_allowed = CPU_MASK_ALL;
sched->priority_bias = 0;