-#ifndef _VX_VS_SCHED_H
-#define _VX_VS_SCHED_H
-
-#ifndef CONFIG_VSERVER
-#warning config options missing
-#endif
+#ifndef _VS_SCHED_H
+#define _VS_SCHED_H
+#include "vserver/base.h"
+#include "vserver/context.h"
#include "vserver/sched.h"
#define MIN_PRIO_BIAS -20
-static inline int vx_tokens_avail(struct vx_info *vxi)
+#ifdef CONFIG_VSERVER_HARDCPU
+
+/*
+ * effective_prio - return the priority that is based on the static
+ * priority but is modified by bonuses/penalties.
+ *
+ * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
+ * into a -4 ... 0 ... +4 bonus/penalty range.
+ *
+ * Additionally, we scale another amount based on the number of
+ * CPU tokens currently held by the context, if the process is
+ * part of a context (and the appropriate SCHED flag is set).
+ * This ranges from -5 ... 0 ... +15, quadratically.
+ *
+ * So, the total bonus is -9 .. 0 .. +19
+ * We use ~50% of the full 0...39 priority range so that:
+ *
+ * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
+ * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
+ * unless that context is far exceeding its CPU allocation.
+ *
+ * Both properties are important to certain workloads.
+ */
+static inline
+int vx_effective_vavavoom(struct _vx_sched_pc *sched_pc, int max_prio)
{
- return atomic_read(&vxi->sched.tokens);
+ int vavavoom, max;
+
+ /* lots of tokens = lots of vavavoom
+ * no tokens = no vavavoom */
+ if ((vavavoom = sched_pc->tokens) >= 0) {
+ max = sched_pc->tokens_max;
+ vavavoom = max - vavavoom;
+ max = max * max;
+ vavavoom = max_prio * VAVAVOOM_RATIO / 100
+ * (vavavoom*vavavoom - (max >> 2)) / max;
+ return vavavoom;
+ }
+ return 0;
}
-static inline void vx_consume_token(struct vx_info *vxi)
-{
- atomic_dec(&vxi->sched.tokens);
-}
-static inline int vx_need_resched(struct task_struct *p)
+static inline
+int vx_adjust_prio(struct task_struct *p, int prio, int max_user)
{
-#ifdef CONFIG_VSERVER_HARDCPU
struct vx_info *vxi = p->vx_info;
-#endif
- int slice = --p->time_slice;
+ struct _vx_sched_pc *sched_pc;
-#ifdef CONFIG_VSERVER_HARDCPU
- if (vxi) {
- int tokens;
+ if (!vxi)
+ return prio;
- if ((tokens = vx_tokens_avail(vxi)) > 0)
- vx_consume_token(vxi);
- /* for tokens > 0, one token was consumed */
- if (tokens < 2)
- return 1;
+ sched_pc = &vx_cpu(vxi, sched_pc);
+ if (vx_info_flags(vxi, VXF_SCHED_PRIO, 0)) {
+ int vavavoom = vx_effective_vavavoom(sched_pc, max_user);
+
+ sched_pc->vavavoom = vavavoom;
+ prio += vavavoom;
}
-#endif
- return (slice == 0);
+ prio += sched_pc->prio_bias;
+ return prio;
}
+#else /* !CONFIG_VSERVER_HARDCPU */
-static inline void vx_onhold_inc(struct vx_info *vxi)
+static inline
+int vx_adjust_prio(struct task_struct *p, int prio, int max_user)
{
- int onhold = atomic_read(&vxi->cvirt.nr_onhold);
+ struct vx_info *vxi = p->vx_info;
- atomic_inc(&vxi->cvirt.nr_onhold);
- if (!onhold)
- vxi->cvirt.onhold_last = jiffies;
+ if (vxi)
+ prio += vx_cpu(vxi, sched_pc).prio_bias;
+ return prio;
}
-static inline void __vx_onhold_update(struct vx_info *vxi)
-{
- int cpu = smp_processor_id();
- uint32_t now = jiffies;
- uint32_t delta = now - vxi->cvirt.onhold_last;
+#endif /* CONFIG_VSERVER_HARDCPU */
+
- vxi->cvirt.onhold_last = now;
- vxi->sched.cpu[cpu].hold_ticks += delta;
+static inline void vx_account_user(struct vx_info *vxi,
+ cputime_t cputime, int nice)
+{
+ if (!vxi)
+ return;
+ vx_cpu(vxi, sched_pc).user_ticks += cputime;
}
-static inline void vx_onhold_dec(struct vx_info *vxi)
+static inline void vx_account_system(struct vx_info *vxi,
+ cputime_t cputime, int idle)
{
- if (atomic_dec_and_test(&vxi->cvirt.nr_onhold))
- __vx_onhold_update(vxi);
+ if (!vxi)
+ return;
+ vx_cpu(vxi, sched_pc).sys_ticks += cputime;
}
#else