894 lines
20 KiB
C
894 lines
20 KiB
C
/* $OpenBSD: kern_sched.c,v 1.89 2023/09/06 02:09:58 cheloha Exp $ */
|
|
/*
|
|
* Copyright (c) 2007, 2008 Artur Grabowski <art@openbsd.org>
|
|
*
|
|
* Permission to use, copy, modify, and distribute this software for any
|
|
* purpose with or without fee is hereby granted, provided that the above
|
|
* copyright notice and this permission notice appear in all copies.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
*/
|
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/sched.h>
|
|
#include <sys/proc.h>
|
|
#include <sys/kthread.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/clockintr.h>
|
|
#include <sys/resourcevar.h>
|
|
#include <sys/task.h>
|
|
#include <sys/time.h>
|
|
#include <sys/smr.h>
|
|
#include <sys/tracepoint.h>
|
|
|
|
#include <uvm/uvm_extern.h>
|
|
|
|
void sched_kthreads_create(void *);
|
|
|
|
int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p);
|
|
struct proc *sched_steal_proc(struct cpu_info *);
|
|
|
|
/*
|
|
* To help choosing which cpu should run which process we keep track
|
|
* of cpus which are currently idle and which cpus have processes
|
|
* queued.
|
|
*/
|
|
struct cpuset sched_idle_cpus;
|
|
struct cpuset sched_queued_cpus;
|
|
struct cpuset sched_all_cpus;
|
|
|
|
/*
|
|
* Some general scheduler counters.
|
|
*/
|
|
uint64_t sched_nmigrations; /* Cpu migration counter */
|
|
uint64_t sched_nomigrations; /* Cpu no migration counter */
|
|
uint64_t sched_noidle; /* Times we didn't pick the idle task */
|
|
uint64_t sched_stolen; /* Times we stole proc from other cpus */
|
|
uint64_t sched_choose; /* Times we chose a cpu */
|
|
uint64_t sched_wasidle; /* Times we came out of idle */
|
|
|
|
int sched_smt;
|
|
|
|
/*
|
|
* A few notes about cpu_switchto that is implemented in MD code.
|
|
*
|
|
* cpu_switchto takes two arguments, the old proc and the proc
|
|
* it should switch to. The new proc will never be NULL, so we always have
|
|
* a saved state that we need to switch to. The old proc however can
|
|
* be NULL if the process is exiting. NULL for the old proc simply
|
|
* means "don't bother saving old state".
|
|
*
|
|
* cpu_switchto is supposed to atomically load the new state of the process
|
|
* including the pcb, pmap and setting curproc, the p_cpu pointer in the
|
|
* proc and p_stat to SONPROC. Atomically with respect to interrupts, other
|
|
* cpus in the system must not depend on this state being consistent.
|
|
* Therefore no locking is necessary in cpu_switchto other than blocking
|
|
* interrupts during the context switch.
|
|
*/
|
|
|
|
/*
|
|
* sched_init_cpu is called from main() for the boot cpu, then it's the
|
|
* responsibility of the MD code to call it for all other cpus.
|
|
*/
|
|
void
|
|
sched_init_cpu(struct cpu_info *ci)
|
|
{
|
|
struct schedstate_percpu *spc = &ci->ci_schedstate;
|
|
int i;
|
|
|
|
for (i = 0; i < SCHED_NQS; i++)
|
|
TAILQ_INIT(&spc->spc_qs[i]);
|
|
|
|
spc->spc_idleproc = NULL;
|
|
|
|
spc->spc_itimer = clockintr_establish(ci, itimer_update);
|
|
if (spc->spc_itimer == NULL)
|
|
panic("%s: clockintr_establish itimer_update", __func__);
|
|
spc->spc_profclock = clockintr_establish(ci, profclock);
|
|
if (spc->spc_profclock == NULL)
|
|
panic("%s: clockintr_establish profclock", __func__);
|
|
spc->spc_roundrobin = clockintr_establish(ci, roundrobin);
|
|
if (spc->spc_roundrobin == NULL)
|
|
panic("%s: clockintr_establish roundrobin", __func__);
|
|
|
|
kthread_create_deferred(sched_kthreads_create, ci);
|
|
|
|
LIST_INIT(&spc->spc_deadproc);
|
|
SIMPLEQ_INIT(&spc->spc_deferred);
|
|
|
|
/*
|
|
* Slight hack here until the cpuset code handles cpu_info
|
|
* structures.
|
|
*/
|
|
cpuset_init_cpu(ci);
|
|
|
|
#ifdef __HAVE_CPU_TOPOLOGY
|
|
if (!sched_smt && ci->ci_smt_id > 0)
|
|
return;
|
|
#endif
|
|
cpuset_add(&sched_all_cpus, ci);
|
|
}
|
|
|
|
void
|
|
sched_kthreads_create(void *v)
|
|
{
|
|
struct cpu_info *ci = v;
|
|
struct schedstate_percpu *spc = &ci->ci_schedstate;
|
|
static int num;
|
|
|
|
if (fork1(&proc0, FORK_SHAREVM|FORK_SHAREFILES|FORK_NOZOMBIE|
|
|
FORK_SYSTEM|FORK_IDLE, sched_idle, ci, NULL,
|
|
&spc->spc_idleproc))
|
|
panic("fork idle");
|
|
|
|
/* Name it as specified. */
|
|
snprintf(spc->spc_idleproc->p_p->ps_comm,
|
|
sizeof(spc->spc_idleproc->p_p->ps_comm),
|
|
"idle%d", num);
|
|
|
|
num++;
|
|
}
|
|
|
|
void
|
|
sched_idle(void *v)
|
|
{
|
|
struct schedstate_percpu *spc;
|
|
struct proc *p = curproc;
|
|
struct cpu_info *ci = v;
|
|
int s;
|
|
|
|
KERNEL_UNLOCK();
|
|
|
|
spc = &ci->ci_schedstate;
|
|
|
|
/*
|
|
* First time we enter here, we're not supposed to idle,
|
|
* just go away for a while.
|
|
*/
|
|
SCHED_LOCK(s);
|
|
cpuset_add(&sched_idle_cpus, ci);
|
|
p->p_stat = SSLEEP;
|
|
p->p_cpu = ci;
|
|
atomic_setbits_int(&p->p_flag, P_CPUPEG);
|
|
mi_switch();
|
|
cpuset_del(&sched_idle_cpus, ci);
|
|
SCHED_UNLOCK(s);
|
|
|
|
KASSERT(ci == curcpu());
|
|
KASSERT(curproc == spc->spc_idleproc);
|
|
|
|
while (1) {
|
|
while (!cpu_is_idle(curcpu())) {
|
|
struct proc *dead;
|
|
|
|
SCHED_LOCK(s);
|
|
p->p_stat = SSLEEP;
|
|
mi_switch();
|
|
SCHED_UNLOCK(s);
|
|
|
|
while ((dead = LIST_FIRST(&spc->spc_deadproc))) {
|
|
LIST_REMOVE(dead, p_hash);
|
|
exit2(dead);
|
|
}
|
|
}
|
|
|
|
splassert(IPL_NONE);
|
|
|
|
smr_idle();
|
|
|
|
cpuset_add(&sched_idle_cpus, ci);
|
|
cpu_idle_enter();
|
|
while (spc->spc_whichqs == 0) {
|
|
#ifdef MULTIPROCESSOR
|
|
if (spc->spc_schedflags & SPCF_SHOULDHALT &&
|
|
(spc->spc_schedflags & SPCF_HALTED) == 0) {
|
|
cpuset_del(&sched_idle_cpus, ci);
|
|
SCHED_LOCK(s);
|
|
atomic_setbits_int(&spc->spc_schedflags,
|
|
spc->spc_whichqs ? 0 : SPCF_HALTED);
|
|
SCHED_UNLOCK(s);
|
|
wakeup(spc);
|
|
}
|
|
#endif
|
|
cpu_idle_cycle();
|
|
}
|
|
cpu_idle_leave();
|
|
cpuset_del(&sched_idle_cpus, ci);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* To free our address space we have to jump through a few hoops.
|
|
* The freeing is done by the reaper, but until we have one reaper
|
|
* per cpu, we have no way of putting this proc on the deadproc list
|
|
* and waking up the reaper without risking having our address space and
|
|
* stack torn from under us before we manage to switch to another proc.
|
|
* Therefore we have a per-cpu list of dead processes where we put this
|
|
* proc and have idle clean up that list and move it to the reaper list.
|
|
* All this will be unnecessary once we can bind the reaper this cpu
|
|
* and not risk having it switch to another in case it sleeps.
|
|
*/
|
|
void
|
|
sched_exit(struct proc *p)
|
|
{
|
|
struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
|
|
struct proc *idle;
|
|
int s;
|
|
|
|
if (ISSET(spc->spc_schedflags, SPCF_ITIMER)) {
|
|
atomic_clearbits_int(&spc->spc_schedflags, SPCF_ITIMER);
|
|
clockintr_cancel(spc->spc_itimer);
|
|
}
|
|
if (ISSET(spc->spc_schedflags, SPCF_PROFCLOCK)) {
|
|
atomic_clearbits_int(&spc->spc_schedflags, SPCF_PROFCLOCK);
|
|
clockintr_cancel(spc->spc_profclock);
|
|
}
|
|
|
|
LIST_INSERT_HEAD(&spc->spc_deadproc, p, p_hash);
|
|
|
|
#ifdef MULTIPROCESSOR
|
|
/* This process no longer needs to hold the kernel lock. */
|
|
KERNEL_ASSERT_LOCKED();
|
|
__mp_release_all(&kernel_lock);
|
|
#endif
|
|
|
|
SCHED_LOCK(s);
|
|
idle = spc->spc_idleproc;
|
|
idle->p_stat = SRUN;
|
|
cpu_switchto(NULL, idle);
|
|
panic("cpu_switchto returned");
|
|
}
|
|
|
|
/*
|
|
* Run queue management.
|
|
*/
|
|
void
|
|
sched_init_runqueues(void)
|
|
{
|
|
}
|
|
|
|
void
|
|
setrunqueue(struct cpu_info *ci, struct proc *p, uint8_t prio)
|
|
{
|
|
struct schedstate_percpu *spc;
|
|
int queue = prio >> 2;
|
|
|
|
if (ci == NULL)
|
|
ci = sched_choosecpu(p);
|
|
|
|
KASSERT(ci != NULL);
|
|
SCHED_ASSERT_LOCKED();
|
|
|
|
p->p_cpu = ci;
|
|
p->p_stat = SRUN;
|
|
p->p_runpri = prio;
|
|
|
|
spc = &p->p_cpu->ci_schedstate;
|
|
spc->spc_nrun++;
|
|
TRACEPOINT(sched, enqueue, p->p_tid + THREAD_PID_OFFSET,
|
|
p->p_p->ps_pid);
|
|
|
|
TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq);
|
|
spc->spc_whichqs |= (1U << queue);
|
|
cpuset_add(&sched_queued_cpus, p->p_cpu);
|
|
|
|
if (cpuset_isset(&sched_idle_cpus, p->p_cpu))
|
|
cpu_unidle(p->p_cpu);
|
|
|
|
if (prio < spc->spc_curpriority)
|
|
need_resched(ci);
|
|
}
|
|
|
|
void
|
|
remrunqueue(struct proc *p)
|
|
{
|
|
struct schedstate_percpu *spc;
|
|
int queue = p->p_runpri >> 2;
|
|
|
|
SCHED_ASSERT_LOCKED();
|
|
spc = &p->p_cpu->ci_schedstate;
|
|
spc->spc_nrun--;
|
|
TRACEPOINT(sched, dequeue, p->p_tid + THREAD_PID_OFFSET,
|
|
p->p_p->ps_pid);
|
|
|
|
TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq);
|
|
if (TAILQ_EMPTY(&spc->spc_qs[queue])) {
|
|
spc->spc_whichqs &= ~(1U << queue);
|
|
if (spc->spc_whichqs == 0)
|
|
cpuset_del(&sched_queued_cpus, p->p_cpu);
|
|
}
|
|
}
|
|
|
|
struct proc *
|
|
sched_chooseproc(void)
|
|
{
|
|
struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
|
|
struct proc *p;
|
|
int queue;
|
|
|
|
SCHED_ASSERT_LOCKED();
|
|
|
|
#ifdef MULTIPROCESSOR
|
|
if (spc->spc_schedflags & SPCF_SHOULDHALT) {
|
|
if (spc->spc_whichqs) {
|
|
for (queue = 0; queue < SCHED_NQS; queue++) {
|
|
while ((p = TAILQ_FIRST(&spc->spc_qs[queue]))) {
|
|
remrunqueue(p);
|
|
setrunqueue(NULL, p, p->p_runpri);
|
|
if (p->p_cpu == curcpu()) {
|
|
KASSERT(p->p_flag & P_CPUPEG);
|
|
goto again;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
p = spc->spc_idleproc;
|
|
KASSERT(p);
|
|
KASSERT(p->p_wchan == NULL);
|
|
p->p_stat = SRUN;
|
|
return (p);
|
|
}
|
|
#endif
|
|
|
|
again:
|
|
if (spc->spc_whichqs) {
|
|
queue = ffs(spc->spc_whichqs) - 1;
|
|
p = TAILQ_FIRST(&spc->spc_qs[queue]);
|
|
remrunqueue(p);
|
|
sched_noidle++;
|
|
if (p->p_stat != SRUN)
|
|
panic("thread %d not in SRUN: %d", p->p_tid, p->p_stat);
|
|
} else if ((p = sched_steal_proc(curcpu())) == NULL) {
|
|
p = spc->spc_idleproc;
|
|
if (p == NULL) {
|
|
int s;
|
|
/*
|
|
* We get here if someone decides to switch during
|
|
* boot before forking kthreads, bleh.
|
|
* This is kind of like a stupid idle loop.
|
|
*/
|
|
#ifdef MULTIPROCESSOR
|
|
__mp_unlock(&sched_lock);
|
|
#endif
|
|
spl0();
|
|
delay(10);
|
|
SCHED_LOCK(s);
|
|
goto again;
|
|
}
|
|
KASSERT(p);
|
|
p->p_stat = SRUN;
|
|
}
|
|
|
|
KASSERT(p->p_wchan == NULL);
|
|
return (p);
|
|
}
|
|
|
|
struct cpu_info *
|
|
sched_choosecpu_fork(struct proc *parent, int flags)
|
|
{
|
|
#ifdef MULTIPROCESSOR
|
|
struct cpu_info *choice = NULL;
|
|
int run, best_run = INT_MAX;
|
|
struct cpu_info *ci;
|
|
struct cpuset set;
|
|
|
|
#if 0
|
|
/*
|
|
* XXX
|
|
* Don't do this until we have a painless way to move the cpu in exec.
|
|
* Preferably when nuking the old pmap and getting a new one on a
|
|
* new cpu.
|
|
*/
|
|
/*
|
|
* PPWAIT forks are simple. We know that the parent will not
|
|
* run until we exec and choose another cpu, so we just steal its
|
|
* cpu.
|
|
*/
|
|
if (flags & FORK_PPWAIT)
|
|
return (parent->p_cpu);
|
|
#endif
|
|
|
|
/*
|
|
* Look at all cpus that are currently idle and have nothing queued.
|
|
* If there are none, pick the one with least queued procs first,
|
|
* then the one with lowest load average.
|
|
*/
|
|
cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
|
|
cpuset_intersection(&set, &set, &sched_all_cpus);
|
|
if (cpuset_first(&set) == NULL)
|
|
cpuset_copy(&set, &sched_all_cpus);
|
|
|
|
while ((ci = cpuset_first(&set)) != NULL) {
|
|
cpuset_del(&set, ci);
|
|
|
|
run = ci->ci_schedstate.spc_nrun;
|
|
|
|
if (choice == NULL || run < best_run) {
|
|
choice = ci;
|
|
best_run = run;
|
|
}
|
|
}
|
|
|
|
return (choice);
|
|
#else
|
|
return (curcpu());
|
|
#endif
|
|
}
|
|
|
|
struct cpu_info *
|
|
sched_choosecpu(struct proc *p)
|
|
{
|
|
#ifdef MULTIPROCESSOR
|
|
struct cpu_info *choice = NULL;
|
|
int last_cost = INT_MAX;
|
|
struct cpu_info *ci;
|
|
struct cpuset set;
|
|
|
|
/*
|
|
* If pegged to a cpu, don't allow it to move.
|
|
*/
|
|
if (p->p_flag & P_CPUPEG)
|
|
return (p->p_cpu);
|
|
|
|
sched_choose++;
|
|
|
|
/*
|
|
* Look at all cpus that are currently idle and have nothing queued.
|
|
* If there are none, pick the cheapest of those.
|
|
* (idle + queued could mean that the cpu is handling an interrupt
|
|
* at this moment and haven't had time to leave idle yet).
|
|
*/
|
|
cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
|
|
cpuset_intersection(&set, &set, &sched_all_cpus);
|
|
|
|
/*
|
|
* First, just check if our current cpu is in that set, if it is,
|
|
* this is simple.
|
|
* Also, our cpu might not be idle, but if it's the current cpu
|
|
* and it has nothing else queued and we're curproc, take it.
|
|
*/
|
|
if (cpuset_isset(&set, p->p_cpu) ||
|
|
(p->p_cpu == curcpu() && p->p_cpu->ci_schedstate.spc_nrun == 0 &&
|
|
(p->p_cpu->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0 &&
|
|
curproc == p)) {
|
|
sched_wasidle++;
|
|
return (p->p_cpu);
|
|
}
|
|
|
|
if (cpuset_first(&set) == NULL)
|
|
cpuset_copy(&set, &sched_all_cpus);
|
|
|
|
while ((ci = cpuset_first(&set)) != NULL) {
|
|
int cost = sched_proc_to_cpu_cost(ci, p);
|
|
|
|
if (choice == NULL || cost < last_cost) {
|
|
choice = ci;
|
|
last_cost = cost;
|
|
}
|
|
cpuset_del(&set, ci);
|
|
}
|
|
|
|
if (p->p_cpu != choice)
|
|
sched_nmigrations++;
|
|
else
|
|
sched_nomigrations++;
|
|
|
|
return (choice);
|
|
#else
|
|
return (curcpu());
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Attempt to steal a proc from some cpu.
|
|
*/
|
|
struct proc *
|
|
sched_steal_proc(struct cpu_info *self)
|
|
{
|
|
struct proc *best = NULL;
|
|
#ifdef MULTIPROCESSOR
|
|
struct schedstate_percpu *spc;
|
|
int bestcost = INT_MAX;
|
|
struct cpu_info *ci;
|
|
struct cpuset set;
|
|
|
|
KASSERT((self->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0);
|
|
|
|
/* Don't steal if we don't want to schedule processes in this CPU. */
|
|
if (!cpuset_isset(&sched_all_cpus, self))
|
|
return (NULL);
|
|
|
|
cpuset_copy(&set, &sched_queued_cpus);
|
|
|
|
while ((ci = cpuset_first(&set)) != NULL) {
|
|
struct proc *p;
|
|
int queue;
|
|
int cost;
|
|
|
|
cpuset_del(&set, ci);
|
|
|
|
spc = &ci->ci_schedstate;
|
|
|
|
queue = ffs(spc->spc_whichqs) - 1;
|
|
TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq) {
|
|
if (p->p_flag & P_CPUPEG)
|
|
continue;
|
|
|
|
cost = sched_proc_to_cpu_cost(self, p);
|
|
|
|
if (best == NULL || cost < bestcost) {
|
|
best = p;
|
|
bestcost = cost;
|
|
}
|
|
}
|
|
}
|
|
if (best == NULL)
|
|
return (NULL);
|
|
|
|
TRACEPOINT(sched, steal, best->p_tid + THREAD_PID_OFFSET,
|
|
best->p_p->ps_pid, CPU_INFO_UNIT(self));
|
|
|
|
remrunqueue(best);
|
|
best->p_cpu = self;
|
|
|
|
sched_stolen++;
|
|
#endif
|
|
return (best);
|
|
}
|
|
|
|
#ifdef MULTIPROCESSOR
|
|
/*
|
|
* Base 2 logarithm of an int. returns 0 for 0 (yeye, I know).
|
|
*/
|
|
static int
|
|
log2(unsigned int i)
|
|
{
|
|
int ret = 0;
|
|
|
|
while (i >>= 1)
|
|
ret++;
|
|
|
|
return (ret);
|
|
}
|
|
|
|
/*
|
|
* Calculate the cost of moving the proc to this cpu.
|
|
*
|
|
* What we want is some guesstimate of how much "performance" it will
|
|
* cost us to move the proc here. Not just for caches and TLBs and NUMA
|
|
* memory, but also for the proc itself. A highly loaded cpu might not
|
|
* be the best candidate for this proc since it won't get run.
|
|
*
|
|
* Just total guesstimates for now.
|
|
*/
|
|
|
|
int sched_cost_load = 1;
|
|
int sched_cost_priority = 1;
|
|
int sched_cost_runnable = 3;
|
|
int sched_cost_resident = 1;
|
|
#endif
|
|
|
|
int
|
|
sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p)
|
|
{
|
|
int cost = 0;
|
|
#ifdef MULTIPROCESSOR
|
|
struct schedstate_percpu *spc;
|
|
int l2resident = 0;
|
|
|
|
spc = &ci->ci_schedstate;
|
|
|
|
/*
|
|
* First, account for the priority of the proc we want to move.
|
|
* More willing to move, the lower the priority of the destination
|
|
* and the higher the priority of the proc.
|
|
*/
|
|
if (!cpuset_isset(&sched_idle_cpus, ci)) {
|
|
cost += (p->p_usrpri - spc->spc_curpriority) *
|
|
sched_cost_priority;
|
|
cost += sched_cost_runnable;
|
|
}
|
|
if (cpuset_isset(&sched_queued_cpus, ci))
|
|
cost += spc->spc_nrun * sched_cost_runnable;
|
|
|
|
/*
|
|
* Try to avoid the primary cpu as it handles hardware interrupts.
|
|
*
|
|
* XXX Needs to be revisited when we distribute interrupts
|
|
* over cpus.
|
|
*/
|
|
if (CPU_IS_PRIMARY(ci))
|
|
cost += sched_cost_runnable;
|
|
|
|
/*
|
|
* If the proc is on this cpu already, lower the cost by how much
|
|
* it has been running and an estimate of its footprint.
|
|
*/
|
|
if (p->p_cpu == ci && p->p_slptime == 0) {
|
|
l2resident =
|
|
log2(pmap_resident_count(p->p_vmspace->vm_map.pmap));
|
|
cost -= l2resident * sched_cost_resident;
|
|
}
|
|
#endif
|
|
return (cost);
|
|
}
|
|
|
|
/*
|
|
* Peg a proc to a cpu.
|
|
*/
|
|
void
|
|
sched_peg_curproc(struct cpu_info *ci)
|
|
{
|
|
struct proc *p = curproc;
|
|
int s;
|
|
|
|
SCHED_LOCK(s);
|
|
atomic_setbits_int(&p->p_flag, P_CPUPEG);
|
|
setrunqueue(ci, p, p->p_usrpri);
|
|
p->p_ru.ru_nvcsw++;
|
|
mi_switch();
|
|
SCHED_UNLOCK(s);
|
|
}
|
|
|
|
#ifdef MULTIPROCESSOR
|
|
|
|
void
|
|
sched_start_secondary_cpus(void)
|
|
{
|
|
CPU_INFO_ITERATOR cii;
|
|
struct cpu_info *ci;
|
|
|
|
CPU_INFO_FOREACH(cii, ci) {
|
|
struct schedstate_percpu *spc = &ci->ci_schedstate;
|
|
|
|
if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci))
|
|
continue;
|
|
atomic_clearbits_int(&spc->spc_schedflags,
|
|
SPCF_SHOULDHALT | SPCF_HALTED);
|
|
#ifdef __HAVE_CPU_TOPOLOGY
|
|
if (!sched_smt && ci->ci_smt_id > 0)
|
|
continue;
|
|
#endif
|
|
cpuset_add(&sched_all_cpus, ci);
|
|
}
|
|
}
|
|
|
|
void
|
|
sched_stop_secondary_cpus(void)
|
|
{
|
|
CPU_INFO_ITERATOR cii;
|
|
struct cpu_info *ci;
|
|
|
|
/*
|
|
* Make sure we stop the secondary CPUs.
|
|
*/
|
|
CPU_INFO_FOREACH(cii, ci) {
|
|
struct schedstate_percpu *spc = &ci->ci_schedstate;
|
|
|
|
if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci))
|
|
continue;
|
|
cpuset_del(&sched_all_cpus, ci);
|
|
atomic_setbits_int(&spc->spc_schedflags, SPCF_SHOULDHALT);
|
|
}
|
|
CPU_INFO_FOREACH(cii, ci) {
|
|
struct schedstate_percpu *spc = &ci->ci_schedstate;
|
|
|
|
if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci))
|
|
continue;
|
|
while ((spc->spc_schedflags & SPCF_HALTED) == 0) {
|
|
sleep_setup(spc, PZERO, "schedstate");
|
|
sleep_finish(0,
|
|
(spc->spc_schedflags & SPCF_HALTED) == 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
struct sched_barrier_state {
|
|
struct cpu_info *ci;
|
|
struct cond cond;
|
|
};
|
|
|
|
void
|
|
sched_barrier_task(void *arg)
|
|
{
|
|
struct sched_barrier_state *sb = arg;
|
|
struct cpu_info *ci = sb->ci;
|
|
|
|
sched_peg_curproc(ci);
|
|
cond_signal(&sb->cond);
|
|
atomic_clearbits_int(&curproc->p_flag, P_CPUPEG);
|
|
}
|
|
|
|
void
|
|
sched_barrier(struct cpu_info *ci)
|
|
{
|
|
struct sched_barrier_state sb;
|
|
struct task task;
|
|
CPU_INFO_ITERATOR cii;
|
|
|
|
if (ci == NULL) {
|
|
CPU_INFO_FOREACH(cii, ci) {
|
|
if (CPU_IS_PRIMARY(ci))
|
|
break;
|
|
}
|
|
}
|
|
KASSERT(ci != NULL);
|
|
|
|
if (ci == curcpu())
|
|
return;
|
|
|
|
sb.ci = ci;
|
|
cond_init(&sb.cond);
|
|
task_set(&task, sched_barrier_task, &sb);
|
|
|
|
task_add(systqmp, &task);
|
|
cond_wait(&sb.cond, "sbar");
|
|
}
|
|
|
|
#else
|
|
|
|
void
|
|
sched_barrier(struct cpu_info *ci)
|
|
{
|
|
}
|
|
|
|
#endif
|
|
|
|
/*
|
|
* Functions to manipulate cpu sets.
|
|
*/
|
|
struct cpu_info *cpuset_infos[MAXCPUS];
|
|
static struct cpuset cpuset_all;
|
|
|
|
void
|
|
cpuset_init_cpu(struct cpu_info *ci)
|
|
{
|
|
cpuset_add(&cpuset_all, ci);
|
|
cpuset_infos[CPU_INFO_UNIT(ci)] = ci;
|
|
}
|
|
|
|
void
|
|
cpuset_clear(struct cpuset *cs)
|
|
{
|
|
memset(cs, 0, sizeof(*cs));
|
|
}
|
|
|
|
void
|
|
cpuset_add(struct cpuset *cs, struct cpu_info *ci)
|
|
{
|
|
unsigned int num = CPU_INFO_UNIT(ci);
|
|
atomic_setbits_int(&cs->cs_set[num/32], (1U << (num % 32)));
|
|
}
|
|
|
|
void
|
|
cpuset_del(struct cpuset *cs, struct cpu_info *ci)
|
|
{
|
|
unsigned int num = CPU_INFO_UNIT(ci);
|
|
atomic_clearbits_int(&cs->cs_set[num/32], (1U << (num % 32)));
|
|
}
|
|
|
|
int
|
|
cpuset_isset(struct cpuset *cs, struct cpu_info *ci)
|
|
{
|
|
unsigned int num = CPU_INFO_UNIT(ci);
|
|
return (cs->cs_set[num/32] & (1U << (num % 32)));
|
|
}
|
|
|
|
void
|
|
cpuset_add_all(struct cpuset *cs)
|
|
{
|
|
cpuset_copy(cs, &cpuset_all);
|
|
}
|
|
|
|
void
|
|
cpuset_copy(struct cpuset *to, struct cpuset *from)
|
|
{
|
|
memcpy(to, from, sizeof(*to));
|
|
}
|
|
|
|
struct cpu_info *
|
|
cpuset_first(struct cpuset *cs)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
|
|
if (cs->cs_set[i])
|
|
return (cpuset_infos[i * 32 + ffs(cs->cs_set[i]) - 1]);
|
|
|
|
return (NULL);
|
|
}
|
|
|
|
void
|
|
cpuset_union(struct cpuset *to, struct cpuset *a, struct cpuset *b)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
|
|
to->cs_set[i] = a->cs_set[i] | b->cs_set[i];
|
|
}
|
|
|
|
void
|
|
cpuset_intersection(struct cpuset *to, struct cpuset *a, struct cpuset *b)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
|
|
to->cs_set[i] = a->cs_set[i] & b->cs_set[i];
|
|
}
|
|
|
|
void
|
|
cpuset_complement(struct cpuset *to, struct cpuset *a, struct cpuset *b)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
|
|
to->cs_set[i] = b->cs_set[i] & ~a->cs_set[i];
|
|
}
|
|
|
|
int
|
|
cpuset_cardinality(struct cpuset *cs)
|
|
{
|
|
int cardinality, i, n;
|
|
|
|
cardinality = 0;
|
|
|
|
for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
|
|
for (n = cs->cs_set[i]; n != 0; n &= n - 1)
|
|
cardinality++;
|
|
|
|
return (cardinality);
|
|
}
|
|
|
|
int
|
|
sysctl_hwncpuonline(void)
|
|
{
|
|
return cpuset_cardinality(&sched_all_cpus);
|
|
}
|
|
|
|
int
|
|
cpu_is_online(struct cpu_info *ci)
|
|
{
|
|
return cpuset_isset(&sched_all_cpus, ci);
|
|
}
|
|
|
|
#ifdef __HAVE_CPU_TOPOLOGY
|
|
|
|
#include <sys/sysctl.h>
|
|
|
|
int
|
|
sysctl_hwsmt(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
|
|
{
|
|
CPU_INFO_ITERATOR cii;
|
|
struct cpu_info *ci;
|
|
int err, newsmt;
|
|
|
|
newsmt = sched_smt;
|
|
err = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &newsmt, 0, 1);
|
|
if (err)
|
|
return err;
|
|
if (newsmt == sched_smt)
|
|
return 0;
|
|
|
|
sched_smt = newsmt;
|
|
CPU_INFO_FOREACH(cii, ci) {
|
|
if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci))
|
|
continue;
|
|
if (ci->ci_smt_id == 0)
|
|
continue;
|
|
if (sched_smt)
|
|
cpuset_add(&sched_all_cpus, ci);
|
|
else
|
|
cpuset_del(&sched_all_cpus, ci);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
#endif
|