Subversion Repositories HelenOS

Compare Revisions

Ignore whitespace Rev 4663 → Rev 4679

/branches/sparc/kernel/generic/include/cpu.h
47,7 → 47,7
*
* There is one structure like this for every processor.
*/
typedef struct {
typedef struct cpu {
SPINLOCK_DECLARE(lock);
 
tlb_shootdown_msg_t tlb_messages[TLB_MESSAGE_QUEUE_LEN];
/branches/sparc/kernel/generic/src/proc/scheduler.c
61,6 → 61,7
#include <cpu.h>
#include <print.h>
#include <debug.h>
#include <arch/smp/sun4v/smp.h>
 
static void before_task_runs(void);
static void before_thread_runs(void);
220,6 → 221,8
 
atomic_dec(&CPU->nrdy);
atomic_dec(&nrdy);
if (CPU->arch.exec_unit)
atomic_dec(&(CPU->arch.exec_unit->nrdy));
r->n--;
 
/*
565,6 → 568,31
average = atomic_get(&nrdy) / config.cpu_active + 1;
count = average - atomic_get(&CPU->nrdy);
 
/* calculate number of threads to be stolen from other exec. units */
spinlock_lock(&(CPU->arch.exec_unit->proposed_nrdy_lock));
bool eu_busy = calculate_optimal_nrdy(CPU->arch.exec_unit);
unsigned int count_other_eus = CPU->arch.proposed_nrdy
- atomic_get(&(CPU->nrdy));
spinlock_unlock(&(CPU->arch.exec_unit->proposed_nrdy_lock));
 
/*
* If the CPU's parent core is overloaded, do not do the load
* balancing, otherwise we would migrate threads which should be
* migrated to other cores and since a thread cannot be migrated
* multiple times, it would not be migrated to the other core
* in the future.
*/
if (eu_busy)
return;
 
/*
* get the maximum - stole enough threads to satisfy both the need to
* have all virtual CPUs equally busy and the need to have all the
* cores equally busy
*/
if (((int) count_other_eus) > count)
count = count_other_eus;
 
if (count <= 0)
goto satisfied;
 
622,6 → 650,8
atomic_dec(&cpu->nrdy);
atomic_dec(&nrdy);
if (cpu->arch.exec_unit)
atomic_dec(&(cpu->arch.exec_unit->nrdy));
 
r->n--;
list_remove(&t->rq_link);
/branches/sparc/kernel/generic/src/proc/thread.c
265,6 → 265,8
atomic_inc(&nrdy);
avg = atomic_get(&nrdy) / config.cpu_active;
atomic_inc(&cpu->nrdy);
if (cpu->arch.exec_unit)
atomic_inc(&(cpu->arch.exec_unit->nrdy));
 
interrupts_restore(ipl);
}
/branches/sparc/kernel/generic/src/mm/tlb.c
152,7 → 152,7
CPU->tlb_active = 0;
spinlock_lock(&tlblock);
spinlock_unlock(&tlblock);
 
spinlock_lock(&CPU->lock);
ASSERT(CPU->tlb_messages_count <= TLB_MESSAGE_QUEUE_LEN);
 
/branches/sparc/kernel/arch/sparc64/include/sun4v/cpu.h
43,20 → 43,28
 
#ifndef __ASM__
 
struct cpu;
 
typedef struct {
uint64_t exec_unit_id;
uint8_t strand_count;
uint64_t cpuids[MAX_CORE_STRANDS];
struct cpu *cpus[MAX_CORE_STRANDS];
atomic_t nrdy;
SPINLOCK_DECLARE(proposed_nrdy_lock);
} exec_unit_t;
 
typedef struct cpu_arch {
uint64_t id; /**< virtual processor ID */
uint32_t clock_frequency; /**< Processor frequency in Hz. */
uint64_t next_tick_cmpr; /**< Next clock interrupt should be
generated when the TICK register
matches this value. */
exec_unit_t *exec_unit; /**< Physical core. */
unsigned long proposed_nrdy; /**< Proposed No. of ready threads
so that cores are equally balanced. */
} cpu_arch_t;
 
typedef struct {
uint64_t exec_unit_id;
uint8_t strand_count;
uint64_t cpuids[MAX_CORE_STRANDS];
} exec_unit_t;
 
#endif
 
#ifdef __ASM__
/branches/sparc/kernel/arch/sparc64/src/smp/sun4v/smp.c
51,6 → 51,7
#include <arch/sun4v/md.h>
#include <arch/sun4v/ipi.h>
#include <time/delay.h>
#include <arch/smp/sun4v/smp.h>
 
/** hypervisor code of the "running" state of the CPU */
#define CPU_STATE_RUNNING 2
138,7 → 139,55
}
#endif
 
 
/**
* Proposes the optimal number of ready threads for each virtual processor
* in the given processor core so that the processor core is as busy as the
* average processor core. The proposed number of ready threads will be
* stored to the proposed_nrdy variable of the cpu_arch_t struture.
*/
bool calculate_optimal_nrdy(exec_unit_t *exec_unit) {
 
/* calculate the number of threads the core will steal */
int avg = atomic_get(&nrdy) / exec_unit_count;
int to_steal = avg - atomic_get(&(exec_units->nrdy));
if (to_steal < 0) {
return true;
} else if (to_steal == 0) {
return false;
}
 
/* initialize the proposals with the real numbers of ready threads */
unsigned int k;
for (k = 0; k < exec_unit->strand_count; k++) {
exec_units->cpus[k]->arch.proposed_nrdy =
atomic_get(&(exec_unit->cpus[k]->nrdy));
}
 
/* distribute the threads to be stolen to the core's CPUs */
int j;
for (j = to_steal; j > 0; j--) {
unsigned int k;
unsigned int least_busy = 0;
unsigned int least_busy_nrdy =
exec_unit->cpus[0]->arch.proposed_nrdy;
 
/* for each stolen thread, give it to the least busy CPU */
for (k = 0; k < exec_unit->strand_count; k++) {
if (exec_unit->cpus[k]->arch.proposed_nrdy
< least_busy_nrdy) {
least_busy = k;
least_busy_nrdy =
exec_unit->cpus[k]->arch.proposed_nrdy;
}
}
exec_unit->cpus[least_busy]->arch.proposed_nrdy++;
}
 
return false;
}
 
/**
* Finds out which execution units belong to particular CPUs. By execution unit
* we mean the physical core the logical processor is backed by. Since each
* Niagara physical core has just one integer execution unit and we will
238,6 → 287,8
if (i == exec_unit_count) {
exec_units[i].exec_unit_id = exec_unit_id;
exec_units[i].strand_count = 0;
atomic_set(&(exec_units[i].nrdy), 0);
spinlock_initialize(&(exec_units[i].proposed_nrdy_lock), "proposed nrdy lock");
exec_unit_count++;
}
 
275,6 → 326,8
exec_unit_count = 1;
exec_units[0].strand_count = cpu_count;
exec_units[0].exec_unit_id = 1;
spinlock_initialize(&(exec_units[0].proposed_nrdy_lock), "proposed nrdy lock");
atomic_set(&(exec_units[0].nrdy), 0);
max_core_strands = cpu_count;
 
/* browse CPUs again, assign them the fictional exec. unit */
344,7 → 397,11
if (cur_core_strand > exec_units[cur_core].strand_count)
continue;
 
cpus[cur_cpu++].arch.id = exec_units[cur_core].cpuids[cur_core_strand];
cpus[cur_cpu].arch.exec_unit = &(exec_units[cur_core]);
atomic_add(&(exec_units[cur_core].nrdy), atomic_get(&(cpus[cur_cpu].nrdy)));
cpus[cur_cpu].arch.id = exec_units[cur_core].cpuids[cur_core_strand];
exec_units[cur_core].cpus[cur_core_strand] = &(cpus[cur_cpu]);
cur_cpu++;
}
}
}