51,6 → 51,7 |
#include <arch/sun4v/md.h> |
#include <arch/sun4v/ipi.h> |
#include <time/delay.h> |
#include <arch/smp/sun4v/smp.h> |
|
/** hypervisor code of the "running" state of the CPU */ |
#define CPU_STATE_RUNNING 2 |
138,7 → 139,55 |
} |
#endif |
|
|
/** |
* Proposes the optimal number of ready threads for each virtual processor |
* in the given processor core so that the processor core is as busy as the |
* average processor core. The proposed number of ready threads will be |
* stored to the proposed_nrdy variable of the cpu_arch_t struture. |
*/ |
bool calculate_optimal_nrdy(exec_unit_t *exec_unit) { |
|
/* calculate the number of threads the core will steal */ |
int avg = atomic_get(&nrdy) / exec_unit_count; |
int to_steal = avg - atomic_get(&(exec_units->nrdy)); |
if (to_steal < 0) { |
return true; |
} else if (to_steal == 0) { |
return false; |
} |
|
/* initialize the proposals with the real numbers of ready threads */ |
unsigned int k; |
for (k = 0; k < exec_unit->strand_count; k++) { |
exec_units->cpus[k]->arch.proposed_nrdy = |
atomic_get(&(exec_unit->cpus[k]->nrdy)); |
} |
|
/* distribute the threads to be stolen to the core's CPUs */ |
int j; |
for (j = to_steal; j > 0; j--) { |
unsigned int k; |
unsigned int least_busy = 0; |
unsigned int least_busy_nrdy = |
exec_unit->cpus[0]->arch.proposed_nrdy; |
|
/* for each stolen thread, give it to the least busy CPU */ |
for (k = 0; k < exec_unit->strand_count; k++) { |
if (exec_unit->cpus[k]->arch.proposed_nrdy |
< least_busy_nrdy) { |
least_busy = k; |
least_busy_nrdy = |
exec_unit->cpus[k]->arch.proposed_nrdy; |
} |
} |
exec_unit->cpus[least_busy]->arch.proposed_nrdy++; |
} |
|
return false; |
} |
|
/** |
* Finds out which execution units belong to particular CPUs. By execution unit |
* we mean the physical core the logical processor is backed by. Since each |
* Niagara physical core has just one integer execution unit and we will |
238,6 → 287,8 |
if (i == exec_unit_count) { |
exec_units[i].exec_unit_id = exec_unit_id; |
exec_units[i].strand_count = 0; |
atomic_set(&(exec_units[i].nrdy), 0); |
spinlock_initialize(&(exec_units[i].proposed_nrdy_lock), "proposed nrdy lock"); |
exec_unit_count++; |
} |
|
275,6 → 326,8 |
exec_unit_count = 1; |
exec_units[0].strand_count = cpu_count; |
exec_units[0].exec_unit_id = 1; |
spinlock_initialize(&(exec_units[0].proposed_nrdy_lock), "proposed nrdy lock"); |
atomic_set(&(exec_units[0].nrdy), 0); |
max_core_strands = cpu_count; |
|
/* browse CPUs again, assign them the fictional exec. unit */ |
344,7 → 397,11 |
if (cur_core_strand > exec_units[cur_core].strand_count) |
continue; |
|
cpus[cur_cpu++].arch.id = exec_units[cur_core].cpuids[cur_core_strand]; |
cpus[cur_cpu].arch.exec_unit = &(exec_units[cur_core]); |
atomic_add(&(exec_units[cur_core].nrdy), atomic_get(&(cpus[cur_cpu].nrdy))); |
cpus[cur_cpu].arch.id = exec_units[cur_core].cpuids[cur_core_strand]; |
exec_units[cur_core].cpus[cur_core_strand] = &(cpus[cur_cpu]); |
cur_cpu++; |
} |
} |
} |