/branches/sparc/kernel/arch/sparc64/include/sun4v/cpu.h |
---|
43,28 → 43,20 |
#ifndef __ASM__ |
struct cpu; |
typedef struct { |
uint64_t exec_unit_id; |
uint8_t strand_count; |
uint64_t cpuids[MAX_CORE_STRANDS]; |
struct cpu *cpus[MAX_CORE_STRANDS]; |
atomic_t nrdy; |
SPINLOCK_DECLARE(proposed_nrdy_lock); |
} exec_unit_t; |
typedef struct cpu_arch { |
uint64_t id; /**< virtual processor ID */ |
uint32_t clock_frequency; /**< Processor frequency in Hz. */ |
uint64_t next_tick_cmpr; /**< Next clock interrupt should be |
generated when the TICK register |
matches this value. */ |
exec_unit_t *exec_unit; /**< Physical core. */ |
unsigned long proposed_nrdy; /**< Proposed No. of ready threads |
so that cores are equally balanced. */ |
} cpu_arch_t; |
typedef struct { |
uint64_t exec_unit_id; |
uint8_t strand_count; |
uint64_t cpuids[MAX_CORE_STRANDS]; |
} exec_unit_t; |
#endif |
#ifdef __ASM__ |
/branches/sparc/kernel/arch/sparc64/src/smp/sun4v/smp.c |
---|
51,7 → 51,6 |
#include <arch/sun4v/md.h> |
#include <arch/sun4v/ipi.h> |
#include <time/delay.h> |
#include <arch/smp/sun4v/smp.h> |
/** hypervisor code of the "running" state of the CPU */ |
#define CPU_STATE_RUNNING 2 |
139,55 → 138,7 |
} |
#endif |
/** |
* Proposes the optimal number of ready threads for each virtual processor |
* in the given processor core so that the processor core is as busy as the |
* average processor core. The proposed number of ready threads will be |
* stored to the proposed_nrdy variable of the cpu_arch_t struture. |
*/ |
bool calculate_optimal_nrdy(exec_unit_t *exec_unit) { |
/* calculate the number of threads the core will steal */ |
int avg = atomic_get(&nrdy) / exec_unit_count; |
int to_steal = avg - atomic_get(&(exec_units->nrdy)); |
if (to_steal < 0) { |
return true; |
} else if (to_steal == 0) { |
return false; |
} |
/* initialize the proposals with the real numbers of ready threads */ |
unsigned int k; |
for (k = 0; k < exec_unit->strand_count; k++) { |
exec_units->cpus[k]->arch.proposed_nrdy = |
atomic_get(&(exec_unit->cpus[k]->nrdy)); |
} |
/* distribute the threads to be stolen to the core's CPUs */ |
int j; |
for (j = to_steal; j > 0; j--) { |
unsigned int k; |
unsigned int least_busy = 0; |
unsigned int least_busy_nrdy = |
exec_unit->cpus[0]->arch.proposed_nrdy; |
/* for each stolen thread, give it to the least busy CPU */ |
for (k = 0; k < exec_unit->strand_count; k++) { |
if (exec_unit->cpus[k]->arch.proposed_nrdy |
< least_busy_nrdy) { |
least_busy = k; |
least_busy_nrdy = |
exec_unit->cpus[k]->arch.proposed_nrdy; |
} |
} |
exec_unit->cpus[least_busy]->arch.proposed_nrdy++; |
} |
return false; |
} |
/** |
* Finds out which execution units belong to particular CPUs. By execution unit |
* we mean the physical core the logical processor is backed by. Since each |
* Niagara physical core has just one integer execution unit and we will |
287,8 → 238,6 |
if (i == exec_unit_count) { |
exec_units[i].exec_unit_id = exec_unit_id; |
exec_units[i].strand_count = 0; |
atomic_set(&(exec_units[i].nrdy), 0); |
spinlock_initialize(&(exec_units[i].proposed_nrdy_lock), "proposed nrdy lock"); |
exec_unit_count++; |
} |
326,8 → 275,6 |
exec_unit_count = 1; |
exec_units[0].strand_count = cpu_count; |
exec_units[0].exec_unit_id = 1; |
spinlock_initialize(&(exec_units[0].proposed_nrdy_lock), "proposed nrdy lock"); |
atomic_set(&(exec_units[0].nrdy), 0); |
max_core_strands = cpu_count; |
/* browse CPUs again, assign them the fictional exec. unit */ |
397,11 → 344,7 |
if (cur_core_strand > exec_units[cur_core].strand_count) |
continue; |
cpus[cur_cpu].arch.exec_unit = &(exec_units[cur_core]); |
atomic_add(&(exec_units[cur_core].nrdy), atomic_get(&(cpus[cur_cpu].nrdy))); |
cpus[cur_cpu].arch.id = exec_units[cur_core].cpuids[cur_core_strand]; |
exec_units[cur_core].cpus[cur_core_strand] = &(cpus[cur_cpu]); |
cur_cpu++; |
cpus[cur_cpu++].arch.id = exec_units[cur_core].cpuids[cur_core_strand]; |
} |
} |
} |
/branches/sparc/kernel/generic/src/mm/tlb.c |
---|
152,7 → 152,7 |
CPU->tlb_active = 0; |
spinlock_lock(&tlblock); |
spinlock_unlock(&tlblock); |
spinlock_lock(&CPU->lock); |
ASSERT(CPU->tlb_messages_count <= TLB_MESSAGE_QUEUE_LEN); |
/branches/sparc/kernel/generic/src/proc/scheduler.c |
---|
61,7 → 61,6 |
#include <cpu.h> |
#include <print.h> |
#include <debug.h> |
#include <arch/smp/sun4v/smp.h> |
static void before_task_runs(void); |
static void before_thread_runs(void); |
221,8 → 220,6 |
atomic_dec(&CPU->nrdy); |
atomic_dec(&nrdy); |
if (CPU->arch.exec_unit) |
atomic_dec(&(CPU->arch.exec_unit->nrdy)); |
r->n--; |
/* |
568,31 → 565,6 |
average = atomic_get(&nrdy) / config.cpu_active + 1; |
count = average - atomic_get(&CPU->nrdy); |
/* calculate number of threads to be stolen from other exec. units */ |
spinlock_lock(&(CPU->arch.exec_unit->proposed_nrdy_lock)); |
bool eu_busy = calculate_optimal_nrdy(CPU->arch.exec_unit); |
unsigned int count_other_eus = CPU->arch.proposed_nrdy |
- atomic_get(&(CPU->nrdy)); |
spinlock_unlock(&(CPU->arch.exec_unit->proposed_nrdy_lock)); |
/* |
* If the CPU's parent core is overloaded, do not do the load |
* balancing, otherwise we would migrate threads which should be |
* migrated to other cores and since a thread cannot be migrated |
* multiple times, it would not be migrated to the other core |
* in the future. |
*/ |
if (eu_busy) |
return; |
/* |
* get the maximum - stole enough threads to satisfy both the need to |
* have all virtual CPUs equally busy and the need to have all the |
* cores equally busy |
*/ |
if (((int) count_other_eus) > count) |
count = count_other_eus; |
if (count <= 0) |
goto satisfied; |
650,8 → 622,6 |
atomic_dec(&cpu->nrdy); |
atomic_dec(&nrdy); |
if (cpu->arch.exec_unit) |
atomic_dec(&(cpu->arch.exec_unit->nrdy)); |
r->n--; |
list_remove(&t->rq_link); |
/branches/sparc/kernel/generic/src/proc/thread.c |
---|
265,8 → 265,6 |
atomic_inc(&nrdy); |
avg = atomic_get(&nrdy) / config.cpu_active; |
atomic_inc(&cpu->nrdy); |
if (cpu->arch.exec_unit) |
atomic_inc(&(cpu->arch.exec_unit->nrdy)); |
interrupts_restore(ipl); |
} |
/branches/sparc/kernel/generic/include/cpu.h |
---|
47,7 → 47,7 |
* |
* There is one structure like this for every processor. |
*/ |
typedef struct cpu { |
typedef struct { |
SPINLOCK_DECLARE(lock); |
tlb_shootdown_msg_t tlb_messages[TLB_MESSAGE_QUEUE_LEN]; |