Rev 4663 | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 4663 | Rev 4679 | ||
---|---|---|---|
Line 49... | Line 49... | ||
49 | #include <print.h> |
49 | #include <print.h> |
50 | #include <arch/sun4v/hypercall.h> |
50 | #include <arch/sun4v/hypercall.h> |
51 | #include <arch/sun4v/md.h> |
51 | #include <arch/sun4v/md.h> |
52 | #include <arch/sun4v/ipi.h> |
52 | #include <arch/sun4v/ipi.h> |
53 | #include <time/delay.h> |
53 | #include <time/delay.h> |
- | 54 | #include <arch/smp/sun4v/smp.h> |
|
54 | 55 | ||
55 | /** hypervisor code of the "running" state of the CPU */ |
56 | /** hypervisor code of the "running" state of the CPU */ |
56 | #define CPU_STATE_RUNNING 2 |
57 | #define CPU_STATE_RUNNING 2 |
57 | 58 | ||
58 | /** maximum possible number of processor cores */ |
59 | /** maximum possible number of processor cores */ |
Line 136... | Line 137... | ||
136 | 137 | ||
137 | ); |
138 | ); |
138 | } |
139 | } |
139 | #endif |
140 | #endif |
140 | 141 | ||
- | 142 | ||
- | 143 | /** |
|
- | 144 | * Proposes the optimal number of ready threads for each virtual processor |
|
- | 145 | * in the given processor core so that the processor core is as busy as the |
|
- | 146 | * average processor core. The proposed number of ready threads will be |
|
- | 147 | * stored to the proposed_nrdy variable of the cpu_arch_t struture. |
|
- | 148 | */ |
|
- | 149 | bool calculate_optimal_nrdy(exec_unit_t *exec_unit) { |
|
- | 150 | ||
- | 151 | /* calculate the number of threads the core will steal */ |
|
- | 152 | int avg = atomic_get(&nrdy) / exec_unit_count; |
|
- | 153 | int to_steal = avg - atomic_get(&(exec_units->nrdy)); |
|
- | 154 | if (to_steal < 0) { |
|
- | 155 | return true; |
|
- | 156 | } else if (to_steal == 0) { |
|
- | 157 | return false; |
|
- | 158 | } |
|
- | 159 | ||
- | 160 | /* initialize the proposals with the real numbers of ready threads */ |
|
- | 161 | unsigned int k; |
|
- | 162 | for (k = 0; k < exec_unit->strand_count; k++) { |
|
- | 163 | exec_units->cpus[k]->arch.proposed_nrdy = |
|
- | 164 | atomic_get(&(exec_unit->cpus[k]->nrdy)); |
|
- | 165 | } |
|
- | 166 | ||
- | 167 | /* distribute the threads to be stolen to the core's CPUs */ |
|
- | 168 | int j; |
|
- | 169 | for (j = to_steal; j > 0; j--) { |
|
- | 170 | unsigned int k; |
|
- | 171 | unsigned int least_busy = 0; |
|
- | 172 | unsigned int least_busy_nrdy = |
|
- | 173 | exec_unit->cpus[0]->arch.proposed_nrdy; |
|
- | 174 | ||
- | 175 | /* for each stolen thread, give it to the least busy CPU */ |
|
- | 176 | for (k = 0; k < exec_unit->strand_count; k++) { |
|
- | 177 | if (exec_unit->cpus[k]->arch.proposed_nrdy |
|
- | 178 | < least_busy_nrdy) { |
|
- | 179 | least_busy = k; |
|
- | 180 | least_busy_nrdy = |
|
- | 181 | exec_unit->cpus[k]->arch.proposed_nrdy; |
|
- | 182 | } |
|
- | 183 | } |
|
- | 184 | exec_unit->cpus[least_busy]->arch.proposed_nrdy++; |
|
- | 185 | } |
|
- | 186 | ||
- | 187 | return false; |
|
- | 188 | } |
|
- | 189 | ||
141 | /** |
190 | /** |
142 | * Finds out which execution units belong to particular CPUs. By execution unit |
191 | * Finds out which execution units belong to particular CPUs. By execution unit |
143 | * we mean the physical core the logical processor is backed by. Since each |
192 | * we mean the physical core the logical processor is backed by. Since each |
144 | * Niagara physical core has just one integer execution unit and we will |
193 | * Niagara physical core has just one integer execution unit and we will |
145 | * ignore other execution units than the integer ones, we will use the terms |
194 | * ignore other execution units than the integer ones, we will use the terms |
Line 236... | Line 285... | ||
236 | * create a new entry in array of all execution units |
285 | * create a new entry in array of all execution units |
237 | */ |
286 | */ |
238 | if (i == exec_unit_count) { |
287 | if (i == exec_unit_count) { |
239 | exec_units[i].exec_unit_id = exec_unit_id; |
288 | exec_units[i].exec_unit_id = exec_unit_id; |
240 | exec_units[i].strand_count = 0; |
289 | exec_units[i].strand_count = 0; |
- | 290 | atomic_set(&(exec_units[i].nrdy), 0); |
|
- | 291 | spinlock_initialize(&(exec_units[i].proposed_nrdy_lock), "proposed nrdy lock"); |
|
241 | exec_unit_count++; |
292 | exec_unit_count++; |
242 | } |
293 | } |
243 | 294 | ||
244 | /* |
295 | /* |
245 | * remember the exec. unit and strand of the BSP |
296 | * remember the exec. unit and strand of the BSP |
Line 273... | Line 324... | ||
273 | if (exec_unit_assign_error) { |
324 | if (exec_unit_assign_error) { |
274 | bsp_exec_unit_index = 0; |
325 | bsp_exec_unit_index = 0; |
275 | exec_unit_count = 1; |
326 | exec_unit_count = 1; |
276 | exec_units[0].strand_count = cpu_count; |
327 | exec_units[0].strand_count = cpu_count; |
277 | exec_units[0].exec_unit_id = 1; |
328 | exec_units[0].exec_unit_id = 1; |
- | 329 | spinlock_initialize(&(exec_units[0].proposed_nrdy_lock), "proposed nrdy lock"); |
|
- | 330 | atomic_set(&(exec_units[0].nrdy), 0); |
|
278 | max_core_strands = cpu_count; |
331 | max_core_strands = cpu_count; |
279 | 332 | ||
280 | /* browse CPUs again, assign them the fictional exec. unit */ |
333 | /* browse CPUs again, assign them the fictional exec. unit */ |
281 | node = md_get_root(); |
334 | node = md_get_root(); |
282 | unsigned int i = 0; |
335 | unsigned int i = 0; |
Line 342... | Line 395... | ||
342 | for (cur_core_strand = 0; cur_core_strand < max_core_strands; cur_core_strand++) { |
395 | for (cur_core_strand = 0; cur_core_strand < max_core_strands; cur_core_strand++) { |
343 | for (cur_core = 0; cur_core < exec_unit_count; cur_core++) { |
396 | for (cur_core = 0; cur_core < exec_unit_count; cur_core++) { |
344 | if (cur_core_strand > exec_units[cur_core].strand_count) |
397 | if (cur_core_strand > exec_units[cur_core].strand_count) |
345 | continue; |
398 | continue; |
346 | 399 | ||
- | 400 | cpus[cur_cpu].arch.exec_unit = &(exec_units[cur_core]); |
|
- | 401 | atomic_add(&(exec_units[cur_core].nrdy), atomic_get(&(cpus[cur_cpu].nrdy))); |
|
347 | cpus[cur_cpu++].arch.id = exec_units[cur_core].cpuids[cur_core_strand]; |
402 | cpus[cur_cpu].arch.id = exec_units[cur_core].cpuids[cur_core_strand]; |
- | 403 | exec_units[cur_core].cpus[cur_core_strand] = &(cpus[cur_cpu]); |
|
- | 404 | cur_cpu++; |
|
348 | } |
405 | } |
349 | } |
406 | } |
350 | } |
407 | } |
351 | 408 | ||
352 | /** |
409 | /** |