Rev 4663 | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
| Rev 4663 | Rev 4679 | ||
|---|---|---|---|
| Line 49... | Line 49... | ||
| 49 | #include <print.h> |
49 | #include <print.h> |
| 50 | #include <arch/sun4v/hypercall.h> |
50 | #include <arch/sun4v/hypercall.h> |
| 51 | #include <arch/sun4v/md.h> |
51 | #include <arch/sun4v/md.h> |
| 52 | #include <arch/sun4v/ipi.h> |
52 | #include <arch/sun4v/ipi.h> |
| 53 | #include <time/delay.h> |
53 | #include <time/delay.h> |
| - | 54 | #include <arch/smp/sun4v/smp.h> |
|
| 54 | 55 | ||
| 55 | /** hypervisor code of the "running" state of the CPU */ |
56 | /** hypervisor code of the "running" state of the CPU */ |
| 56 | #define CPU_STATE_RUNNING 2 |
57 | #define CPU_STATE_RUNNING 2 |
| 57 | 58 | ||
| 58 | /** maximum possible number of processor cores */ |
59 | /** maximum possible number of processor cores */ |
| Line 136... | Line 137... | ||
| 136 | 137 | ||
| 137 | ); |
138 | ); |
| 138 | } |
139 | } |
| 139 | #endif |
140 | #endif |
| 140 | 141 | ||
| - | 142 | ||
| - | 143 | /** |
|
| - | 144 | * Proposes the optimal number of ready threads for each virtual processor |
|
| - | 145 | * in the given processor core so that the processor core is as busy as the |
|
| - | 146 | * average processor core. The proposed number of ready threads will be |
|
| - | 147 | * stored to the proposed_nrdy variable of the cpu_arch_t struture. |
|
| - | 148 | */ |
|
| - | 149 | bool calculate_optimal_nrdy(exec_unit_t *exec_unit) { |
|
| - | 150 | ||
| - | 151 | /* calculate the number of threads the core will steal */ |
|
| - | 152 | int avg = atomic_get(&nrdy) / exec_unit_count; |
|
| - | 153 | int to_steal = avg - atomic_get(&(exec_units->nrdy)); |
|
| - | 154 | if (to_steal < 0) { |
|
| - | 155 | return true; |
|
| - | 156 | } else if (to_steal == 0) { |
|
| - | 157 | return false; |
|
| - | 158 | } |
|
| - | 159 | ||
| - | 160 | /* initialize the proposals with the real numbers of ready threads */ |
|
| - | 161 | unsigned int k; |
|
| - | 162 | for (k = 0; k < exec_unit->strand_count; k++) { |
|
| - | 163 | exec_units->cpus[k]->arch.proposed_nrdy = |
|
| - | 164 | atomic_get(&(exec_unit->cpus[k]->nrdy)); |
|
| - | 165 | } |
|
| - | 166 | ||
| - | 167 | /* distribute the threads to be stolen to the core's CPUs */ |
|
| - | 168 | int j; |
|
| - | 169 | for (j = to_steal; j > 0; j--) { |
|
| - | 170 | unsigned int k; |
|
| - | 171 | unsigned int least_busy = 0; |
|
| - | 172 | unsigned int least_busy_nrdy = |
|
| - | 173 | exec_unit->cpus[0]->arch.proposed_nrdy; |
|
| - | 174 | ||
| - | 175 | /* for each stolen thread, give it to the least busy CPU */ |
|
| - | 176 | for (k = 0; k < exec_unit->strand_count; k++) { |
|
| - | 177 | if (exec_unit->cpus[k]->arch.proposed_nrdy |
|
| - | 178 | < least_busy_nrdy) { |
|
| - | 179 | least_busy = k; |
|
| - | 180 | least_busy_nrdy = |
|
| - | 181 | exec_unit->cpus[k]->arch.proposed_nrdy; |
|
| - | 182 | } |
|
| - | 183 | } |
|
| - | 184 | exec_unit->cpus[least_busy]->arch.proposed_nrdy++; |
|
| - | 185 | } |
|
| - | 186 | ||
| - | 187 | return false; |
|
| - | 188 | } |
|
| - | 189 | ||
| 141 | /** |
190 | /** |
| 142 | * Finds out which execution units belong to particular CPUs. By execution unit |
191 | * Finds out which execution units belong to particular CPUs. By execution unit |
| 143 | * we mean the physical core the logical processor is backed by. Since each |
192 | * we mean the physical core the logical processor is backed by. Since each |
| 144 | * Niagara physical core has just one integer execution unit and we will |
193 | * Niagara physical core has just one integer execution unit and we will |
| 145 | * ignore other execution units than the integer ones, we will use the terms |
194 | * ignore other execution units than the integer ones, we will use the terms |
| Line 236... | Line 285... | ||
| 236 | * create a new entry in array of all execution units |
285 | * create a new entry in array of all execution units |
| 237 | */ |
286 | */ |
| 238 | if (i == exec_unit_count) { |
287 | if (i == exec_unit_count) { |
| 239 | exec_units[i].exec_unit_id = exec_unit_id; |
288 | exec_units[i].exec_unit_id = exec_unit_id; |
| 240 | exec_units[i].strand_count = 0; |
289 | exec_units[i].strand_count = 0; |
| - | 290 | atomic_set(&(exec_units[i].nrdy), 0); |
|
| - | 291 | spinlock_initialize(&(exec_units[i].proposed_nrdy_lock), "proposed nrdy lock"); |
|
| 241 | exec_unit_count++; |
292 | exec_unit_count++; |
| 242 | } |
293 | } |
| 243 | 294 | ||
| 244 | /* |
295 | /* |
| 245 | * remember the exec. unit and strand of the BSP |
296 | * remember the exec. unit and strand of the BSP |
| Line 273... | Line 324... | ||
| 273 | if (exec_unit_assign_error) { |
324 | if (exec_unit_assign_error) { |
| 274 | bsp_exec_unit_index = 0; |
325 | bsp_exec_unit_index = 0; |
| 275 | exec_unit_count = 1; |
326 | exec_unit_count = 1; |
| 276 | exec_units[0].strand_count = cpu_count; |
327 | exec_units[0].strand_count = cpu_count; |
| 277 | exec_units[0].exec_unit_id = 1; |
328 | exec_units[0].exec_unit_id = 1; |
| - | 329 | spinlock_initialize(&(exec_units[0].proposed_nrdy_lock), "proposed nrdy lock"); |
|
| - | 330 | atomic_set(&(exec_units[0].nrdy), 0); |
|
| 278 | max_core_strands = cpu_count; |
331 | max_core_strands = cpu_count; |
| 279 | 332 | ||
| 280 | /* browse CPUs again, assign them the fictional exec. unit */ |
333 | /* browse CPUs again, assign them the fictional exec. unit */ |
| 281 | node = md_get_root(); |
334 | node = md_get_root(); |
| 282 | unsigned int i = 0; |
335 | unsigned int i = 0; |
| Line 342... | Line 395... | ||
| 342 | for (cur_core_strand = 0; cur_core_strand < max_core_strands; cur_core_strand++) { |
395 | for (cur_core_strand = 0; cur_core_strand < max_core_strands; cur_core_strand++) { |
| 343 | for (cur_core = 0; cur_core < exec_unit_count; cur_core++) { |
396 | for (cur_core = 0; cur_core < exec_unit_count; cur_core++) { |
| 344 | if (cur_core_strand > exec_units[cur_core].strand_count) |
397 | if (cur_core_strand > exec_units[cur_core].strand_count) |
| 345 | continue; |
398 | continue; |
| 346 | 399 | ||
| - | 400 | cpus[cur_cpu].arch.exec_unit = &(exec_units[cur_core]); |
|
| - | 401 | atomic_add(&(exec_units[cur_core].nrdy), atomic_get(&(cpus[cur_cpu].nrdy))); |
|
| 347 | cpus[cur_cpu++].arch.id = exec_units[cur_core].cpuids[cur_core_strand]; |
402 | cpus[cur_cpu].arch.id = exec_units[cur_core].cpuids[cur_core_strand]; |
| - | 403 | exec_units[cur_core].cpus[cur_core_strand] = &(cpus[cur_cpu]); |
|
| - | 404 | cur_cpu++; |
|
| 348 | } |
405 | } |
| 349 | } |
406 | } |
| 350 | } |
407 | } |
| 351 | 408 | ||
| 352 | /** |
409 | /** |