Subversion Repositories HelenOS

Rev

Rev 4663 | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 4663 Rev 4679
Line 49... Line 49...
49
#include <print.h>
49
#include <print.h>
50
#include <arch/sun4v/hypercall.h>
50
#include <arch/sun4v/hypercall.h>
51
#include <arch/sun4v/md.h>
51
#include <arch/sun4v/md.h>
52
#include <arch/sun4v/ipi.h>
52
#include <arch/sun4v/ipi.h>
53
#include <time/delay.h>
53
#include <time/delay.h>
-
 
54
#include <arch/smp/sun4v/smp.h>
54
 
55
 
55
/** hypervisor code of the "running" state of the CPU */
56
/** hypervisor code of the "running" state of the CPU */
56
#define CPU_STATE_RUNNING   2
57
#define CPU_STATE_RUNNING   2
57
 
58
 
58
/** maximum possible number of processor cores */
59
/** maximum possible number of processor cores */
Line 136... Line 137...
136
 
137
 
137
        );
138
        );
138
}
139
}
139
#endif
140
#endif
140
 
141
 
-
 
142
 
-
 
143
/**
-
 
144
 * Proposes the optimal number of ready threads for each virtual processor
-
 
145
 * in the given processor core so that the processor core is as busy as the
-
 
146
 * average processor core. The proposed number of ready threads will be
-
 
147
 * stored to the proposed_nrdy variable of the cpu_arch_t struture.
-
 
148
 */
-
 
149
bool calculate_optimal_nrdy(exec_unit_t *exec_unit) {
-
 
150
 
-
 
151
    /* calculate the number of threads the core will steal */
-
 
152
    int avg = atomic_get(&nrdy) / exec_unit_count;
-
 
153
    int to_steal = avg - atomic_get(&(exec_units->nrdy));
-
 
154
    if (to_steal < 0) {
-
 
155
        return true;
-
 
156
    } else if (to_steal == 0) {
-
 
157
        return false;
-
 
158
    }
-
 
159
 
-
 
160
    /* initialize the proposals with the real numbers of ready threads */
-
 
161
    unsigned int k;
-
 
162
    for (k = 0; k < exec_unit->strand_count; k++) {
-
 
163
        exec_units->cpus[k]->arch.proposed_nrdy =
-
 
164
            atomic_get(&(exec_unit->cpus[k]->nrdy));
-
 
165
    }
-
 
166
 
-
 
167
    /* distribute the threads to be stolen to the core's CPUs */
-
 
168
    int j;
-
 
169
    for (j = to_steal; j > 0; j--) {
-
 
170
        unsigned int k;
-
 
171
        unsigned int least_busy = 0;
-
 
172
        unsigned int least_busy_nrdy =
-
 
173
            exec_unit->cpus[0]->arch.proposed_nrdy;
-
 
174
 
-
 
175
        /* for each stolen thread, give it to the least busy CPU */
-
 
176
        for (k = 0; k < exec_unit->strand_count; k++) {
-
 
177
            if (exec_unit->cpus[k]->arch.proposed_nrdy
-
 
178
                    < least_busy_nrdy) {
-
 
179
                least_busy = k;
-
 
180
                least_busy_nrdy =
-
 
181
                    exec_unit->cpus[k]->arch.proposed_nrdy;
-
 
182
            }
-
 
183
        }
-
 
184
        exec_unit->cpus[least_busy]->arch.proposed_nrdy++;
-
 
185
    }
-
 
186
 
-
 
187
    return false;
-
 
188
}
-
 
189
 
141
/**
190
/**
142
 * Finds out which execution units belong to particular CPUs. By execution unit
191
 * Finds out which execution units belong to particular CPUs. By execution unit
143
 * we mean the physical core the logical processor is backed by. Since each
192
 * we mean the physical core the logical processor is backed by. Since each
144
 * Niagara physical core has just one integer execution unit and we will
193
 * Niagara physical core has just one integer execution unit and we will
145
 * ignore other execution units than the integer ones, we will use the terms
194
 * ignore other execution units than the integer ones, we will use the terms
Line 236... Line 285...
236
             * create a new entry in array of all execution units
285
             * create a new entry in array of all execution units
237
             */
286
             */
238
            if (i == exec_unit_count) {
287
            if (i == exec_unit_count) {
239
                exec_units[i].exec_unit_id = exec_unit_id;
288
                exec_units[i].exec_unit_id = exec_unit_id;
240
                exec_units[i].strand_count = 0;
289
                exec_units[i].strand_count = 0;
-
 
290
                atomic_set(&(exec_units[i].nrdy), 0);
-
 
291
                spinlock_initialize(&(exec_units[i].proposed_nrdy_lock), "proposed nrdy lock");
241
                exec_unit_count++;
292
                exec_unit_count++;
242
            }
293
            }
243
 
294
 
244
            /*
295
            /*
245
             * remember the exec. unit and strand of the BSP
296
             * remember the exec. unit and strand of the BSP
Line 273... Line 324...
273
    if (exec_unit_assign_error) {
324
    if (exec_unit_assign_error) {
274
        bsp_exec_unit_index = 0;
325
        bsp_exec_unit_index = 0;
275
        exec_unit_count = 1;
326
        exec_unit_count = 1;
276
        exec_units[0].strand_count = cpu_count;
327
        exec_units[0].strand_count = cpu_count;
277
        exec_units[0].exec_unit_id = 1;
328
        exec_units[0].exec_unit_id = 1;
-
 
329
        spinlock_initialize(&(exec_units[0].proposed_nrdy_lock), "proposed nrdy lock");
-
 
330
        atomic_set(&(exec_units[0].nrdy), 0);
278
        max_core_strands = cpu_count;
331
        max_core_strands = cpu_count;
279
 
332
 
280
        /* browse CPUs again, assign them the fictional exec. unit */
333
        /* browse CPUs again, assign them the fictional exec. unit */
281
        node = md_get_root();
334
        node = md_get_root();
282
        unsigned int i = 0;
335
        unsigned int i = 0;
Line 342... Line 395...
342
    for (cur_core_strand = 0; cur_core_strand < max_core_strands; cur_core_strand++) {
395
    for (cur_core_strand = 0; cur_core_strand < max_core_strands; cur_core_strand++) {
343
        for (cur_core = 0; cur_core < exec_unit_count; cur_core++) {
396
        for (cur_core = 0; cur_core < exec_unit_count; cur_core++) {
344
            if (cur_core_strand > exec_units[cur_core].strand_count)
397
            if (cur_core_strand > exec_units[cur_core].strand_count)
345
                continue;
398
                continue;
346
 
399
 
-
 
400
            cpus[cur_cpu].arch.exec_unit = &(exec_units[cur_core]);
-
 
401
            atomic_add(&(exec_units[cur_core].nrdy), atomic_get(&(cpus[cur_cpu].nrdy)));
347
            cpus[cur_cpu++].arch.id = exec_units[cur_core].cpuids[cur_core_strand];
402
            cpus[cur_cpu].arch.id = exec_units[cur_core].cpuids[cur_core_strand];
-
 
403
            exec_units[cur_core].cpus[cur_core_strand] = &(cpus[cur_cpu]);
-
 
404
            cur_cpu++;
348
        }
405
        }
349
    }
406
    }
350
}
407
}
351
 
408
 
352
/**
409
/**