Subversion Repositories HelenOS

Rev

Rev 4663 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3771 rimsky 1
/*
2
 * Copyright (c) 2006 Jakub Jermar
3
 * Copyright (c) 2009 Pavel Rimsky
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions
8
 * are met:
9
 *
10
 * - Redistributions of source code must retain the above copyright
11
 *   notice, this list of conditions and the following disclaimer.
12
 * - Redistributions in binary form must reproduce the above copyright
13
 *   notice, this list of conditions and the following disclaimer in the
14
 *   documentation and/or other materials provided with the distribution.
15
 * - The name of the author may not be used to endorse or promote products
16
 *   derived from this software without specific prior written permission.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 */
29
 
30
/** @addtogroup sparc64
31
 * @{
32
 */
33
/** @file
34
 */
35
 
36
#include <smp/smp.h>
4638 rimsky 37
#include <smp/ipi.h>
3771 rimsky 38
#include <genarch/ofw/ofw_tree.h>
39
#include <cpu.h>
40
#include <arch/cpu.h>
4614 rimsky 41
#include <arch/boot/boot.h>
3771 rimsky 42
#include <arch.h>
43
#include <config.h>
44
#include <macros.h>
4663 rimsky 45
#include <func.h>
3771 rimsky 46
#include <arch/types.h>
47
#include <synch/synch.h>
48
#include <synch/waitq.h>
49
#include <print.h>
4614 rimsky 50
#include <arch/sun4v/hypercall.h>
51
#include <arch/sun4v/md.h>
4638 rimsky 52
#include <arch/sun4v/ipi.h>
4614 rimsky 53
#include <time/delay.h>
4679 rimsky 54
#include <arch/smp/sun4v/smp.h>
3771 rimsky 55
 
4663 rimsky 56
/** hypervisor code of the "running" state of the CPU */
4614 rimsky 57
#define CPU_STATE_RUNNING   2
58
 
4663 rimsky 59
/** maximum possible number of processor cores */
60
#define MAX_NUM_CORES       8
61
 
62
/** needed in the CPU_START hypercall */
4614 rimsky 63
extern void kernel_image_start(void);
4663 rimsky 64
 
65
/** needed in the CPU_START hypercall */
4614 rimsky 66
extern void *trap_table;
67
 
4663 rimsky 68
/** number of execution units detected */
69
uint8_t exec_unit_count = 0;
4614 rimsky 70
 
4663 rimsky 71
/** execution units (processor cores) */
72
exec_unit_t exec_units[MAX_NUM_CORES];
4614 rimsky 73
 
4663 rimsky 74
/** CPU structures */
75
extern cpu_t *cpus;
3771 rimsky 76
 
4663 rimsky 77
/** maximum number of strands per a physical core detected */
78
unsigned int max_core_strands = 0;
3771 rimsky 79
 
4663 rimsky 80
#ifdef CONFIG_SIMICS_SMP_HACK
81
/**
82
 * Copies a piece of HelenOS code to the place where OBP had its IPI handler.
83
 * By sending an IPI by the BSP to the AP the code will be executed.
84
 * The code will jump to the first instruction of the kernel. This is
85
 * a workaround how to make APs execute HelenOS code on Simics.
86
 */
87
static void simics_smp_hack_init(void) {
4638 rimsky 88
    asm volatile (
89
        "setx temp_cpu_mondo_handler, %g4, %g6 \n"
90
        "setx 0x80200f80, %g4, %g7 \n"
91
 
92
        "ldx [%g6], %g4 \n"
93
        "stxa %g4, [%g7] 0x14 \n"
94
        "membar #Sync \n"
95
 
96
        "add %g7, 0x8, %g7 \n"
97
        "ldx [%g6 + 0x8], %g4 \n"
98
        "stxa %g4, [%g7] 0x14 \n"
99
        "membar #Sync \n"
100
 
101
        "add %g7, 0x8, %g7 \n"
102
        "ldx [%g6 + 0x10], %g4 \n"
103
        "stxa %g4, [%g7] 0x14 \n"
104
        "membar #Sync \n"
105
 
106
        "add %g7, 0x8, %g7 \n"
107
        "ldx [%g6 + 0x18], %g4 \n"
108
        "stxa %g4, [%g7] 0x14 \n"
109
        "membar #Sync \n"
110
 
111
        "add %g7, 0x8, %g7 \n"
112
        "ldx [%g6 + 0x20], %g4 \n"
113
        "stxa %g4, [%g7] 0x14 \n"
114
        "membar #Sync \n"
115
 
116
        "add %g7, 0x8, %g7 \n"
117
        "ldx [%g6 + 0x28], %g4 \n"
118
        "stxa %g4, [%g7] 0x14 \n"
119
        "membar #Sync \n"
120
 
121
        "add %g7, 0x8, %g7 \n"
122
        "ldx [%g6 + 0x30], %g4 \n"
123
        "stxa %g4, [%g7] 0x14 \n"
124
        "membar #Sync \n"
125
 
126
        "add %g7, 0x8, %g7 \n"
127
        "ldx [%g6 + 0x38], %g4 \n"
128
        "stxa %g4, [%g7] 0x14 \n"
129
        "membar #Sync \n"
130
 
131
        "add %g7, 0x8, %g7 \n"
132
        "ldx [%g6 + 0x40], %g4 \n"
133
        "stxa %g4, [%g7] 0x14 \n"
134
        "membar #Sync \n"
135
 
136
        "flush %i7"
137
 
138
        );
4663 rimsky 139
}
4638 rimsky 140
#endif
4663 rimsky 141
 
4679 rimsky 142
 
4663 rimsky 143
/**
4679 rimsky 144
 * Proposes the optimal number of ready threads for each virtual processor
145
 * in the given processor core so that the processor core is as busy as the
146
 * average processor core. The proposed number of ready threads will be
147
 * stored to the proposed_nrdy variable of the cpu_arch_t struture.
148
 */
149
bool calculate_optimal_nrdy(exec_unit_t *exec_unit) {
150
 
151
    /* calculate the number of threads the core will steal */
152
    int avg = atomic_get(&nrdy) / exec_unit_count;
153
    int to_steal = avg - atomic_get(&(exec_units->nrdy));
154
    if (to_steal < 0) {
155
        return true;
156
    } else if (to_steal == 0) {
157
        return false;
158
    }
159
 
160
    /* initialize the proposals with the real numbers of ready threads */
161
    unsigned int k;
162
    for (k = 0; k < exec_unit->strand_count; k++) {
163
        exec_units->cpus[k]->arch.proposed_nrdy =
164
            atomic_get(&(exec_unit->cpus[k]->nrdy));
165
    }
166
 
167
    /* distribute the threads to be stolen to the core's CPUs */
168
    int j;
169
    for (j = to_steal; j > 0; j--) {
170
        unsigned int k;
171
        unsigned int least_busy = 0;
172
        unsigned int least_busy_nrdy =
173
            exec_unit->cpus[0]->arch.proposed_nrdy;
174
 
175
        /* for each stolen thread, give it to the least busy CPU */
176
        for (k = 0; k < exec_unit->strand_count; k++) {
177
            if (exec_unit->cpus[k]->arch.proposed_nrdy
178
                    < least_busy_nrdy) {
179
                least_busy = k;
180
                least_busy_nrdy =
181
                    exec_unit->cpus[k]->arch.proposed_nrdy;
182
            }
183
        }
184
        exec_unit->cpus[least_busy]->arch.proposed_nrdy++;
185
    }
186
 
187
    return false;
188
}
189
 
190
/**
4663 rimsky 191
 * Finds out which execution units belong to particular CPUs. By execution unit
192
 * we mean the physical core the logical processor is backed by. Since each
193
 * Niagara physical core has just one integer execution unit and we will
194
 * ignore other execution units than the integer ones, we will use the terms
195
 * "integer execution unit", "execution unit" and "physical core"
196
 * interchangeably.
197
 *
198
 * The physical cores are detected by browsing the children of the CPU node
199
 * in the machine description and looking for a node representing an integer
200
 * execution unit. Once the integer execution unit of a particular CPU is
201
 * known, the ID of the CPU is added to the list of cpuids of the corresponding
202
 * execution unit structure (exec_unit_t). If an execution unit is encountered
203
 * for the first time, a new execution unit structure (exec_unit_t) must be
204
 * created first and added to the execution units array (exec_units).
205
 *
206
 * If the function fails to find an execution unit for a CPU (this may happen
207
 * on machines with older firmware or on Simics), it performs a fallback code
208
 * which pretends there exists just one execution unit and all CPUs belong to
209
 * it.
210
 *
211
 * Finally, the array of all execution units is reordered such that its element
212
 * which represents the physical core of the the bootstrap CPU is at index 0.
213
 * Moreover, the array of CPU IDs within the BSP's physical core structure is
214
 * reordered such that the element which represents the ID of the BSP is at
215
 * index 0. This is done because we would like the CPUs to be woken up
216
 * such that the 0-index CPU of the 0-index execution unit is
217
 * woken up first. And since the BSP is already woken up, we would like it to be
218
 * at 0-th position of the 0-th execution unit structure.
219
 *
220
 * Apart from that, the code also counts the total number of CPUs and stores
221
 * it to the global config.cpu_count variable.
222
 */
223
static void detect_execution_units(void)
224
{
225
    /* ID of the bootstrap processor */
226
    uint64_t myid;
227
 
228
    /* total number of CPUs detected */
229
    count_t cpu_count = 0;
230
 
231
    /* will be set to 1 if detecting the physical cores fails */
232
    bool exec_unit_assign_error = 0;
233
 
234
    /* index of the bootstrap physical core in the array of cores */
235
    unsigned int bsp_exec_unit_index = 0;
236
 
237
    /* index of the BSP ID inside the array of bootstrap core's cpuids */
238
    unsigned int bsp_core_strand_index = 0;
239
 
240
    __hypercall_fast_ret1(0, 0, 0, 0, 0, CPU_MYID, &myid);
241
    md_node_t node = md_get_root();
242
 
243
    /* walk through all the CPU nodes in the MD*/
244
    while (md_next_node(&node, "cpu")) {
245
 
246
        uint64_t cpuid;
247
        md_get_integer_property(node, "id", &cpuid);
248
        cpu_count++;
249
 
250
        /*
251
         * if failed in previous CPUs, don't try
252
         * to detect physical cores any more
253
         */
254
        if (exec_unit_assign_error)
255
            continue;
256
 
257
        /* detect exec. unit for the CPU represented by current node */
258
        uint64_t exec_unit_id = 0;
259
        md_child_iter_t it = md_get_child_iterator(node);
260
 
261
        while (md_next_child(&it)) {
262
            md_node_t child = md_get_child_node(it);
263
            const char *exec_unit_type;
264
            md_get_string_property(child, "type", &exec_unit_type);
265
 
266
            /* each physical core has just 1 integer exec. unit */
267
            if (strcmp(exec_unit_type, "integer") == 0) {
268
                exec_unit_id = child;
269
                break;
270
            }
271
        }
272
 
273
        /* execution unit detected successfully */
274
        if (exec_unit_id != 0) {
275
 
276
            /* find the exec. unit in array of existing units */
277
            unsigned int i = 0;
278
            for (i = 0; i < exec_unit_count; i++) {
279
                if (exec_units[i].exec_unit_id == exec_unit_id)
280
                    break;
281
            }
282
 
283
            /*
284
             * execution unit just met has not been met before, so
285
             * create a new entry in array of all execution units
286
             */
287
            if (i == exec_unit_count) {
288
                exec_units[i].exec_unit_id = exec_unit_id;
289
                exec_units[i].strand_count = 0;
4679 rimsky 290
                atomic_set(&(exec_units[i].nrdy), 0);
291
                spinlock_initialize(&(exec_units[i].proposed_nrdy_lock), "proposed nrdy lock");
4663 rimsky 292
                exec_unit_count++;
293
            }
294
 
295
            /*
296
             * remember the exec. unit and strand of the BSP
297
             */
298
            if (cpuid == myid) {
299
                bsp_exec_unit_index = i;
300
                bsp_core_strand_index = exec_units[i].strand_count;
301
            }
302
 
303
            /* add the CPU just met to the exec. unit's list */
304
            exec_units[i].cpuids[exec_units[i].strand_count] = cpuid;
305
            exec_units[i].strand_count++;
306
            max_core_strands =
307
                exec_units[i].strand_count > max_core_strands ?
308
                exec_units[i].strand_count : max_core_strands;
309
 
310
        /* detecting execution unit failed */
311
        } else {
312
            exec_unit_assign_error = 1;
313
        }
314
    }      
315
 
316
    /* save the number of CPUs to a globally accessible variable */
317
    config.cpu_count = cpu_count;
318
 
319
    /*
320
     * A fallback code which will be executed if finding out which
321
     * execution units belong to particular CPUs fails. Pretend there
322
     * exists just one execution unit and all CPUs belong to it.
323
     */
324
    if (exec_unit_assign_error) {
325
        bsp_exec_unit_index = 0;
326
        exec_unit_count = 1;
327
        exec_units[0].strand_count = cpu_count;
328
        exec_units[0].exec_unit_id = 1;
4679 rimsky 329
        spinlock_initialize(&(exec_units[0].proposed_nrdy_lock), "proposed nrdy lock");
330
        atomic_set(&(exec_units[0].nrdy), 0);
4663 rimsky 331
        max_core_strands = cpu_count;
332
 
333
        /* browse CPUs again, assign them the fictional exec. unit */
334
        node = md_get_root();
335
        unsigned int i = 0;
336
 
337
        while (md_next_node(&node, "cpu")) {
338
            uint64_t cpuid;
339
            md_get_integer_property(node, "id", &cpuid);
340
            if (cpuid == myid) {
341
                bsp_core_strand_index = i;
342
            }
343
            exec_units[0].cpuids[i++] = cpuid;
344
        }
345
    }
346
 
347
    /*
348
     * Reorder the execution units array elements and the cpuid array
349
     * elements so that the BSP will always be the very first CPU of
350
     * the very first execution unit.
351
     */
352
    exec_unit_t temp_exec_unit = exec_units[0];
353
    exec_units[0] = exec_units[bsp_exec_unit_index];
354
    exec_units[bsp_exec_unit_index] = temp_exec_unit;
355
 
356
    uint64_t temp_cpuid = exec_units[0].cpuids[0];
357
    exec_units[0].cpuids[0] = exec_units[0].cpuids[bsp_exec_unit_index];
358
    exec_units[0].cpuids[bsp_core_strand_index] = temp_cpuid;
359
 
3771 rimsky 360
}
361
 
4663 rimsky 362
/**
363
 * Determine number of processors and detect physical cores. On Simics
364
 * copy the code which will be executed by the AP when the BSP sends an
365
 * IPI to it in order to make it execute HelenOS code.
366
 */
367
void smp_init(void)
368
{
369
    detect_execution_units();
370
#ifdef CONFIG_SIMICS_SMP_HACK
371
    simics_smp_hack_init();
372
#endif
373
}
374
 
375
/**
376
 * For each CPU sets the value of cpus[i].arch.id, where i is the
377
 * index of the CPU in the cpus variable, to the cpuid of the i-th processor
378
 * to be run. The CPUs are run such that the CPU represented by cpus[0]
379
 * is run first, cpus[1] is run after it, and cpus[cpu_count - 1] is run as the
380
 * last one.
381
 *
382
 * The CPU IDs are set such that during waking the CPUs up the
383
 * processor cores will be alternated, i.e. first one CPU from the first core
384
 * will be run, after that one CPU from the second CPU core will be run,...
385
 * then one CPU from the last core will be run, after that another CPU
386
 * from the first core will be run, then another CPU from the second core
387
 * will be run,... then another CPU from the last core will be run, and so on.
388
 */
389
static void init_cpuids(void)
390
{
391
    unsigned int cur_core_strand;
392
    unsigned int cur_core;
393
    unsigned int cur_cpu = 0;
394
 
395
    for (cur_core_strand = 0; cur_core_strand < max_core_strands; cur_core_strand++) {
396
        for (cur_core = 0; cur_core < exec_unit_count; cur_core++) {
397
            if (cur_core_strand > exec_units[cur_core].strand_count)
398
                continue;
399
 
4679 rimsky 400
            cpus[cur_cpu].arch.exec_unit = &(exec_units[cur_core]);
401
            atomic_add(&(exec_units[cur_core].nrdy), atomic_get(&(cpus[cur_cpu].nrdy)));
402
            cpus[cur_cpu].arch.id = exec_units[cur_core].cpuids[cur_core_strand];
403
            exec_units[cur_core].cpus[cur_core_strand] = &(cpus[cur_cpu]);
404
            cur_cpu++;
4663 rimsky 405
        }
406
    }
407
}
408
 
409
/**
410
 * Wakes up a single CPU.
411
 *
412
 * @param cpuid ID of the CPU to be woken up
413
 */
414
static bool wake_cpu(uint64_t cpuid)
415
{
416
 
417
#ifdef CONFIG_SIMICS_SMP_HACK
418
    ipi_unicast_to((void (*)(void)) 1234, cpuid);
419
#else
420
    /* stop the CPU before making it execute our code */
421
    if (__hypercall_fast1(CPU_STOP, cpuid) != EOK)
422
        return false;
423
 
424
    /* wait for the CPU to stop */
425
    uint64_t state;
426
    __hypercall_fast_ret1(cpuid, 0, 0, 0, 0,
427
        CPU_STATE, &state);
428
    while (state == CPU_STATE_RUNNING) {
429
        __hypercall_fast_ret1(cpuid, 0, 0, 0, 0,
430
            CPU_STATE, &state);
431
    }
432
 
433
    /* make the CPU run again and execute HelenOS code */
434
    if (__hypercall_fast4(
435
        CPU_START, cpuid,
436
        (uint64_t) KA2PA(kernel_image_start),
437
        KA2PA(trap_table), bootinfo.physmem_start          
438
        ) != EOK)
439
            return false;
440
#endif
441
 
442
    if (waitq_sleep_timeout(&ap_completion_wq, 10000000, SYNCH_FLAGS_NONE) ==
443
            ESYNCH_TIMEOUT)
444
        printf("%s: waiting for processor (cpuid = %" PRIu32
445
        ") timed out\n", __func__, cpuid);
446
 
447
    return true;
448
}
449
 
450
/** Wake application processors up. */
451
void kmp(void *arg)
452
{
453
    init_cpuids();
454
 
455
    unsigned int i;
456
 
457
    for (i = 1; i < config.cpu_count; i++) {
458
        wake_cpu(cpus[i].arch.id);
459
    }
460
}
461
 
3771 rimsky 462
/** @}
463
 */