Subversion Repositories HelenOS

Rev

Rev 4638 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3771 rimsky 1
/*
2
 * Copyright (c) 2006 Jakub Jermar
3
 * Copyright (c) 2009 Pavel Rimsky
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions
8
 * are met:
9
 *
10
 * - Redistributions of source code must retain the above copyright
11
 *   notice, this list of conditions and the following disclaimer.
12
 * - Redistributions in binary form must reproduce the above copyright
13
 *   notice, this list of conditions and the following disclaimer in the
14
 *   documentation and/or other materials provided with the distribution.
15
 * - The name of the author may not be used to endorse or promote products
16
 *   derived from this software without specific prior written permission.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 */
29
 
30
/** @addtogroup sparc64
31
 * @{
32
 */
33
/** @file
34
 */
35
 
36
#include <smp/smp.h>
4638 rimsky 37
#include <smp/ipi.h>
3771 rimsky 38
#include <genarch/ofw/ofw_tree.h>
39
#include <cpu.h>
40
#include <arch/cpu.h>
4614 rimsky 41
#include <arch/boot/boot.h>
3771 rimsky 42
#include <arch.h>
43
#include <config.h>
44
#include <macros.h>
4663 rimsky 45
#include <func.h>
3771 rimsky 46
#include <arch/types.h>
47
#include <synch/synch.h>
48
#include <synch/waitq.h>
49
#include <print.h>
4614 rimsky 50
#include <arch/sun4v/hypercall.h>
51
#include <arch/sun4v/md.h>
4638 rimsky 52
#include <arch/sun4v/ipi.h>
4614 rimsky 53
#include <time/delay.h>
3771 rimsky 54
 
4663 rimsky 55
/** hypervisor code of the "running" state of the CPU */
4614 rimsky 56
#define CPU_STATE_RUNNING   2
57
 
4663 rimsky 58
/** maximum possible number of processor cores */
59
#define MAX_NUM_CORES       8
60
 
61
/** needed in the CPU_START hypercall */
4614 rimsky 62
extern void kernel_image_start(void);
4663 rimsky 63
 
64
/** needed in the CPU_START hypercall */
4614 rimsky 65
extern void *trap_table;
66
 
4663 rimsky 67
/** number of execution units detected */
68
uint8_t exec_unit_count = 0;
4614 rimsky 69
 
4663 rimsky 70
/** execution units (processor cores) */
71
exec_unit_t exec_units[MAX_NUM_CORES];
4614 rimsky 72
 
4663 rimsky 73
/** CPU structures */
74
extern cpu_t *cpus;
3771 rimsky 75
 
4663 rimsky 76
/** maximum number of strands per a physical core detected */
77
unsigned int max_core_strands = 0;
3771 rimsky 78
 
4663 rimsky 79
#ifdef CONFIG_SIMICS_SMP_HACK
80
/**
81
 * Copies a piece of HelenOS code to the place where OBP had its IPI handler.
82
 * By sending an IPI by the BSP to the AP the code will be executed.
83
 * The code will jump to the first instruction of the kernel. This is
84
 * a workaround how to make APs execute HelenOS code on Simics.
85
 */
86
static void simics_smp_hack_init(void) {
4638 rimsky 87
    asm volatile (
88
        "setx temp_cpu_mondo_handler, %g4, %g6 \n"
89
        "setx 0x80200f80, %g4, %g7 \n"
90
 
91
        "ldx [%g6], %g4 \n"
92
        "stxa %g4, [%g7] 0x14 \n"
93
        "membar #Sync \n"
94
 
95
        "add %g7, 0x8, %g7 \n"
96
        "ldx [%g6 + 0x8], %g4 \n"
97
        "stxa %g4, [%g7] 0x14 \n"
98
        "membar #Sync \n"
99
 
100
        "add %g7, 0x8, %g7 \n"
101
        "ldx [%g6 + 0x10], %g4 \n"
102
        "stxa %g4, [%g7] 0x14 \n"
103
        "membar #Sync \n"
104
 
105
        "add %g7, 0x8, %g7 \n"
106
        "ldx [%g6 + 0x18], %g4 \n"
107
        "stxa %g4, [%g7] 0x14 \n"
108
        "membar #Sync \n"
109
 
110
        "add %g7, 0x8, %g7 \n"
111
        "ldx [%g6 + 0x20], %g4 \n"
112
        "stxa %g4, [%g7] 0x14 \n"
113
        "membar #Sync \n"
114
 
115
        "add %g7, 0x8, %g7 \n"
116
        "ldx [%g6 + 0x28], %g4 \n"
117
        "stxa %g4, [%g7] 0x14 \n"
118
        "membar #Sync \n"
119
 
120
        "add %g7, 0x8, %g7 \n"
121
        "ldx [%g6 + 0x30], %g4 \n"
122
        "stxa %g4, [%g7] 0x14 \n"
123
        "membar #Sync \n"
124
 
125
        "add %g7, 0x8, %g7 \n"
126
        "ldx [%g6 + 0x38], %g4 \n"
127
        "stxa %g4, [%g7] 0x14 \n"
128
        "membar #Sync \n"
129
 
130
        "add %g7, 0x8, %g7 \n"
131
        "ldx [%g6 + 0x40], %g4 \n"
132
        "stxa %g4, [%g7] 0x14 \n"
133
        "membar #Sync \n"
134
 
135
        "flush %i7"
136
 
137
        );
4663 rimsky 138
}
4638 rimsky 139
#endif
4663 rimsky 140
 
141
/**
142
 * Finds out which execution units belong to particular CPUs. By execution unit
143
 * we mean the physical core the logical processor is backed by. Since each
144
 * Niagara physical core has just one integer execution unit and we will
145
 * ignore other execution units than the integer ones, we will use the terms
146
 * "integer execution unit", "execution unit" and "physical core"
147
 * interchangeably.
148
 *
149
 * The physical cores are detected by browsing the children of the CPU node
150
 * in the machine description and looking for a node representing an integer
151
 * execution unit. Once the integer execution unit of a particular CPU is
152
 * known, the ID of the CPU is added to the list of cpuids of the corresponding
153
 * execution unit structure (exec_unit_t). If an execution unit is encountered
154
 * for the first time, a new execution unit structure (exec_unit_t) must be
155
 * created first and added to the execution units array (exec_units).
156
 *
157
 * If the function fails to find an execution unit for a CPU (this may happen
158
 * on machines with older firmware or on Simics), it performs a fallback code
159
 * which pretends there exists just one execution unit and all CPUs belong to
160
 * it.
161
 *
162
 * Finally, the array of all execution units is reordered such that its element
163
 * which represents the physical core of the the bootstrap CPU is at index 0.
164
 * Moreover, the array of CPU IDs within the BSP's physical core structure is
165
 * reordered such that the element which represents the ID of the BSP is at
166
 * index 0. This is done because we would like the CPUs to be woken up
167
 * such that the 0-index CPU of the 0-index execution unit is
168
 * woken up first. And since the BSP is already woken up, we would like it to be
169
 * at 0-th position of the 0-th execution unit structure.
170
 *
171
 * Apart from that, the code also counts the total number of CPUs and stores
172
 * it to the global config.cpu_count variable.
173
 */
174
static void detect_execution_units(void)
175
{
176
    /* ID of the bootstrap processor */
177
    uint64_t myid;
178
 
179
    /* total number of CPUs detected */
180
    count_t cpu_count = 0;
181
 
182
    /* will be set to 1 if detecting the physical cores fails */
183
    bool exec_unit_assign_error = 0;
184
 
185
    /* index of the bootstrap physical core in the array of cores */
186
    unsigned int bsp_exec_unit_index = 0;
187
 
188
    /* index of the BSP ID inside the array of bootstrap core's cpuids */
189
    unsigned int bsp_core_strand_index = 0;
190
 
191
    __hypercall_fast_ret1(0, 0, 0, 0, 0, CPU_MYID, &myid);
192
    md_node_t node = md_get_root();
193
 
194
    /* walk through all the CPU nodes in the MD*/
195
    while (md_next_node(&node, "cpu")) {
196
 
197
        uint64_t cpuid;
198
        md_get_integer_property(node, "id", &cpuid);
199
        cpu_count++;
200
 
201
        /*
202
         * if failed in previous CPUs, don't try
203
         * to detect physical cores any more
204
         */
205
        if (exec_unit_assign_error)
206
            continue;
207
 
208
        /* detect exec. unit for the CPU represented by current node */
209
        uint64_t exec_unit_id = 0;
210
        md_child_iter_t it = md_get_child_iterator(node);
211
 
212
        while (md_next_child(&it)) {
213
            md_node_t child = md_get_child_node(it);
214
            const char *exec_unit_type;
215
            md_get_string_property(child, "type", &exec_unit_type);
216
 
217
            /* each physical core has just 1 integer exec. unit */
218
            if (strcmp(exec_unit_type, "integer") == 0) {
219
                exec_unit_id = child;
220
                break;
221
            }
222
        }
223
 
224
        /* execution unit detected successfully */
225
        if (exec_unit_id != 0) {
226
 
227
            /* find the exec. unit in array of existing units */
228
            unsigned int i = 0;
229
            for (i = 0; i < exec_unit_count; i++) {
230
                if (exec_units[i].exec_unit_id == exec_unit_id)
231
                    break;
232
            }
233
 
234
            /*
235
             * execution unit just met has not been met before, so
236
             * create a new entry in array of all execution units
237
             */
238
            if (i == exec_unit_count) {
239
                exec_units[i].exec_unit_id = exec_unit_id;
240
                exec_units[i].strand_count = 0;
241
                exec_unit_count++;
242
            }
243
 
244
            /*
245
             * remember the exec. unit and strand of the BSP
246
             */
247
            if (cpuid == myid) {
248
                bsp_exec_unit_index = i;
249
                bsp_core_strand_index = exec_units[i].strand_count;
250
            }
251
 
252
            /* add the CPU just met to the exec. unit's list */
253
            exec_units[i].cpuids[exec_units[i].strand_count] = cpuid;
254
            exec_units[i].strand_count++;
255
            max_core_strands =
256
                exec_units[i].strand_count > max_core_strands ?
257
                exec_units[i].strand_count : max_core_strands;
258
 
259
        /* detecting execution unit failed */
260
        } else {
261
            exec_unit_assign_error = 1;
262
        }
263
    }      
264
 
265
    /* save the number of CPUs to a globally accessible variable */
266
    config.cpu_count = cpu_count;
267
 
268
    /*
269
     * A fallback code which will be executed if finding out which
270
     * execution units belong to particular CPUs fails. Pretend there
271
     * exists just one execution unit and all CPUs belong to it.
272
     */
273
    if (exec_unit_assign_error) {
274
        bsp_exec_unit_index = 0;
275
        exec_unit_count = 1;
276
        exec_units[0].strand_count = cpu_count;
277
        exec_units[0].exec_unit_id = 1;
278
        max_core_strands = cpu_count;
279
 
280
        /* browse CPUs again, assign them the fictional exec. unit */
281
        node = md_get_root();
282
        unsigned int i = 0;
283
 
284
        while (md_next_node(&node, "cpu")) {
285
            uint64_t cpuid;
286
            md_get_integer_property(node, "id", &cpuid);
287
            if (cpuid == myid) {
288
                bsp_core_strand_index = i;
289
            }
290
            exec_units[0].cpuids[i++] = cpuid;
291
        }
292
    }
293
 
294
    /*
295
     * Reorder the execution units array elements and the cpuid array
296
     * elements so that the BSP will always be the very first CPU of
297
     * the very first execution unit.
298
     */
299
    exec_unit_t temp_exec_unit = exec_units[0];
300
    exec_units[0] = exec_units[bsp_exec_unit_index];
301
    exec_units[bsp_exec_unit_index] = temp_exec_unit;
302
 
303
    uint64_t temp_cpuid = exec_units[0].cpuids[0];
304
    exec_units[0].cpuids[0] = exec_units[0].cpuids[bsp_exec_unit_index];
305
    exec_units[0].cpuids[bsp_core_strand_index] = temp_cpuid;
306
 
3771 rimsky 307
}
308
 
4663 rimsky 309
/**
310
 * Determine number of processors and detect physical cores. On Simics
311
 * copy the code which will be executed by the AP when the BSP sends an
312
 * IPI to it in order to make it execute HelenOS code.
313
 */
314
void smp_init(void)
315
{
316
    detect_execution_units();
317
#ifdef CONFIG_SIMICS_SMP_HACK
318
    simics_smp_hack_init();
319
#endif
320
}
321
 
322
/**
323
 * For each CPU sets the value of cpus[i].arch.id, where i is the
324
 * index of the CPU in the cpus variable, to the cpuid of the i-th processor
325
 * to be run. The CPUs are run such that the CPU represented by cpus[0]
326
 * is run first, cpus[1] is run after it, and cpus[cpu_count - 1] is run as the
327
 * last one.
328
 *
329
 * The CPU IDs are set such that during waking the CPUs up the
330
 * processor cores will be alternated, i.e. first one CPU from the first core
331
 * will be run, after that one CPU from the second CPU core will be run,...
332
 * then one CPU from the last core will be run, after that another CPU
333
 * from the first core will be run, then another CPU from the second core
334
 * will be run,... then another CPU from the last core will be run, and so on.
335
 */
336
static void init_cpuids(void)
337
{
338
    unsigned int cur_core_strand;
339
    unsigned int cur_core;
340
    unsigned int cur_cpu = 0;
341
 
342
    for (cur_core_strand = 0; cur_core_strand < max_core_strands; cur_core_strand++) {
343
        for (cur_core = 0; cur_core < exec_unit_count; cur_core++) {
344
            if (cur_core_strand > exec_units[cur_core].strand_count)
345
                continue;
346
 
347
            cpus[cur_cpu++].arch.id = exec_units[cur_core].cpuids[cur_core_strand];
348
        }
349
    }
350
}
351
 
352
/**
353
 * Wakes up a single CPU.
354
 *
355
 * @param cpuid ID of the CPU to be woken up
356
 */
357
static bool wake_cpu(uint64_t cpuid)
358
{
359
 
360
#ifdef CONFIG_SIMICS_SMP_HACK
361
    ipi_unicast_to((void (*)(void)) 1234, cpuid);
362
#else
363
    /* stop the CPU before making it execute our code */
364
    if (__hypercall_fast1(CPU_STOP, cpuid) != EOK)
365
        return false;
366
 
367
    /* wait for the CPU to stop */
368
    uint64_t state;
369
    __hypercall_fast_ret1(cpuid, 0, 0, 0, 0,
370
        CPU_STATE, &state);
371
    while (state == CPU_STATE_RUNNING) {
372
        __hypercall_fast_ret1(cpuid, 0, 0, 0, 0,
373
            CPU_STATE, &state);
374
    }
375
 
376
    /* make the CPU run again and execute HelenOS code */
377
    if (__hypercall_fast4(
378
        CPU_START, cpuid,
379
        (uint64_t) KA2PA(kernel_image_start),
380
        KA2PA(trap_table), bootinfo.physmem_start          
381
        ) != EOK)
382
            return false;
383
#endif
384
 
385
    if (waitq_sleep_timeout(&ap_completion_wq, 10000000, SYNCH_FLAGS_NONE) ==
386
            ESYNCH_TIMEOUT)
387
        printf("%s: waiting for processor (cpuid = %" PRIu32
388
        ") timed out\n", __func__, cpuid);
389
 
390
    return true;
391
}
392
 
393
/** Wake application processors up. */
394
void kmp(void *arg)
395
{
396
    init_cpuids();
397
 
398
    unsigned int i;
399
 
400
    for (i = 1; i < config.cpu_count; i++) {
401
        wake_cpu(cpus[i].arch.id);
402
    }
403
}
404
 
3771 rimsky 405
/** @}
406
 */