Subversion Repositories HelenOS

Rev

Rev 4638 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 4638 Rev 4663
Line 40... Line 40...
40
#include <arch/cpu.h>
40
#include <arch/cpu.h>
41
#include <arch/boot/boot.h>
41
#include <arch/boot/boot.h>
42
#include <arch.h>
42
#include <arch.h>
43
#include <config.h>
43
#include <config.h>
44
#include <macros.h>
44
#include <macros.h>
-
 
45
#include <func.h>
45
#include <arch/types.h>
46
#include <arch/types.h>
46
#include <synch/synch.h>
47
#include <synch/synch.h>
47
#include <synch/waitq.h>
48
#include <synch/waitq.h>
48
#include <print.h>
49
#include <print.h>
49
#include <arch/sun4v/hypercall.h>
50
#include <arch/sun4v/hypercall.h>
50
#include <arch/sun4v/md.h>
51
#include <arch/sun4v/md.h>
51
#include <arch/sun4v/ipi.h>
52
#include <arch/sun4v/ipi.h>
52
#include <time/delay.h>
53
#include <time/delay.h>
53
 
54
 
-
 
55
/** hypervisor code of the "running" state of the CPU */
54
#define CPU_STATE_RUNNING   2
56
#define CPU_STATE_RUNNING   2
55
 
57
 
56
extern void kernel_image_start(void);
-
 
57
extern void *trap_table;
-
 
58
 
-
 
59
/** Determine number of processors. */
58
/** maximum possible number of processor cores */
60
void smp_init(void)
-
 
61
{
-
 
62
    md_node_t node = md_get_root();
59
#define MAX_NUM_CORES       8
63
    count_t cpu_count = 0;
-
 
64
 
-
 
65
    /* walk through MD, find the current CPU node & its clock-frequency */
-
 
66
    while(md_next_node(&node, "cpu")) {
-
 
67
        cpu_count++;
-
 
68
    }
-
 
69
 
-
 
70
    config.cpu_count = cpu_count;
-
 
71
}
-
 
72
 
-
 
73
 
-
 
74
/** Wake application processors up. */
-
 
75
void kmp(void *arg)
-
 
76
{
-
 
77
#if 1
-
 
78
    (void) arg;
-
 
79
 
60
 
80
    uint64_t myid;
61
/** needed in the CPU_START hypercall */
81
    __hypercall_fast_ret1(0, 0, 0, 0, 0, CPU_MYID, &myid);
62
extern void kernel_image_start(void);
82
 
63
 
83
    /* stop the CPUs before making them execute our code */
64
/** needed in the CPU_START hypercall */
84
    uint64_t i;
65
extern void *trap_table;
85
    for (i = 0; i < config.cpu_count; i++) {
-
 
86
        if (i == myid)
-
 
87
            continue;
-
 
88
 
66
 
89
        if (__hypercall_fast1(CPU_STOP, i) != 0)
67
/** number of execution units detected */
90
            continue;
68
uint8_t exec_unit_count = 0;
91
 
69
 
92
        uint64_t state;
-
 
93
        __hypercall_fast_ret1(i, 0, 0, 0, 0, CPU_STATE, &state);
-
 
94
        while (state == CPU_STATE_RUNNING) {
70
/** execution units (processor cores) */
95
            __hypercall_fast_ret1(i, 0, 0, 0, 0, CPU_STATE, &state);
71
exec_unit_t exec_units[MAX_NUM_CORES];
96
        }
-
 
97
    }
-
 
98
 
72
 
99
    /* wake the processors up, one by one */
73
/** CPU structures */
100
    uint64_t state;
74
extern cpu_t *cpus;
101
    for (i = 1; i < config.cpu_count; i++) {
-
 
102
        __hypercall_fast_ret1(i, 0, 0, 0, 0, CPU_STATE, &state);
-
 
103
        printf("Starting CPU %d, error code = %d.\n", i, __hypercall_fast4(
-
 
104
            CPU_START,
-
 
105
            i,
-
 
106
            (uint64_t) KA2PA(kernel_image_start),
-
 
107
            KA2PA(trap_table),
-
 
108
            bootinfo.physmem_start         
-
 
109
        ));
-
 
110
 
-
 
111
        if (waitq_sleep_timeout(&ap_completion_wq, 10000000, SYNCH_FLAGS_NONE) ==
-
 
112
                    ESYNCH_TIMEOUT)
-
 
113
            printf("%s: waiting for processor (cpuid = %" PRIu32
-
 
114
                ") timed out\n", __func__, i);
-
 
115
       
-
 
116
    }
-
 
117
#else
-
 
118
 
75
 
-
 
76
/** maximum number of strands per a physical core detected */
-
 
77
unsigned int max_core_strands = 0;
-
 
78
 
-
 
79
#ifdef CONFIG_SIMICS_SMP_HACK
-
 
80
/**
-
 
81
 * Copies a piece of HelenOS code to the place where OBP had its IPI handler.
-
 
82
 * By sending an IPI by the BSP to the AP the code will be executed.
-
 
83
 * The code will jump to the first instruction of the kernel. This is
-
 
84
 * a workaround how to make APs execute HelenOS code on Simics.
-
 
85
 */
-
 
86
static void simics_smp_hack_init(void) {
119
    asm volatile (
87
    asm volatile (
120
        "setx temp_cpu_mondo_handler, %g4, %g6 \n"
88
        "setx temp_cpu_mondo_handler, %g4, %g6 \n"
121
        //"setx 0x80246ad8, %g4, %g7 \n"
-
 
122
        "setx 0x80200f80, %g4, %g7 \n"
89
        "setx 0x80200f80, %g4, %g7 \n"
123
 
90
 
124
        "ldx [%g6], %g4 \n"
91
        "ldx [%g6], %g4 \n"
125
        "stxa %g4, [%g7] 0x14 \n"
92
        "stxa %g4, [%g7] 0x14 \n"
126
        "membar #Sync \n"
93
        "membar #Sync \n"
Line 166... Line 133...
166
        "membar #Sync \n"
133
        "membar #Sync \n"
167
 
134
 
168
        "flush %i7"
135
        "flush %i7"
169
 
136
 
170
        );
137
        );
-
 
138
}
-
 
139
#endif
-
 
140
 
-
 
141
/**
-
 
142
 * Finds out which execution units belong to particular CPUs. By execution unit
-
 
143
 * we mean the physical core the logical processor is backed by. Since each
-
 
144
 * Niagara physical core has just one integer execution unit and we will
-
 
145
 * ignore other execution units than the integer ones, we will use the terms
-
 
146
 * "integer execution unit", "execution unit" and "physical core"
-
 
147
 * interchangeably.
-
 
148
 *
-
 
149
 * The physical cores are detected by browsing the children of the CPU node
-
 
150
 * in the machine description and looking for a node representing an integer
-
 
151
 * execution unit. Once the integer execution unit of a particular CPU is
-
 
152
 * known, the ID of the CPU is added to the list of cpuids of the corresponding
-
 
153
 * execution unit structure (exec_unit_t). If an execution unit is encountered
-
 
154
 * for the first time, a new execution unit structure (exec_unit_t) must be
-
 
155
 * created first and added to the execution units array (exec_units).
-
 
156
 *
-
 
157
 * If the function fails to find an execution unit for a CPU (this may happen
-
 
158
 * on machines with older firmware or on Simics), it performs a fallback code
-
 
159
 * which pretends there exists just one execution unit and all CPUs belong to
-
 
160
 * it.
-
 
161
 *
-
 
162
 * Finally, the array of all execution units is reordered such that its element
-
 
163
 * which represents the physical core of the the bootstrap CPU is at index 0.
-
 
164
 * Moreover, the array of CPU IDs within the BSP's physical core structure is
-
 
165
 * reordered such that the element which represents the ID of the BSP is at
-
 
166
 * index 0. This is done because we would like the CPUs to be woken up
-
 
167
 * such that the 0-index CPU of the 0-index execution unit is
-
 
168
 * woken up first. And since the BSP is already woken up, we would like it to be
-
 
169
 * at 0-th position of the 0-th execution unit structure.
-
 
170
 *
-
 
171
 * Apart from that, the code also counts the total number of CPUs and stores
-
 
172
 * it to the global config.cpu_count variable.
-
 
173
 */
-
 
174
static void detect_execution_units(void)
-
 
175
{
-
 
176
    /* ID of the bootstrap processor */
-
 
177
    uint64_t myid;
-
 
178
 
-
 
179
    /* total number of CPUs detected */
-
 
180
    count_t cpu_count = 0;
-
 
181
 
-
 
182
    /* will be set to 1 if detecting the physical cores fails */
-
 
183
    bool exec_unit_assign_error = 0;
-
 
184
 
-
 
185
    /* index of the bootstrap physical core in the array of cores */
-
 
186
    unsigned int bsp_exec_unit_index = 0;
-
 
187
 
-
 
188
    /* index of the BSP ID inside the array of bootstrap core's cpuids */
-
 
189
    unsigned int bsp_core_strand_index = 0;
-
 
190
 
-
 
191
    __hypercall_fast_ret1(0, 0, 0, 0, 0, CPU_MYID, &myid);
-
 
192
    md_node_t node = md_get_root();
-
 
193
 
-
 
194
    /* walk through all the CPU nodes in the MD*/
-
 
195
    while (md_next_node(&node, "cpu")) {
-
 
196
 
-
 
197
        uint64_t cpuid;
-
 
198
        md_get_integer_property(node, "id", &cpuid);
-
 
199
        cpu_count++;
-
 
200
 
-
 
201
        /*
-
 
202
         * if failed in previous CPUs, don't try
-
 
203
         * to detect physical cores any more
-
 
204
         */
-
 
205
        if (exec_unit_assign_error)
-
 
206
            continue;
-
 
207
 
-
 
208
        /* detect exec. unit for the CPU represented by current node */
-
 
209
        uint64_t exec_unit_id = 0;
-
 
210
        md_child_iter_t it = md_get_child_iterator(node);
-
 
211
 
-
 
212
        while (md_next_child(&it)) {
-
 
213
            md_node_t child = md_get_child_node(it);
-
 
214
            const char *exec_unit_type;
-
 
215
            md_get_string_property(child, "type", &exec_unit_type);
-
 
216
 
-
 
217
            /* each physical core has just 1 integer exec. unit */
-
 
218
            if (strcmp(exec_unit_type, "integer") == 0) {
-
 
219
                exec_unit_id = child;
-
 
220
                break;
-
 
221
            }
-
 
222
        }
-
 
223
 
-
 
224
        /* execution unit detected successfully */
-
 
225
        if (exec_unit_id != 0) {
-
 
226
 
-
 
227
            /* find the exec. unit in array of existing units */
-
 
228
            unsigned int i = 0;
-
 
229
            for (i = 0; i < exec_unit_count; i++) {
-
 
230
                if (exec_units[i].exec_unit_id == exec_unit_id)
-
 
231
                    break;
-
 
232
            }
-
 
233
 
-
 
234
            /*
-
 
235
             * execution unit just met has not been met before, so
-
 
236
             * create a new entry in array of all execution units
-
 
237
             */
-
 
238
            if (i == exec_unit_count) {
-
 
239
                exec_units[i].exec_unit_id = exec_unit_id;
-
 
240
                exec_units[i].strand_count = 0;
-
 
241
                exec_unit_count++;
-
 
242
            }
-
 
243
 
-
 
244
            /*
-
 
245
             * remember the exec. unit and strand of the BSP
-
 
246
             */
-
 
247
            if (cpuid == myid) {
-
 
248
                bsp_exec_unit_index = i;
-
 
249
                bsp_core_strand_index = exec_units[i].strand_count;
-
 
250
            }
-
 
251
 
-
 
252
            /* add the CPU just met to the exec. unit's list */
-
 
253
            exec_units[i].cpuids[exec_units[i].strand_count] = cpuid;
-
 
254
            exec_units[i].strand_count++;
-
 
255
            max_core_strands =
-
 
256
                exec_units[i].strand_count > max_core_strands ?
-
 
257
                exec_units[i].strand_count : max_core_strands;
-
 
258
 
-
 
259
        /* detecting execution unit failed */
-
 
260
        } else {
-
 
261
            exec_unit_assign_error = 1;
-
 
262
        }
-
 
263
    }      
-
 
264
 
-
 
265
    /* save the number of CPUs to a globally accessible variable */
-
 
266
    config.cpu_count = cpu_count;
-
 
267
 
-
 
268
    /*
-
 
269
     * A fallback code which will be executed if finding out which
-
 
270
     * execution units belong to particular CPUs fails. Pretend there
-
 
271
     * exists just one execution unit and all CPUs belong to it.
-
 
272
     */
-
 
273
    if (exec_unit_assign_error) {
-
 
274
        bsp_exec_unit_index = 0;
-
 
275
        exec_unit_count = 1;
-
 
276
        exec_units[0].strand_count = cpu_count;
-
 
277
        exec_units[0].exec_unit_id = 1;
-
 
278
        max_core_strands = cpu_count;
-
 
279
 
-
 
280
        /* browse CPUs again, assign them the fictional exec. unit */
-
 
281
        node = md_get_root();
-
 
282
        unsigned int i = 0;
-
 
283
 
-
 
284
        while (md_next_node(&node, "cpu")) {
-
 
285
            uint64_t cpuid;
-
 
286
            md_get_integer_property(node, "id", &cpuid);
-
 
287
            if (cpuid == myid) {
-
 
288
                bsp_core_strand_index = i;
-
 
289
            }
-
 
290
            exec_units[0].cpuids[i++] = cpuid;
-
 
291
        }
-
 
292
    }
-
 
293
 
-
 
294
    /*
-
 
295
     * Reorder the execution units array elements and the cpuid array
-
 
296
     * elements so that the BSP will always be the very first CPU of
-
 
297
     * the very first execution unit.
-
 
298
     */
-
 
299
    exec_unit_t temp_exec_unit = exec_units[0];
-
 
300
    exec_units[0] = exec_units[bsp_exec_unit_index];
-
 
301
    exec_units[bsp_exec_unit_index] = temp_exec_unit;
-
 
302
 
-
 
303
    uint64_t temp_cpuid = exec_units[0].cpuids[0];
-
 
304
    exec_units[0].cpuids[0] = exec_units[0].cpuids[bsp_exec_unit_index];
-
 
305
    exec_units[0].cpuids[bsp_core_strand_index] = temp_cpuid;
-
 
306
 
-
 
307
}
-
 
308
 
-
 
309
/**
-
 
310
 * Determine number of processors and detect physical cores. On Simics
-
 
311
 * copy the code which will be executed by the AP when the BSP sends an
-
 
312
 * IPI to it in order to make it execute HelenOS code.
-
 
313
 */
-
 
314
void smp_init(void)
-
 
315
{
-
 
316
    detect_execution_units();
-
 
317
#ifdef CONFIG_SIMICS_SMP_HACK
-
 
318
    simics_smp_hack_init();
-
 
319
#endif
-
 
320
}
-
 
321
 
-
 
322
/**
-
 
323
 * For each CPU sets the value of cpus[i].arch.id, where i is the
-
 
324
 * index of the CPU in the cpus variable, to the cpuid of the i-th processor
-
 
325
 * to be run. The CPUs are run such that the CPU represented by cpus[0]
-
 
326
 * is run first, cpus[1] is run after it, and cpus[cpu_count - 1] is run as the
-
 
327
 * last one.
-
 
328
 *
-
 
329
 * The CPU IDs are set such that during waking the CPUs up the
-
 
330
 * processor cores will be alternated, i.e. first one CPU from the first core
-
 
331
 * will be run, after that one CPU from the second CPU core will be run,...
-
 
332
 * then one CPU from the last core will be run, after that another CPU
-
 
333
 * from the first core will be run, then another CPU from the second core
-
 
334
 * will be run,... then another CPU from the last core will be run, and so on.
-
 
335
 */
-
 
336
static void init_cpuids(void)
-
 
337
{
-
 
338
    unsigned int cur_core_strand;
-
 
339
    unsigned int cur_core;
-
 
340
    unsigned int cur_cpu = 0;
-
 
341
 
-
 
342
    for (cur_core_strand = 0; cur_core_strand < max_core_strands; cur_core_strand++) {
-
 
343
        for (cur_core = 0; cur_core < exec_unit_count; cur_core++) {
-
 
344
            if (cur_core_strand > exec_units[cur_core].strand_count)
171
    delay(1000);
345
                continue;
-
 
346
 
-
 
347
            cpus[cur_cpu++].arch.id = exec_units[cur_core].cpuids[cur_core_strand];
-
 
348
        }
-
 
349
    }
-
 
350
}
-
 
351
 
-
 
352
/**
-
 
353
 * Wakes up a single CPU.
-
 
354
 *
-
 
355
 * @param cpuid ID of the CPU to be woken up
-
 
356
 */
-
 
357
static bool wake_cpu(uint64_t cpuid)
-
 
358
{
-
 
359
 
-
 
360
#ifdef CONFIG_SIMICS_SMP_HACK
172
    printf("Result: %d\n", ipi_unicast_to((void (*)(void)) 1234, 1));
361
    ipi_unicast_to((void (*)(void)) 1234, cpuid);
-
 
362
#else
-
 
363
    /* stop the CPU before making it execute our code */
173
        if (waitq_sleep_timeout(&ap_completion_wq, 10000000, SYNCH_FLAGS_NONE) ==
364
    if (__hypercall_fast1(CPU_STOP, cpuid) != EOK)
-
 
365
        return false;
-
 
366
 
-
 
367
    /* wait for the CPU to stop */
-
 
368
    uint64_t state;
-
 
369
    __hypercall_fast_ret1(cpuid, 0, 0, 0, 0,
174
                    ESYNCH_TIMEOUT)
370
        CPU_STATE, &state);
-
 
371
    while (state == CPU_STATE_RUNNING) {
175
            printf("%s: waiting for processor (cpuid = %" PRIu32
372
        __hypercall_fast_ret1(cpuid, 0, 0, 0, 0,
-
 
373
            CPU_STATE, &state);
-
 
374
    }
-
 
375
 
-
 
376
    /* make the CPU run again and execute HelenOS code */
-
 
377
    if (__hypercall_fast4(
-
 
378
        CPU_START, cpuid,
176
                ") timed out\n", __func__, 1);
379
        (uint64_t) KA2PA(kernel_image_start),
-
 
380
        KA2PA(trap_table), bootinfo.physmem_start          
-
 
381
        ) != EOK)
-
 
382
            return false;
177
#endif
383
#endif
-
 
384
 
-
 
385
    if (waitq_sleep_timeout(&ap_completion_wq, 10000000, SYNCH_FLAGS_NONE) ==
-
 
386
            ESYNCH_TIMEOUT)
-
 
387
        printf("%s: waiting for processor (cpuid = %" PRIu32
-
 
388
        ") timed out\n", __func__, cpuid);
-
 
389
 
-
 
390
    return true;
-
 
391
}
-
 
392
 
-
 
393
/** Wake application processors up. */
-
 
394
void kmp(void *arg)
-
 
395
{
-
 
396
    init_cpuids();
-
 
397
 
-
 
398
    unsigned int i;
-
 
399
 
-
 
400
    for (i = 1; i < config.cpu_count; i++) {
-
 
401
        wake_cpu(cpus[i].arch.id);
-
 
402
    }
178
}
403
}
179
 
404
 
180
/** @}
405
/** @}
181
 */
406
 */