Subversion Repositories HelenOS

Rev

Rev 4663 | Only display areas with differences | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 4663 Rev 4679
1
/*
1
/*
2
 * Copyright (c) 2006 Jakub Jermar
2
 * Copyright (c) 2006 Jakub Jermar
3
 * Copyright (c) 2009 Pavel Rimsky
3
 * Copyright (c) 2009 Pavel Rimsky
4
 * All rights reserved.
4
 * All rights reserved.
5
 *
5
 *
6
 * Redistribution and use in source and binary forms, with or without
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions
7
 * modification, are permitted provided that the following conditions
8
 * are met:
8
 * are met:
9
 *
9
 *
10
 * - Redistributions of source code must retain the above copyright
10
 * - Redistributions of source code must retain the above copyright
11
 *   notice, this list of conditions and the following disclaimer.
11
 *   notice, this list of conditions and the following disclaimer.
12
 * - Redistributions in binary form must reproduce the above copyright
12
 * - Redistributions in binary form must reproduce the above copyright
13
 *   notice, this list of conditions and the following disclaimer in the
13
 *   notice, this list of conditions and the following disclaimer in the
14
 *   documentation and/or other materials provided with the distribution.
14
 *   documentation and/or other materials provided with the distribution.
15
 * - The name of the author may not be used to endorse or promote products
15
 * - The name of the author may not be used to endorse or promote products
16
 *   derived from this software without specific prior written permission.
16
 *   derived from this software without specific prior written permission.
17
 *
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 */
28
 */
29
 
29
 
30
/** @addtogroup sparc64
30
/** @addtogroup sparc64
31
 * @{
31
 * @{
32
 */
32
 */
33
/** @file
33
/** @file
34
 */
34
 */
35
 
35
 
36
#include <smp/smp.h>
36
#include <smp/smp.h>
37
#include <smp/ipi.h>
37
#include <smp/ipi.h>
38
#include <genarch/ofw/ofw_tree.h>
38
#include <genarch/ofw/ofw_tree.h>
39
#include <cpu.h>
39
#include <cpu.h>
40
#include <arch/cpu.h>
40
#include <arch/cpu.h>
41
#include <arch/boot/boot.h>
41
#include <arch/boot/boot.h>
42
#include <arch.h>
42
#include <arch.h>
43
#include <config.h>
43
#include <config.h>
44
#include <macros.h>
44
#include <macros.h>
45
#include <func.h>
45
#include <func.h>
46
#include <arch/types.h>
46
#include <arch/types.h>
47
#include <synch/synch.h>
47
#include <synch/synch.h>
48
#include <synch/waitq.h>
48
#include <synch/waitq.h>
49
#include <print.h>
49
#include <print.h>
50
#include <arch/sun4v/hypercall.h>
50
#include <arch/sun4v/hypercall.h>
51
#include <arch/sun4v/md.h>
51
#include <arch/sun4v/md.h>
52
#include <arch/sun4v/ipi.h>
52
#include <arch/sun4v/ipi.h>
53
#include <time/delay.h>
53
#include <time/delay.h>
-
 
54
#include <arch/smp/sun4v/smp.h>
54
 
55
 
55
/** hypervisor code of the "running" state of the CPU */
56
/** hypervisor code of the "running" state of the CPU */
56
#define CPU_STATE_RUNNING   2
57
#define CPU_STATE_RUNNING   2
57
 
58
 
58
/** maximum possible number of processor cores */
59
/** maximum possible number of processor cores */
59
#define MAX_NUM_CORES       8
60
#define MAX_NUM_CORES       8
60
 
61
 
61
/** needed in the CPU_START hypercall */
62
/** needed in the CPU_START hypercall */
62
extern void kernel_image_start(void);
63
extern void kernel_image_start(void);
63
 
64
 
64
/** needed in the CPU_START hypercall */
65
/** needed in the CPU_START hypercall */
65
extern void *trap_table;
66
extern void *trap_table;
66
 
67
 
67
/** number of execution units detected */
68
/** number of execution units detected */
68
uint8_t exec_unit_count = 0;
69
uint8_t exec_unit_count = 0;
69
 
70
 
70
/** execution units (processor cores) */
71
/** execution units (processor cores) */
71
exec_unit_t exec_units[MAX_NUM_CORES];
72
exec_unit_t exec_units[MAX_NUM_CORES];
72
 
73
 
73
/** CPU structures */
74
/** CPU structures */
74
extern cpu_t *cpus;
75
extern cpu_t *cpus;
75
 
76
 
76
/** maximum number of strands per a physical core detected */
77
/** maximum number of strands per a physical core detected */
77
unsigned int max_core_strands = 0;
78
unsigned int max_core_strands = 0;
78
 
79
 
79
#ifdef CONFIG_SIMICS_SMP_HACK
80
#ifdef CONFIG_SIMICS_SMP_HACK
80
/**
81
/**
81
 * Copies a piece of HelenOS code to the place where OBP had its IPI handler.
82
 * Copies a piece of HelenOS code to the place where OBP had its IPI handler.
82
 * By sending an IPI by the BSP to the AP the code will be executed.
83
 * By sending an IPI by the BSP to the AP the code will be executed.
83
 * The code will jump to the first instruction of the kernel. This is
84
 * The code will jump to the first instruction of the kernel. This is
84
 * a workaround how to make APs execute HelenOS code on Simics.
85
 * a workaround how to make APs execute HelenOS code on Simics.
85
 */
86
 */
86
static void simics_smp_hack_init(void) {
87
static void simics_smp_hack_init(void) {
87
    asm volatile (
88
    asm volatile (
88
        "setx temp_cpu_mondo_handler, %g4, %g6 \n"
89
        "setx temp_cpu_mondo_handler, %g4, %g6 \n"
89
        "setx 0x80200f80, %g4, %g7 \n"
90
        "setx 0x80200f80, %g4, %g7 \n"
90
 
91
 
91
        "ldx [%g6], %g4 \n"
92
        "ldx [%g6], %g4 \n"
92
        "stxa %g4, [%g7] 0x14 \n"
93
        "stxa %g4, [%g7] 0x14 \n"
93
        "membar #Sync \n"
94
        "membar #Sync \n"
94
 
95
 
95
        "add %g7, 0x8, %g7 \n"
96
        "add %g7, 0x8, %g7 \n"
96
        "ldx [%g6 + 0x8], %g4 \n"
97
        "ldx [%g6 + 0x8], %g4 \n"
97
        "stxa %g4, [%g7] 0x14 \n"
98
        "stxa %g4, [%g7] 0x14 \n"
98
        "membar #Sync \n"
99
        "membar #Sync \n"
99
 
100
 
100
        "add %g7, 0x8, %g7 \n"
101
        "add %g7, 0x8, %g7 \n"
101
        "ldx [%g6 + 0x10], %g4 \n"
102
        "ldx [%g6 + 0x10], %g4 \n"
102
        "stxa %g4, [%g7] 0x14 \n"
103
        "stxa %g4, [%g7] 0x14 \n"
103
        "membar #Sync \n"
104
        "membar #Sync \n"
104
 
105
 
105
        "add %g7, 0x8, %g7 \n"
106
        "add %g7, 0x8, %g7 \n"
106
        "ldx [%g6 + 0x18], %g4 \n"
107
        "ldx [%g6 + 0x18], %g4 \n"
107
        "stxa %g4, [%g7] 0x14 \n"
108
        "stxa %g4, [%g7] 0x14 \n"
108
        "membar #Sync \n"
109
        "membar #Sync \n"
109
 
110
 
110
        "add %g7, 0x8, %g7 \n"
111
        "add %g7, 0x8, %g7 \n"
111
        "ldx [%g6 + 0x20], %g4 \n"
112
        "ldx [%g6 + 0x20], %g4 \n"
112
        "stxa %g4, [%g7] 0x14 \n"
113
        "stxa %g4, [%g7] 0x14 \n"
113
        "membar #Sync \n"
114
        "membar #Sync \n"
114
 
115
 
115
        "add %g7, 0x8, %g7 \n"
116
        "add %g7, 0x8, %g7 \n"
116
        "ldx [%g6 + 0x28], %g4 \n"
117
        "ldx [%g6 + 0x28], %g4 \n"
117
        "stxa %g4, [%g7] 0x14 \n"
118
        "stxa %g4, [%g7] 0x14 \n"
118
        "membar #Sync \n"
119
        "membar #Sync \n"
119
 
120
 
120
        "add %g7, 0x8, %g7 \n"
121
        "add %g7, 0x8, %g7 \n"
121
        "ldx [%g6 + 0x30], %g4 \n"
122
        "ldx [%g6 + 0x30], %g4 \n"
122
        "stxa %g4, [%g7] 0x14 \n"
123
        "stxa %g4, [%g7] 0x14 \n"
123
        "membar #Sync \n"
124
        "membar #Sync \n"
124
 
125
 
125
        "add %g7, 0x8, %g7 \n"
126
        "add %g7, 0x8, %g7 \n"
126
        "ldx [%g6 + 0x38], %g4 \n"
127
        "ldx [%g6 + 0x38], %g4 \n"
127
        "stxa %g4, [%g7] 0x14 \n"
128
        "stxa %g4, [%g7] 0x14 \n"
128
        "membar #Sync \n"
129
        "membar #Sync \n"
129
 
130
 
130
        "add %g7, 0x8, %g7 \n"
131
        "add %g7, 0x8, %g7 \n"
131
        "ldx [%g6 + 0x40], %g4 \n"
132
        "ldx [%g6 + 0x40], %g4 \n"
132
        "stxa %g4, [%g7] 0x14 \n"
133
        "stxa %g4, [%g7] 0x14 \n"
133
        "membar #Sync \n"
134
        "membar #Sync \n"
134
 
135
 
135
        "flush %i7"
136
        "flush %i7"
136
 
137
 
137
        );
138
        );
138
}
139
}
139
#endif
140
#endif
140
 
141
 
-
 
142
 
-
 
143
/**
-
 
144
 * Proposes the optimal number of ready threads for each virtual processor
-
 
145
 * in the given processor core so that the processor core is as busy as the
-
 
146
 * average processor core. The proposed number of ready threads will be
-
 
147
 * stored to the proposed_nrdy variable of the cpu_arch_t struture.
-
 
148
 */
-
 
149
bool calculate_optimal_nrdy(exec_unit_t *exec_unit) {
-
 
150
 
-
 
151
    /* calculate the number of threads the core will steal */
-
 
152
    int avg = atomic_get(&nrdy) / exec_unit_count;
-
 
153
    int to_steal = avg - atomic_get(&(exec_units->nrdy));
-
 
154
    if (to_steal < 0) {
-
 
155
        return true;
-
 
156
    } else if (to_steal == 0) {
-
 
157
        return false;
-
 
158
    }
-
 
159
 
-
 
160
    /* initialize the proposals with the real numbers of ready threads */
-
 
161
    unsigned int k;
-
 
162
    for (k = 0; k < exec_unit->strand_count; k++) {
-
 
163
        exec_units->cpus[k]->arch.proposed_nrdy =
-
 
164
            atomic_get(&(exec_unit->cpus[k]->nrdy));
-
 
165
    }
-
 
166
 
-
 
167
    /* distribute the threads to be stolen to the core's CPUs */
-
 
168
    int j;
-
 
169
    for (j = to_steal; j > 0; j--) {
-
 
170
        unsigned int k;
-
 
171
        unsigned int least_busy = 0;
-
 
172
        unsigned int least_busy_nrdy =
-
 
173
            exec_unit->cpus[0]->arch.proposed_nrdy;
-
 
174
 
-
 
175
        /* for each stolen thread, give it to the least busy CPU */
-
 
176
        for (k = 0; k < exec_unit->strand_count; k++) {
-
 
177
            if (exec_unit->cpus[k]->arch.proposed_nrdy
-
 
178
                    < least_busy_nrdy) {
-
 
179
                least_busy = k;
-
 
180
                least_busy_nrdy =
-
 
181
                    exec_unit->cpus[k]->arch.proposed_nrdy;
-
 
182
            }
-
 
183
        }
-
 
184
        exec_unit->cpus[least_busy]->arch.proposed_nrdy++;
-
 
185
    }
-
 
186
 
-
 
187
    return false;
-
 
188
}
-
 
189
 
141
/**
190
/**
142
 * Finds out which execution units belong to particular CPUs. By execution unit
191
 * Finds out which execution units belong to particular CPUs. By execution unit
143
 * we mean the physical core the logical processor is backed by. Since each
192
 * we mean the physical core the logical processor is backed by. Since each
144
 * Niagara physical core has just one integer execution unit and we will
193
 * Niagara physical core has just one integer execution unit and we will
145
 * ignore other execution units than the integer ones, we will use the terms
194
 * ignore other execution units than the integer ones, we will use the terms
146
 * "integer execution unit", "execution unit" and "physical core"
195
 * "integer execution unit", "execution unit" and "physical core"
147
 * interchangeably.
196
 * interchangeably.
148
 *
197
 *
149
 * The physical cores are detected by browsing the children of the CPU node
198
 * The physical cores are detected by browsing the children of the CPU node
150
 * in the machine description and looking for a node representing an integer
199
 * in the machine description and looking for a node representing an integer
151
 * execution unit. Once the integer execution unit of a particular CPU is
200
 * execution unit. Once the integer execution unit of a particular CPU is
152
 * known, the ID of the CPU is added to the list of cpuids of the corresponding
201
 * known, the ID of the CPU is added to the list of cpuids of the corresponding
153
 * execution unit structure (exec_unit_t). If an execution unit is encountered
202
 * execution unit structure (exec_unit_t). If an execution unit is encountered
154
 * for the first time, a new execution unit structure (exec_unit_t) must be
203
 * for the first time, a new execution unit structure (exec_unit_t) must be
155
 * created first and added to the execution units array (exec_units).
204
 * created first and added to the execution units array (exec_units).
156
 *
205
 *
157
 * If the function fails to find an execution unit for a CPU (this may happen
206
 * If the function fails to find an execution unit for a CPU (this may happen
158
 * on machines with older firmware or on Simics), it performs a fallback code
207
 * on machines with older firmware or on Simics), it performs a fallback code
159
 * which pretends there exists just one execution unit and all CPUs belong to
208
 * which pretends there exists just one execution unit and all CPUs belong to
160
 * it.
209
 * it.
161
 *
210
 *
162
 * Finally, the array of all execution units is reordered such that its element
211
 * Finally, the array of all execution units is reordered such that its element
163
 * which represents the physical core of the the bootstrap CPU is at index 0.
212
 * which represents the physical core of the the bootstrap CPU is at index 0.
164
 * Moreover, the array of CPU IDs within the BSP's physical core structure is
213
 * Moreover, the array of CPU IDs within the BSP's physical core structure is
165
 * reordered such that the element which represents the ID of the BSP is at
214
 * reordered such that the element which represents the ID of the BSP is at
166
 * index 0. This is done because we would like the CPUs to be woken up
215
 * index 0. This is done because we would like the CPUs to be woken up
167
 * such that the 0-index CPU of the 0-index execution unit is
216
 * such that the 0-index CPU of the 0-index execution unit is
168
 * woken up first. And since the BSP is already woken up, we would like it to be
217
 * woken up first. And since the BSP is already woken up, we would like it to be
169
 * at 0-th position of the 0-th execution unit structure.
218
 * at 0-th position of the 0-th execution unit structure.
170
 *
219
 *
171
 * Apart from that, the code also counts the total number of CPUs and stores
220
 * Apart from that, the code also counts the total number of CPUs and stores
172
 * it to the global config.cpu_count variable.
221
 * it to the global config.cpu_count variable.
173
 */
222
 */
174
static void detect_execution_units(void)
223
static void detect_execution_units(void)
175
{
224
{
176
    /* ID of the bootstrap processor */
225
    /* ID of the bootstrap processor */
177
    uint64_t myid;
226
    uint64_t myid;
178
 
227
 
179
    /* total number of CPUs detected */
228
    /* total number of CPUs detected */
180
    count_t cpu_count = 0;
229
    count_t cpu_count = 0;
181
 
230
 
182
    /* will be set to 1 if detecting the physical cores fails */
231
    /* will be set to 1 if detecting the physical cores fails */
183
    bool exec_unit_assign_error = 0;
232
    bool exec_unit_assign_error = 0;
184
 
233
 
185
    /* index of the bootstrap physical core in the array of cores */
234
    /* index of the bootstrap physical core in the array of cores */
186
    unsigned int bsp_exec_unit_index = 0;
235
    unsigned int bsp_exec_unit_index = 0;
187
 
236
 
188
    /* index of the BSP ID inside the array of bootstrap core's cpuids */
237
    /* index of the BSP ID inside the array of bootstrap core's cpuids */
189
    unsigned int bsp_core_strand_index = 0;
238
    unsigned int bsp_core_strand_index = 0;
190
 
239
 
191
    __hypercall_fast_ret1(0, 0, 0, 0, 0, CPU_MYID, &myid);
240
    __hypercall_fast_ret1(0, 0, 0, 0, 0, CPU_MYID, &myid);
192
    md_node_t node = md_get_root();
241
    md_node_t node = md_get_root();
193
 
242
 
194
    /* walk through all the CPU nodes in the MD*/
243
    /* walk through all the CPU nodes in the MD*/
195
    while (md_next_node(&node, "cpu")) {
244
    while (md_next_node(&node, "cpu")) {
196
 
245
 
197
        uint64_t cpuid;
246
        uint64_t cpuid;
198
        md_get_integer_property(node, "id", &cpuid);
247
        md_get_integer_property(node, "id", &cpuid);
199
        cpu_count++;
248
        cpu_count++;
200
 
249
 
201
        /*
250
        /*
202
         * if failed in previous CPUs, don't try
251
         * if failed in previous CPUs, don't try
203
         * to detect physical cores any more
252
         * to detect physical cores any more
204
         */
253
         */
205
        if (exec_unit_assign_error)
254
        if (exec_unit_assign_error)
206
            continue;
255
            continue;
207
 
256
 
208
        /* detect exec. unit for the CPU represented by current node */
257
        /* detect exec. unit for the CPU represented by current node */
209
        uint64_t exec_unit_id = 0;
258
        uint64_t exec_unit_id = 0;
210
        md_child_iter_t it = md_get_child_iterator(node);
259
        md_child_iter_t it = md_get_child_iterator(node);
211
 
260
 
212
        while (md_next_child(&it)) {
261
        while (md_next_child(&it)) {
213
            md_node_t child = md_get_child_node(it);
262
            md_node_t child = md_get_child_node(it);
214
            const char *exec_unit_type;
263
            const char *exec_unit_type;
215
            md_get_string_property(child, "type", &exec_unit_type);
264
            md_get_string_property(child, "type", &exec_unit_type);
216
 
265
 
217
            /* each physical core has just 1 integer exec. unit */
266
            /* each physical core has just 1 integer exec. unit */
218
            if (strcmp(exec_unit_type, "integer") == 0) {
267
            if (strcmp(exec_unit_type, "integer") == 0) {
219
                exec_unit_id = child;
268
                exec_unit_id = child;
220
                break;
269
                break;
221
            }
270
            }
222
        }
271
        }
223
 
272
 
224
        /* execution unit detected successfully */
273
        /* execution unit detected successfully */
225
        if (exec_unit_id != 0) {
274
        if (exec_unit_id != 0) {
226
 
275
 
227
            /* find the exec. unit in array of existing units */
276
            /* find the exec. unit in array of existing units */
228
            unsigned int i = 0;
277
            unsigned int i = 0;
229
            for (i = 0; i < exec_unit_count; i++) {
278
            for (i = 0; i < exec_unit_count; i++) {
230
                if (exec_units[i].exec_unit_id == exec_unit_id)
279
                if (exec_units[i].exec_unit_id == exec_unit_id)
231
                    break;
280
                    break;
232
            }
281
            }
233
 
282
 
234
            /*
283
            /*
235
             * execution unit just met has not been met before, so
284
             * execution unit just met has not been met before, so
236
             * create a new entry in array of all execution units
285
             * create a new entry in array of all execution units
237
             */
286
             */
238
            if (i == exec_unit_count) {
287
            if (i == exec_unit_count) {
239
                exec_units[i].exec_unit_id = exec_unit_id;
288
                exec_units[i].exec_unit_id = exec_unit_id;
240
                exec_units[i].strand_count = 0;
289
                exec_units[i].strand_count = 0;
-
 
290
                atomic_set(&(exec_units[i].nrdy), 0);
-
 
291
                spinlock_initialize(&(exec_units[i].proposed_nrdy_lock), "proposed nrdy lock");
241
                exec_unit_count++;
292
                exec_unit_count++;
242
            }
293
            }
243
 
294
 
244
            /*
295
            /*
245
             * remember the exec. unit and strand of the BSP
296
             * remember the exec. unit and strand of the BSP
246
             */
297
             */
247
            if (cpuid == myid) {
298
            if (cpuid == myid) {
248
                bsp_exec_unit_index = i;
299
                bsp_exec_unit_index = i;
249
                bsp_core_strand_index = exec_units[i].strand_count;
300
                bsp_core_strand_index = exec_units[i].strand_count;
250
            }
301
            }
251
 
302
 
252
            /* add the CPU just met to the exec. unit's list */
303
            /* add the CPU just met to the exec. unit's list */
253
            exec_units[i].cpuids[exec_units[i].strand_count] = cpuid;
304
            exec_units[i].cpuids[exec_units[i].strand_count] = cpuid;
254
            exec_units[i].strand_count++;
305
            exec_units[i].strand_count++;
255
            max_core_strands =
306
            max_core_strands =
256
                exec_units[i].strand_count > max_core_strands ?
307
                exec_units[i].strand_count > max_core_strands ?
257
                exec_units[i].strand_count : max_core_strands;
308
                exec_units[i].strand_count : max_core_strands;
258
 
309
 
259
        /* detecting execution unit failed */
310
        /* detecting execution unit failed */
260
        } else {
311
        } else {
261
            exec_unit_assign_error = 1;
312
            exec_unit_assign_error = 1;
262
        }
313
        }
263
    }      
314
    }      
264
 
315
 
265
    /* save the number of CPUs to a globally accessible variable */
316
    /* save the number of CPUs to a globally accessible variable */
266
    config.cpu_count = cpu_count;
317
    config.cpu_count = cpu_count;
267
 
318
 
268
    /*
319
    /*
269
     * A fallback code which will be executed if finding out which
320
     * A fallback code which will be executed if finding out which
270
     * execution units belong to particular CPUs fails. Pretend there
321
     * execution units belong to particular CPUs fails. Pretend there
271
     * exists just one execution unit and all CPUs belong to it.
322
     * exists just one execution unit and all CPUs belong to it.
272
     */
323
     */
273
    if (exec_unit_assign_error) {
324
    if (exec_unit_assign_error) {
274
        bsp_exec_unit_index = 0;
325
        bsp_exec_unit_index = 0;
275
        exec_unit_count = 1;
326
        exec_unit_count = 1;
276
        exec_units[0].strand_count = cpu_count;
327
        exec_units[0].strand_count = cpu_count;
277
        exec_units[0].exec_unit_id = 1;
328
        exec_units[0].exec_unit_id = 1;
-
 
329
        spinlock_initialize(&(exec_units[0].proposed_nrdy_lock), "proposed nrdy lock");
-
 
330
        atomic_set(&(exec_units[0].nrdy), 0);
278
        max_core_strands = cpu_count;
331
        max_core_strands = cpu_count;
279
 
332
 
280
        /* browse CPUs again, assign them the fictional exec. unit */
333
        /* browse CPUs again, assign them the fictional exec. unit */
281
        node = md_get_root();
334
        node = md_get_root();
282
        unsigned int i = 0;
335
        unsigned int i = 0;
283
 
336
 
284
        while (md_next_node(&node, "cpu")) {
337
        while (md_next_node(&node, "cpu")) {
285
            uint64_t cpuid;
338
            uint64_t cpuid;
286
            md_get_integer_property(node, "id", &cpuid);
339
            md_get_integer_property(node, "id", &cpuid);
287
            if (cpuid == myid) {
340
            if (cpuid == myid) {
288
                bsp_core_strand_index = i;
341
                bsp_core_strand_index = i;
289
            }
342
            }
290
            exec_units[0].cpuids[i++] = cpuid;
343
            exec_units[0].cpuids[i++] = cpuid;
291
        }
344
        }
292
    }
345
    }
293
 
346
 
294
    /*
347
    /*
295
     * Reorder the execution units array elements and the cpuid array
348
     * Reorder the execution units array elements and the cpuid array
296
     * elements so that the BSP will always be the very first CPU of
349
     * elements so that the BSP will always be the very first CPU of
297
     * the very first execution unit.
350
     * the very first execution unit.
298
     */
351
     */
299
    exec_unit_t temp_exec_unit = exec_units[0];
352
    exec_unit_t temp_exec_unit = exec_units[0];
300
    exec_units[0] = exec_units[bsp_exec_unit_index];
353
    exec_units[0] = exec_units[bsp_exec_unit_index];
301
    exec_units[bsp_exec_unit_index] = temp_exec_unit;
354
    exec_units[bsp_exec_unit_index] = temp_exec_unit;
302
 
355
 
303
    uint64_t temp_cpuid = exec_units[0].cpuids[0];
356
    uint64_t temp_cpuid = exec_units[0].cpuids[0];
304
    exec_units[0].cpuids[0] = exec_units[0].cpuids[bsp_exec_unit_index];
357
    exec_units[0].cpuids[0] = exec_units[0].cpuids[bsp_exec_unit_index];
305
    exec_units[0].cpuids[bsp_core_strand_index] = temp_cpuid;
358
    exec_units[0].cpuids[bsp_core_strand_index] = temp_cpuid;
306
 
359
 
307
}
360
}
308
 
361
 
309
/**
362
/**
310
 * Determine number of processors and detect physical cores. On Simics
363
 * Determine number of processors and detect physical cores. On Simics
311
 * copy the code which will be executed by the AP when the BSP sends an
364
 * copy the code which will be executed by the AP when the BSP sends an
312
 * IPI to it in order to make it execute HelenOS code.
365
 * IPI to it in order to make it execute HelenOS code.
313
 */
366
 */
314
void smp_init(void)
367
void smp_init(void)
315
{
368
{
316
    detect_execution_units();
369
    detect_execution_units();
317
#ifdef CONFIG_SIMICS_SMP_HACK
370
#ifdef CONFIG_SIMICS_SMP_HACK
318
    simics_smp_hack_init();
371
    simics_smp_hack_init();
319
#endif
372
#endif
320
}
373
}
321
 
374
 
322
/**
375
/**
323
 * For each CPU sets the value of cpus[i].arch.id, where i is the
376
 * For each CPU sets the value of cpus[i].arch.id, where i is the
324
 * index of the CPU in the cpus variable, to the cpuid of the i-th processor
377
 * index of the CPU in the cpus variable, to the cpuid of the i-th processor
325
 * to be run. The CPUs are run such that the CPU represented by cpus[0]
378
 * to be run. The CPUs are run such that the CPU represented by cpus[0]
326
 * is run first, cpus[1] is run after it, and cpus[cpu_count - 1] is run as the
379
 * is run first, cpus[1] is run after it, and cpus[cpu_count - 1] is run as the
327
 * last one.
380
 * last one.
328
 *
381
 *
329
 * The CPU IDs are set such that during waking the CPUs up the
382
 * The CPU IDs are set such that during waking the CPUs up the
330
 * processor cores will be alternated, i.e. first one CPU from the first core
383
 * processor cores will be alternated, i.e. first one CPU from the first core
331
 * will be run, after that one CPU from the second CPU core will be run,...
384
 * will be run, after that one CPU from the second CPU core will be run,...
332
 * then one CPU from the last core will be run, after that another CPU
385
 * then one CPU from the last core will be run, after that another CPU
333
 * from the first core will be run, then another CPU from the second core
386
 * from the first core will be run, then another CPU from the second core
334
 * will be run,... then another CPU from the last core will be run, and so on.
387
 * will be run,... then another CPU from the last core will be run, and so on.
335
 */
388
 */
336
static void init_cpuids(void)
389
static void init_cpuids(void)
337
{
390
{
338
    unsigned int cur_core_strand;
391
    unsigned int cur_core_strand;
339
    unsigned int cur_core;
392
    unsigned int cur_core;
340
    unsigned int cur_cpu = 0;
393
    unsigned int cur_cpu = 0;
341
 
394
 
342
    for (cur_core_strand = 0; cur_core_strand < max_core_strands; cur_core_strand++) {
395
    for (cur_core_strand = 0; cur_core_strand < max_core_strands; cur_core_strand++) {
343
        for (cur_core = 0; cur_core < exec_unit_count; cur_core++) {
396
        for (cur_core = 0; cur_core < exec_unit_count; cur_core++) {
344
            if (cur_core_strand > exec_units[cur_core].strand_count)
397
            if (cur_core_strand > exec_units[cur_core].strand_count)
345
                continue;
398
                continue;
346
 
399
 
-
 
400
            cpus[cur_cpu].arch.exec_unit = &(exec_units[cur_core]);
-
 
401
            atomic_add(&(exec_units[cur_core].nrdy), atomic_get(&(cpus[cur_cpu].nrdy)));
347
            cpus[cur_cpu++].arch.id = exec_units[cur_core].cpuids[cur_core_strand];
402
            cpus[cur_cpu].arch.id = exec_units[cur_core].cpuids[cur_core_strand];
-
 
403
            exec_units[cur_core].cpus[cur_core_strand] = &(cpus[cur_cpu]);
-
 
404
            cur_cpu++;
348
        }
405
        }
349
    }
406
    }
350
}
407
}
351
 
408
 
352
/**
409
/**
353
 * Wakes up a single CPU.
410
 * Wakes up a single CPU.
354
 *
411
 *
355
 * @param cpuid ID of the CPU to be woken up
412
 * @param cpuid ID of the CPU to be woken up
356
 */
413
 */
357
static bool wake_cpu(uint64_t cpuid)
414
static bool wake_cpu(uint64_t cpuid)
358
{
415
{
359
 
416
 
360
#ifdef CONFIG_SIMICS_SMP_HACK
417
#ifdef CONFIG_SIMICS_SMP_HACK
361
    ipi_unicast_to((void (*)(void)) 1234, cpuid);
418
    ipi_unicast_to((void (*)(void)) 1234, cpuid);
362
#else
419
#else
363
    /* stop the CPU before making it execute our code */
420
    /* stop the CPU before making it execute our code */
364
    if (__hypercall_fast1(CPU_STOP, cpuid) != EOK)
421
    if (__hypercall_fast1(CPU_STOP, cpuid) != EOK)
365
        return false;
422
        return false;
366
 
423
 
367
    /* wait for the CPU to stop */
424
    /* wait for the CPU to stop */
368
    uint64_t state;
425
    uint64_t state;
369
    __hypercall_fast_ret1(cpuid, 0, 0, 0, 0,
426
    __hypercall_fast_ret1(cpuid, 0, 0, 0, 0,
370
        CPU_STATE, &state);
427
        CPU_STATE, &state);
371
    while (state == CPU_STATE_RUNNING) {
428
    while (state == CPU_STATE_RUNNING) {
372
        __hypercall_fast_ret1(cpuid, 0, 0, 0, 0,
429
        __hypercall_fast_ret1(cpuid, 0, 0, 0, 0,
373
            CPU_STATE, &state);
430
            CPU_STATE, &state);
374
    }
431
    }
375
 
432
 
376
    /* make the CPU run again and execute HelenOS code */
433
    /* make the CPU run again and execute HelenOS code */
377
    if (__hypercall_fast4(
434
    if (__hypercall_fast4(
378
        CPU_START, cpuid,
435
        CPU_START, cpuid,
379
        (uint64_t) KA2PA(kernel_image_start),
436
        (uint64_t) KA2PA(kernel_image_start),
380
        KA2PA(trap_table), bootinfo.physmem_start          
437
        KA2PA(trap_table), bootinfo.physmem_start          
381
        ) != EOK)
438
        ) != EOK)
382
            return false;
439
            return false;
383
#endif
440
#endif
384
 
441
 
385
    if (waitq_sleep_timeout(&ap_completion_wq, 10000000, SYNCH_FLAGS_NONE) ==
442
    if (waitq_sleep_timeout(&ap_completion_wq, 10000000, SYNCH_FLAGS_NONE) ==
386
            ESYNCH_TIMEOUT)
443
            ESYNCH_TIMEOUT)
387
        printf("%s: waiting for processor (cpuid = %" PRIu32
444
        printf("%s: waiting for processor (cpuid = %" PRIu32
388
        ") timed out\n", __func__, cpuid);
445
        ") timed out\n", __func__, cpuid);
389
 
446
 
390
    return true;
447
    return true;
391
}
448
}
392
 
449
 
393
/** Wake application processors up. */
450
/** Wake application processors up. */
394
void kmp(void *arg)
451
void kmp(void *arg)
395
{
452
{
396
    init_cpuids();
453
    init_cpuids();
397
 
454
 
398
    unsigned int i;
455
    unsigned int i;
399
 
456
 
400
    for (i = 1; i < config.cpu_count; i++) {
457
    for (i = 1; i < config.cpu_count; i++) {
401
        wake_cpu(cpus[i].arch.id);
458
        wake_cpu(cpus[i].arch.id);
402
    }
459
    }
403
}
460
}
404
 
461
 
405
/** @}
462
/** @}
406
 */
463
 */
407
 
464