WebSVN – HelenOS – Path Comparison – / – /branches/sparc Rev 4663 and /branches/sparc/ Rev 4679

Ignore whitespace Rev 4663 → Rev 4679

 /branches/sparc/kernel/generic/include/cpu.h
 ,7 → 47,7
  *
  * There is one structure like this for every processor.
  */
-typedef struct {
+typedef struct cpu {
     SPINLOCK_DECLARE(lock);
     tlb_shootdown_msg_t tlb_messages[TLB_MESSAGE_QUEUE_LEN];

 /branches/sparc/kernel/generic/src/proc/scheduler.c
 ,6 → 61,7
 #include <cpu.h>
 #include <print.h>
 #include <debug.h>
+#include <arch/smp/sun4v/smp.h>
 static void before_task_runs(void);
 static void before_thread_runs(void);
 ,6 → 221,8
         atomic_dec(&CPU->nrdy);
         atomic_dec(&nrdy);
+        if (CPU->arch.exec_unit)
+            atomic_dec(&(CPU->arch.exec_unit->nrdy));
         r->n--;
         /*
 ,6 → 568,31
     average = atomic_get(&nrdy) / config.cpu_active + 1;
     count = average - atomic_get(&CPU->nrdy);
+    /* calculate number of threads to be stolen from other exec. units */
+    spinlock_lock(&(CPU->arch.exec_unit->proposed_nrdy_lock));
+    bool eu_busy = calculate_optimal_nrdy(CPU->arch.exec_unit);
+    unsigned int count_other_eus = CPU->arch.proposed_nrdy
+        - atomic_get(&(CPU->nrdy));
+    spinlock_unlock(&(CPU->arch.exec_unit->proposed_nrdy_lock));
+    /*
+     * If the CPU's parent core is overloaded, do not do the load
+     * balancing, otherwise we would migrate threads which should be
+     * migrated to other cores and since a thread cannot be migrated
+     * multiple times, it would not be migrated to the other core
+     * in the future.
+     */
+    if (eu_busy)
+        return;
+    /*
+     * get the maximum - stole enough threads to satisfy both the need to
+     * have all virtual CPUs equally busy and the need to have all the
+     * cores equally busy
+     */
+    if (((int) count_other_eus) > count)
+        count = count_other_eus;
     if (count <= 0)
         goto satisfied;
 ,6 → 650,8
                     atomic_dec(&cpu->nrdy);
                     atomic_dec(&nrdy);
+                    if (cpu->arch.exec_unit)
+                        atomic_dec(&(cpu->arch.exec_unit->nrdy));
                     r->n--;
                     list_remove(&t->rq_link);

 /branches/sparc/kernel/generic/src/proc/thread.c
 ,6 → 265,8
     atomic_inc(&nrdy);
     avg = atomic_get(&nrdy) / config.cpu_active;
     atomic_inc(&cpu->nrdy);
+    if (cpu->arch.exec_unit)
+        atomic_inc(&(cpu->arch.exec_unit->nrdy));
     interrupts_restore(ipl);
 }

 /branches/sparc/kernel/generic/src/mm/tlb.c
 ,7 → 152,7
     CPU->tlb_active = 0;
     spinlock_lock(&tlblock);
     spinlock_unlock(&tlblock);
     spinlock_lock(&CPU->lock);
     ASSERT(CPU->tlb_messages_count <= TLB_MESSAGE_QUEUE_LEN);

 /branches/sparc/kernel/arch/sparc64/include/sun4v/cpu.h
 ,20 → 43,28
 #ifndef __ASM__
+struct cpu;
 typedef struct {
+    uint64_t exec_unit_id;
+    uint8_t strand_count;
+    uint64_t cpuids[MAX_CORE_STRANDS];
+    struct cpu *cpus[MAX_CORE_STRANDS];
+    atomic_t nrdy;
+    SPINLOCK_DECLARE(proposed_nrdy_lock);
+} exec_unit_t;
+typedef struct cpu_arch {
     uint64_t id;            /**< virtual processor ID */
     uint32_t clock_frequency;   /**< Processor frequency in Hz. */
     uint64_t next_tick_cmpr;    /**< Next clock interrupt should be
                          generated when the TICK register
                          matches this value. */
+    exec_unit_t *exec_unit;     /**< Physical core. */
+    unsigned long proposed_nrdy;    /**< Proposed No. of ready threads
+                         so that cores are equally balanced. */
 } cpu_arch_t;
-typedef struct {
-    uint64_t exec_unit_id;
-    uint8_t strand_count;
-    uint64_t cpuids[MAX_CORE_STRANDS];
-} exec_unit_t;
 #endif
 #ifdef __ASM__

 /branches/sparc/kernel/arch/sparc64/src/smp/sun4v/smp.c
 ,6 → 51,7
 #include <arch/sun4v/md.h>
 #include <arch/sun4v/ipi.h>
 #include <time/delay.h>
+#include <arch/smp/sun4v/smp.h>
 /** hypervisor code of the "running" state of the CPU */
 #define CPU_STATE_RUNNING   2
 ,7 → 139,55
 }
 #endif
 /**
+ * Proposes the optimal number of ready threads for each virtual processor
+ * in the given processor core so that the processor core is as busy as the
+ * average processor core. The proposed number of ready threads will be
+ * stored to the proposed_nrdy variable of the cpu_arch_t struture.
+ */
+bool calculate_optimal_nrdy(exec_unit_t *exec_unit) {
+    /* calculate the number of threads the core will steal */
+    int avg = atomic_get(&nrdy) / exec_unit_count;
+    int to_steal = avg - atomic_get(&(exec_units->nrdy));
+    if (to_steal < 0) {
+        return true;
+    } else if (to_steal == 0) {
+        return false;
+    }
+    /* initialize the proposals with the real numbers of ready threads */
+    unsigned int k;
+    for (k = 0; k < exec_unit->strand_count; k++) {
+        exec_units->cpus[k]->arch.proposed_nrdy =
+            atomic_get(&(exec_unit->cpus[k]->nrdy));
+    }
+    /* distribute the threads to be stolen to the core's CPUs */
+    int j;
+    for (j = to_steal; j > 0; j--) {
+        unsigned int k;
+        unsigned int least_busy = 0;
+        unsigned int least_busy_nrdy =
+            exec_unit->cpus[0]->arch.proposed_nrdy;
+        /* for each stolen thread, give it to the least busy CPU */
+        for (k = 0; k < exec_unit->strand_count; k++) {
+            if (exec_unit->cpus[k]->arch.proposed_nrdy
+                    < least_busy_nrdy) {
+                least_busy = k;
+                least_busy_nrdy =
+                    exec_unit->cpus[k]->arch.proposed_nrdy;
+            }
+        }
+        exec_unit->cpus[least_busy]->arch.proposed_nrdy++;
+    }
+    return false;
+}
+/**
  * Finds out which execution units belong to particular CPUs. By execution unit
  * we mean the physical core the logical processor is backed by. Since each
  * Niagara physical core has just one integer execution unit and we will
 ,6 → 287,8
             if (i == exec_unit_count) {
                 exec_units[i].exec_unit_id = exec_unit_id;
                 exec_units[i].strand_count = 0;
+                atomic_set(&(exec_units[i].nrdy), 0);
+                spinlock_initialize(&(exec_units[i].proposed_nrdy_lock), "proposed nrdy lock");
                 exec_unit_count++;
             }
 ,6 → 326,8
         exec_unit_count = 1;
         exec_units[0].strand_count = cpu_count;
         exec_units[0].exec_unit_id = 1;
+        spinlock_initialize(&(exec_units[0].proposed_nrdy_lock), "proposed nrdy lock");
+        atomic_set(&(exec_units[0].nrdy), 0);
         max_core_strands = cpu_count;
         /* browse CPUs again, assign them the fictional exec. unit */
 ,7 → 397,11
             if (cur_core_strand > exec_units[cur_core].strand_count)
                 continue;
-            cpus[cur_cpu++].arch.id = exec_units[cur_core].cpuids[cur_core_strand];
+            cpus[cur_cpu].arch.exec_unit = &(exec_units[cur_core]);
+            atomic_add(&(exec_units[cur_core].nrdy), atomic_get(&(cpus[cur_cpu].nrdy)));
+            cpus[cur_cpu].arch.id = exec_units[cur_core].cpuids[cur_core_strand];
+            exec_units[cur_core].cpus[cur_core_strand] = &(cpus[cur_cpu]);
+            cur_cpu++;
         }
     }
 }

Subversion Repositories HelenOS

Compare Revisions

Ignore whitespace Rev 4663 → Rev 4679