/*
 * Copyright (c) 2006 Jakub Jermar
 * Copyright (c) 2009 Pavel Rimsky 
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * - Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.
 * - Redistributions in binary form must reproduce the above copyright
 *   notice, this list of conditions and the following disclaimer in the
 *   documentation and/or other materials provided with the distribution.
 * - The name of the author may not be used to endorse or promote products
 *   derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/** @addtogroup sparc64	
 * @{
 */
/** @file
 */

#include <smp/smp.h>
#include <smp/ipi.h>
#include <genarch/ofw/ofw_tree.h>
#include <cpu.h>
#include <arch/cpu.h>
#include <arch/boot/boot.h>
#include <arch.h>
#include <config.h>
#include <macros.h>
#include <arch/types.h>
#include <synch/synch.h>
#include <synch/waitq.h>
#include <print.h>
#include <arch/sun4v/hypercall.h>
#include <arch/sun4v/md.h>
#include <arch/sun4v/ipi.h>
#include <time/delay.h>

#define CPU_STATE_RUNNING	2

extern void kernel_image_start(void);
extern void *trap_table;

/** Determine number of processors. */
void smp_init(void)
{
	md_node_t node = md_get_root();
	count_t cpu_count = 0;

	/* walk through MD, find the current CPU node & its clock-frequency */
	while(md_next_node(&node, "cpu")) {
		cpu_count++;
	}

	config.cpu_count = cpu_count;
}


/** Wake application processors up. */
void kmp(void *arg)
{
#if 1
	(void) arg;

	uint64_t myid;
	__hypercall_fast_ret1(0, 0, 0, 0, 0, CPU_MYID, &myid);

	/* stop the CPUs before making them execute our code */
	uint64_t i;
	for (i = 0; i < config.cpu_count; i++) {
		if (i == myid)
			continue;

		if (__hypercall_fast1(CPU_STOP, i) != 0)
			continue;

		uint64_t state;
		__hypercall_fast_ret1(i, 0, 0, 0, 0, CPU_STATE, &state);
		while (state == CPU_STATE_RUNNING) {
			__hypercall_fast_ret1(i, 0, 0, 0, 0, CPU_STATE, &state);
		}
	}

	/* wake the processors up, one by one */
	uint64_t state;
	for (i = 1; i < config.cpu_count; i++) {
		__hypercall_fast_ret1(i, 0, 0, 0, 0, CPU_STATE, &state);
		printf("Starting CPU %d, error code = %d.\n", i, __hypercall_fast4(
			CPU_START,
			i,
			(uint64_t) KA2PA(kernel_image_start),
			KA2PA(trap_table),
			bootinfo.physmem_start			
		));

		if (waitq_sleep_timeout(&ap_completion_wq, 10000000, SYNCH_FLAGS_NONE) ==
	    			ESYNCH_TIMEOUT)
			printf("%s: waiting for processor (cpuid = %" PRIu32
		    	") timed out\n", __func__, i);
		
	}
#else

	asm volatile (
		"setx temp_cpu_mondo_handler, %g4, %g6 \n"
		//"setx 0x80246ad8, %g4, %g7 \n"
		"setx 0x80200f80, %g4, %g7 \n"

		"ldx [%g6], %g4 \n"
		"stxa %g4, [%g7] 0x14 \n"
		"membar #Sync \n"

		"add %g7, 0x8, %g7 \n"
		"ldx [%g6 + 0x8], %g4 \n"
		"stxa %g4, [%g7] 0x14 \n"
		"membar #Sync \n"

		"add %g7, 0x8, %g7 \n"
		"ldx [%g6 + 0x10], %g4 \n"
		"stxa %g4, [%g7] 0x14 \n"
		"membar #Sync \n"

		"add %g7, 0x8, %g7 \n"
		"ldx [%g6 + 0x18], %g4 \n"
		"stxa %g4, [%g7] 0x14 \n"
		"membar #Sync \n"

		"add %g7, 0x8, %g7 \n"
		"ldx [%g6 + 0x20], %g4 \n"
		"stxa %g4, [%g7] 0x14 \n"
		"membar #Sync \n"

		"add %g7, 0x8, %g7 \n"
		"ldx [%g6 + 0x28], %g4 \n"
		"stxa %g4, [%g7] 0x14 \n"
		"membar #Sync \n"

		"add %g7, 0x8, %g7 \n"
		"ldx [%g6 + 0x30], %g4 \n"
		"stxa %g4, [%g7] 0x14 \n"
		"membar #Sync \n"

		"add %g7, 0x8, %g7 \n"
		"ldx [%g6 + 0x38], %g4 \n"
		"stxa %g4, [%g7] 0x14 \n"
		"membar #Sync \n"

		"add %g7, 0x8, %g7 \n"
		"ldx [%g6 + 0x40], %g4 \n"
		"stxa %g4, [%g7] 0x14 \n"
		"membar #Sync \n"

		"flush %i7"

		);
	delay(1000);
	printf("Result: %d\n", ipi_unicast_to((void (*)(void)) 1234, 1));
		if (waitq_sleep_timeout(&ap_completion_wq, 10000000, SYNCH_FLAGS_NONE) ==
	    			ESYNCH_TIMEOUT)
			printf("%s: waiting for processor (cpuid = %" PRIu32
		    	") timed out\n", __func__, 1);
#endif
}

/** @}
 */
