//kernel/trunk/arch/ia64/include/interrupt.h |
---|
48,6 → 48,39 |
#define EOI 0 /**< The actual value doesn't matter. */ |
struct istate { |
__r128 f2; |
__r128 f3; |
__r128 f4; |
__r128 f5; |
__r128 f6; |
__r128 f7; |
__r128 f8; |
__r128 f9; |
__r128 f10; |
__r128 f11; |
__r128 f12; |
__r128 f13; |
__r128 f14; |
__r128 f15; |
__r128 f16; |
__r128 f17; |
__r128 f18; |
__r128 f19; |
__r128 f20; |
__r128 f21; |
__r128 f22; |
__r128 f23; |
__r128 f24; |
__r128 f25; |
__r128 f26; |
__r128 f27; |
__r128 f28; |
__r128 f29; |
__r128 f30; |
__r128 f31; |
__address ar_bsp; |
__address ar_bspstore; |
__address ar_bspstore_new; |
//kernel/trunk/arch/ia64/include/fpu_context.h |
---|
34,8 → 34,7 |
#include <arch/types.h> |
#define FRS 128 |
#define SAVABLE_FRS_OFFSET 2 |
#define FRS 96 |
struct fpu_context { |
__r128 fr[FRS]; |
//kernel/trunk/arch/ia64/include/context.h |
---|
96,7 → 96,31 |
*/ |
__u64 pr; |
__r128 f2 __attribute__ ((aligned(16))); |
__r128 f3; |
__r128 f4; |
__r128 f5; |
__r128 f16; |
__r128 f17; |
__r128 f18; |
__r128 f19; |
__r128 f20; |
__r128 f21; |
__r128 f22; |
__r128 f23; |
__r128 f24; |
__r128 f25; |
__r128 f26; |
__r128 f27; |
__r128 f28; |
__r128 f29; |
__r128 f30; |
__r128 f31; |
ipl_t ipl; |
}; |
#endif |
//kernel/trunk/arch/ia64/Makefile.inc |
---|
41,7 → 41,7 |
INIT_ADDRESS = 0xe000000000400000 |
INIT_SIZE = 0x100000 |
CFLAGS += -mconstant-gp -fno-unwind-tables |
CFLAGS += -mconstant-gp -fno-unwind-tables -mfixed-range=f32-f127 |
LFLAGS += -EL |
AFLAGS += -mconstant-gp |
//kernel/trunk/arch/ia64/src/fpu_context.c |
---|
31,43 → 31,11 |
#include <arch/register.h> |
#include <print.h> |
void fpu_context_save(fpu_context_t *fctx){ |
asm volatile( |
"stf.spill [%2]=f2,0x80\n" |
"stf.spill [%3]=f3,0x80\n" |
"stf.spill [%4]=f4,0x80\n" |
"stf.spill [%5]=f5,0x80\n" |
"stf.spill [%6]=f6,0x80\n" |
"stf.spill [%7]=f7,0x80\n;;" |
"stf.spill [%0]=f8,0x80\n" |
"stf.spill [%1]=f9,0x80\n" |
"stf.spill [%2]=f10,0x80\n" |
"stf.spill [%3]=f11,0x80\n" |
"stf.spill [%4]=f12,0x80\n" |
"stf.spill [%5]=f13,0x80\n" |
"stf.spill [%6]=f14,0x80\n" |
"stf.spill [%7]=f15,0x80\n;;" |
"stf.spill [%0]=f16,0x80\n" |
"stf.spill [%1]=f17,0x80\n" |
"stf.spill [%2]=f18,0x80\n" |
"stf.spill [%3]=f19,0x80\n" |
"stf.spill [%4]=f20,0x80\n" |
"stf.spill [%5]=f21,0x80\n" |
"stf.spill [%6]=f22,0x80\n" |
"stf.spill [%7]=f23,0x80\n;;" |
"stf.spill [%0]=f24,0x80\n" |
"stf.spill [%1]=f25,0x80\n" |
"stf.spill [%2]=f26,0x80\n" |
"stf.spill [%3]=f27,0x80\n" |
"stf.spill [%4]=f28,0x80\n" |
"stf.spill [%5]=f29,0x80\n" |
"stf.spill [%6]=f30,0x80\n" |
"stf.spill [%7]=f31,0x80\n;;" |
"stf.spill [%0]=f32,0x80\n" |
"stf.spill [%1]=f33,0x80\n" |
"stf.spill [%2]=f34,0x80\n" |
188,42 → 156,8 |
void fpu_context_restore(fpu_context_t *fctx) |
{ |
asm volatile( |
"ldf.fill f2=[%2],0x80\n" |
"ldf.fill f3=[%3],0x80\n" |
"ldf.fill f4=[%4],0x80\n" |
"ldf.fill f5=[%5],0x80\n" |
"ldf.fill f6=[%6],0x80\n" |
"ldf.fill f7=[%7],0x80\n;;" |
"ldf.fill f8=[%0],0x80\n" |
"ldf.fill f9=[%1],0x80\n" |
"ldf.fill f10=[%2],0x80\n" |
"ldf.fill f11=[%3],0x80\n" |
"ldf.fill f12=[%4],0x80\n" |
"ldf.fill f13=[%5],0x80\n" |
"ldf.fill f14=[%6],0x80\n" |
"ldf.fill f15=[%7],0x80\n;;" |
"ldf.fill f16=[%0],0x80\n" |
"ldf.fill f17=[%1],0x80\n" |
"ldf.fill f18=[%2],0x80\n" |
"ldf.fill f19=[%3],0x80\n" |
"ldf.fill f20=[%4],0x80\n" |
"ldf.fill f21=[%5],0x80\n" |
"ldf.fill f22=[%6],0x80\n" |
"ldf.fill f23=[%7],0x80\n;;" |
"ldf.fill f24=[%0],0x80\n" |
"ldf.fill f25=[%1],0x80\n" |
"ldf.fill f26=[%2],0x80\n" |
"ldf.fill f27=[%3],0x80\n" |
"ldf.fill f28=[%4],0x80\n" |
"ldf.fill f29=[%5],0x80\n" |
"ldf.fill f30=[%6],0x80\n" |
"ldf.fill f31=[%7],0x80\n;;" |
"ldf.fill f32=[%0],0x80\n" |
"ldf.fill f33=[%1],0x80\n" |
"ldf.fill f34=[%2],0x80\n" |
338,31 → 272,48 |
:"r" (&((fctx->fr)[0])),"r" (&((fctx->fr)[1])),"r" (&((fctx->fr)[2])),"r" (&((fctx->fr)[3])), |
"r" (&((fctx->fr)[4])),"r" (&((fctx->fr)[5])),"r" (&((fctx->fr)[6])),"r" (&((fctx->fr)[7])) |
); |
} |
void fpu_disable(void) |
void fpu_enable(void) |
{ |
__u64 a = 0 ; |
asm volatile( |
"ssm %0;;\n" |
"rsm %0;;" |
"srlz.i\n" |
"srlz.d;;\n" |
: |
:"i" (PSR_DFL_MASK|PSR_DFH_MASK) |
:"i" (PSR_DFH_MASK) |
); |
asm volatile |
( |
"mov %0=ar.fpsr;;\n" |
"or %0=%0,%1;;\n" |
"mov ar.fpsr=%0;;\n" |
: "+r" (a) |
: "r" (0x38) |
); |
} |
void fpu_enable(void) |
void fpu_disable(void) |
{ |
__u64 a = 0 ; |
asm volatile( |
"rsm %0;;\n" |
"ssm %0;;\n" |
"srlz.i\n" |
"srlz.d;;\n" |
: |
:"i" (PSR_DFL_MASK|PSR_DFH_MASK) |
:"i" (PSR_DFH_MASK) |
); |
asm volatile |
( |
"mov %0=ar.fpsr;;\n" |
"or %0=%0,%1;;\n" |
"mov ar.fpsr=%0;;\n" |
: "+r" (a) |
: "r" (0x38) |
); |
} |
369,7 → 320,6 |
void fpu_init(void) |
{ |
__u64 a = 0; |
fpu_enable(); |
asm volatile |
( |
"mov %0=ar.fpsr;;\n" |
521,6 → 471,5 |
); |
fpu_enable(); |
} |
//kernel/trunk/arch/ia64/src/ivt.S |
---|
32,7 → 32,11 |
#include <arch/mm/page.h> |
#include <align.h> |
#define STACK_ITEMS 19 |
#define FRS_TO_SAVE 30 |
#define STACK_ITEMS (19 + FRS_TO_SAVE*2) |
//#define STACK_ITEMS 19 |
/* 30*2 for FPU registers */ |
#define STACK_FRAME_SIZE ALIGN_UP((STACK_ITEMS*STACK_ITEM_SIZE) + STACK_SCRATCH_AREA_SIZE, STACK_ALIGNMENT) |
#if (STACK_ITEMS % 2 == 0) |
78,6 → 82,19 |
/* |
* Note that r24-r31 from bank 0 can be used only as long as PSR.ic = 0. |
*/ |
/*Set up FPU as in interrupred*/ |
mov r24=psr |
mov r25=cr.ipsr |
mov r26=(PSR_DFH_MASK) |
mov r27=(~(PSR_DFH_MASK));; |
and r26=r25,r26 |
and r24=r24,r27;; |
or r24=r24,r26;; |
mov psr.l=r24;; |
srlz.i |
srlz.d;; |
mov r24 = cr.iip |
mov r25 = cr.ipsr |
mov r26 = cr.iipa |
195,7 → 212,7 |
/* 16. RSE switch to interrupted context */ |
cover /* allocate zerro size frame (step 1 (from Intel Docs)) */ |
add r31 = STACK_SCRATCH_AREA_SIZE, r12 ;; |
add r31 = (STACK_SCRATCH_AREA_SIZE+(FRS_TO_SAVE*2*8)), r12 ;; |
ld8 r30 = [r31], +8 ;; /* load ar.bsp */ |
ld8 r29 = [r31], +8 ;; /* load ar.bspstore */ |
230,12 → 247,20 |
ld8 r25 = [r31], +8 ;; /* load cr.ipsr */ |
ld8 r24 = [r31], +8 ;; /* load cr.iip */ |
mov cr.iip = r24 |
mov cr.ipsr = r25 |
mov cr.iip = r24;; |
mov cr.iipa = r26 |
mov cr.isr = r27 |
mov cr.ifa = r28 |
/*Set up FPU as in exception*/ |
mov r24=psr |
mov r26=(PSR_DFH_MASK) |
mov r27=(~(PSR_DFH_MASK));; |
and r25=r25,r27 |
and r24=r24,r26;; |
or r25=r25,r24;; |
mov cr.ipsr = r25 |
/* 18. restore predicate registers from memory stack */ |
ld8 r29 = [r31], +8 ;; /* load predicate registers */ |
mov pr = r29 |
315,6 → 340,57 |
mov loc45 = r30 |
mov loc46 = r31 |
mov r24=96 + STACK_SCRATCH_AREA_SIZE |
mov r25=112 + STACK_SCRATCH_AREA_SIZE |
mov r26=0 + STACK_SCRATCH_AREA_SIZE |
mov r27=16 + STACK_SCRATCH_AREA_SIZE |
mov r28=32 + STACK_SCRATCH_AREA_SIZE |
mov r29=48 + STACK_SCRATCH_AREA_SIZE |
mov r30=64 + STACK_SCRATCH_AREA_SIZE |
mov r31=80 + STACK_SCRATCH_AREA_SIZE;; |
add r24=r12,r24 |
add r25=r12,r25 |
add r26=r12,r26 |
add r27=r12,r27 |
add r28=r12,r28 |
add r29=r12,r29 |
add r30=r12,r30 |
add r31=r12,r31;; |
stf.spill [r26]=f2,0x80 |
stf.spill [r27]=f3,0x80 |
stf.spill [r28]=f4,0x80 |
stf.spill [r29]=f5,0x80 |
stf.spill [r30]=f6,0x80 |
stf.spill [r31]=f7,0x80;; |
stf.spill [r24]=f8,0x80 |
stf.spill [r25]=f9,0x80 |
stf.spill [r26]=f10,0x80 |
stf.spill [r27]=f11,0x80 |
stf.spill [r28]=f12,0x80 |
stf.spill [r29]=f13,0x80 |
stf.spill [r30]=f14,0x80 |
stf.spill [r31]=f15,0x80;; |
stf.spill [r24]=f16,0x80 |
stf.spill [r25]=f17,0x80 |
stf.spill [r26]=f18,0x80 |
stf.spill [r27]=f19,0x80 |
stf.spill [r28]=f20,0x80 |
stf.spill [r29]=f21,0x80 |
stf.spill [r30]=f22,0x80 |
stf.spill [r31]=f23,0x80;; |
stf.spill [r24]=f24,0x80 |
stf.spill [r25]=f25,0x80 |
stf.spill [r26]=f26,0x80 |
stf.spill [r27]=f27,0x80 |
stf.spill [r28]=f28,0x80 |
stf.spill [r29]=f29,0x80 |
stf.spill [r30]=f30,0x80 |
stf.spill [r31]=f31,0x80;; |
/* preserve Floating point status register */ |
mov loc47 = ar.fpsr |
343,6 → 419,59 |
/* 13. restore general and floating-point registers */ |
/* TODO: restore floating-point context */ |
mov r24=96 + STACK_SCRATCH_AREA_SIZE |
mov r25=112 + STACK_SCRATCH_AREA_SIZE |
mov r26=0 + STACK_SCRATCH_AREA_SIZE |
mov r27=16 + STACK_SCRATCH_AREA_SIZE |
mov r28=32 + STACK_SCRATCH_AREA_SIZE |
mov r29=48 + STACK_SCRATCH_AREA_SIZE |
mov r30=64 + STACK_SCRATCH_AREA_SIZE |
mov r31=80 + STACK_SCRATCH_AREA_SIZE;; |
add r24=r12,r24 |
add r25=r12,r25 |
add r26=r12,r26 |
add r27=r12,r27 |
add r28=r12,r28 |
add r29=r12,r29 |
add r30=r12,r30 |
add r31=r12,r31;; |
ldf.fill f2=[r26],0x80 |
ldf.fill f3=[r27],0x80 |
ldf.fill f4=[r28],0x80 |
ldf.fill f5=[r29],0x80 |
ldf.fill f6=[r30],0x80 |
ldf.fill f7=[r31],0x80;; |
ldf.fill f8=[r24],0x80 |
ldf.fill f9=[r25],0x80 |
ldf.fill f10=[r26],0x80 |
ldf.fill f11=[r27],0x80 |
ldf.fill f12=[r28],0x80 |
ldf.fill f13=[r29],0x80 |
ldf.fill f14=[r30],0x80 |
ldf.fill f15=[r31],0x80;; |
ldf.fill f16=[r24],0x80 |
ldf.fill f17=[r25],0x80 |
ldf.fill f18=[r26],0x80 |
ldf.fill f19=[r27],0x80 |
ldf.fill f20=[r28],0x80 |
ldf.fill f21=[r29],0x80 |
ldf.fill f22=[r30],0x80 |
ldf.fill f23=[r31],0x80;; |
ldf.fill f24=[r24],0x80 |
ldf.fill f25=[r25],0x80 |
ldf.fill f26=[r26],0x80 |
ldf.fill f27=[r27],0x80 |
ldf.fill f28=[r28],0x80 |
ldf.fill f29=[r29],0x80 |
ldf.fill f30=[r30],0x80 |
ldf.fill f31=[r31],0x80;; |
mov r1 = loc17 |
mov r2 = loc18 |
mov r3 = loc19 |
478,3 → 607,7 |
HEAVYWEIGHT_HANDLER 0x7d00 |
HEAVYWEIGHT_HANDLER 0x7e00 |
HEAVYWEIGHT_HANDLER 0x7f00 |
//kernel/trunk/arch/ia64/src/context.S |
---|
104,8 → 104,32 |
* Save predicate registers |
*/ |
mov loc2 = pr ;; |
st8 [in0] = loc2, 8 |
st8 [in0] = loc2, 16;; /*Next fpu registers should be spilled to 16B aligned address*/ |
stf.spill [in0]=f2,16;; |
stf.spill [in0]=f3,16;; |
stf.spill [in0]=f4,16;; |
stf.spill [in0]=f5,16;; |
stf.spill [in0]=f16,16;; |
stf.spill [in0]=f17,16;; |
stf.spill [in0]=f18,16;; |
stf.spill [in0]=f19,16;; |
stf.spill [in0]=f20,16;; |
stf.spill [in0]=f21,16;; |
stf.spill [in0]=f22,16;; |
stf.spill [in0]=f23,16;; |
stf.spill [in0]=f24,16;; |
stf.spill [in0]=f25,16;; |
stf.spill [in0]=f26,16;; |
stf.spill [in0]=f27,16;; |
stf.spill [in0]=f28,16;; |
stf.spill [in0]=f29,16;; |
stf.spill [in0]=f30,16;; |
stf.spill [in0]=f31,16;; |
mov ar.unat = loc1 |
add r8 = r0, r0, 1 /* context_save returns 1 */ |
187,9 → 211,33 |
/* |
* Restore predicate registers |
*/ |
ld8 loc2 = [in0], 8 ;; |
ld8 loc2 = [in0], 16 ;; |
mov pr = loc2, ~0 |
ldf.fill f2=[in0],16;; |
ldf.fill f3=[in0],16;; |
ldf.fill f4=[in0],16;; |
ldf.fill f5=[in0],16;; |
ldf.fill f16=[in0],16;; |
ldf.fill f17=[in0],16;; |
ldf.fill f18=[in0],16;; |
ldf.fill f19=[in0],16;; |
ldf.fill f20=[in0],16;; |
ldf.fill f21=[in0],16;; |
ldf.fill f22=[in0],16;; |
ldf.fill f23=[in0],16;; |
ldf.fill f24=[in0],16;; |
ldf.fill f25=[in0],16;; |
ldf.fill f26=[in0],16;; |
ldf.fill f27=[in0],16;; |
ldf.fill f28=[in0],16;; |
ldf.fill f29=[in0],16;; |
ldf.fill f30=[in0],16;; |
ldf.fill f31=[in0],16;; |
mov ar.unat = loc1 |
mov r8 = r0 /* context_restore returns 0 */ |
//kernel/trunk/arch/ia64/src/interrupt.c |
---|
174,11 → 174,15 |
panic("General Exception (%s)\n", desc); |
} |
void fpu_enable(void); |
void disabled_fp_register(__u64 vector, istate_t *istate) |
{ |
#ifdef CONFIG_CPU_LAZY |
#ifdef CONFIG_FPU_LAZY |
scheduler_fpu_lazy_request(); |
#else |
dump_interrupted_context(istate); |
panic("Interruption: %W (%s)\n", (__u16) vector, vector_to_string(vector)); |
#endif |
} |
//kernel/trunk/arch/ia64/src/start.S |
---|
123,9 → 123,16 |
st8 [r18] = r15 |
st8 [r19] = r16 |
ssm (1<<19);; /*Disable f32 - f127*/ |
srlz.i; |
srlz.d;; |
movl r18=main_bsp ;; |
mov b1=r18 ;; |
br.call.sptk.many b0=b1 |
0: |
br 0b |