forked from Imagelibrary/rtems
2009-11-09 Till Straumann <strauman@slac.stanford.edu>
* cpu.c, cpu_asm.S, rtems/score/cpu.h, sse_test.c: Added experimental SSE support.
This commit is contained in:
@@ -56,9 +56,36 @@ void _CPU_Initialize(void)
|
||||
|
||||
fp_context = &_CPU_Null_fp_context;
|
||||
|
||||
#ifdef __SSE__
|
||||
asm volatile( "fstcw %0":"=m"(fp_context->fpucw) );
|
||||
#else
|
||||
asm volatile( "fsave (%0)" : "=r" (fp_context)
|
||||
: "0" (fp_context)
|
||||
);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __SSE__
|
||||
|
||||
asm volatile("stmxcsr %0":"=m"(fp_context->mxcsr));
|
||||
|
||||
/* The BSP must enable the SSE extensions (early).
|
||||
* If any SSE instruction was already attempted
|
||||
* then that crashed the system.
|
||||
* As a courtesy, we double-check here but it
|
||||
* may be too late (which is also why we don't
|
||||
* enable SSE here).
|
||||
*/
|
||||
{
|
||||
uint32_t cr4;
|
||||
__asm__ __volatile__("mov %%cr4, %0":"=r"(cr4));
|
||||
if ( 0x600 != (cr4 & 0x600) ) {
|
||||
printk("PANIC: RTEMS was compiled for SSE but BSP did not enable it (CR4: 0x%08x)\n", cr4);
|
||||
while ( 1 ) {
|
||||
__asm__ __volatile__("hlt");
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@@ -165,6 +192,9 @@ extern void rtems_exception_prologue_14(void);
|
||||
extern void rtems_exception_prologue_16(void);
|
||||
extern void rtems_exception_prologue_17(void);
|
||||
extern void rtems_exception_prologue_18(void);
|
||||
#ifdef __SSE__
|
||||
extern void rtems_exception_prologue_19(void);
|
||||
#endif
|
||||
|
||||
static rtems_raw_irq_hdl tbl[] = {
|
||||
rtems_exception_prologue_0,
|
||||
@@ -186,6 +216,9 @@ static rtems_raw_irq_hdl tbl[] = {
|
||||
rtems_exception_prologue_16,
|
||||
rtems_exception_prologue_17,
|
||||
rtems_exception_prologue_18,
|
||||
#ifdef __SSE__
|
||||
rtems_exception_prologue_19,
|
||||
#endif
|
||||
};
|
||||
|
||||
void rtems_exception_init_mngt(void)
|
||||
|
||||
@@ -97,6 +97,7 @@ SYM (_CPU_Context_restore):
|
||||
|
||||
.set FPCONTEXT_ARG, 4 /* FP context argument */
|
||||
|
||||
#ifndef __SSE__
|
||||
.p2align 1
|
||||
PUBLIC (_CPU_Context_save_fp)
|
||||
SYM (_CPU_Context_save_fp):
|
||||
@@ -112,18 +113,44 @@ SYM (_CPU_Context_restore_fp):
|
||||
movl (eax),eax /* eax = FP context area */
|
||||
frstor (eax) /* restore FP context */
|
||||
ret
|
||||
#endif
|
||||
|
||||
#ifdef __SSE__
|
||||
#define SSE_OFF 16
|
||||
#endif
|
||||
|
||||
PUBLIC (_Exception_Handler)
|
||||
SYM (_Exception_Handler):
|
||||
pusha /* Push general purpose registers */
|
||||
pushl $0 /* Null pointer to SSE area */
|
||||
movl esp, ebp /* Save original SP */
|
||||
#ifndef __SSE__
|
||||
subl $4, esp /* Reserve space for argument */
|
||||
/* Align stack (courtesy for C/gcc) */
|
||||
andl $ - CPU_STACK_ALIGNMENT, esp
|
||||
#else
|
||||
subl $512, esp /* Space for SSE area */
|
||||
/* Align stack (courtesy for C/gcc) */
|
||||
andl $ - CPU_STACK_ALIGNMENT, esp
|
||||
/* Doing fwait here will re-throw an already pending FP exception!
|
||||
fwait
|
||||
*/
|
||||
fxsave 0(esp)
|
||||
fninit /* Clean-slate FPU */
|
||||
movl $0x1f80, 0(ebp)
|
||||
ldmxcsr 0(ebp) /* Clean-slate MXCSR */
|
||||
movl esp, 0(ebp) /* Store pointer to SSE area */
|
||||
subl $SSE_OFF, esp /* Aligned space for argument */
|
||||
#endif
|
||||
movl ebp, (esp) /* Store argument */
|
||||
movl _currentExcHandler, eax /* Call function stored in _currentExcHandler */
|
||||
call * eax
|
||||
#ifdef __SSE__
|
||||
fwait
|
||||
fxrstor 16(esp)
|
||||
#endif
|
||||
movl ebp, esp /* Restore original SP */
|
||||
addl $4, esp /* Skill pointer to SSE area */
|
||||
popa /* Restore general purpose registers */
|
||||
addl $8, esp /* Skill vector number and faultCode */
|
||||
iret
|
||||
@@ -216,6 +243,13 @@ DISTINCT_EXCEPTION_WITH_FAULTCODE_ENTRY (17)
|
||||
*/
|
||||
DISTINCT_EXCEPTION_WITH_FAULTCODE_ENTRY (18)
|
||||
|
||||
#ifdef __SSE__
|
||||
/*
|
||||
* SIMD FP Exception
|
||||
*/
|
||||
DISTINCT_EXCEPTION_WITHOUT_FAULTCODE_ENTRY (19)
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* void *i386_Logical_to_physical(
|
||||
|
||||
@@ -61,6 +61,15 @@ extern "C" {
|
||||
* for the i386, others have it built in (i486DX, Pentium).
|
||||
*/
|
||||
|
||||
#ifdef __SSE__
|
||||
#define CPU_HARDWARE_FP TRUE
|
||||
#define CPU_SOFTWARE_FP FALSE
|
||||
|
||||
#define CPU_ALL_TASKS_ARE_FP TRUE
|
||||
#define CPU_IDLE_TASK_IS_FP TRUE
|
||||
#define CPU_USE_DEFERRED_FP_SWITCH FALSE
|
||||
#else /* __SSE__ */
|
||||
|
||||
#if ( I386_HAS_FPU == 1 )
|
||||
#define CPU_HARDWARE_FP TRUE /* i387 for i386 */
|
||||
#else
|
||||
@@ -71,6 +80,7 @@ extern "C" {
|
||||
#define CPU_ALL_TASKS_ARE_FP FALSE
|
||||
#define CPU_IDLE_TASK_IS_FP FALSE
|
||||
#define CPU_USE_DEFERRED_FP_SWITCH TRUE
|
||||
#endif /* __SSE__ */
|
||||
|
||||
#define CPU_STACK_GROWS_UP FALSE
|
||||
#define CPU_STRUCTURE_ALIGNMENT
|
||||
@@ -119,12 +129,38 @@ typedef struct {
|
||||
/*
|
||||
* FP context save area for the i387 numeric coprocessors.
|
||||
*/
|
||||
#ifdef __SSE__
|
||||
/* All FPU and SSE registers are volatile; hence, as long
|
||||
* as we are within normally executing C code (including
|
||||
* a task switch) there is no need for saving/restoring
|
||||
* any of those registers.
|
||||
* We must save/restore the full FPU/SSE context across
|
||||
* interrupts and exceptions, however:
|
||||
* - after ISR execution a _Thread_Dispatch() may happen
|
||||
* and it is therefore necessary to save the FPU/SSE
|
||||
* registers to be restored when control is returned
|
||||
* to the interrupted task.
|
||||
* - gcc may implicitly use FPU/SSE instructions in
|
||||
* an ISR.
|
||||
*
|
||||
* Even though there is no explicit mentioning of the FPU
|
||||
* control word in the SYSV ABI (i386) being non-volatile
|
||||
* we maintain MXCSR and the FPU control-word for each task.
|
||||
*/
|
||||
typedef struct {
|
||||
uint32_t mxcsr;
|
||||
uint16_t fpucw;
|
||||
} Context_Control_fp;
|
||||
|
||||
#else
|
||||
|
||||
typedef struct {
|
||||
uint8_t fp_save_area[108]; /* context size area for I80387 */
|
||||
/* 28 bytes for environment */
|
||||
} Context_Control_fp;
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* The following structure defines the set of information saved
|
||||
@@ -132,9 +168,20 @@ typedef struct {
|
||||
*
|
||||
* idtIndex is either the interrupt number or the trap/exception number.
|
||||
* faultCode is the code pushed by the processor on some exceptions.
|
||||
*
|
||||
* Since the first registers are directly pushed by the CPU they
|
||||
* may not respect 16-byte stack alignment, which is, however,
|
||||
* mandatory for the SSE register area.
|
||||
* Therefore, these registers are stored at an aligned address
|
||||
* and a pointer is stored in the CPU_Exception_frame.
|
||||
* If the executive was compiled without SSE support then
|
||||
* this pointer is NULL.
|
||||
*/
|
||||
|
||||
struct Context_Control_sse;
|
||||
|
||||
typedef struct {
|
||||
struct Context_Control_sse *fp_ctxt;
|
||||
uint32_t edi;
|
||||
uint32_t esi;
|
||||
uint32_t ebp;
|
||||
@@ -150,6 +197,32 @@ typedef struct {
|
||||
uint32_t eflags;
|
||||
} CPU_Exception_frame;
|
||||
|
||||
#ifdef __SSE__
|
||||
typedef struct Context_Control_sse {
|
||||
uint16_t fcw;
|
||||
uint16_t fsw;
|
||||
uint8_t ftw;
|
||||
uint8_t res_1;
|
||||
uint16_t fop;
|
||||
uint32_t fpu_ip;
|
||||
uint16_t cs;
|
||||
uint16_t res_2;
|
||||
uint32_t fpu_dp;
|
||||
uint16_t ds;
|
||||
uint16_t res_3;
|
||||
uint32_t mxcsr;
|
||||
uint32_t mxcsr_mask;
|
||||
struct {
|
||||
uint8_t fpreg[10];
|
||||
uint8_t res_4[ 6];
|
||||
} fp_mmregs[8];
|
||||
uint8_t xmmregs[8][16];
|
||||
uint8_t res_5[224];
|
||||
} Context_Control_sse
|
||||
__attribute__((aligned(16)))
|
||||
;
|
||||
#endif
|
||||
|
||||
typedef void (*cpuExcHandlerType) (CPU_Exception_frame*);
|
||||
extern cpuExcHandlerType _currentExcHandler;
|
||||
extern void rtems_exception_init_mngt(void);
|
||||
@@ -510,19 +583,61 @@ void _CPU_Context_restore(
|
||||
* This routine saves the floating point context passed to it.
|
||||
*/
|
||||
|
||||
#ifdef __SSE__
|
||||
#define _CPU_Context_save_fp(fp_context_pp) \
|
||||
do { \
|
||||
__asm__ __volatile__( \
|
||||
"fstcw %0" \
|
||||
:"=m"((*(fp_context_pp))->fpucw) \
|
||||
); \
|
||||
__asm__ __volatile__( \
|
||||
"stmxcsr %0" \
|
||||
:"=m"((*(fp_context_pp))->mxcsr) \
|
||||
); \
|
||||
} while (0)
|
||||
#else
|
||||
void _CPU_Context_save_fp(
|
||||
Context_Control_fp **fp_context_ptr
|
||||
);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* _CPU_Context_restore_fp
|
||||
*
|
||||
* This routine restores the floating point context passed to it.
|
||||
*/
|
||||
|
||||
#ifdef __SSE__
|
||||
#define _CPU_Context_restore_fp(fp_context_pp) \
|
||||
do { \
|
||||
__asm__ __volatile__( \
|
||||
"fldcw %0" \
|
||||
::"m"((*(fp_context_pp))->fpucw) \
|
||||
:"fpcr" \
|
||||
); \
|
||||
__builtin_ia32_ldmxcsr(_Thread_Executing->fp_context->mxcsr); \
|
||||
} while (0)
|
||||
#else
|
||||
void _CPU_Context_restore_fp(
|
||||
Context_Control_fp **fp_context_ptr
|
||||
);
|
||||
#endif
|
||||
|
||||
#ifdef __SSE__
|
||||
#define _CPU_Context_Initialization_at_thread_begin() \
|
||||
do { \
|
||||
__asm__ __volatile__( \
|
||||
"finit" \
|
||||
: \
|
||||
: \
|
||||
:"st","st(1)","st(2)","st(3)", \
|
||||
"st(4)","st(5)","st(6)","st(7)", \
|
||||
"fpsr","fpcr" \
|
||||
); \
|
||||
if ( _Thread_Executing->fp_context ) { \
|
||||
_CPU_Context_restore_fp(&_Thread_Executing->fp_context); \
|
||||
} \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#endif /* ASM */
|
||||
|
||||
|
||||
951
cpukit/score/cpu/i386/sse_test.c
Normal file
951
cpukit/score/cpu/i386/sse_test.c
Normal file
@@ -0,0 +1,951 @@
|
||||
/* $Id$ */
|
||||
|
||||
/*
|
||||
* Authorship
|
||||
* ----------
|
||||
* This software was created by
|
||||
* Till Straumann <strauman@slac.stanford.edu>, 2009,
|
||||
* Stanford Linear Accelerator Center, Stanford University.
|
||||
*
|
||||
* Acknowledgement of sponsorship
|
||||
* ------------------------------
|
||||
* This software was produced by
|
||||
* the Stanford Linear Accelerator Center, Stanford University,
|
||||
* under Contract DE-AC03-76SFO0515 with the Department of Energy.
|
||||
*
|
||||
* Government disclaimer of liability
|
||||
* ----------------------------------
|
||||
* Neither the United States nor the United States Department of Energy,
|
||||
* nor any of their employees, makes any warranty, express or implied, or
|
||||
* assumes any legal liability or responsibility for the accuracy,
|
||||
* completeness, or usefulness of any data, apparatus, product, or process
|
||||
* disclosed, or represents that its use would not infringe privately owned
|
||||
* rights.
|
||||
*
|
||||
* Stanford disclaimer of liability
|
||||
* --------------------------------
|
||||
* Stanford University makes no representations or warranties, express or
|
||||
* implied, nor assumes any liability for the use of this software.
|
||||
*
|
||||
* Stanford disclaimer of copyright
|
||||
* --------------------------------
|
||||
* Stanford University, owner of the copyright, hereby disclaims its
|
||||
* copyright and all other rights in this software. Hence, anyone may
|
||||
* freely use it for any purpose without restriction.
|
||||
*
|
||||
* Maintenance of notices
|
||||
* ----------------------
|
||||
* In the interest of clarity regarding the origin and status of this
|
||||
* SLAC software, this and all the preceding Stanford University notices
|
||||
* are to remain affixed to any copy or derivative of this software made
|
||||
* or distributed by the recipient and are to be affixed to any copy of
|
||||
* software made or distributed by the recipient that contains a copy or
|
||||
* derivative of this software.
|
||||
*
|
||||
* ------------------ SLAC Software Notices, Set 4 OTT.002a, 2004 FEB 03
|
||||
*/
|
||||
|
||||
|
||||
/* Code for testing FPU/SSE context save/restore across exceptions
|
||||
* (including interrupts).
|
||||
*
|
||||
* There are two tasks and an IRQ/EXC handler involved. One task (LP)
|
||||
* is of lower priority than the other (HP) task.
|
||||
*
|
||||
* 1) LP task sets up a context area in memory (known contents; every
|
||||
* register is loaded with different values)
|
||||
*
|
||||
* 2) LP task
|
||||
* 2a saves original FP/SSE context
|
||||
* 2b loads context from 1) into FPU/SSE
|
||||
* 2c raises an exception or interrupt
|
||||
*
|
||||
* * (2d save FPU/SSE context after irq/exception returns to
|
||||
* separate area for verification
|
||||
* 2e reload original FP/SSE context.)
|
||||
*
|
||||
* * All these five steps are coded in assembly to prevent
|
||||
* gcc from manipulating the FP/SSE state. The last two steps,
|
||||
* however, are effectively executed during 6 when control is
|
||||
* returned to the LP task.
|
||||
*
|
||||
* 3) IRQ/EXC handler OS wrapper saves context, initializes FPU and
|
||||
* MXCSR.
|
||||
*
|
||||
* 4) user (our) irq/exc handler clears exception condition, clobbers
|
||||
* FPU and XMM regs and finally releases a semaphore on which HP
|
||||
* task is waiting.
|
||||
*
|
||||
* 5) context switch to HP task. HP task clobbers FPU and XMM regs.
|
||||
* Then it tries to re-acquire the synchronization semaphore and
|
||||
* blocks.
|
||||
*
|
||||
* 6) task switch back to (interrupted) LP task. Original context is
|
||||
* restored and verified against the context that was setup in 1).
|
||||
*
|
||||
*
|
||||
* Three methods for interrupting the LP task are tested
|
||||
*
|
||||
* a) FP exception (by setting an exception status in the context from 1)
|
||||
* b) SSE exception (by computing the square root of a vector of negative
|
||||
* numbers.
|
||||
* c) IRQ (software IRQ via 'INT xx' instruction)
|
||||
*
|
||||
*/
|
||||
#ifdef __rtems__
|
||||
|
||||
#include <rtems.h>
|
||||
#include <rtems/score/cpu.h>
|
||||
#include <rtems/irq.h>
|
||||
#include <rtems/error.h>
|
||||
|
||||
#endif
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
|
||||
/* This is currently hardcoded (int xx opcode requires immediate operand) */
|
||||
#define SSE_TEST_IRQ 10
|
||||
|
||||
typedef uint8_t __v8 __attribute__((vector_size(16)));
|
||||
typedef uint32_t __v32 __attribute__((vector_size(16)));
|
||||
typedef float __vf __attribute__((vector_size(16)));
|
||||
|
||||
#ifndef __rtems__
|
||||
/* Clone of what is defined in rtems/score/cpu.h (for testing under linux) */
|
||||
typedef struct Context_Control_sse {
|
||||
uint16_t fcw;
|
||||
uint16_t fsw;
|
||||
uint8_t ftw;
|
||||
uint8_t res_1;
|
||||
uint16_t fop;
|
||||
uint32_t fpu_ip;
|
||||
uint16_t cs;
|
||||
uint16_t res_2;
|
||||
uint32_t fpu_dp;
|
||||
uint16_t ds;
|
||||
uint16_t res_3;
|
||||
uint32_t mxcsr;
|
||||
uint32_t mxcsr_mask;
|
||||
struct {
|
||||
uint8_t fpreg[10];
|
||||
uint8_t res_4[ 6];
|
||||
} fp_mmregs[8];
|
||||
uint8_t xmmregs[8][16];
|
||||
uint8_t res_5[224];
|
||||
} Context_Control_sse
|
||||
__attribute__((aligned(16)))
|
||||
;
|
||||
#endif
|
||||
|
||||
#define MXCSR_FZ (1<<15) /* Flush to zero */
|
||||
#define MXCSR_RC(x) (((x)&3)<<13) /* Rounding ctrl */
|
||||
#define MXCSR_PM (1<<12) /* Precision msk */
|
||||
#define MXCSR_UM (1<<11) /* Underflow msk */
|
||||
#define MXCSR_OM (1<<10) /* Overflow msk */
|
||||
#define MXCSR_ZM (1<< 9) /* Divbyzero msk */
|
||||
#define MXCSR_DM (1<< 8) /* Denormal msk */
|
||||
#define MXCSR_IM (1<< 7) /* Invalidop msk */
|
||||
#define MXCSR_DAZ (1<< 6) /* Denorml are 0 */
|
||||
#define MXCSR_PE (1<< 5) /* Precision flg */
|
||||
#define MXCSR_UE (1<< 4) /* Underflow flg */
|
||||
#define MXCSR_OE (1<< 3) /* Overflow flg */
|
||||
#define MXCSR_ZE (1<< 2) /* Divbyzero flg */
|
||||
#define MXCSR_DE (1<< 1) /* Denormal flg */
|
||||
#define MXCSR_IE (1<< 0) /* Invalidop flg */
|
||||
|
||||
#define MXCSR_ALLM (MXCSR_PM | MXCSR_UM | MXCSR_OM | MXCSR_ZM | MXCSR_DM | MXCSR_IM)
|
||||
#define MXCSR_ALLE (MXCSR_PE | MXCSR_UE | MXCSR_OE | MXCSR_ZE | MXCSR_DE | MXCSR_IE)
|
||||
|
||||
#define FPSR_B (1<<15) /* FPU busy */
|
||||
#define FPSR_C3 (1<<14) /* Cond code C3 */
|
||||
#define FPSR_TOP(x) (((x)&7)<<11) /* TOP */
|
||||
#define FPSR_C2 (1<<10) /* Cond code C2 */
|
||||
#define FPSR_C1 (1<< 9) /* Cond code C1 */
|
||||
#define FPSR_C0 (1<< 8) /* Cond code C0 */
|
||||
#define FPSR_ES (1<< 7) /* Error summary */
|
||||
#define FPSR_SF (1<< 6) /* Stack fault */
|
||||
#define FPSR_PE (1<< 5) /* Precision flg */
|
||||
#define FPSR_UE (1<< 4) /* Underflow flg */
|
||||
#define FPSR_OE (1<< 3) /* Overflow flg */
|
||||
#define FPSR_ZE (1<< 2) /* Divbyzero flg */
|
||||
#define FPSR_DE (1<< 1) /* Denormal flg */
|
||||
#define FPSR_IE (1<< 0) /* Invalidop flg */
|
||||
|
||||
#define FPCW_X (1<<12) /* Infinity ctrl */
|
||||
#define FPCW_RC(x) (((x)&3)<<10) /* Rounding ctrl */
|
||||
#define FPCW_PC(x) (((x)&3)<< 8) /* Precision ctl */
|
||||
#define FPCW_PM (1<< 5) /* Precision msk */
|
||||
#define FPCW_UM (1<< 4) /* Underflow msk */
|
||||
#define FPCW_OM (1<< 3) /* Overflow msk */
|
||||
#define FPCW_ZM (1<< 2) /* Divbyzero msk */
|
||||
#define FPCW_DM (1<< 1) /* Denormal msk */
|
||||
#define FPCW_IM (1<< 0) /* Invalidop msk */
|
||||
|
||||
#define FPCW_ALLM (FPCW_PM | FPCW_UM | FPCW_OM | FPCW_ZM | FPCW_DM | FPCW_IM)
|
||||
#define FPSR_ALLE (FPSR_ES | FPSR_SF | FPSR_PE | FPSR_UE | FPSR_OE | FPSR_ZE | FPSR_DE | FPSR_IE)
|
||||
|
||||
/* Store 'double' into 80-bit register image */
|
||||
void
|
||||
fp_st1(uint8_t (*p_dst)[10], double v)
|
||||
{
|
||||
asm volatile("fstpt %0":"=m"(*p_dst):"t"(v):"st");
|
||||
}
|
||||
|
||||
/* Store 'double' into 80-bit register image #i in context */
|
||||
void
|
||||
fp_st(Context_Control_sse *p_ctxt, int i, double v)
|
||||
{
|
||||
fp_st1(&p_ctxt->fp_mmregs[i].fpreg,v);
|
||||
}
|
||||
|
||||
/* Load 'double' from 80-bit register image */
|
||||
double
|
||||
fp_ld1(uint8_t (*p_src)[10])
|
||||
{
|
||||
double v;
|
||||
|
||||
asm volatile("fldt %1":"=t"(v):"m"((*p_src)[0]),"m"(*p_src));
|
||||
return v;
|
||||
}
|
||||
|
||||
/* Load 'double' from 80-bit register image #i in context */
|
||||
double
|
||||
fp_ld(Context_Control_sse *p_ctxt, int i)
|
||||
{
|
||||
return fp_ld1(&p_ctxt->fp_mmregs[i].fpreg);
|
||||
}
|
||||
|
||||
#define FPUCLOBBER \
|
||||
"st","st(1)","st(2)","st(3)", \
|
||||
"st(4)","st(5)","st(6)","st(7)",\
|
||||
"fpsr","fpcr"
|
||||
|
||||
/* There seems to be no way to say that mxcsr was clobbered */
|
||||
|
||||
#define SSECLOBBER \
|
||||
"xmm0","xmm1","xmm2","xmm3", \
|
||||
"xmm4","xmm5","xmm6","xmm7"
|
||||
|
||||
static void
|
||||
sse_clobber(uint32_t x)
|
||||
{
|
||||
__v32 v = { x, x, x, x };
|
||||
asm volatile (
|
||||
" movdqa %0, %%xmm0 \n"
|
||||
" movdqa %%xmm0, %%xmm1 \n"
|
||||
" movdqa %%xmm0, %%xmm2 \n"
|
||||
" movdqa %%xmm0, %%xmm3 \n"
|
||||
" movdqa %%xmm0, %%xmm4 \n"
|
||||
" movdqa %%xmm0, %%xmm5 \n"
|
||||
" movdqa %%xmm0, %%xmm6 \n"
|
||||
" movdqa %%xmm0, %%xmm7 \n"
|
||||
:
|
||||
:"m"(v)
|
||||
:SSECLOBBER
|
||||
);
|
||||
}
|
||||
|
||||
void
|
||||
all_clobber(uint32_t v1, uint32_t v2);
|
||||
|
||||
asm(
|
||||
"all_clobber: \n"
|
||||
" finit \n"
|
||||
" movq 0(%esp), %xmm0 \n"
|
||||
" punpcklqdq %xmm0, %xmm0 \n"
|
||||
" movdqa %xmm0, %xmm1 \n"
|
||||
" movdqa %xmm0, %xmm2 \n"
|
||||
" movdqa %xmm0, %xmm3 \n"
|
||||
" movdqa %xmm0, %xmm4 \n"
|
||||
" movdqa %xmm0, %xmm5 \n"
|
||||
" movdqa %xmm0, %xmm6 \n"
|
||||
" movdqa %xmm0, %xmm7 \n"
|
||||
" ret \n"
|
||||
);
|
||||
|
||||
/* Clear FPU and save FPU/SSE registers to context area */
|
||||
|
||||
void
|
||||
init_ctxt(Context_Control_sse *p_ctxt);
|
||||
|
||||
asm(
|
||||
"init_ctxt: \n"
|
||||
" finit \n"
|
||||
" mov 4(%esp), %eax\n"
|
||||
" fxsave (%eax) \n"
|
||||
" fwait \n"
|
||||
" ret \n"
|
||||
);
|
||||
|
||||
/* Save FPU/SSE registers to context area */
|
||||
|
||||
static void
|
||||
stor_ctxt(Context_Control_sse *p_ctxt)
|
||||
{
|
||||
memset(p_ctxt, 0, sizeof(*p_ctxt));
|
||||
asm volatile(
|
||||
/* " finit \n" */
|
||||
" fxsave %0 \n"
|
||||
" fwait \n"
|
||||
: "=m"(*p_ctxt)
|
||||
:
|
||||
: FPUCLOBBER
|
||||
);
|
||||
}
|
||||
|
||||
#define H08 "0x%02"PRIx8
|
||||
#define H16 "0x%04"PRIx16
|
||||
#define H32 "0x%08"PRIx32
|
||||
|
||||
#define F16 "mismatch ("H16" != "H16")\n"
|
||||
|
||||
#define FLDCMP(fld, fmt) \
|
||||
if ( a->fld != b->fld ) { \
|
||||
rval = 1; \
|
||||
if ( !quiet ) \
|
||||
fprintf(stderr,#fld" mismatch ("fmt" != "fmt")\n",a->fld, b->fld); \
|
||||
}
|
||||
|
||||
#define FLTCMP(i) \
|
||||
do { \
|
||||
if ( ( (a->ftw ^ b->ftw) & (1<<i)) \
|
||||
|| ( (a->ftw & b->ftw & (1<<i)) && \
|
||||
memcmp(a->fp_mmregs[i].fpreg, \
|
||||
b->fp_mmregs[i].fpreg, \
|
||||
sizeof(a->fp_mmregs[i].fpreg)) \
|
||||
) \
|
||||
) { \
|
||||
rval = 1; \
|
||||
if ( !quiet ) { \
|
||||
double fa = fp_ld(a, i); \
|
||||
double fb = fp_ld(b, i); \
|
||||
if ( ((a->ftw ^ b->ftw) & (1<<i)) ) \
|
||||
fprintf(stderr,"fpreg[%u] TAG mismatch (%u != %u)\n",i,(a->ftw & (1<<i)) ? 1 : 0,(b->ftw & (1<<i)) ? 1 : 0); \
|
||||
else \
|
||||
fprintf(stderr,"fpreg[%u] mismatch (%g != %g)\n",i,fa,fb); \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define XMMCMP(i) \
|
||||
do { \
|
||||
if ( memcmp(&a->xmmregs[i], \
|
||||
&b->xmmregs[i], \
|
||||
sizeof(a->xmmregs[i])) \
|
||||
) { \
|
||||
rval = 1; \
|
||||
if ( !quiet ) { \
|
||||
int _jj; \
|
||||
fprintf(stderr,"xmmreg[%u] mismatch:\n", i); \
|
||||
fprintf(stderr," "); \
|
||||
for (_jj=0; _jj<16; _jj++) \
|
||||
fprintf(stderr,"%02x ",a->xmmregs[i][_jj]); \
|
||||
fprintf(stderr,"\n !=\n"); \
|
||||
fprintf(stderr," "); \
|
||||
for (_jj=0; _jj<16; _jj++) \
|
||||
fprintf(stderr,"%02x ",b->xmmregs[i][_jj]); \
|
||||
fprintf(stderr,"\n"); \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
||||
/* Compare two FPU/SSE context areas and flag differences;
|
||||
* RETURNS: zero if the contexts match and nonzero otherwise
|
||||
*/
|
||||
static int
|
||||
cmp_ctxt(Context_Control_sse *a, Context_Control_sse *b, int quiet)
|
||||
{
|
||||
int rval = 0;
|
||||
int i;
|
||||
FLDCMP(fcw,H16);
|
||||
FLDCMP(fsw,H16);
|
||||
FLDCMP(ftw,H08);
|
||||
FLDCMP(fop,H16);
|
||||
FLDCMP(fpu_ip,H32);
|
||||
FLDCMP(cs,H16);
|
||||
FLDCMP(fpu_dp,H32);
|
||||
FLDCMP(ds,H16);
|
||||
FLDCMP(mxcsr,H32);
|
||||
FLDCMP(mxcsr_mask,H32);
|
||||
for ( i=0; i<8; i++ ) {
|
||||
FLTCMP(i);
|
||||
}
|
||||
for ( i=0; i<8; i++ ) {
|
||||
XMMCMP(i);
|
||||
}
|
||||
return rval;
|
||||
}
|
||||
|
||||
/* Possible arguments to exc_raise() */
|
||||
|
||||
#define FP_EXC 0
|
||||
#define IRQ_EXC 1
|
||||
#define SSE_EXC -1
|
||||
|
||||
/* Check stack alignment by raising the interrupt from a
|
||||
* non-16-byte aligned section of code. The exception/IRQ
|
||||
* handler must align the stack and SSE context area
|
||||
* properly or it will crash.
|
||||
*/
|
||||
#define __INTRAISE(x) " int $32+"#x" \n"
|
||||
#define INTRAISE(x) __INTRAISE(x)
|
||||
|
||||
asm(
|
||||
"do_raise: \n"
|
||||
" fwait \n"
|
||||
" test %eax, %eax \n"
|
||||
" je 2f \n"
|
||||
" jl 1f \n"
|
||||
INTRAISE(SSE_TEST_IRQ)
|
||||
" jmp 2f \n"
|
||||
"1: sqrtps %xmm0, %xmm0 \n"
|
||||
"2: \n"
|
||||
" ret \n"
|
||||
);
|
||||
|
||||
#define SSE_TEST_HP_FAILED 1
|
||||
#define SSE_TEST_FSPR_FAILED 2
|
||||
#define SSE_TEST_CTXTCMP_FAILED 4
|
||||
|
||||
static const char *fail_msgs[] = {
|
||||
"Seems that HP task was not executing",
|
||||
"FPSR 'Invalid-operation' flag should be clear",
|
||||
"Restored context does NOT match the saved one",
|
||||
};
|
||||
|
||||
static void prstat(int st, const char *where)
|
||||
{
|
||||
int i,msk;
|
||||
for ( i=0, msk=1; i<sizeof(fail_msgs)/sizeof(fail_msgs[0]); i++, msk<<=1 ) {
|
||||
if ( (st & msk) ) {
|
||||
fprintf(stderr,"sse_test ERROR: %s (testing: %s)\n", fail_msgs[i], where);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int sse_test_debug = 0;
|
||||
|
||||
static int
|
||||
exc_raise(int kind)
|
||||
{
|
||||
Context_Control_sse nctxt;
|
||||
Context_Control_sse octxt;
|
||||
Context_Control_sse orig_ctxt;
|
||||
int i,j,rval;
|
||||
double s2;
|
||||
uint16_t fsw;
|
||||
__vf f4 = { -1., -2., -3., -4. };
|
||||
__vf tmp;
|
||||
__v32 sgn = { (1<<31), (1<<31), (1<<31), (1<<31) };
|
||||
|
||||
stor_ctxt(&octxt);
|
||||
|
||||
octxt.fsw &= ~FPSR_ALLE;
|
||||
octxt.mxcsr &= ~MXCSR_ALLE;
|
||||
|
||||
for ( i=0; i<8; i++ ) {
|
||||
fp_st(&octxt, i, (double)i+0.1);
|
||||
for (j=0; j<16; j++) {
|
||||
octxt.xmmregs[i][j]=(i<<4)+j;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if ( SSE_EXC == kind ) {
|
||||
memcpy(octxt.xmmregs[0], &f4, sizeof(f4));
|
||||
octxt.mxcsr &= ~MXCSR_IM;
|
||||
}
|
||||
|
||||
/* set tags to 'valid' */
|
||||
octxt.ftw = 0xff;
|
||||
|
||||
/* enable 'invalid arg' exception */
|
||||
octxt.fcw &= ~ ( FPCW_IM );
|
||||
|
||||
if ( FP_EXC == kind ) {
|
||||
octxt.fsw |= ( FPSR_IE | FPSR_ES );
|
||||
}
|
||||
|
||||
if ( sse_test_debug )
|
||||
printk("RAISE (fsw was 0x%04x)\n", orig_ctxt.fsw);
|
||||
asm volatile(
|
||||
" fxsave %2 \n"
|
||||
#ifdef __rtems__
|
||||
" movl %4, sse_test_check\n"
|
||||
#endif
|
||||
" fxrstor %3 \n"
|
||||
" call do_raise \n"
|
||||
#ifdef __rtems__
|
||||
" movl sse_test_check, %1\n"
|
||||
#else
|
||||
" movl $0, %1 \n"
|
||||
#endif
|
||||
#ifdef TEST_MISMATCH
|
||||
" pxor %%xmm0, %%xmm0 \n"
|
||||
#endif
|
||||
" fxsave %0 \n"
|
||||
" fxrstor %2 \n"
|
||||
: "=m"(nctxt),"=&r"(rval),"=m"(orig_ctxt)
|
||||
: "m"(octxt), "i"(SSE_TEST_HP_FAILED),"a"(kind)
|
||||
: "xmm0"
|
||||
);
|
||||
|
||||
if ( ( FPSR_IE & nctxt.fsw ) ) {
|
||||
rval |= SSE_TEST_FSPR_FAILED;
|
||||
}
|
||||
if ( FP_EXC == kind )
|
||||
nctxt.fsw |= (FPSR_IE | FPSR_ES);
|
||||
else if ( SSE_EXC == kind ) {
|
||||
tmp = __builtin_ia32_sqrtps( (__vf)(~sgn & (__v32)f4) );
|
||||
/* sqrt raises PE; just clear it */
|
||||
nctxt.mxcsr &= ~MXCSR_PE;
|
||||
memcpy( octxt.xmmregs[0], &tmp, sizeof(tmp) );
|
||||
}
|
||||
|
||||
if ( cmp_ctxt(&nctxt, &octxt, 0) ) {
|
||||
rval |= SSE_TEST_CTXTCMP_FAILED;
|
||||
}
|
||||
|
||||
s2 = sqrt(2.0);
|
||||
|
||||
asm volatile("fstsw %0":"=m"(fsw));
|
||||
|
||||
if ( sse_test_debug )
|
||||
printf("sqrt(2): %f (FSTW: 0x%02"PRIx16")\n", sqrt(2.0), fsw);
|
||||
|
||||
return rval;
|
||||
}
|
||||
|
||||
#ifdef __rtems__
|
||||
static void
|
||||
sse_test_ehdl(CPU_Exception_frame *p_f);
|
||||
|
||||
rtems_id sse_test_sync = 0;
|
||||
cpuExcHandlerType sse_test_ohdl = 0;
|
||||
|
||||
CPU_Exception_frame *sse_test_frame = 0;
|
||||
volatile int sse_test_check = SSE_TEST_HP_FAILED;
|
||||
unsigned sse_tests = 0;
|
||||
|
||||
rtems_task
|
||||
sse_test_hp_task(rtems_task_argument arg)
|
||||
{
|
||||
rtems_id sync = (rtems_id)arg;
|
||||
|
||||
uint16_t fp_cw;
|
||||
uint32_t mxcsr;
|
||||
rtems_status_code sc;
|
||||
const char * msgs[] = {"FPU_EXC", "SSE_EXC", "IRQ_EXC"};
|
||||
int i;
|
||||
|
||||
/* verify that FPU control word is default value */
|
||||
asm volatile("fstcw %0":"=m"(fp_cw));
|
||||
if ( fp_cw != _CPU_Null_fp_context.fpucw ) {
|
||||
fprintf(
|
||||
stderr,
|
||||
"ERROR: FPU CW initialization mismatch: got 0x%04"PRIx16"; expected 0x%04"PRIx16"\n",
|
||||
fp_cw,
|
||||
_CPU_Null_fp_context.fpucw
|
||||
);
|
||||
}
|
||||
|
||||
/* check MXCSR default value */
|
||||
asm volatile("stmxcsr %0":"=m"(mxcsr));
|
||||
if ( mxcsr != _CPU_Null_fp_context.mxcsr ) {
|
||||
fprintf(
|
||||
stderr,
|
||||
"ERROR: MXCSR initialization mismatch: got 0x%08"PRIx32"; expected 0x%08"PRIx32"\n",
|
||||
mxcsr,
|
||||
_CPU_Null_fp_context.mxcsr
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
for (i=0; i<sizeof(msgs)/sizeof(msgs[0]); i++ ) {
|
||||
if ( ( sse_tests & (1<<i) ) ) {
|
||||
if ( sse_test_debug )
|
||||
printk("HP task will now block for %s\n",msgs[i]);
|
||||
|
||||
/* Blocking here lets the low-priority task continue */
|
||||
sc = rtems_semaphore_obtain(sync, RTEMS_WAIT, 500);
|
||||
|
||||
all_clobber(0xaffeaffe, 0xcafecafe);
|
||||
|
||||
if ( RTEMS_SUCCESSFUL != sc ) {
|
||||
rtems_error(sc,"ERROR: sse_test hp task wasn't notified of exception\n");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
/* set flag indicating that we executed until here */
|
||||
sse_test_check = 0;
|
||||
}
|
||||
}
|
||||
|
||||
bail:
|
||||
rtems_task_suspend(RTEMS_SELF);
|
||||
}
|
||||
|
||||
/* Flags to skip individual tests */
|
||||
#define SSE_TEST_FPU_EXC (1<<0)
|
||||
#define SSE_TEST_SSE_EXC (1<<1)
|
||||
#define SSE_TEST_IRQ_EXC (1<<2)
|
||||
|
||||
#define SSE_TEST_ALL 7
|
||||
|
||||
/* If this flag is given the executing task is not deleted
|
||||
* when the test finishes. This is useful if you want to
|
||||
* execute from a shell or similar.
|
||||
*/
|
||||
#define SSE_TEST_NO_DEL (1<<0)
|
||||
|
||||
/* Task arg is bitmask of these flags */
|
||||
rtems_task
|
||||
sse_test_lp_task(rtems_task_argument arg)
|
||||
{
|
||||
rtems_id hp_task = 0;
|
||||
rtems_status_code sc;
|
||||
rtems_task_priority pri;
|
||||
uint16_t fp_cw,fp_cw_set;
|
||||
uint32_t mxcsr, mxcsr_set;
|
||||
rtems_irq_connect_data irqd;
|
||||
int flags = (int)arg;
|
||||
int st;
|
||||
int errs = 0;
|
||||
|
||||
sse_tests = SSE_TEST_ALL & ~(flags>>1);
|
||||
|
||||
sse_test_ohdl = 0;
|
||||
|
||||
fp_cw_set = _CPU_Null_fp_context.fpucw | FPCW_RC(3) ;
|
||||
mxcsr_set = _CPU_Null_fp_context.mxcsr | MXCSR_RC(3) ;
|
||||
asm volatile("ldmxcsr %0"::"m"(mxcsr_set));
|
||||
asm volatile("fldcw %0"::"m"(fp_cw_set));
|
||||
|
||||
sc = rtems_semaphore_create(
|
||||
rtems_build_name('s','s','e','S'),
|
||||
0,
|
||||
RTEMS_SIMPLE_BINARY_SEMAPHORE,
|
||||
0,
|
||||
&sse_test_sync
|
||||
);
|
||||
if ( RTEMS_SUCCESSFUL != sc ) {
|
||||
rtems_error(sc, "sse_test ERROR: creation of 'sync' semaphore failed");
|
||||
errs++;
|
||||
goto bail;
|
||||
}
|
||||
|
||||
rtems_task_set_priority( RTEMS_SELF, RTEMS_CURRENT_PRIORITY, &pri );
|
||||
|
||||
sc = rtems_task_create(
|
||||
rtems_build_name('s','s','e','H'),
|
||||
pri - 2,
|
||||
20000,
|
||||
RTEMS_DEFAULT_MODES,
|
||||
RTEMS_FLOATING_POINT,
|
||||
&hp_task
|
||||
);
|
||||
if ( RTEMS_SUCCESSFUL != sc ) {
|
||||
hp_task = 0;
|
||||
rtems_error( sc, "sse_test ERROR: creation of high-priority task failed");
|
||||
errs++;
|
||||
goto bail;
|
||||
}
|
||||
|
||||
sc = rtems_task_start( hp_task, sse_test_hp_task, (rtems_task_argument)sse_test_sync );
|
||||
if ( RTEMS_SUCCESSFUL != sc ) {
|
||||
rtems_error( sc, "sse_test ERROR: start of high-priority task failed");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
/* Test if FP/SSE context is saved/restored across an exception */
|
||||
sse_test_ohdl = _currentExcHandler;
|
||||
_currentExcHandler = sse_test_ehdl;
|
||||
|
||||
if ( (sse_tests & SSE_TEST_FPU_EXC) ) {
|
||||
if ( (st = exc_raise(FP_EXC)) ) {
|
||||
prstat(st,"FP_EXC");
|
||||
errs++;
|
||||
}
|
||||
|
||||
/* Test modified FPCW/MXCSR */
|
||||
asm volatile("fstcw %0":"=m"(fp_cw));
|
||||
asm volatile("stmxcsr %0":"=m"(mxcsr));
|
||||
mxcsr &= ~(MXCSR_ALLE);
|
||||
if ( fp_cw != fp_cw_set ) {
|
||||
fprintf(stderr,"sse_test ERROR: FPCW mismatch (after FP_EXC): expected 0x%04"PRIx16", got 0x%04"PRIx16"\n", fp_cw_set, fp_cw);
|
||||
errs++;
|
||||
}
|
||||
if ( mxcsr != mxcsr_set ) {
|
||||
fprintf(stderr,"sse_test ERROR: MXCSR mismatch (after FP_EXC): expected 0x%08"PRIx32", got 0x%08"PRIx32"\n", mxcsr_set, mxcsr);
|
||||
errs++;
|
||||
}
|
||||
}
|
||||
|
||||
if ( (sse_tests & SSE_TEST_SSE_EXC) ) {
|
||||
if ( (st = exc_raise(SSE_EXC)) ) {
|
||||
prstat(st, "SSE_EXC");
|
||||
errs++;
|
||||
}
|
||||
|
||||
/* Test modified FPCW/MXCSR */
|
||||
asm volatile("fstcw %0":"=m"(fp_cw));
|
||||
asm volatile("stmxcsr %0":"=m"(mxcsr));
|
||||
mxcsr &= ~(MXCSR_ALLE);
|
||||
if ( fp_cw != fp_cw_set ) {
|
||||
fprintf(stderr,"sse_test ERROR: FPCW mismatch (after SSE_EXC): expected 0x%04"PRIx16", got 0x%04"PRIx16"\n", fp_cw_set, fp_cw);
|
||||
errs++;
|
||||
}
|
||||
if ( mxcsr != mxcsr_set ) {
|
||||
fprintf(stderr,"sse_test ERROR: MXCSR mismatch (after SSE_EXC): expected 0x%08"PRIx32", got 0x%08"PRIx32"\n", mxcsr_set, mxcsr);
|
||||
errs++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if ( (sse_tests & SSE_TEST_IRQ_EXC) ) {
|
||||
memset( &irqd, 0, sizeof(irqd) );
|
||||
irqd.name = SSE_TEST_IRQ;
|
||||
irqd.hdl = (void*)sse_test_ehdl;
|
||||
irqd.handle = 0;
|
||||
|
||||
if ( ! BSP_install_rtems_irq_handler( &irqd ) ) {
|
||||
fprintf(stderr, "sse_test ERROR: Unable to install ISR\n");
|
||||
errs++;
|
||||
goto bail;
|
||||
}
|
||||
|
||||
/* Test if FP/SSE context is saved/restored across an interrupt */
|
||||
if ( (st = exc_raise(IRQ_EXC)) ) {
|
||||
prstat(st, "IRQ");
|
||||
errs++;
|
||||
}
|
||||
|
||||
if ( ! BSP_remove_rtems_irq_handler( &irqd ) ) {
|
||||
fprintf(stderr, "sse_test ERROR: Unable to uninstall ISR\n");
|
||||
}
|
||||
|
||||
/* Test modified FPCW/MXCSR */
|
||||
asm volatile("fstcw %0":"=m"(fp_cw));
|
||||
asm volatile("stmxcsr %0":"=m"(mxcsr));
|
||||
mxcsr &= ~(MXCSR_ALLE);
|
||||
if ( fp_cw != fp_cw_set ) {
|
||||
fprintf(stderr,"sse_test ERROR: FPCW mismatch (after IRQ): expected 0x%04"PRIx16", got 0x%04"PRIx16"\n", fp_cw_set, fp_cw);
|
||||
errs++;
|
||||
}
|
||||
if ( mxcsr != mxcsr_set ) {
|
||||
fprintf(stderr,"sse_test ERROR: MXCSR mismatch (after IRQ): expected 0x%08"PRIx32", got 0x%08"PRIx32"\n", mxcsr_set, mxcsr);
|
||||
errs++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bail:
|
||||
/* Wait for console to calm down... */
|
||||
rtems_task_wake_after(5);
|
||||
fprintf(stderr,"SSE/FPU Test %s (%u errors)\n", errs ? "FAILED":"PASSED", errs);
|
||||
if ( sse_test_ohdl ) {
|
||||
_currentExcHandler = sse_test_ohdl;
|
||||
sse_test_ohdl = 0;
|
||||
}
|
||||
if ( sse_test_sync )
|
||||
rtems_semaphore_delete( sse_test_sync );
|
||||
sse_test_sync = 0;
|
||||
if ( hp_task )
|
||||
rtems_task_delete( hp_task );
|
||||
|
||||
if ( ! (flags & SSE_TEST_NO_DEL) )
|
||||
rtems_task_delete( RTEMS_SELF );
|
||||
}
|
||||
|
||||
static void
|
||||
sse_test_ehdl(CPU_Exception_frame *p_f)
|
||||
{
|
||||
int i,j,start = 0;
|
||||
int mismatch;
|
||||
__vf f4;
|
||||
|
||||
if ( p_f ) {
|
||||
printk("Got exception #%u\n", p_f->idtIndex);
|
||||
printk("EIP: 0x%08x, ESP: 0x%08x\n", p_f->eip, p_f->esp0);
|
||||
printk("TID: 0x%08x\n", _Thread_Executing->Object.id);
|
||||
|
||||
if ( ! p_f->fp_ctxt ) {
|
||||
printk("ERROR: NO FP/SSE CONTEXT ATTACHED ??\n");
|
||||
sse_test_ohdl(p_f);
|
||||
}
|
||||
if ( 16 == p_f->idtIndex ) {
|
||||
printk("Resetting FP status (0x%04"PRIx16")\n", p_f->fp_ctxt->fsw);
|
||||
p_f->fp_ctxt->fsw = 0;
|
||||
} else if ( 19 == p_f->idtIndex ) {
|
||||
start = 1;
|
||||
memcpy(&f4, p_f->fp_ctxt->xmmregs[0], sizeof(f4));
|
||||
f4 = -f4;
|
||||
memcpy(p_f->fp_ctxt->xmmregs[0], &f4, sizeof(f4));
|
||||
p_f->fp_ctxt->mxcsr &= ~MXCSR_ALLE;
|
||||
} else {
|
||||
printk("(skipping non-FP exception)\n");
|
||||
sse_test_ohdl(p_f);
|
||||
}
|
||||
|
||||
printk("Checking XMM regs -- ");
|
||||
for ( mismatch=0, i=start; i<8; i++ ) {
|
||||
for ( j=0; j<16; j++ ) {
|
||||
if ( p_f->fp_ctxt->xmmregs[i][j] != ((i<<4) | j) )
|
||||
mismatch++;
|
||||
}
|
||||
}
|
||||
if ( mismatch ) {
|
||||
printk("%u mismatches; dump:\n", mismatch);
|
||||
for ( i=0; i<8; i++ ) {
|
||||
for ( j=0; j<16; j++ ) {
|
||||
printk("0x%02x ", p_f->fp_ctxt->xmmregs[i][j]);
|
||||
}
|
||||
printk("\n");
|
||||
}
|
||||
} else {
|
||||
printk("OK\n");
|
||||
}
|
||||
} else {
|
||||
printk("IRQ %u\n", SSE_TEST_IRQ);
|
||||
}
|
||||
printk("Clobbering FPU/SSE state\n");
|
||||
asm volatile("finit");
|
||||
sse_clobber(0xdeadbeef);
|
||||
printk("Notifying task\n");
|
||||
rtems_semaphore_release( sse_test_sync );
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Code using signals for testing under linux; unfortunately, 32-bit
|
||||
* linux seems to pass no SSE context info to the sigaction...
|
||||
*/
|
||||
|
||||
#include <signal.h>
|
||||
#include <ucontext.h>
|
||||
|
||||
#define MKCASE(X) case FPE_##X: msg="FPE_"#X; break;
|
||||
|
||||
#define CLRXMM(i) asm volatile("pxor %%xmm"#i", %%xmm"#i:::"xmm"#i)
|
||||
|
||||
static void
|
||||
fpe_act(int signum, siginfo_t *p_info, void *arg3)
|
||||
{
|
||||
ucontext_t *p_ctxt = arg3;
|
||||
const char *msg = "FPE_UNKNOWN";
|
||||
uint16_t *p_fst;
|
||||
|
||||
if ( SIGFPE != signum ) {
|
||||
fprintf(stderr,"WARNING: fpe_act handles SIGFPE\n");
|
||||
return;
|
||||
}
|
||||
switch ( p_info->si_code ) {
|
||||
default:
|
||||
fprintf(stderr,"WARNING: fpe_act got unkown code %u\n", p_info->si_code);
|
||||
return;
|
||||
MKCASE(INTDIV);
|
||||
MKCASE(INTOVF);
|
||||
MKCASE(FLTDIV);
|
||||
MKCASE(FLTOVF);
|
||||
MKCASE(FLTUND);
|
||||
MKCASE(FLTRES);
|
||||
MKCASE(FLTINV);
|
||||
MKCASE(FLTSUB);
|
||||
}
|
||||
fprintf(stderr,"Got SIGFPE (%s) @%p\n", msg, p_info->si_addr);
|
||||
#ifdef __linux__
|
||||
fprintf(stderr,"Resetting FP status 0x%02lx\n", p_ctxt->uc_mcontext.fpregs->sw);
|
||||
p_ctxt->uc_mcontext.fpregs->sw = 0;
|
||||
#ifdef TEST_MISMATCH
|
||||
fp_st1((void*)&p_ctxt->uc_mcontext.fpregs->_st[3],2.345);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Clear FPU; if context is properly saved/restored around exception
|
||||
* then this shouldn't disturb the register contents of the interrupted
|
||||
* task/process.
|
||||
*/
|
||||
asm volatile("finit");
|
||||
sse_clobber(0xdeadbeef);
|
||||
}
|
||||
|
||||
static void
|
||||
test(void)
|
||||
{
|
||||
Context_Control_sse ctxt;
|
||||
|
||||
stor_ctxt(&ctxt);
|
||||
printf("FPCW: 0x%"PRIx16"\nFPSW: 0x%"PRIx16"\n", ctxt.fcw, ctxt.fsw);
|
||||
printf("FTAG: 0x%"PRIx8"\n",ctxt.ftw);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
struct sigaction a1, a2;
|
||||
uint32_t mxcsr;
|
||||
|
||||
memset(&a1, 0, sizeof(a1));
|
||||
|
||||
a1.sa_sigaction = fpe_act;
|
||||
a1.sa_flags = SA_SIGINFO;
|
||||
|
||||
if ( sigaction(SIGFPE, &a1, &a2) ) {
|
||||
perror("sigaction");
|
||||
return 1;
|
||||
}
|
||||
|
||||
asm volatile("stmxcsr %0":"=m"(mxcsr));
|
||||
printf("MXCSR: 0x%08"PRIx32"\n", mxcsr);
|
||||
|
||||
test();
|
||||
exc_raise(0);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Helpers to access CR4 and MXCSR */
|
||||
|
||||
uint32_t
|
||||
mfcr4()
|
||||
{
|
||||
uint32_t rval;
|
||||
asm volatile("mov %%cr4, %0":"=r"(rval));
|
||||
return rval;
|
||||
}
|
||||
|
||||
void
|
||||
mtcr4(uint32_t rval)
|
||||
{
|
||||
asm volatile("mov %0, %%cr4"::"r"(rval));
|
||||
}
|
||||
|
||||
uint32_t
|
||||
mfmxcsr()
|
||||
{
|
||||
uint32_t rval;
|
||||
asm volatile("stmxcsr %0":"=m"(rval));
|
||||
return rval;
|
||||
}
|
||||
|
||||
void
|
||||
mtmxcsr(uint32_t rval)
|
||||
{
|
||||
asm volatile("ldmxcsr %0"::"m"(rval));
|
||||
}
|
||||
|
||||
|
||||
float
|
||||
sseraise()
|
||||
{
|
||||
__vf f4={-2., -2., -2. -2.};
|
||||
float f;
|
||||
f4 = __builtin_ia32_sqrtps( f4 );
|
||||
memcpy(&f,&f4,sizeof(f));
|
||||
return f;
|
||||
}
|
||||
Reference in New Issue
Block a user