sparc: Add lazy floating point switch

The SPARC ABI is a bit special with respect to the floating point context.
The complete floating point context is volatile.  Thus, from an ABI point
of view nothing needs to be saved and restored during a context switch.
Instead the floating point context must be saved and restored during
interrupt processing.  Historically, the deferred floating point switch was
used for SPARC and the complete floating point context is saved and
restored during a context switch to the new floating point unit owner.
This is a bit dangerous since post-switch actions (e.g. signal handlers)
and context switch extensions may silently corrupt the floating point
context.

The floating point unit is disabled for interrupt handlers.  Thus, in case
an interrupt handler uses the floating point unit then this will result in a
trap (INTERNAL_ERROR_ILLEGAL_USE_OF_FLOATING_POINT_UNIT).

In uniprocessor configurations, a lazy floating point context switch is
used.  In case an active floating point thread is interrupted (PSR[EF] == 1)
and a thread dispatch is carried out, then this thread is registered as the
floating point owner.  When a floating point owner is present during a
context switch, the floating point unit is disabled for the heir thread
(PSR[EF] == 0).  The floating point disabled trap checks that the use of the
floating point unit is allowed and saves/restores the floating point context
on demand.

Update #3077.
This commit is contained in:
Sebastian Huber
2017-07-17 07:30:46 +02:00
parent 600d88dfd7
commit 146adb1edf
15 changed files with 343 additions and 236 deletions

View File

@@ -133,9 +133,11 @@ static rtems_isr bsp_spurious_handler(
.isf = isf
};
#if !defined(SPARC_USE_LAZY_FP_SWITCH)
if ( SPARC_REAL_TRAP_NUMBER( trap ) == 4 ) {
_Internal_error( INTERNAL_ERROR_ILLEGAL_USE_OF_FLOATING_POINT_UNIT );
}
#endif
rtems_fatal(
RTEMS_FATAL_SOURCE_EXCEPTION,
@@ -170,10 +172,13 @@ void bsp_spurious_initialize()
*/
if (( trap == 5 || trap == 6 ) ||
#if defined(SPARC_USE_LAZY_FP_SWITCH)
( trap == 4 ) ||
#endif
(( trap >= 0x11 ) && ( trap <= 0x1f )) ||
(( trap >= 0x70 ) && ( trap <= 0x83 )) ||
( trap == 0x80 + SPARC_SWTRAP_IRQDIS ) ||
#if SPARC_HAS_FPU == 1
#if defined(SPARC_USE_SYNCHRONOUS_FP_SWITCH)
( trap == 0x80 + SPARC_SWTRAP_IRQDIS_FP ) ||
#endif
( trap == 0x80 + SPARC_SWTRAP_IRQEN ))

View File

@@ -120,9 +120,11 @@ static rtems_isr bsp_spurious_handler(
.isf = isf
};
#if !defined(SPARC_USE_LAZY_FP_SWITCH)
if ( SPARC_REAL_TRAP_NUMBER( trap ) == 4 ) {
_Internal_error( INTERNAL_ERROR_ILLEGAL_USE_OF_FLOATING_POINT_UNIT );
}
#endif
rtems_fatal(
RTEMS_FATAL_SOURCE_EXCEPTION,
@@ -157,10 +159,13 @@ void bsp_spurious_initialize()
*/
if (( trap == 5 || trap == 6 ) ||
#if defined(SPARC_USE_LAZY_FP_SWITCH)
( trap == 4 ) ||
#endif
(( trap >= 0x11 ) && ( trap <= 0x1f )) ||
(( trap >= 0x70 ) && ( trap <= 0x83 )) ||
( trap == 0x80 + SPARC_SWTRAP_IRQDIS ) ||
#if SPARC_HAS_FPU == 1
#if defined(SPARC_USE_SYNCHRONOUS_FP_SWITCH)
( trap == 0x80 + SPARC_SWTRAP_IRQDIS_FP ) ||
#endif
( trap == 0x80 + SPARC_SWTRAP_IRQEN ))

View File

@@ -119,9 +119,11 @@ static rtems_isr bsp_spurious_handler(
.isf = isf
};
#if !defined(SPARC_USE_LAZY_FP_SWITCH)
if ( SPARC_REAL_TRAP_NUMBER( trap ) == 4 ) {
_Internal_error( INTERNAL_ERROR_ILLEGAL_USE_OF_FLOATING_POINT_UNIT );
}
#endif
rtems_fatal(
RTEMS_FATAL_SOURCE_EXCEPTION,
@@ -156,10 +158,13 @@ void bsp_spurious_initialize()
*/
if (( trap == 5 ) || ( trap == 6 ) ||
#if defined(SPARC_USE_LAZY_FP_SWITCH)
( trap == 4 ) ||
#endif
(( trap >= 0x11 ) && ( trap <= 0x1f )) ||
(( trap >= 0x70 ) && ( trap <= 0x83 )) ||
( trap == 0x80 + SPARC_SWTRAP_IRQDIS ) ||
#if SPARC_HAS_FPU == 1
#if defined(SPARC_USE_SYNCHRONOUS_FP_SWITCH)
( trap == 0x80 + SPARC_SWTRAP_IRQDIS_FP ) ||
#endif
( trap == 0x80 + SPARC_SWTRAP_IRQEN ))

View File

@@ -7,7 +7,7 @@
* COPYRIGHT (c) 1989-2011.
* On-Line Applications Research Corporation (OAR).
*
* Copyright (c) 2014, 2016 embedded brains GmbH
* Copyright (c) 2014, 2017 embedded brains GmbH
*
* The license and distribution terms for this file may be
* found in the file LICENSE in this distribution or at
@@ -218,7 +218,17 @@ done_flushing:
! The next load is in a delay slot, which is all right
#endif
#if defined(SPARC_USE_LAZY_FP_SWITCH)
ld [%g6 + SPARC_PER_CPU_FP_OWNER_OFFSET], %g2
#endif
ld [%o1 + PSR_OFFSET], %g1 ! g1 = heir psr with traps enabled
#if defined(SPARC_USE_LAZY_FP_SWITCH)
sethi %hi(SPARC_PSR_EF_MASK), %g5
cmp %g2, %g0
bne,a .Lclear_psr_ef_done
andn %g1, %g5, %g1 ! g1 = heir psr w/o PSR[EF]
.Lclear_psr_ef_done:
#endif
andn %g1, SPARC_PSR_CWP_MASK, %g1 ! g1 = heir psr w/o cwp
or %g1, %g3, %g1 ! g1 = heir psr with cwp
mov %g1, %psr ! restore status register and
@@ -583,7 +593,7 @@ pil_fixed:
nop ! delay slot
#endif
#if SPARC_HAS_FPU == 1
#if defined(SPARC_USE_SYNCHRONOUS_FP_SWITCH)
mov %l0, %g1 ! PSR[EF] value of interrupted context
ta SPARC_SWTRAP_IRQDIS_FP ! **** DISABLE INTERRUPTS ****
#else
@@ -652,10 +662,38 @@ isr_dispatch:
mov 0, %o1 ! ISR level for _Thread_Do_dispatch()
#if defined(SPARC_USE_SYNCHRONOUS_FP_SWITCH)
#if defined(SPARC_USE_LAZY_FP_SWITCH)
/* Test if we interrupted a floating point thread (PSR[EF] == 1) */
andcc %l0, %l5, %g0
be non_fp_thread_dispatch
be .Lnon_fp_thread_dispatch
ld [%g6 + PER_CPU_OFFSET_EXECUTING], %l6
/* Set new floating point unit owner to executing thread */
st %l6, [%g6 + SPARC_PER_CPU_FP_OWNER_OFFSET]
call SYM(_Thread_Do_dispatch)
mov %g6, %o0
/*
* If we are still the floating point unit owner, then reset the
* floating point unit owner to NULL, otherwise clear PSR[EF] in the
* interrupt frame and let the FP disabled system call do the floating
* point context save/restore.
*/
ld [%g6 + SPARC_PER_CPU_FP_OWNER_OFFSET], %l7
cmp %l6, %l7
bne .Ldisable_fp
andn %l0, %l5, %l0
ba .Lthread_dispatch_done
st %g0, [%g6 + SPARC_PER_CPU_FP_OWNER_OFFSET]
.Ldisable_fp:
ba .Lthread_dispatch_done
st %l0, [%fp + ISF_PSR_OFFSET]
.Lnon_fp_thread_dispatch:
#elif defined(SPARC_USE_SYNCHRONOUS_FP_SWITCH)
/* Test if we interrupted a floating point thread (PSR[EF] == 1) */
andcc %l0, %l5, %g0
be .Lnon_fp_thread_dispatch
nop
/*
@@ -722,17 +760,17 @@ isr_dispatch:
ldd [%sp + FP_FRAME_OFFSET_F28_F29], %f28
ldd [%sp + FP_FRAME_OFFSET_F3O_F31], %f30
ld [%sp + FP_FRAME_OFFSET_FSR], %fsr
ba thread_dispatch_done
ba .Lthread_dispatch_done
add %sp, FP_FRAME_SIZE, %sp
non_fp_thread_dispatch:
.Lnon_fp_thread_dispatch:
#endif
call SYM(_Thread_Do_dispatch)
mov %g6, %o0
#if defined(SPARC_USE_SYNCHRONOUS_FP_SWITCH)
thread_dispatch_done:
#if SPARC_HAS_FPU == 1
.Lthread_dispatch_done:
#endif
ta SPARC_SWTRAP_IRQDIS ! **** DISABLE INTERRUPTS ****

View File

@@ -32,6 +32,15 @@
nop; \
nop;
/*
* System call optimized trap table entry
*/
#define FPDIS_TRAP(_handler) \
mov %psr, %l0 ; \
sethi %hi(_handler), %l4 ; \
jmp %l4+%lo(_handler); \
sethi %hi(SPARC_PSR_EF_MASK), %l3
/*
* System call optimized trap table entry
*/
@@ -100,7 +109,11 @@ SYM(trap_table):
! exception
BAD_TRAP; ! 02 illegal instruction
BAD_TRAP; ! 03 privileged instruction
#if defined(SPARC_USE_LAZY_FP_SWITCH)
FPDIS_TRAP(SYM(syscall_lazy_fp_switch)); ! 04 fp disabled
#else
BAD_TRAP; ! 04 fp disabled
#endif
WOTRAP(5, SYM(window_overflow_trap_handler)); ! 05 window overflow
WUTRAP(6, SYM(window_underflow_trap_handler));! 06 window underflow
BAD_TRAP; ! 07 memory address not aligned
@@ -209,7 +222,7 @@ SYM(CLOCK_SPEED):
*/
IRQDIS_TRAP(SYM(syscall_irqdis)); ! 89 IRQ Disable syscall trap
IRQEN_TRAP(SYM(syscall_irqen)); ! 8A IRQ Enable syscall trap
#if SPARC_HAS_FPU == 1
#if defined(SPARC_USE_SYNCHRONOUS_FP_SWITCH)
IRQDIS_TRAP(SYM(syscall_irqdis_fp)); ! 8B IRQ disable
! and set PSR[EF] syscall trap
#else

View File

@@ -11,12 +11,15 @@
* COPYRIGHT:
*
* COPYRIGHT (c) 1995. European Space Agency.
* Copyright (c) 2016, 2017 embedded brains GmbH
*
* This terms of the RTEMS license apply to this file.
*
*/
#include <rtems/asm.h>
#include <rtems/score/cpuimpl.h>
#include <rtems/score/percpu.h>
#include "syscall.h"
.section ".text"
@@ -95,7 +98,7 @@ SYM(syscall_irqen):
jmp %l2 ! Return to after TA 10.
rett %l2 + 4
#if SPARC_HAS_FPU == 1
#if defined(SPARC_USE_SYNCHRONOUS_FP_SWITCH)
/*
* system call - Interrupt disable and set PSR[EF] according to caller
* specified %g1
@@ -132,6 +135,127 @@ SYM(syscall_irqdis_fp):
rett %l2 + 4
#endif
#if defined(SPARC_USE_LAZY_FP_SWITCH)
/*
* system call - Perform a lazy floating point switch
*
* On entry:
*
* l0 = psr (from trap table)
* l1 = pc
* l2 = npc
* l3 = SPARC_PSR_EF_MASK
*/
.align 32 ! Align to 32-byte cache-line
PUBLIC(syscall_lazy_fp_switch)
SYM(syscall_lazy_fp_switch):
ld [%g6 + PER_CPU_OFFSET_EXECUTING], %l4
ld [%g6 + PER_CPU_ISR_NEST_LEVEL], %l5
ld [%l4 + %lo(SPARC_THREAD_CONTROL_FP_CONTEXT_OFFSET)], %l6
ld [%g6 + SPARC_PER_CPU_FP_OWNER_OFFSET], %l7
/* Ensure that we are not in interrupt context */
cmp %l5, 0
bne .Lillegal_use_of_floating_point_unit
or %l0, %l3, %l0
/* Ensure that we are a proper floating point thread */
cmp %l6, 0
be .Lillegal_use_of_floating_point_unit
ld [%l4 + %lo(SPARC_THREAD_CONTROL_REGISTERS_FP_CONTEXT_OFFSET)], %l6
/* Set PSR[EF] to 1, PSR write delay 3 instructions! */
mov %l0, %psr
/*
* Check if there is a floating point owner. We have to check this
* here, since the floating point owner may have been deleted in the
* meantime. Save the floating point context if necessary.
*/
cmp %l7, 0
be .Lfp_save_done
nop
ld [%l7 + %lo(SPARC_THREAD_CONTROL_FP_CONTEXT_OFFSET)], %l5
std %f0, [%l5 + SPARC_FP_CONTEXT_OFFSET_F0_F1]
SPARC_LEON3FT_B2BST_NOP
std %f2, [%l5 + SPARC_FP_CONTEXT_OFFSET_F2_F3]
SPARC_LEON3FT_B2BST_NOP
std %f4, [%l5 + SPARC_FP_CONTEXT_OFFSET_F4_F5]
SPARC_LEON3FT_B2BST_NOP
std %f6, [%l5 + SPARC_FP_CONTEXT_OFFSET_F6_F7]
SPARC_LEON3FT_B2BST_NOP
std %f8, [%l5 + SPARC_FP_CONTEXT_OFFSET_F8_F9]
SPARC_LEON3FT_B2BST_NOP
std %f10, [%l5 + SPARC_FP_CONTEXT_OFFSET_F10_F11]
SPARC_LEON3FT_B2BST_NOP
std %f12, [%l5 + SPARC_FP_CONTEXT_OFFSET_F12_F13]
SPARC_LEON3FT_B2BST_NOP
std %f14, [%l5 + SPARC_FP_CONTEXT_OFFSET_F14_F15]
SPARC_LEON3FT_B2BST_NOP
std %f16, [%l5 + SPARC_FP_CONTEXT_OFFSET_F16_F17]
SPARC_LEON3FT_B2BST_NOP
std %f18, [%l5 + SPARC_FP_CONTEXT_OFFSET_F18_F19]
SPARC_LEON3FT_B2BST_NOP
std %f20, [%l5 + SPARC_FP_CONTEXT_OFFSET_F20_F21]
SPARC_LEON3FT_B2BST_NOP
std %f22, [%l5 + SPARC_FP_CONTEXT_OFFSET_F22_F23]
SPARC_LEON3FT_B2BST_NOP
std %f24, [%l5 + SPARC_FP_CONTEXT_OFFSET_F24_F25]
SPARC_LEON3FT_B2BST_NOP
std %f26, [%l5 + SPARC_FP_CONTEXT_OFFSET_F26_F27]
SPARC_LEON3FT_B2BST_NOP
std %f28, [%l5 + SPARC_FP_CONTEXT_OFFSET_F28_F29]
SPARC_LEON3FT_B2BST_NOP
std %f30, [%l5 + SPARC_FP_CONTEXT_OFFSET_F30_F31]
SPARC_LEON3FT_B2BST_NOP
st %fsr, [%l5 + SPARC_FP_CONTEXT_OFFSET_FSR]
SPARC_LEON3FT_B2BST_NOP
st %g0, [%g6 + SPARC_PER_CPU_FP_OWNER_OFFSET]
SPARC_LEON3FT_B2BST_NOP
st %l5, [%l7 + %lo(SPARC_THREAD_CONTROL_REGISTERS_FP_CONTEXT_OFFSET)]
.Lfp_save_done:
/* Restore the floating point context if necessary */
cmp %l6, 0
be .Lfp_restore_done
st %g0, [%l4 + %lo(SPARC_THREAD_CONTROL_REGISTERS_FP_CONTEXT_OFFSET)]
ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F0_F1], %f0
ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F2_F3], %f2
ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F4_F5], %f4
ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F6_F7], %f6
ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F8_F9], %f8
ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F10_F11], %f10
ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F12_F13], %f12
ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F14_F15], %f14
ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F16_F17], %f16
ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F18_F19], %f18
ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F20_F21], %f20
ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F22_F23], %f22
ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F24_F25], %f24
ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F26_F27], %f26
ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F28_F29], %f28
ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F30_F31], %f30
ld [%l6 + SPARC_FP_CONTEXT_OFFSET_FSR], %fsr
.Lfp_restore_done:
/* Now, retry the floating point instruction with PSR[EF] == 1 */
jmp %l1
rett %l1 + 4
.Lillegal_use_of_floating_point_unit:
sethi %hi(_Internal_error), %l1
or %l1, %lo(_Internal_error), %l1
mov 38, %i0
jmp %l1
rett %l1 + 4
#endif
#if defined(RTEMS_PARAVIRT)
PUBLIC(_SPARC_Get_PSR)