2008-03-13 Till Straumann <strauman@slac.stanford.edu>

* new-exceptions/bspsupport/ppc_exc_asm_macros.h,
	new-exceptions/bspsupport/ppc_exc.S,
	new-exceptions/bspsupport/README,
	new-exceptions/bspsupport/ppc_exc_hdl.c:
	Thomas Doerfler clarified (thanks!) that raising an
	exception and executing the 1st instruction is not
	an atomical operation. I added a fix to the code that
	checks if a lower-priority interrupt is under way:
	we now not only test if the 'lock' variable was set
	but also check if the interrupted PC points to the
	'write lock' instruction.
	Added more comments and updated README.
This commit is contained in:
Till Straumann
2008-03-13 19:28:33 +00:00
parent bd7a91b01a
commit 35506215c9
5 changed files with 301 additions and 43 deletions

View File

@@ -1,3 +1,18 @@
2008-03-13 Till Straumann <strauman@slac.stanford.edu>
* new-exceptions/bspsupport/ppc_exc_asm_macros.h,
new-exceptions/bspsupport/ppc_exc.S,
new-exceptions/bspsupport/README,
new-exceptions/bspsupport/ppc_exc_hdl.c:
Thomas Doerfler clarified (thanks!) that raising an
exception and executing the 1st instruction is not
an atomical operation. I added a fix to the code that
checks if a lower-priority interrupt is under way:
we now not only test if the 'lock' variable was set
but also check if the interrupted PC points to the
'write lock' instruction.
Added more comments and updated README.
2008-03-11 Till Straumann <strauman@slac.stanford.edu>
* new-exceptions/bspsupport/ppc_exc_asm_macros.h: bugfix;

View File

@@ -93,7 +93,7 @@ that they could is beyond doubt...):
- some PPCs don't fit into the classic scheme where
the exception vector addresses all were multiples of
0x100 (some are spaced as closely as 0x10).
0x100 (some vectors are spaced as closely as 0x10).
The API should not expose vector offsets but only
vector numbers which can be considered an abstract
entity. The mapping from vector numbers to actual
@@ -323,10 +323,6 @@ RACE CONDITION WHEN DEALING WITH CRITICAL INTERRUPTS
.. increase thread-dispatch-disable-level
.. clear 'ee_lock' variable
The earliest a critical exception could interrupt
the 'external' exception handler is after the
'stw r1, ee_lock@sdarel(r13)' instruction.
After the HPI decrements the dispatch-disable level
it checks 'ee_lock' and refrains from performing
a context switch if 'ee_lock' is nonzero. Since
@@ -341,3 +337,54 @@ RACE CONDITION WHEN DEALING WITH CRITICAL INTERRUPTS
loading any registers. The short-data area
pointer R13 is appropriate.
CAVEAT: unfortunately, this method by itself
is *NOT* enough because raising a low-priority
exception and executing the first instruction
of the handler is *NOT* atomic. Hence, the following
could occur:
1) LPI is taken
2) PC is saved in SRR0, PC is loaded with
address of 'locking instruction'
stw r1, ee_lock@sdarel(r13)
3) ==> critical interrupt happens
4) PC (containing address of locking instruction)
is saved in CSRR0
5) HPI is dispatched
For the HPI to correctly handle this situation
it does the following:
a) increase thread-dispatch disable level
b) do interrupt work
c) decrease thread-dispatch disable level
d) if ( dispatch-disable level == 0 )
d1) check ee_lock
d2) check instruction at *CSRR0
d3) do a context switch if necessary ONLY IF
ee_lock is NOT set AND *CSRR0 is NOT the
'locking instruction'
this works because the address of 'ee_lock'
is embedded in the locking instruction
'stw r1, ee_lock@sdarel(r13)' and because the
registers r1/r13 have a special purpose
(stack-pointer, SDA-pointer). Hence it is safe
to assume that the particular instruction
'stw r1,ee_lock&sdarel(r13)' never occurs
anywhere else.
Another note: this algorithm also makes sure
that ONLY nested ASYNCHRONOUS interrupts which
enable/disable thread-dispatching and check if
thread-dispatching is required before returning
control engage in this locking protocol. It is
important that when a critical, asynchronous
interrupt interrupts a 'synchronous' exception
(which does not disable thread-dispatching)
the thread-dispatching operation upon return of
the HPI is NOT deferred (because the synchronous
handler would not, eventually, check for a
dispatch requirement).

View File

@@ -40,7 +40,7 @@ ppc_exc_min_prolog_size = 4 * 4
* that off before we can use the stack pointer. Note that this is
* ONLY safe if the shadowing is actually active -- otherwise, r1
* is destroyed. We deliberately use r1 so problems become obvious
* if this is abused!
* if this is misused!
*/
.global ppc_exc_tgpr_clr_prolog
ppc_exc_tgpr_clr_prolog:
@@ -246,15 +246,16 @@ wrap_common:
skip_save_nonvolatile_regs:
/* store address of exception frame in r4; vector is in r3 */
addi r4, r14, FRAME_LINK_SPACE
/* load hi-halfword of C wrapper address */
lis r5, ppc_exc_C_wrapper@h
/* clear CR[6] to make sure no vararg callee assumes that
* there are any valid FP regs
*/
crxor 6,6,6
/* Far branch to ppc_C_wrapper */
lis r5, ppc_exc_C_wrapper@h
addi r4, r14, FRAME_LINK_SPACE
/* merge lo-halfword of C wrapper address */
ori r5, r5, ppc_exc_C_wrapper@l
/* Far branch to ppc_C_wrapper */
mtlr r5
blrl
@@ -279,7 +280,10 @@ skip_save_nonvolatile_regs:
addi r4, r4, -1
stw r4, _ISR_Nest_level@sdarel(r13)
/* switch back to original stack */
/*
* switch back to original stack (r14 == r1 if we are
* still on the IRQ stack).
*/
mr r1, r14
/* restore interrupt mask */
@@ -342,8 +346,10 @@ skip_restore_nonvolatile_regs:
lwz r8, GPR8_OFFSET(r1)
lwz r7, GPR7_OFFSET(r1)
lwz r6, GPR6_OFFSET(r1)
/* r4, r5 are eventually restored by caller */
lwz r3, GPR3_OFFSET(r1)
lwz r2, GPR2_OFFSET(r1)
/* r1, is eventually restored by caller */
lwz r0, GPR0_OFFSET(r1)
beq cr3, 2f

View File

@@ -1,7 +1,7 @@
/*
* (c) 1999, Eric Valette valette@crf.canon.fr
*
* Modified and partially rewritten by Till Straumann, 2007
* Modified and partially rewritten by Till Straumann, 2007-2008
*
* Low-level assembly code for PPC exceptions (macros).
*
@@ -17,6 +17,7 @@
#define EXC_MIN_GPR1 0
#define FRAME_LINK_SPACE 8
#define r0 0
#define r1 1
#define r2 2
@@ -60,9 +61,18 @@
#define NOFRAME 0xffff8000
/* Switch r1 to interrupt stack if not already there.
/* Opcode of 'stw r1, off(r13)' */
#define STW_R1_R13(off) ((((36<<10)|(r1<<5)|(r13))<<16) | ((off)&0xffff))
/*
**********************************************************************
* MACRO: SWITCH_STACK
**********************************************************************
*
* USES: RA, RB
* Increment _ISR_Nest_level and switch r1 to interrupt
* stack if not already there.
*
* USES: RA, RB, cr0
* ON EXIT: RA, RB available, r1 points into interrupt
* stack.
*
@@ -131,11 +141,39 @@ no_r1_reload_\FLVR:
* 4. branch
*
*/
/*
**********************************************************************
* MACRO: PPC_EXC_MIN_PROLOG_ASYNC
**********************************************************************
* USES: r3
* ON EXIT: vector in r3
*
* NOTES: r3 saved in special variable 'ppc_exc_gpr3_\_PRI'
*
*/
.macro PPC_EXC_MIN_PROLOG_ASYNC _NAME _VEC _PRI _FLVR
.global ppc_exc_min_prolog_async_\_NAME
ppc_exc_min_prolog_async_\_NAME:
/* Atomically write lock variable in 1st instruction with non-zero value
* (r1 is always nonzero; r13 could also be used)
*
* NOTE: raising an exception and executing this first instruction
* of the exception handler is apparently NOT atomic, i.e.,
* a low-priority IRQ could set the PC to this location and
* a critical IRQ could intervene just at this point.
*
* We check against this pathological case by checking the
* opcode/instruction at the interrupted PC for matching
*
* stw r1, ppc_exc_lock_XXX@sdarel(r13)
*
* ASSUMPTION:
* 1) ALL 'asynchronous' exceptions (which disable thread-
* dispatching) execute THIS 'magical' instruction
* FIRST.
* 2) This instruction (including the address offset)
* is not used anywhere else (probably a safe assumption).
*/
stw r1, ppc_exc_lock_\_PRI@sdarel(r13)
/* We have no stack frame yet; store r3 in special area;
@@ -151,6 +189,16 @@ ppc_exc_min_prolog_async_\_NAME:
ba wrap_\_FLVR
.endm
/*
**********************************************************************
* MACRO: PPC_EXC_MIN_PROLOG_SYNC
**********************************************************************
* USES: r3
* ON EXIT: vector in r3
*
* NOTES: exception stack frame pushed; r3 saved in frame
*
*/
.macro PPC_EXC_MIN_PROLOG_SYNC _NAME _VEC _PRI _FLVR
.global ppc_exc_min_prolog_sync_\_NAME
ppc_exc_min_prolog_sync_\_NAME:
@@ -160,33 +208,156 @@ ppc_exc_min_prolog_sync_\_NAME:
ba wrap_nopush_\_FLVR
.endm
.macro TEST_LOCK_std
/*
**********************************************************************
* MACRO: TEST_1ST_OPCODE_crit
**********************************************************************
*
* USES: REG, cr4
* ON EXIT: REG available (contains *pc - STW_R1_R13(0)), return value in cr4
*
* test opcode interrupted by critical (asynchronous) exception;
* set cr4 if
*
* *SRR0 == 'stw r1, ppc_exc_std_lock@sdarel(r13)'
*
*/
.macro TEST_1ST_OPCODE_crit _REG _SRR0
mf\_SRR0 \_REG
lwz \_REG, 0(\_REG)
/* opcode now in REG */
/* subtract upper 16bits of 'stw r1, 0(r13)' instruction */
subis \_REG, \_REG, STW_R1_R13(0)@h
/*
* if what's left compares against the 'ppc_exc_lock_std@sdarel'
* address offset then we have a match...
*/
cmpli cr4, \_REG, ppc_exc_lock_std@sdarel
.endm
/*
**********************************************************************
* MACRO: TEST_1ST_OPCODE_mchk
**********************************************************************
* USES: REG, cr0, cr4
* ON EXIT: REG, cr0 available, return value in cr4
*
* test opcode interrupted by (asynchronous) machine-check exception;
* set cr4 if
*
* *SRR0 == 'stw r1, ppc_exc_std_lock@sdarel(r13)'
*
* OR
*
* *SRR0 == 'stw r1, ppc_exc_crit_lock@sdarel(r13)'
*
*/
.macro TEST_1ST_OPCODE_mchk _REG _SRR0
TEST_1ST_OPCODE_crit _REG=\_REG _SRR0=\_SRR0
cmpli cr0, \_REG, ppc_exc_lock_crit@sdarel
/* cr4 set if 1st opcode matches writing either lock */
cror EQ(cr4), EQ(cr4), EQ(cr0)
.endm
/*
**********************************************************************
* MACRO: TEST_LOCK_std
**********************************************************************
*
* USES: cr4
* ON EXIT: cr4 is set (indicates no lower-priority locks are engaged)
*
*/
.macro TEST_LOCK_std _SRR0
/* 'std' is lowest level, i.e., can not be locked -> EQ(cr4) = 1 */
creqv EQ(cr4), EQ(cr4), EQ(cr4)
.endm
/* critical-exception wrapper has to check 'std' lock: */
.macro TEST_LOCK_crit
lwz r5, ppc_exc_lock_std@sdarel(r13)
cmpli cr4, r5, 0
.endm
/*
**********************************************************************
* MACRO: TEST_LOCK_crit
**********************************************************************
*
* USES: cr4, cr0, r4, r5
* ON EXIT: cr0, r4, r5 available, returns result in cr4
*
* critical-exception wrapper has to check 'std' lock:
*
* Return cr4 = ( ppc_std_lock == 0
* && * _SRR0 != <write std lock instruction> )
*
*/
.macro TEST_LOCK_crit _SRR0
/* STD interrupt could have been interrupted before
* executing the 1st instruction which sets the lock;
* check this case by looking at the opcode present
* at the interrupted PC location.
*/
TEST_1ST_OPCODE_crit _REG=r4 _SRR0=\_SRR0
/*
* At this point cr4 is set if
*
* *(PC) == 'stw r1, ppc_exc_lock_std@sdarel(r13)'
*
*/
/* machine-check wrapper has to check 'std' and 'crit' locks */
.macro TEST_LOCK_mchk
/* check lock */
lwz r5, ppc_exc_lock_std@sdarel(r13)
cmpli cr4, r5, 0
lwz r5, ppc_exc_lock_crit@sdarel(r13)
cmpli cr0, r5, 0
crand EQ(cr4), EQ(cr4), EQ(cr0)
/*
*
* cr4 = ( *pc != <write std lock instruction>
* && ppc_exc_lock_std == 0 )
*/
crandc EQ(cr4), EQ(cr0), EQ(cr4)
.endm
/* Minimal prologue snippets jump into WRAP
/*
**********************************************************************
* MACRO: TEST_LOCK_mchk
**********************************************************************
*
* USES: cr4, cr0, r4, r5
* ON EXIT: cr0, r4, r5 available, returns result in cr4
*
* machine-check wrapper has to check 'std' and 'crit' locks, i.e.,
*
* Return cr4 = ( * _SRR0 != <write std lock instruction>
* && * _SRR0 != <write crit lock instruction> )
* && ppc_std_lock == 0
* && ppc_crit_lock == 0 )
*/
.macro TEST_LOCK_mchk _SRR0
TEST_1ST_OPCODE_mchk _REG=r4 _SRR0=\_SRR0
/* cr4 set if 1st opcode matches writing either lock */
/* proceed checking the locks */
lwz r5, ppc_exc_lock_std@sdarel(r13)
lwz r4, ppc_exc_lock_crit@sdarel(r13)
/* set cr0 if neither lock is set */
or. r4, r4, r5
/* set cr4 if
* cr0 is set (neither lock set)
* AND cr4 is clear (interrupted opcode doesn't match writing any lock)
*/
crandc EQ(cr4), EQ(cr0), EQ(cr4)
.endm
/*
**********************************************************************
* MACRO: WRAP
**********************************************************************
*
* Minimal prologue snippets jump into WRAP
* which prepares calling code common to all
* flavors of exceptions.
* We must have this macro instantiated for
* each possible flavor of exception so that
* we use the proper lock variable, SRR register pair and
* RFI instruction.
*
*/
.macro WRAP _FLVR _PRI _SRR0 _SRR1 _RFI
wrap_\_FLVR:
@@ -223,6 +394,9 @@ wrap_no_save_r14_\_FLVR:
* increment the thread-dispatch disable level
* in case a higher priority exception occurs
* we don't want it to run the scheduler.
* (It is safe to increment this w/o disabling
* higher priority interrupts since those will
* see that we wrote the lock anyways).
*/
lwz r5, _Thread_Dispatch_disable_level@sdarel(r13)
addi r5, r5, 1
@@ -235,7 +409,7 @@ wrap_no_save_r14_\_FLVR:
stw r5, ppc_exc_lock_\_PRI@sdarel(r13)
/* test lower-priority locks; result in (non-volatile) cr4 */
TEST_LOCK_\_PRI
TEST_LOCK_\_PRI _SRR0=\_SRR0
/* Peform stack switch if necessary */
SWITCH_STACK RA=r4 RB=r5 FLVR=\_FLVR
@@ -262,10 +436,23 @@ no_thread_dispatch_disable_\_FLVR:
mf\_SRR0 r4
mf\_SRR1 r5
/* branch to common routine */
/*
* branch to common routine;
*
* r1, r3, r4, r5, cr, lr and r14 are saved on the
* stack at this point.
*/
bl wrap_common
/* restore SRR, r4, r5, r1 (stack pointer) and lr */
/*
* restore SRRs, r4, r5, r1 (stack pointer) and lr;
* wrap_common restores r3, r14 and cr for us.
*
* NOTE: we restore r1 from the frame rather than
* just popping (adding to current r1) since the
* exception handler might have done strange things
* (e.g., a debugger moving and relocating the stack).
*/
mt\_SRR0 r4
mt\_SRR1 r5
/* restore lr */

View File

@@ -118,9 +118,12 @@ ppc_exc_wrapup(int ll_rval, BSP_Exception_frame *f)
*/
}
/* dispatch_disable level is decremented from assembly code. */
if ( _Context_Switch_necessary )
if ( _Context_Switch_necessary ) {
/* FIXME: I believe it should be OK to re-enable
* interrupts around the execution of _Thread_Dispatch();
*/
_Thread_Dispatch();
else if ( _ISR_Signals_to_thread_executing ) {
} else if ( _ISR_Signals_to_thread_executing ) {
_ISR_Signals_to_thread_executing = 0;
/*
* Process pending signals that have not already been