forked from Imagelibrary/rtems
SMP: Fix and optimize thread dispatching
According to the C11 and C++11 memory models only a read-modify-write operation guarantees that we read the last value written in modification order. Avoid the sequential consistent thread fence and instead use the inter-processor interrupt to set the thread dispatch necessary indicator.
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2013 embedded brains GmbH. All rights reserved.
|
||||
* Copyright (c) 2013-2015 embedded brains GmbH. All rights reserved.
|
||||
*
|
||||
* embedded brains GmbH
|
||||
* Dornierstr. 4
|
||||
@@ -62,6 +62,7 @@ void _CPU_SMP_Prepare_start_multitasking( void )
|
||||
|
||||
void _CPU_SMP_Send_interrupt( uint32_t target_processor_index )
|
||||
{
|
||||
_ARM_Data_memory_barrier();
|
||||
arm_gic_irq_generate_software_irq(
|
||||
ARM_GIC_IRQ_SGI_0,
|
||||
ARM_GIC_IRQ_SOFTWARE_IRQ_TO_ALL_IN_LIST,
|
||||
|
||||
@@ -235,5 +235,10 @@ void _CPU_SMP_Prepare_start_multitasking(void)
|
||||
|
||||
void _CPU_SMP_Send_interrupt(uint32_t target_processor_index)
|
||||
{
|
||||
#ifdef __PPC_CPU_E6500__
|
||||
ppc_light_weight_synchronize();
|
||||
#else
|
||||
ppc_synchronize_data();
|
||||
#endif
|
||||
qoriq.pic.ipidr [IPI_INDEX].reg = 1U << target_processor_index;
|
||||
}
|
||||
|
||||
@@ -236,24 +236,18 @@ check_is_executing:
|
||||
beq try_update_is_executing
|
||||
mov 1, %g1
|
||||
|
||||
! Check if a thread dispatch is necessary
|
||||
ldub [%g6 + PER_CPU_DISPATCH_NEEDED], %g1
|
||||
cmp %g1, 0
|
||||
beq check_is_executing
|
||||
nop
|
||||
|
||||
! We have a new heir
|
||||
|
||||
! Clear the thread dispatch necessary flag
|
||||
stub %g0, [%g6 + PER_CPU_DISPATCH_NEEDED]
|
||||
|
||||
! Here we assume a strong memory order, otherwise a memory barrier must
|
||||
! be inserted here
|
||||
! We may have a new heir
|
||||
|
||||
! Read the executing and heir
|
||||
ld [%g6 + PER_CPU_OFFSET_EXECUTING], %g1
|
||||
ld [%g6 + PER_CPU_OFFSET_HEIR], %g2
|
||||
|
||||
! Update the executing only if necessary to avoid cache line
|
||||
! monopolization.
|
||||
cmp %g1, %g2
|
||||
beq try_update_is_executing
|
||||
mov 1, %g1
|
||||
|
||||
! Calculate the heir context pointer
|
||||
sub %o1, %g1, %g1
|
||||
add %g1, %g2, %o1
|
||||
|
||||
@@ -413,12 +413,12 @@ check_is_executing:
|
||||
addi r6, r5, PPC_CONTEXT_OFFSET_IS_EXECUTING
|
||||
lwarx r7, r0, r6
|
||||
cmpwi r7, 0
|
||||
bne check_thread_dispatch_necessary
|
||||
bne get_potential_new_heir
|
||||
|
||||
/* Try to update the is executing indicator of the heir context */
|
||||
li r7, 1
|
||||
stwcx. r7, r0, r6
|
||||
bne check_thread_dispatch_necessary
|
||||
bne get_potential_new_heir
|
||||
isync
|
||||
#endif
|
||||
|
||||
@@ -536,26 +536,23 @@ PROC (_CPU_Context_restore):
|
||||
b restore_context
|
||||
|
||||
#ifdef RTEMS_SMP
|
||||
check_thread_dispatch_necessary:
|
||||
get_potential_new_heir:
|
||||
|
||||
GET_SELF_CPU_CONTROL r6
|
||||
|
||||
/* Check if a thread dispatch is necessary */
|
||||
lbz r7, PER_CPU_DISPATCH_NEEDED(r6)
|
||||
cmpwi r7, 0
|
||||
beq check_is_executing
|
||||
|
||||
/* We have a new heir */
|
||||
|
||||
/* Clear the thread dispatch necessary flag */
|
||||
li r7, 0
|
||||
stb r7, PER_CPU_DISPATCH_NEEDED(r6)
|
||||
msync
|
||||
/* We may have a new heir */
|
||||
|
||||
/* Read the executing and heir */
|
||||
lwz r7, PER_CPU_OFFSET_EXECUTING(r6)
|
||||
lwz r8, PER_CPU_OFFSET_HEIR(r6)
|
||||
|
||||
/*
|
||||
* Update the executing only if necessary to avoid cache line
|
||||
* monopolization.
|
||||
*/
|
||||
cmpw r7, r8
|
||||
beq check_is_executing
|
||||
|
||||
/* Calculate the heir context pointer */
|
||||
sub r7, r4, r7
|
||||
add r4, r8, r7
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2008-2014 embedded brains GmbH.
|
||||
* Copyright (c) 2008-2015 embedded brains GmbH.
|
||||
*
|
||||
* embedded brains GmbH
|
||||
* Dornierstr. 4
|
||||
@@ -206,6 +206,13 @@ static inline void ppc_synchronize_data(void)
|
||||
__asm__ volatile ("sync");
|
||||
}
|
||||
|
||||
static inline void ppc_light_weight_synchronize(void)
|
||||
{
|
||||
RTEMS_COMPILER_MEMORY_BARRIER();
|
||||
|
||||
__asm__ volatile ("lwsync");
|
||||
}
|
||||
|
||||
static inline void ppc_synchronize_instructions(void)
|
||||
{
|
||||
RTEMS_COMPILER_MEMORY_BARRIER();
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
* COPYRIGHT (c) 2000 Canon Research Centre France SA.
|
||||
* Emmanuel Raguet, mailto:raguet@crf.canon.fr
|
||||
*
|
||||
* Copyright (c) 2013-2014 embedded brains GmbH
|
||||
* Copyright (c) 2013-2015 embedded brains GmbH
|
||||
*
|
||||
* The license and distribution terms for this file may be
|
||||
* found in the file LICENSE in this distribution or at
|
||||
@@ -80,13 +80,13 @@ DEFINE_FUNCTION_ARM(_CPU_Context_switch)
|
||||
add r3, r1, #ARM_CONTEXT_CONTROL_IS_EXECUTING_OFFSET
|
||||
ldrexb r4, [r3]
|
||||
cmp r4, #0
|
||||
bne .L_check_thread_dispatch_necessary
|
||||
bne .L_get_potential_new_heir
|
||||
|
||||
/* Try to update the is executing indicator of the heir context */
|
||||
mov r4, #1
|
||||
strexb r5, r4, [r3]
|
||||
cmp r5, #0
|
||||
bne .L_check_thread_dispatch_necessary
|
||||
bne .L_get_potential_new_heir
|
||||
dmb
|
||||
#endif
|
||||
|
||||
@@ -126,26 +126,23 @@ DEFINE_FUNCTION_ARM(_CPU_Context_restore)
|
||||
b .L_restore
|
||||
|
||||
#ifdef RTEMS_SMP
|
||||
.L_check_thread_dispatch_necessary:
|
||||
.L_get_potential_new_heir:
|
||||
|
||||
GET_SELF_CPU_CONTROL r2, r3
|
||||
|
||||
/* Check if a thread dispatch is necessary */
|
||||
ldrb r4, [r2, #PER_CPU_DISPATCH_NEEDED]
|
||||
cmp r4, #0
|
||||
beq .L_check_is_executing
|
||||
|
||||
/* We have a new heir */
|
||||
|
||||
/* Clear the thread dispatch necessary flag */
|
||||
mov r4, #0
|
||||
strb r4, [r2, #PER_CPU_DISPATCH_NEEDED]
|
||||
dmb
|
||||
/* We may have a new heir */
|
||||
|
||||
/* Read the executing and heir */
|
||||
ldr r4, [r2, #PER_CPU_OFFSET_EXECUTING]
|
||||
ldr r5, [r2, #PER_CPU_OFFSET_HEIR]
|
||||
|
||||
/*
|
||||
* Update the executing only if necessary to avoid cache line
|
||||
* monopolization.
|
||||
*/
|
||||
cmp r4, r5
|
||||
beq .L_check_is_executing
|
||||
|
||||
/* Calculate the heir context pointer */
|
||||
sub r4, r1, r4
|
||||
add r1, r5, r4
|
||||
|
||||
@@ -559,34 +559,41 @@ typedef struct {
|
||||
*
|
||||
* This field must be updated during a context switch. The context switch
|
||||
* to the heir must wait until the heir context indicates that it is no
|
||||
* longer executing on a processor. The context switch must also check if
|
||||
* a thread dispatch is necessary to honor updates of the heir thread for
|
||||
* this processor. This indicator must be updated using an atomic test and
|
||||
* set operation to ensure that at most one processor uses the heir
|
||||
* context at the same time.
|
||||
* longer executing on a processor. This indicator must be updated using
|
||||
* an atomic test and set operation to ensure that at most one processor
|
||||
* uses the heir context at the same time. The context switch must also
|
||||
* check for a potential new heir thread for this processor in case the
|
||||
* heir context is not immediately available. Update the executing thread
|
||||
* for this processor only if necessary to avoid a cache line
|
||||
* monopolization.
|
||||
*
|
||||
* @code
|
||||
* void _CPU_Context_switch(
|
||||
* Context_Control *executing,
|
||||
* Context_Control *heir
|
||||
* Context_Control *executing_context,
|
||||
* Context_Control *heir_context
|
||||
* )
|
||||
* {
|
||||
* save( executing );
|
||||
* save( executing_context );
|
||||
*
|
||||
* executing->is_executing = false;
|
||||
* executing_context->is_executing = false;
|
||||
* memory_barrier();
|
||||
*
|
||||
* if ( test_and_set( &heir->is_executing ) ) {
|
||||
* if ( test_and_set( &heir_context->is_executing ) ) {
|
||||
* do {
|
||||
* Per_CPU_Control *cpu_self = _Per_CPU_Get_snapshot();
|
||||
* Thread_Control *executing = cpu_self->executing;
|
||||
* Thread_Control *heir = cpu_self->heir;
|
||||
*
|
||||
* if ( cpu_self->dispatch_necessary ) {
|
||||
* heir = _Thread_Get_heir_and_make_it_executing( cpu_self );
|
||||
* if ( heir != executing ) {
|
||||
* cpu_self->executing = heir;
|
||||
* heir_context = (Context_Control *)
|
||||
* ((uintptr_t) heir + (uintptr_t) executing_context
|
||||
* - (uintptr_t) executing)
|
||||
* }
|
||||
* } while ( test_and_set( &heir->is_executing ) );
|
||||
* } while ( test_and_set( &heir_context->is_executing ) );
|
||||
* }
|
||||
*
|
||||
* restore( heir );
|
||||
* restore( heir_context );
|
||||
* }
|
||||
* @endcode
|
||||
*/
|
||||
@@ -1578,6 +1585,10 @@ register struct Per_CPU_Control *_CPU_Per_CPU_current asm( "rX" );
|
||||
* @brief Sends an inter-processor interrupt to the specified target
|
||||
* processor.
|
||||
*
|
||||
* This interrupt send and the corresponding inter-processor interrupt must
|
||||
* act as an release/acquire barrier so that all values written by the
|
||||
* sending processor are visible to the target processor.
|
||||
*
|
||||
* This operation is undefined for target processor indices out of range.
|
||||
*
|
||||
* @param[in] target_processor_index The target processor index.
|
||||
|
||||
@@ -279,9 +279,11 @@ typedef struct Per_CPU_Control {
|
||||
* @brief This is the heir thread for this processor.
|
||||
*
|
||||
* This field is not protected by a lock. The only writer after multitasking
|
||||
* start is the scheduler owning this processor. This processor will set the
|
||||
* dispatch necessary indicator to false, before it reads the heir. This
|
||||
* field is used in combination with the dispatch necessary indicator.
|
||||
* start is the scheduler owning this processor. It is assumed that stores
|
||||
* to pointers are atomic on all supported SMP architectures. The CPU port
|
||||
* specific code (inter-processor interrupt handling and
|
||||
* _CPU_SMP_Send_interrupt()) must guarantee that this processor observes the
|
||||
* last value written.
|
||||
*
|
||||
* A thread can be a heir on at most one processor in the system.
|
||||
*
|
||||
@@ -290,16 +292,15 @@ typedef struct Per_CPU_Control {
|
||||
struct _Thread_Control *heir;
|
||||
|
||||
/**
|
||||
* @brief This is set to true when this processor needs to run the
|
||||
* @brief This is set to true when this processor needs to run the thread
|
||||
* dispatcher.
|
||||
*
|
||||
* It is volatile since interrupts may alter this flag.
|
||||
*
|
||||
* This field is not protected by a lock. There are two writers after
|
||||
* multitasking start. The scheduler owning this processor sets this
|
||||
* indicator to true, after it updated the heir field. This processor sets
|
||||
* this indicator to false, before it reads the heir. This field is used in
|
||||
* combination with the heir field.
|
||||
* This field is not protected by a lock and must be accessed only by this
|
||||
* processor. Code (e.g. scheduler and post-switch action requests) running
|
||||
* on another processors must use an inter-processor interrupt to set the
|
||||
* thread dispatch necessary indicator to true.
|
||||
*
|
||||
* @see _Thread_Get_heir_and_make_it_executing().
|
||||
*/
|
||||
|
||||
@@ -146,6 +146,12 @@ static inline void _SMP_Inter_processor_interrupt_handler( void )
|
||||
{
|
||||
Per_CPU_Control *cpu_self = _Per_CPU_Get();
|
||||
|
||||
/*
|
||||
* In the common case the inter-processor interrupt is issued to carry out a
|
||||
* thread dispatch.
|
||||
*/
|
||||
cpu_self->dispatch_necessary = true;
|
||||
|
||||
if ( _Atomic_Load_ulong( &cpu_self->message, ATOMIC_ORDER_RELAXED ) != 0 ) {
|
||||
unsigned long message = _Atomic_Exchange_ulong(
|
||||
&cpu_self->message,
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
* COPYRIGHT (c) 1989-2008.
|
||||
* On-Line Applications Research Corporation (OAR).
|
||||
*
|
||||
* Copyright (c) 2014 embedded brains GmbH.
|
||||
* Copyright (c) 2014-2015 embedded brains GmbH.
|
||||
*
|
||||
* The license and distribution terms for this file may be
|
||||
* found in the file LICENSE in this distribution or at
|
||||
@@ -793,7 +793,8 @@ RTEMS_INLINE_ROUTINE Thread_Control *_Thread_Internal_allocate( void )
|
||||
/**
|
||||
* @brief Gets the heir of the processor and makes it executing.
|
||||
*
|
||||
* The thread dispatch necessary indicator is cleared as a side-effect.
|
||||
* Must be called with interrupts disabled. The thread dispatch necessary
|
||||
* indicator is cleared as a side-effect.
|
||||
*
|
||||
* @return The heir thread.
|
||||
*
|
||||
@@ -806,18 +807,8 @@ RTEMS_INLINE_ROUTINE Thread_Control *_Thread_Get_heir_and_make_it_executing(
|
||||
{
|
||||
Thread_Control *heir;
|
||||
|
||||
cpu_self->dispatch_necessary = false;
|
||||
|
||||
#if defined( RTEMS_SMP )
|
||||
/*
|
||||
* It is critical that we first update the dispatch necessary and then the
|
||||
* read the heir so that we don't miss an update by
|
||||
* _Thread_Dispatch_update_heir().
|
||||
*/
|
||||
_Atomic_Fence( ATOMIC_ORDER_SEQ_CST );
|
||||
#endif
|
||||
|
||||
heir = cpu_self->heir;
|
||||
cpu_self->dispatch_necessary = false;
|
||||
cpu_self->executing = heir;
|
||||
|
||||
return heir;
|
||||
@@ -832,24 +823,11 @@ RTEMS_INLINE_ROUTINE void _Thread_Dispatch_update_heir(
|
||||
{
|
||||
cpu_for_heir->heir = heir;
|
||||
|
||||
/*
|
||||
* It is critical that we first update the heir and then the dispatch
|
||||
* necessary so that _Thread_Get_heir_and_make_it_executing() cannot miss an
|
||||
* update.
|
||||
*/
|
||||
_Atomic_Fence( ATOMIC_ORDER_SEQ_CST );
|
||||
|
||||
/*
|
||||
* Only update the dispatch necessary indicator if not already set to
|
||||
* avoid superfluous inter-processor interrupts.
|
||||
*/
|
||||
if ( !cpu_for_heir->dispatch_necessary ) {
|
||||
cpu_for_heir->dispatch_necessary = true;
|
||||
|
||||
if ( cpu_for_heir != cpu_self ) {
|
||||
if ( cpu_for_heir == cpu_self ) {
|
||||
cpu_self->dispatch_necessary = true;
|
||||
} else {
|
||||
_Per_CPU_Send_interrupt( cpu_for_heir );
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -930,12 +908,15 @@ RTEMS_INLINE_ROUTINE void _Thread_Add_post_switch_action(
|
||||
ISR_Level level;
|
||||
|
||||
cpu_of_thread = _Thread_Action_ISR_disable_and_acquire( thread, &level );
|
||||
cpu_of_thread->dispatch_necessary = true;
|
||||
|
||||
#if defined(RTEMS_SMP)
|
||||
if ( _Per_CPU_Get() != cpu_of_thread ) {
|
||||
if ( _Per_CPU_Get() == cpu_of_thread ) {
|
||||
cpu_of_thread->dispatch_necessary = true;
|
||||
} else {
|
||||
_Per_CPU_Send_interrupt( cpu_of_thread );
|
||||
}
|
||||
#else
|
||||
cpu_of_thread->dispatch_necessary = true;
|
||||
#endif
|
||||
|
||||
_Chain_Append_if_is_off_chain_unprotected(
|
||||
|
||||
@@ -201,7 +201,6 @@ static void delay_ipi_task(rtems_task_argument variant)
|
||||
ISR_Level level;
|
||||
|
||||
_ISR_Disable_without_giant(level);
|
||||
(void) level;
|
||||
|
||||
/* (C) */
|
||||
barrier(ctx, &ctx->worker_barrier_state);
|
||||
@@ -216,6 +215,8 @@ static void delay_ipi_task(rtems_task_argument variant)
|
||||
_Thread_Disable_dispatch();
|
||||
}
|
||||
|
||||
_ISR_Enable_without_giant(level);
|
||||
|
||||
/*
|
||||
* We get deleted as a side effect of enabling the thread life protection or
|
||||
* later if we enable the thread dispatching.
|
||||
|
||||
Reference in New Issue
Block a user