SMP: Fix and optimize thread dispatching

According to the C11 and C++11 memory models only a read-modify-write
operation guarantees that we read the last value written in modification
order.  Avoid the sequential consistent thread fence and instead use the
inter-processor interrupt to set the thread dispatch necessary
indicator.
This commit is contained in:
Sebastian Huber
2015-09-25 14:34:24 +02:00
parent aee6a1d05f
commit 258ad71e96
11 changed files with 101 additions and 100 deletions

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2013 embedded brains GmbH. All rights reserved. * Copyright (c) 2013-2015 embedded brains GmbH. All rights reserved.
* *
* embedded brains GmbH * embedded brains GmbH
* Dornierstr. 4 * Dornierstr. 4
@@ -62,6 +62,7 @@ void _CPU_SMP_Prepare_start_multitasking( void )
void _CPU_SMP_Send_interrupt( uint32_t target_processor_index ) void _CPU_SMP_Send_interrupt( uint32_t target_processor_index )
{ {
_ARM_Data_memory_barrier();
arm_gic_irq_generate_software_irq( arm_gic_irq_generate_software_irq(
ARM_GIC_IRQ_SGI_0, ARM_GIC_IRQ_SGI_0,
ARM_GIC_IRQ_SOFTWARE_IRQ_TO_ALL_IN_LIST, ARM_GIC_IRQ_SOFTWARE_IRQ_TO_ALL_IN_LIST,

View File

@@ -235,5 +235,10 @@ void _CPU_SMP_Prepare_start_multitasking(void)
void _CPU_SMP_Send_interrupt(uint32_t target_processor_index) void _CPU_SMP_Send_interrupt(uint32_t target_processor_index)
{ {
#ifdef __PPC_CPU_E6500__
ppc_light_weight_synchronize();
#else
ppc_synchronize_data();
#endif
qoriq.pic.ipidr [IPI_INDEX].reg = 1U << target_processor_index; qoriq.pic.ipidr [IPI_INDEX].reg = 1U << target_processor_index;
} }

View File

@@ -236,24 +236,18 @@ check_is_executing:
beq try_update_is_executing beq try_update_is_executing
mov 1, %g1 mov 1, %g1
! Check if a thread dispatch is necessary ! We may have a new heir
ldub [%g6 + PER_CPU_DISPATCH_NEEDED], %g1
cmp %g1, 0
beq check_is_executing
nop
! We have a new heir
! Clear the thread dispatch necessary flag
stub %g0, [%g6 + PER_CPU_DISPATCH_NEEDED]
! Here we assume a strong memory order, otherwise a memory barrier must
! be inserted here
! Read the executing and heir ! Read the executing and heir
ld [%g6 + PER_CPU_OFFSET_EXECUTING], %g1 ld [%g6 + PER_CPU_OFFSET_EXECUTING], %g1
ld [%g6 + PER_CPU_OFFSET_HEIR], %g2 ld [%g6 + PER_CPU_OFFSET_HEIR], %g2
! Update the executing only if necessary to avoid cache line
! monopolization.
cmp %g1, %g2
beq try_update_is_executing
mov 1, %g1
! Calculate the heir context pointer ! Calculate the heir context pointer
sub %o1, %g1, %g1 sub %o1, %g1, %g1
add %g1, %g2, %o1 add %g1, %g2, %o1

View File

@@ -413,12 +413,12 @@ check_is_executing:
addi r6, r5, PPC_CONTEXT_OFFSET_IS_EXECUTING addi r6, r5, PPC_CONTEXT_OFFSET_IS_EXECUTING
lwarx r7, r0, r6 lwarx r7, r0, r6
cmpwi r7, 0 cmpwi r7, 0
bne check_thread_dispatch_necessary bne get_potential_new_heir
/* Try to update the is executing indicator of the heir context */ /* Try to update the is executing indicator of the heir context */
li r7, 1 li r7, 1
stwcx. r7, r0, r6 stwcx. r7, r0, r6
bne check_thread_dispatch_necessary bne get_potential_new_heir
isync isync
#endif #endif
@@ -536,26 +536,23 @@ PROC (_CPU_Context_restore):
b restore_context b restore_context
#ifdef RTEMS_SMP #ifdef RTEMS_SMP
check_thread_dispatch_necessary: get_potential_new_heir:
GET_SELF_CPU_CONTROL r6 GET_SELF_CPU_CONTROL r6
/* Check if a thread dispatch is necessary */ /* We may have a new heir */
lbz r7, PER_CPU_DISPATCH_NEEDED(r6)
cmpwi r7, 0
beq check_is_executing
/* We have a new heir */
/* Clear the thread dispatch necessary flag */
li r7, 0
stb r7, PER_CPU_DISPATCH_NEEDED(r6)
msync
/* Read the executing and heir */ /* Read the executing and heir */
lwz r7, PER_CPU_OFFSET_EXECUTING(r6) lwz r7, PER_CPU_OFFSET_EXECUTING(r6)
lwz r8, PER_CPU_OFFSET_HEIR(r6) lwz r8, PER_CPU_OFFSET_HEIR(r6)
/*
* Update the executing only if necessary to avoid cache line
* monopolization.
*/
cmpw r7, r8
beq check_is_executing
/* Calculate the heir context pointer */ /* Calculate the heir context pointer */
sub r7, r4, r7 sub r7, r4, r7
add r4, r8, r7 add r4, r8, r7

View File

@@ -8,7 +8,7 @@
*/ */
/* /*
* Copyright (c) 2008-2014 embedded brains GmbH. * Copyright (c) 2008-2015 embedded brains GmbH.
* *
* embedded brains GmbH * embedded brains GmbH
* Dornierstr. 4 * Dornierstr. 4
@@ -206,6 +206,13 @@ static inline void ppc_synchronize_data(void)
__asm__ volatile ("sync"); __asm__ volatile ("sync");
} }
static inline void ppc_light_weight_synchronize(void)
{
RTEMS_COMPILER_MEMORY_BARRIER();
__asm__ volatile ("lwsync");
}
static inline void ppc_synchronize_instructions(void) static inline void ppc_synchronize_instructions(void)
{ {
RTEMS_COMPILER_MEMORY_BARRIER(); RTEMS_COMPILER_MEMORY_BARRIER();

View File

@@ -19,7 +19,7 @@
* COPYRIGHT (c) 2000 Canon Research Centre France SA. * COPYRIGHT (c) 2000 Canon Research Centre France SA.
* Emmanuel Raguet, mailto:raguet@crf.canon.fr * Emmanuel Raguet, mailto:raguet@crf.canon.fr
* *
* Copyright (c) 2013-2014 embedded brains GmbH * Copyright (c) 2013-2015 embedded brains GmbH
* *
* The license and distribution terms for this file may be * The license and distribution terms for this file may be
* found in the file LICENSE in this distribution or at * found in the file LICENSE in this distribution or at
@@ -80,13 +80,13 @@ DEFINE_FUNCTION_ARM(_CPU_Context_switch)
add r3, r1, #ARM_CONTEXT_CONTROL_IS_EXECUTING_OFFSET add r3, r1, #ARM_CONTEXT_CONTROL_IS_EXECUTING_OFFSET
ldrexb r4, [r3] ldrexb r4, [r3]
cmp r4, #0 cmp r4, #0
bne .L_check_thread_dispatch_necessary bne .L_get_potential_new_heir
/* Try to update the is executing indicator of the heir context */ /* Try to update the is executing indicator of the heir context */
mov r4, #1 mov r4, #1
strexb r5, r4, [r3] strexb r5, r4, [r3]
cmp r5, #0 cmp r5, #0
bne .L_check_thread_dispatch_necessary bne .L_get_potential_new_heir
dmb dmb
#endif #endif
@@ -126,26 +126,23 @@ DEFINE_FUNCTION_ARM(_CPU_Context_restore)
b .L_restore b .L_restore
#ifdef RTEMS_SMP #ifdef RTEMS_SMP
.L_check_thread_dispatch_necessary: .L_get_potential_new_heir:
GET_SELF_CPU_CONTROL r2, r3 GET_SELF_CPU_CONTROL r2, r3
/* Check if a thread dispatch is necessary */ /* We may have a new heir */
ldrb r4, [r2, #PER_CPU_DISPATCH_NEEDED]
cmp r4, #0
beq .L_check_is_executing
/* We have a new heir */
/* Clear the thread dispatch necessary flag */
mov r4, #0
strb r4, [r2, #PER_CPU_DISPATCH_NEEDED]
dmb
/* Read the executing and heir */ /* Read the executing and heir */
ldr r4, [r2, #PER_CPU_OFFSET_EXECUTING] ldr r4, [r2, #PER_CPU_OFFSET_EXECUTING]
ldr r5, [r2, #PER_CPU_OFFSET_HEIR] ldr r5, [r2, #PER_CPU_OFFSET_HEIR]
/*
* Update the executing only if necessary to avoid cache line
* monopolization.
*/
cmp r4, r5
beq .L_check_is_executing
/* Calculate the heir context pointer */ /* Calculate the heir context pointer */
sub r4, r1, r4 sub r4, r1, r4
add r1, r5, r4 add r1, r5, r4

View File

@@ -559,34 +559,41 @@ typedef struct {
* *
* This field must be updated during a context switch. The context switch * This field must be updated during a context switch. The context switch
* to the heir must wait until the heir context indicates that it is no * to the heir must wait until the heir context indicates that it is no
* longer executing on a processor. The context switch must also check if * longer executing on a processor. This indicator must be updated using
* a thread dispatch is necessary to honor updates of the heir thread for * an atomic test and set operation to ensure that at most one processor
* this processor. This indicator must be updated using an atomic test and * uses the heir context at the same time. The context switch must also
* set operation to ensure that at most one processor uses the heir * check for a potential new heir thread for this processor in case the
* context at the same time. * heir context is not immediately available. Update the executing thread
* for this processor only if necessary to avoid a cache line
* monopolization.
* *
* @code * @code
* void _CPU_Context_switch( * void _CPU_Context_switch(
* Context_Control *executing, * Context_Control *executing_context,
* Context_Control *heir * Context_Control *heir_context
* ) * )
* { * {
* save( executing ); * save( executing_context );
* *
* executing->is_executing = false; * executing_context->is_executing = false;
* memory_barrier(); * memory_barrier();
* *
* if ( test_and_set( &heir->is_executing ) ) { * if ( test_and_set( &heir_context->is_executing ) ) {
* do { * do {
* Per_CPU_Control *cpu_self = _Per_CPU_Get_snapshot(); * Per_CPU_Control *cpu_self = _Per_CPU_Get_snapshot();
* Thread_Control *executing = cpu_self->executing;
* Thread_Control *heir = cpu_self->heir;
* *
* if ( cpu_self->dispatch_necessary ) { * if ( heir != executing ) {
* heir = _Thread_Get_heir_and_make_it_executing( cpu_self ); * cpu_self->executing = heir;
* heir_context = (Context_Control *)
* ((uintptr_t) heir + (uintptr_t) executing_context
* - (uintptr_t) executing)
* } * }
* } while ( test_and_set( &heir->is_executing ) ); * } while ( test_and_set( &heir_context->is_executing ) );
* } * }
* *
* restore( heir ); * restore( heir_context );
* } * }
* @endcode * @endcode
*/ */
@@ -1578,6 +1585,10 @@ register struct Per_CPU_Control *_CPU_Per_CPU_current asm( "rX" );
* @brief Sends an inter-processor interrupt to the specified target * @brief Sends an inter-processor interrupt to the specified target
* processor. * processor.
* *
* This interrupt send and the corresponding inter-processor interrupt must
* act as an release/acquire barrier so that all values written by the
* sending processor are visible to the target processor.
*
* This operation is undefined for target processor indices out of range. * This operation is undefined for target processor indices out of range.
* *
* @param[in] target_processor_index The target processor index. * @param[in] target_processor_index The target processor index.

View File

@@ -279,9 +279,11 @@ typedef struct Per_CPU_Control {
* @brief This is the heir thread for this processor. * @brief This is the heir thread for this processor.
* *
* This field is not protected by a lock. The only writer after multitasking * This field is not protected by a lock. The only writer after multitasking
* start is the scheduler owning this processor. This processor will set the * start is the scheduler owning this processor. It is assumed that stores
* dispatch necessary indicator to false, before it reads the heir. This * to pointers are atomic on all supported SMP architectures. The CPU port
* field is used in combination with the dispatch necessary indicator. * specific code (inter-processor interrupt handling and
* _CPU_SMP_Send_interrupt()) must guarantee that this processor observes the
* last value written.
* *
* A thread can be a heir on at most one processor in the system. * A thread can be a heir on at most one processor in the system.
* *
@@ -290,16 +292,15 @@ typedef struct Per_CPU_Control {
struct _Thread_Control *heir; struct _Thread_Control *heir;
/** /**
* @brief This is set to true when this processor needs to run the * @brief This is set to true when this processor needs to run the thread
* dispatcher. * dispatcher.
* *
* It is volatile since interrupts may alter this flag. * It is volatile since interrupts may alter this flag.
* *
* This field is not protected by a lock. There are two writers after * This field is not protected by a lock and must be accessed only by this
* multitasking start. The scheduler owning this processor sets this * processor. Code (e.g. scheduler and post-switch action requests) running
* indicator to true, after it updated the heir field. This processor sets * on another processors must use an inter-processor interrupt to set the
* this indicator to false, before it reads the heir. This field is used in * thread dispatch necessary indicator to true.
* combination with the heir field.
* *
* @see _Thread_Get_heir_and_make_it_executing(). * @see _Thread_Get_heir_and_make_it_executing().
*/ */

View File

@@ -146,6 +146,12 @@ static inline void _SMP_Inter_processor_interrupt_handler( void )
{ {
Per_CPU_Control *cpu_self = _Per_CPU_Get(); Per_CPU_Control *cpu_self = _Per_CPU_Get();
/*
* In the common case the inter-processor interrupt is issued to carry out a
* thread dispatch.
*/
cpu_self->dispatch_necessary = true;
if ( _Atomic_Load_ulong( &cpu_self->message, ATOMIC_ORDER_RELAXED ) != 0 ) { if ( _Atomic_Load_ulong( &cpu_self->message, ATOMIC_ORDER_RELAXED ) != 0 ) {
unsigned long message = _Atomic_Exchange_ulong( unsigned long message = _Atomic_Exchange_ulong(
&cpu_self->message, &cpu_self->message,

View File

@@ -11,7 +11,7 @@
* COPYRIGHT (c) 1989-2008. * COPYRIGHT (c) 1989-2008.
* On-Line Applications Research Corporation (OAR). * On-Line Applications Research Corporation (OAR).
* *
* Copyright (c) 2014 embedded brains GmbH. * Copyright (c) 2014-2015 embedded brains GmbH.
* *
* The license and distribution terms for this file may be * The license and distribution terms for this file may be
* found in the file LICENSE in this distribution or at * found in the file LICENSE in this distribution or at
@@ -793,7 +793,8 @@ RTEMS_INLINE_ROUTINE Thread_Control *_Thread_Internal_allocate( void )
/** /**
* @brief Gets the heir of the processor and makes it executing. * @brief Gets the heir of the processor and makes it executing.
* *
* The thread dispatch necessary indicator is cleared as a side-effect. * Must be called with interrupts disabled. The thread dispatch necessary
* indicator is cleared as a side-effect.
* *
* @return The heir thread. * @return The heir thread.
* *
@@ -806,18 +807,8 @@ RTEMS_INLINE_ROUTINE Thread_Control *_Thread_Get_heir_and_make_it_executing(
{ {
Thread_Control *heir; Thread_Control *heir;
cpu_self->dispatch_necessary = false;
#if defined( RTEMS_SMP )
/*
* It is critical that we first update the dispatch necessary and then the
* read the heir so that we don't miss an update by
* _Thread_Dispatch_update_heir().
*/
_Atomic_Fence( ATOMIC_ORDER_SEQ_CST );
#endif
heir = cpu_self->heir; heir = cpu_self->heir;
cpu_self->dispatch_necessary = false;
cpu_self->executing = heir; cpu_self->executing = heir;
return heir; return heir;
@@ -832,24 +823,11 @@ RTEMS_INLINE_ROUTINE void _Thread_Dispatch_update_heir(
{ {
cpu_for_heir->heir = heir; cpu_for_heir->heir = heir;
/* if ( cpu_for_heir == cpu_self ) {
* It is critical that we first update the heir and then the dispatch cpu_self->dispatch_necessary = true;
* necessary so that _Thread_Get_heir_and_make_it_executing() cannot miss an } else {
* update.
*/
_Atomic_Fence( ATOMIC_ORDER_SEQ_CST );
/*
* Only update the dispatch necessary indicator if not already set to
* avoid superfluous inter-processor interrupts.
*/
if ( !cpu_for_heir->dispatch_necessary ) {
cpu_for_heir->dispatch_necessary = true;
if ( cpu_for_heir != cpu_self ) {
_Per_CPU_Send_interrupt( cpu_for_heir ); _Per_CPU_Send_interrupt( cpu_for_heir );
} }
}
} }
#endif #endif
@@ -930,12 +908,15 @@ RTEMS_INLINE_ROUTINE void _Thread_Add_post_switch_action(
ISR_Level level; ISR_Level level;
cpu_of_thread = _Thread_Action_ISR_disable_and_acquire( thread, &level ); cpu_of_thread = _Thread_Action_ISR_disable_and_acquire( thread, &level );
cpu_of_thread->dispatch_necessary = true;
#if defined(RTEMS_SMP) #if defined(RTEMS_SMP)
if ( _Per_CPU_Get() != cpu_of_thread ) { if ( _Per_CPU_Get() == cpu_of_thread ) {
cpu_of_thread->dispatch_necessary = true;
} else {
_Per_CPU_Send_interrupt( cpu_of_thread ); _Per_CPU_Send_interrupt( cpu_of_thread );
} }
#else
cpu_of_thread->dispatch_necessary = true;
#endif #endif
_Chain_Append_if_is_off_chain_unprotected( _Chain_Append_if_is_off_chain_unprotected(

View File

@@ -201,7 +201,6 @@ static void delay_ipi_task(rtems_task_argument variant)
ISR_Level level; ISR_Level level;
_ISR_Disable_without_giant(level); _ISR_Disable_without_giant(level);
(void) level;
/* (C) */ /* (C) */
barrier(ctx, &ctx->worker_barrier_state); barrier(ctx, &ctx->worker_barrier_state);
@@ -216,6 +215,8 @@ static void delay_ipi_task(rtems_task_argument variant)
_Thread_Disable_dispatch(); _Thread_Disable_dispatch();
} }
_ISR_Enable_without_giant(level);
/* /*
* We get deleted as a side effect of enabling the thread life protection or * We get deleted as a side effect of enabling the thread life protection or
* later if we enable the thread dispatching. * later if we enable the thread dispatching.