[rtems commit] SMP: Fix and optimize thread dispatching
Sebastian Huber
sebh at rtems.org
Mon Sep 28 12:03:31 UTC 2015
Module: rtems
Branch: master
Commit: 258ad71e9626c16f30b40e06c321326636c976ff
Changeset: http://git.rtems.org/rtems/commit/?id=258ad71e9626c16f30b40e06c321326636c976ff
Author: Sebastian Huber <sebastian.huber at embedded-brains.de>
Date: Fri Sep 25 14:34:24 2015 +0200
SMP: Fix and optimize thread dispatching
According to the C11 and C++11 memory models only a read-modify-write
operation guarantees that we read the last value written in modification
order. Avoid the sequential consistent thread fence and instead use the
inter-processor interrupt to set the thread dispatch necessary
indicator.
---
c/src/lib/libbsp/arm/shared/arm-a9mpcore-smp.c | 3 +-
c/src/lib/libbsp/powerpc/qoriq/startup/bspsmp.c | 5 +++
c/src/lib/libbsp/sparc/shared/irq_asm.S | 20 ++++------
c/src/lib/libcpu/powerpc/new-exceptions/cpu_asm.S | 25 ++++++------
.../powerpc/shared/include/powerpc-utility.h | 9 ++++-
cpukit/score/cpu/arm/cpu_asm.S | 27 ++++++-------
cpukit/score/cpu/no_cpu/rtems/score/cpu.h | 39 ++++++++++++-------
cpukit/score/include/rtems/score/percpu.h | 19 ++++-----
cpukit/score/include/rtems/score/smpimpl.h | 6 +++
cpukit/score/include/rtems/score/threadimpl.h | 45 +++++++---------------
testsuites/smptests/smpthreadlife01/init.c | 3 +-
11 files changed, 101 insertions(+), 100 deletions(-)
diff --git a/c/src/lib/libbsp/arm/shared/arm-a9mpcore-smp.c b/c/src/lib/libbsp/arm/shared/arm-a9mpcore-smp.c
index f2c0201..7e939ff 100644
--- a/c/src/lib/libbsp/arm/shared/arm-a9mpcore-smp.c
+++ b/c/src/lib/libbsp/arm/shared/arm-a9mpcore-smp.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013 embedded brains GmbH. All rights reserved.
+ * Copyright (c) 2013-2015 embedded brains GmbH. All rights reserved.
*
* embedded brains GmbH
* Dornierstr. 4
@@ -62,6 +62,7 @@ void _CPU_SMP_Prepare_start_multitasking( void )
void _CPU_SMP_Send_interrupt( uint32_t target_processor_index )
{
+ _ARM_Data_memory_barrier();
arm_gic_irq_generate_software_irq(
ARM_GIC_IRQ_SGI_0,
ARM_GIC_IRQ_SOFTWARE_IRQ_TO_ALL_IN_LIST,
diff --git a/c/src/lib/libbsp/powerpc/qoriq/startup/bspsmp.c b/c/src/lib/libbsp/powerpc/qoriq/startup/bspsmp.c
index 0b0743b..8952d3e 100644
--- a/c/src/lib/libbsp/powerpc/qoriq/startup/bspsmp.c
+++ b/c/src/lib/libbsp/powerpc/qoriq/startup/bspsmp.c
@@ -235,5 +235,10 @@ void _CPU_SMP_Prepare_start_multitasking(void)
void _CPU_SMP_Send_interrupt(uint32_t target_processor_index)
{
+#ifdef __PPC_CPU_E6500__
+ ppc_light_weight_synchronize();
+#else
+ ppc_synchronize_data();
+#endif
qoriq.pic.ipidr [IPI_INDEX].reg = 1U << target_processor_index;
}
diff --git a/c/src/lib/libbsp/sparc/shared/irq_asm.S b/c/src/lib/libbsp/sparc/shared/irq_asm.S
index f7222e7..9d8600e 100644
--- a/c/src/lib/libbsp/sparc/shared/irq_asm.S
+++ b/c/src/lib/libbsp/sparc/shared/irq_asm.S
@@ -236,24 +236,18 @@ check_is_executing:
beq try_update_is_executing
mov 1, %g1
- ! Check if a thread dispatch is necessary
- ldub [%g6 + PER_CPU_DISPATCH_NEEDED], %g1
- cmp %g1, 0
- beq check_is_executing
- nop
-
- ! We have a new heir
-
- ! Clear the thread dispatch necessary flag
- stub %g0, [%g6 + PER_CPU_DISPATCH_NEEDED]
-
- ! Here we assume a strong memory order, otherwise a memory barrier must
- ! be inserted here
+ ! We may have a new heir
! Read the executing and heir
ld [%g6 + PER_CPU_OFFSET_EXECUTING], %g1
ld [%g6 + PER_CPU_OFFSET_HEIR], %g2
+ ! Update the executing only if necessary to avoid cache line
+ ! monopolization.
+ cmp %g1, %g2
+ beq try_update_is_executing
+ mov 1, %g1
+
! Calculate the heir context pointer
sub %o1, %g1, %g1
add %g1, %g2, %o1
diff --git a/c/src/lib/libcpu/powerpc/new-exceptions/cpu_asm.S b/c/src/lib/libcpu/powerpc/new-exceptions/cpu_asm.S
index 5d8c70d..8bfef20 100644
--- a/c/src/lib/libcpu/powerpc/new-exceptions/cpu_asm.S
+++ b/c/src/lib/libcpu/powerpc/new-exceptions/cpu_asm.S
@@ -413,12 +413,12 @@ check_is_executing:
addi r6, r5, PPC_CONTEXT_OFFSET_IS_EXECUTING
lwarx r7, r0, r6
cmpwi r7, 0
- bne check_thread_dispatch_necessary
+ bne get_potential_new_heir
/* Try to update the is executing indicator of the heir context */
li r7, 1
stwcx. r7, r0, r6
- bne check_thread_dispatch_necessary
+ bne get_potential_new_heir
isync
#endif
@@ -536,26 +536,23 @@ PROC (_CPU_Context_restore):
b restore_context
#ifdef RTEMS_SMP
-check_thread_dispatch_necessary:
+get_potential_new_heir:
GET_SELF_CPU_CONTROL r6
- /* Check if a thread dispatch is necessary */
- lbz r7, PER_CPU_DISPATCH_NEEDED(r6)
- cmpwi r7, 0
- beq check_is_executing
-
- /* We have a new heir */
-
- /* Clear the thread dispatch necessary flag */
- li r7, 0
- stb r7, PER_CPU_DISPATCH_NEEDED(r6)
- msync
+ /* We may have a new heir */
/* Read the executing and heir */
lwz r7, PER_CPU_OFFSET_EXECUTING(r6)
lwz r8, PER_CPU_OFFSET_HEIR(r6)
+ /*
+ * Update the executing only if necessary to avoid cache line
+ * monopolization.
+ */
+ cmpw r7, r8
+ beq check_is_executing
+
/* Calculate the heir context pointer */
sub r7, r4, r7
add r4, r8, r7
diff --git a/c/src/lib/libcpu/powerpc/shared/include/powerpc-utility.h b/c/src/lib/libcpu/powerpc/shared/include/powerpc-utility.h
index 331aa58..b8dc5f4 100644
--- a/c/src/lib/libcpu/powerpc/shared/include/powerpc-utility.h
+++ b/c/src/lib/libcpu/powerpc/shared/include/powerpc-utility.h
@@ -8,7 +8,7 @@
*/
/*
- * Copyright (c) 2008-2014 embedded brains GmbH.
+ * Copyright (c) 2008-2015 embedded brains GmbH.
*
* embedded brains GmbH
* Dornierstr. 4
@@ -206,6 +206,13 @@ static inline void ppc_synchronize_data(void)
__asm__ volatile ("sync");
}
+static inline void ppc_light_weight_synchronize(void)
+{
+ RTEMS_COMPILER_MEMORY_BARRIER();
+
+ __asm__ volatile ("lwsync");
+}
+
static inline void ppc_synchronize_instructions(void)
{
RTEMS_COMPILER_MEMORY_BARRIER();
diff --git a/cpukit/score/cpu/arm/cpu_asm.S b/cpukit/score/cpu/arm/cpu_asm.S
index 344512b..6ee9e74 100644
--- a/cpukit/score/cpu/arm/cpu_asm.S
+++ b/cpukit/score/cpu/arm/cpu_asm.S
@@ -19,7 +19,7 @@
* COPYRIGHT (c) 2000 Canon Research Centre France SA.
* Emmanuel Raguet, mailto:raguet at crf.canon.fr
*
- * Copyright (c) 2013-2014 embedded brains GmbH
+ * Copyright (c) 2013-2015 embedded brains GmbH
*
* The license and distribution terms for this file may be
* found in the file LICENSE in this distribution or at
@@ -80,13 +80,13 @@ DEFINE_FUNCTION_ARM(_CPU_Context_switch)
add r3, r1, #ARM_CONTEXT_CONTROL_IS_EXECUTING_OFFSET
ldrexb r4, [r3]
cmp r4, #0
- bne .L_check_thread_dispatch_necessary
+ bne .L_get_potential_new_heir
/* Try to update the is executing indicator of the heir context */
mov r4, #1
strexb r5, r4, [r3]
cmp r5, #0
- bne .L_check_thread_dispatch_necessary
+ bne .L_get_potential_new_heir
dmb
#endif
@@ -126,26 +126,23 @@ DEFINE_FUNCTION_ARM(_CPU_Context_restore)
b .L_restore
#ifdef RTEMS_SMP
-.L_check_thread_dispatch_necessary:
+.L_get_potential_new_heir:
GET_SELF_CPU_CONTROL r2, r3
- /* Check if a thread dispatch is necessary */
- ldrb r4, [r2, #PER_CPU_DISPATCH_NEEDED]
- cmp r4, #0
- beq .L_check_is_executing
-
- /* We have a new heir */
-
- /* Clear the thread dispatch necessary flag */
- mov r4, #0
- strb r4, [r2, #PER_CPU_DISPATCH_NEEDED]
- dmb
+ /* We may have a new heir */
/* Read the executing and heir */
ldr r4, [r2, #PER_CPU_OFFSET_EXECUTING]
ldr r5, [r2, #PER_CPU_OFFSET_HEIR]
+ /*
+ * Update the executing only if necessary to avoid cache line
+ * monopolization.
+ */
+ cmp r4, r5
+ beq .L_check_is_executing
+
/* Calculate the heir context pointer */
sub r4, r1, r4
add r1, r5, r4
diff --git a/cpukit/score/cpu/no_cpu/rtems/score/cpu.h b/cpukit/score/cpu/no_cpu/rtems/score/cpu.h
index f556087..3755709 100644
--- a/cpukit/score/cpu/no_cpu/rtems/score/cpu.h
+++ b/cpukit/score/cpu/no_cpu/rtems/score/cpu.h
@@ -559,34 +559,41 @@ typedef struct {
*
* This field must be updated during a context switch. The context switch
* to the heir must wait until the heir context indicates that it is no
- * longer executing on a processor. The context switch must also check if
- * a thread dispatch is necessary to honor updates of the heir thread for
- * this processor. This indicator must be updated using an atomic test and
- * set operation to ensure that at most one processor uses the heir
- * context at the same time.
+ * longer executing on a processor. This indicator must be updated using
+ * an atomic test and set operation to ensure that at most one processor
+ * uses the heir context at the same time. The context switch must also
+ * check for a potential new heir thread for this processor in case the
+ * heir context is not immediately available. Update the executing thread
+ * for this processor only if necessary to avoid a cache line
+ * monopolization.
*
* @code
* void _CPU_Context_switch(
- * Context_Control *executing,
- * Context_Control *heir
+ * Context_Control *executing_context,
+ * Context_Control *heir_context
* )
* {
- * save( executing );
+ * save( executing_context );
*
- * executing->is_executing = false;
+ * executing_context->is_executing = false;
* memory_barrier();
*
- * if ( test_and_set( &heir->is_executing ) ) {
+ * if ( test_and_set( &heir_context->is_executing ) ) {
* do {
* Per_CPU_Control *cpu_self = _Per_CPU_Get_snapshot();
+ * Thread_Control *executing = cpu_self->executing;
+ * Thread_Control *heir = cpu_self->heir;
*
- * if ( cpu_self->dispatch_necessary ) {
- * heir = _Thread_Get_heir_and_make_it_executing( cpu_self );
+ * if ( heir != executing ) {
+ * cpu_self->executing = heir;
+ * heir_context = (Context_Control *)
+ * ((uintptr_t) heir + (uintptr_t) executing_context
+ * - (uintptr_t) executing)
* }
- * } while ( test_and_set( &heir->is_executing ) );
+ * } while ( test_and_set( &heir_context->is_executing ) );
* }
*
- * restore( heir );
+ * restore( heir_context );
* }
* @endcode
*/
@@ -1578,6 +1585,10 @@ register struct Per_CPU_Control *_CPU_Per_CPU_current asm( "rX" );
* @brief Sends an inter-processor interrupt to the specified target
* processor.
*
+ * This interrupt send and the corresponding inter-processor interrupt must
+ * act as an release/acquire barrier so that all values written by the
+ * sending processor are visible to the target processor.
+ *
* This operation is undefined for target processor indices out of range.
*
* @param[in] target_processor_index The target processor index.
diff --git a/cpukit/score/include/rtems/score/percpu.h b/cpukit/score/include/rtems/score/percpu.h
index f1dad90..806c290 100644
--- a/cpukit/score/include/rtems/score/percpu.h
+++ b/cpukit/score/include/rtems/score/percpu.h
@@ -279,9 +279,11 @@ typedef struct Per_CPU_Control {
* @brief This is the heir thread for this processor.
*
* This field is not protected by a lock. The only writer after multitasking
- * start is the scheduler owning this processor. This processor will set the
- * dispatch necessary indicator to false, before it reads the heir. This
- * field is used in combination with the dispatch necessary indicator.
+ * start is the scheduler owning this processor. It is assumed that stores
+ * to pointers are atomic on all supported SMP architectures. The CPU port
+ * specific code (inter-processor interrupt handling and
+ * _CPU_SMP_Send_interrupt()) must guarantee that this processor observes the
+ * last value written.
*
* A thread can be a heir on at most one processor in the system.
*
@@ -290,16 +292,15 @@ typedef struct Per_CPU_Control {
struct _Thread_Control *heir;
/**
- * @brief This is set to true when this processor needs to run the
+ * @brief This is set to true when this processor needs to run the thread
* dispatcher.
*
* It is volatile since interrupts may alter this flag.
*
- * This field is not protected by a lock. There are two writers after
- * multitasking start. The scheduler owning this processor sets this
- * indicator to true, after it updated the heir field. This processor sets
- * this indicator to false, before it reads the heir. This field is used in
- * combination with the heir field.
+ * This field is not protected by a lock and must be accessed only by this
+ * processor. Code (e.g. scheduler and post-switch action requests) running
+ * on another processors must use an inter-processor interrupt to set the
+ * thread dispatch necessary indicator to true.
*
* @see _Thread_Get_heir_and_make_it_executing().
*/
diff --git a/cpukit/score/include/rtems/score/smpimpl.h b/cpukit/score/include/rtems/score/smpimpl.h
index 97c78b0..3167e82 100644
--- a/cpukit/score/include/rtems/score/smpimpl.h
+++ b/cpukit/score/include/rtems/score/smpimpl.h
@@ -146,6 +146,12 @@ static inline void _SMP_Inter_processor_interrupt_handler( void )
{
Per_CPU_Control *cpu_self = _Per_CPU_Get();
+ /*
+ * In the common case the inter-processor interrupt is issued to carry out a
+ * thread dispatch.
+ */
+ cpu_self->dispatch_necessary = true;
+
if ( _Atomic_Load_ulong( &cpu_self->message, ATOMIC_ORDER_RELAXED ) != 0 ) {
unsigned long message = _Atomic_Exchange_ulong(
&cpu_self->message,
diff --git a/cpukit/score/include/rtems/score/threadimpl.h b/cpukit/score/include/rtems/score/threadimpl.h
index 68c26c3..7412bd9 100644
--- a/cpukit/score/include/rtems/score/threadimpl.h
+++ b/cpukit/score/include/rtems/score/threadimpl.h
@@ -11,7 +11,7 @@
* COPYRIGHT (c) 1989-2008.
* On-Line Applications Research Corporation (OAR).
*
- * Copyright (c) 2014 embedded brains GmbH.
+ * Copyright (c) 2014-2015 embedded brains GmbH.
*
* The license and distribution terms for this file may be
* found in the file LICENSE in this distribution or at
@@ -793,7 +793,8 @@ RTEMS_INLINE_ROUTINE Thread_Control *_Thread_Internal_allocate( void )
/**
* @brief Gets the heir of the processor and makes it executing.
*
- * The thread dispatch necessary indicator is cleared as a side-effect.
+ * Must be called with interrupts disabled. The thread dispatch necessary
+ * indicator is cleared as a side-effect.
*
* @return The heir thread.
*
@@ -806,18 +807,8 @@ RTEMS_INLINE_ROUTINE Thread_Control *_Thread_Get_heir_and_make_it_executing(
{
Thread_Control *heir;
- cpu_self->dispatch_necessary = false;
-
-#if defined( RTEMS_SMP )
- /*
- * It is critical that we first update the dispatch necessary and then the
- * read the heir so that we don't miss an update by
- * _Thread_Dispatch_update_heir().
- */
- _Atomic_Fence( ATOMIC_ORDER_SEQ_CST );
-#endif
-
heir = cpu_self->heir;
+ cpu_self->dispatch_necessary = false;
cpu_self->executing = heir;
return heir;
@@ -832,23 +823,10 @@ RTEMS_INLINE_ROUTINE void _Thread_Dispatch_update_heir(
{
cpu_for_heir->heir = heir;
- /*
- * It is critical that we first update the heir and then the dispatch
- * necessary so that _Thread_Get_heir_and_make_it_executing() cannot miss an
- * update.
- */
- _Atomic_Fence( ATOMIC_ORDER_SEQ_CST );
-
- /*
- * Only update the dispatch necessary indicator if not already set to
- * avoid superfluous inter-processor interrupts.
- */
- if ( !cpu_for_heir->dispatch_necessary ) {
- cpu_for_heir->dispatch_necessary = true;
-
- if ( cpu_for_heir != cpu_self ) {
- _Per_CPU_Send_interrupt( cpu_for_heir );
- }
+ if ( cpu_for_heir == cpu_self ) {
+ cpu_self->dispatch_necessary = true;
+ } else {
+ _Per_CPU_Send_interrupt( cpu_for_heir );
}
}
#endif
@@ -930,12 +908,15 @@ RTEMS_INLINE_ROUTINE void _Thread_Add_post_switch_action(
ISR_Level level;
cpu_of_thread = _Thread_Action_ISR_disable_and_acquire( thread, &level );
- cpu_of_thread->dispatch_necessary = true;
#if defined(RTEMS_SMP)
- if ( _Per_CPU_Get() != cpu_of_thread ) {
+ if ( _Per_CPU_Get() == cpu_of_thread ) {
+ cpu_of_thread->dispatch_necessary = true;
+ } else {
_Per_CPU_Send_interrupt( cpu_of_thread );
}
+#else
+ cpu_of_thread->dispatch_necessary = true;
#endif
_Chain_Append_if_is_off_chain_unprotected(
diff --git a/testsuites/smptests/smpthreadlife01/init.c b/testsuites/smptests/smpthreadlife01/init.c
index 12b6bd9..4597520 100644
--- a/testsuites/smptests/smpthreadlife01/init.c
+++ b/testsuites/smptests/smpthreadlife01/init.c
@@ -201,7 +201,6 @@ static void delay_ipi_task(rtems_task_argument variant)
ISR_Level level;
_ISR_Disable_without_giant(level);
- (void) level;
/* (C) */
barrier(ctx, &ctx->worker_barrier_state);
@@ -216,6 +215,8 @@ static void delay_ipi_task(rtems_task_argument variant)
_Thread_Disable_dispatch();
}
+ _ISR_Enable_without_giant(level);
+
/*
* We get deleted as a side effect of enabling the thread life protection or
* later if we enable the thread dispatching.
More information about the vc
mailing list