[rtems commit] SMP: Fix and optimize thread dispatching

Sebastian Huber sebh at rtems.org
Mon Sep 28 12:03:31 UTC 2015


Module:    rtems
Branch:    master
Commit:    258ad71e9626c16f30b40e06c321326636c976ff
Changeset: http://git.rtems.org/rtems/commit/?id=258ad71e9626c16f30b40e06c321326636c976ff

Author:    Sebastian Huber <sebastian.huber at embedded-brains.de>
Date:      Fri Sep 25 14:34:24 2015 +0200

SMP: Fix and optimize thread dispatching

According to the C11 and C++11 memory models only a read-modify-write
operation guarantees that we read the last value written in modification
order.  Avoid the sequential consistent thread fence and instead use the
inter-processor interrupt to set the thread dispatch necessary
indicator.

---

 c/src/lib/libbsp/arm/shared/arm-a9mpcore-smp.c     |  3 +-
 c/src/lib/libbsp/powerpc/qoriq/startup/bspsmp.c    |  5 +++
 c/src/lib/libbsp/sparc/shared/irq_asm.S            | 20 ++++------
 c/src/lib/libcpu/powerpc/new-exceptions/cpu_asm.S  | 25 ++++++------
 .../powerpc/shared/include/powerpc-utility.h       |  9 ++++-
 cpukit/score/cpu/arm/cpu_asm.S                     | 27 ++++++-------
 cpukit/score/cpu/no_cpu/rtems/score/cpu.h          | 39 ++++++++++++-------
 cpukit/score/include/rtems/score/percpu.h          | 19 ++++-----
 cpukit/score/include/rtems/score/smpimpl.h         |  6 +++
 cpukit/score/include/rtems/score/threadimpl.h      | 45 +++++++---------------
 testsuites/smptests/smpthreadlife01/init.c         |  3 +-
 11 files changed, 101 insertions(+), 100 deletions(-)

diff --git a/c/src/lib/libbsp/arm/shared/arm-a9mpcore-smp.c b/c/src/lib/libbsp/arm/shared/arm-a9mpcore-smp.c
index f2c0201..7e939ff 100644
--- a/c/src/lib/libbsp/arm/shared/arm-a9mpcore-smp.c
+++ b/c/src/lib/libbsp/arm/shared/arm-a9mpcore-smp.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013 embedded brains GmbH.  All rights reserved.
+ * Copyright (c) 2013-2015 embedded brains GmbH.  All rights reserved.
  *
  *  embedded brains GmbH
  *  Dornierstr. 4
@@ -62,6 +62,7 @@ void _CPU_SMP_Prepare_start_multitasking( void )
 
 void _CPU_SMP_Send_interrupt( uint32_t target_processor_index )
 {
+  _ARM_Data_memory_barrier();
   arm_gic_irq_generate_software_irq(
     ARM_GIC_IRQ_SGI_0,
     ARM_GIC_IRQ_SOFTWARE_IRQ_TO_ALL_IN_LIST,
diff --git a/c/src/lib/libbsp/powerpc/qoriq/startup/bspsmp.c b/c/src/lib/libbsp/powerpc/qoriq/startup/bspsmp.c
index 0b0743b..8952d3e 100644
--- a/c/src/lib/libbsp/powerpc/qoriq/startup/bspsmp.c
+++ b/c/src/lib/libbsp/powerpc/qoriq/startup/bspsmp.c
@@ -235,5 +235,10 @@ void _CPU_SMP_Prepare_start_multitasking(void)
 
 void _CPU_SMP_Send_interrupt(uint32_t target_processor_index)
 {
+#ifdef __PPC_CPU_E6500__
+  ppc_light_weight_synchronize();
+#else
+  ppc_synchronize_data();
+#endif
   qoriq.pic.ipidr [IPI_INDEX].reg = 1U << target_processor_index;
 }
diff --git a/c/src/lib/libbsp/sparc/shared/irq_asm.S b/c/src/lib/libbsp/sparc/shared/irq_asm.S
index f7222e7..9d8600e 100644
--- a/c/src/lib/libbsp/sparc/shared/irq_asm.S
+++ b/c/src/lib/libbsp/sparc/shared/irq_asm.S
@@ -236,24 +236,18 @@ check_is_executing:
         beq     try_update_is_executing
          mov    1, %g1
 
-        ! Check if a thread dispatch is necessary
-        ldub    [%g6 + PER_CPU_DISPATCH_NEEDED], %g1
-        cmp     %g1, 0
-        beq     check_is_executing
-         nop
-
-        ! We have a new heir
-
-        ! Clear the thread dispatch necessary flag
-        stub    %g0, [%g6 + PER_CPU_DISPATCH_NEEDED]
-
-        ! Here we assume a strong memory order, otherwise a memory barrier must
-        ! be inserted here
+        ! We may have a new heir
 
         ! Read the executing and heir
         ld      [%g6 + PER_CPU_OFFSET_EXECUTING], %g1
         ld      [%g6 + PER_CPU_OFFSET_HEIR], %g2
 
+        ! Update the executing only if necessary to avoid cache line
+        ! monopolization.
+        cmp     %g1, %g2
+        beq     try_update_is_executing
+         mov    1, %g1
+
         ! Calculate the heir context pointer
         sub     %o1, %g1, %g1
         add     %g1, %g2, %o1
diff --git a/c/src/lib/libcpu/powerpc/new-exceptions/cpu_asm.S b/c/src/lib/libcpu/powerpc/new-exceptions/cpu_asm.S
index 5d8c70d..8bfef20 100644
--- a/c/src/lib/libcpu/powerpc/new-exceptions/cpu_asm.S
+++ b/c/src/lib/libcpu/powerpc/new-exceptions/cpu_asm.S
@@ -413,12 +413,12 @@ check_is_executing:
 	addi	r6, r5, PPC_CONTEXT_OFFSET_IS_EXECUTING
 	lwarx	r7, r0, r6
 	cmpwi	r7, 0
-	bne	check_thread_dispatch_necessary
+	bne	get_potential_new_heir
 
 	/* Try to update the is executing indicator of the heir context */
 	li	r7, 1
 	stwcx.	r7, r0, r6
-	bne	check_thread_dispatch_necessary
+	bne	get_potential_new_heir
 	isync
 #endif
 
@@ -536,26 +536,23 @@ PROC (_CPU_Context_restore):
 	b	restore_context
 
 #ifdef RTEMS_SMP
-check_thread_dispatch_necessary:
+get_potential_new_heir:
 
 	GET_SELF_CPU_CONTROL	r6
 
-	/* Check if a thread dispatch is necessary */
-	lbz	r7, PER_CPU_DISPATCH_NEEDED(r6)
-	cmpwi	r7, 0
-	beq	check_is_executing
-
-	/* We have a new heir */
-
-	/* Clear the thread dispatch necessary flag */
-	li	r7, 0
-	stb	r7, PER_CPU_DISPATCH_NEEDED(r6)
-	msync
+	/* We may have a new heir */
 
 	/* Read the executing and heir */
 	lwz	r7, PER_CPU_OFFSET_EXECUTING(r6)
 	lwz	r8, PER_CPU_OFFSET_HEIR(r6)
 
+	/*
+	 * Update the executing only if necessary to avoid cache line
+	 * monopolization.
+	 */
+	cmpw	r7, r8
+	beq	check_is_executing
+
 	/* Calculate the heir context pointer */
 	sub	r7, r4, r7
 	add	r4, r8, r7
diff --git a/c/src/lib/libcpu/powerpc/shared/include/powerpc-utility.h b/c/src/lib/libcpu/powerpc/shared/include/powerpc-utility.h
index 331aa58..b8dc5f4 100644
--- a/c/src/lib/libcpu/powerpc/shared/include/powerpc-utility.h
+++ b/c/src/lib/libcpu/powerpc/shared/include/powerpc-utility.h
@@ -8,7 +8,7 @@
  */
 
 /*
- * Copyright (c) 2008-2014 embedded brains GmbH.
+ * Copyright (c) 2008-2015 embedded brains GmbH.
  *
  *  embedded brains GmbH
  *  Dornierstr. 4
@@ -206,6 +206,13 @@ static inline void ppc_synchronize_data(void)
   __asm__ volatile ("sync");
 }
 
+static inline void ppc_light_weight_synchronize(void)
+{
+  RTEMS_COMPILER_MEMORY_BARRIER();
+
+  __asm__ volatile ("lwsync");
+}
+
 static inline void ppc_synchronize_instructions(void)
 {
   RTEMS_COMPILER_MEMORY_BARRIER();
diff --git a/cpukit/score/cpu/arm/cpu_asm.S b/cpukit/score/cpu/arm/cpu_asm.S
index 344512b..6ee9e74 100644
--- a/cpukit/score/cpu/arm/cpu_asm.S
+++ b/cpukit/score/cpu/arm/cpu_asm.S
@@ -19,7 +19,7 @@
  *  COPYRIGHT (c) 2000 Canon Research Centre France SA.
  *  Emmanuel Raguet, mailto:raguet at crf.canon.fr
  *
- *  Copyright (c) 2013-2014 embedded brains GmbH
+ *  Copyright (c) 2013-2015 embedded brains GmbH
  *
  *  The license and distribution terms for this file may be
  *  found in the file LICENSE in this distribution or at
@@ -80,13 +80,13 @@ DEFINE_FUNCTION_ARM(_CPU_Context_switch)
 	add	r3, r1, #ARM_CONTEXT_CONTROL_IS_EXECUTING_OFFSET
 	ldrexb	r4, [r3]
 	cmp	r4, #0
-	bne	.L_check_thread_dispatch_necessary
+	bne	.L_get_potential_new_heir
 
 	/* Try to update the is executing indicator of the heir context */
 	mov	r4, #1
 	strexb	r5, r4, [r3]
 	cmp	r5, #0
-	bne	.L_check_thread_dispatch_necessary
+	bne	.L_get_potential_new_heir
 	dmb
 #endif
 
@@ -126,26 +126,23 @@ DEFINE_FUNCTION_ARM(_CPU_Context_restore)
         b       .L_restore
 
 #ifdef RTEMS_SMP
-.L_check_thread_dispatch_necessary:
+.L_get_potential_new_heir:
 
 	GET_SELF_CPU_CONTROL	r2, r3
 
-	/* Check if a thread dispatch is necessary */
-	ldrb	r4, [r2, #PER_CPU_DISPATCH_NEEDED]
-	cmp	r4, #0
-	beq	.L_check_is_executing
-
-	/* We have a new heir */
-
-	/* Clear the thread dispatch necessary flag */
-	mov	r4, #0
-	strb	r4, [r2, #PER_CPU_DISPATCH_NEEDED]
-	dmb
+	/* We may have a new heir */
 
 	/* Read the executing and heir */
 	ldr	r4, [r2, #PER_CPU_OFFSET_EXECUTING]
 	ldr	r5, [r2, #PER_CPU_OFFSET_HEIR]
 
+	/*
+	 * Update the executing only if necessary to avoid cache line
+	 * monopolization.
+	 */
+	cmp	r4, r5
+	beq	.L_check_is_executing
+
 	/* Calculate the heir context pointer */
 	sub	r4, r1, r4
 	add	r1, r5, r4
diff --git a/cpukit/score/cpu/no_cpu/rtems/score/cpu.h b/cpukit/score/cpu/no_cpu/rtems/score/cpu.h
index f556087..3755709 100644
--- a/cpukit/score/cpu/no_cpu/rtems/score/cpu.h
+++ b/cpukit/score/cpu/no_cpu/rtems/score/cpu.h
@@ -559,34 +559,41 @@ typedef struct {
      *
      * This field must be updated during a context switch.  The context switch
      * to the heir must wait until the heir context indicates that it is no
-     * longer executing on a processor.  The context switch must also check if
-     * a thread dispatch is necessary to honor updates of the heir thread for
-     * this processor.  This indicator must be updated using an atomic test and
-     * set operation to ensure that at most one processor uses the heir
-     * context at the same time.
+     * longer executing on a processor.  This indicator must be updated using
+     * an atomic test and set operation to ensure that at most one processor
+     * uses the heir context at the same time.  The context switch must also
+     * check for a potential new heir thread for this processor in case the
+     * heir context is not immediately available.  Update the executing thread
+     * for this processor only if necessary to avoid a cache line
+     * monopolization.
      *
      * @code
      * void _CPU_Context_switch(
-     *   Context_Control *executing,
-     *   Context_Control *heir
+     *   Context_Control *executing_context,
+     *   Context_Control *heir_context
      * )
      * {
-     *   save( executing );
+     *   save( executing_context );
      *
-     *   executing->is_executing = false;
+     *   executing_context->is_executing = false;
      *   memory_barrier();
      *
-     *   if ( test_and_set( &heir->is_executing ) ) {
+     *   if ( test_and_set( &heir_context->is_executing ) ) {
      *     do {
      *       Per_CPU_Control *cpu_self = _Per_CPU_Get_snapshot();
+     *       Thread_Control *executing = cpu_self->executing;
+     *       Thread_Control *heir = cpu_self->heir;
      *
-     *       if ( cpu_self->dispatch_necessary ) {
-     *         heir = _Thread_Get_heir_and_make_it_executing( cpu_self );
+     *       if ( heir != executing ) {
+     *         cpu_self->executing = heir;
+     *         heir_context = (Context_Control *)
+     *           ((uintptr_t) heir + (uintptr_t) executing_context
+     *             - (uintptr_t) executing)
      *       }
-     *     } while ( test_and_set( &heir->is_executing ) );
+     *     } while ( test_and_set( &heir_context->is_executing ) );
      *   }
      *
-     *   restore( heir );
+     *   restore( heir_context );
      * }
      * @endcode
      */
@@ -1578,6 +1585,10 @@ register struct Per_CPU_Control *_CPU_Per_CPU_current asm( "rX" );
    * @brief Sends an inter-processor interrupt to the specified target
    * processor.
    *
+   * This interrupt send and the corresponding inter-processor interrupt must
+   * act as an release/acquire barrier so that all values written by the
+   * sending processor are visible to the target processor.
+   *
    * This operation is undefined for target processor indices out of range.
    *
    * @param[in] target_processor_index The target processor index.
diff --git a/cpukit/score/include/rtems/score/percpu.h b/cpukit/score/include/rtems/score/percpu.h
index f1dad90..806c290 100644
--- a/cpukit/score/include/rtems/score/percpu.h
+++ b/cpukit/score/include/rtems/score/percpu.h
@@ -279,9 +279,11 @@ typedef struct Per_CPU_Control {
    * @brief This is the heir thread for this processor.
    *
    * This field is not protected by a lock.  The only writer after multitasking
-   * start is the scheduler owning this processor.  This processor will set the
-   * dispatch necessary indicator to false, before it reads the heir.  This
-   * field is used in combination with the dispatch necessary indicator.
+   * start is the scheduler owning this processor.  It is assumed that stores
+   * to pointers are atomic on all supported SMP architectures.  The CPU port
+   * specific code (inter-processor interrupt handling and
+   * _CPU_SMP_Send_interrupt()) must guarantee that this processor observes the
+   * last value written.
    *
    * A thread can be a heir on at most one processor in the system.
    *
@@ -290,16 +292,15 @@ typedef struct Per_CPU_Control {
   struct _Thread_Control *heir;
 
   /**
-   * @brief This is set to true when this processor needs to run the
+   * @brief This is set to true when this processor needs to run the thread
    * dispatcher.
    *
    * It is volatile since interrupts may alter this flag.
    *
-   * This field is not protected by a lock.  There are two writers after
-   * multitasking start.  The scheduler owning this processor sets this
-   * indicator to true, after it updated the heir field.  This processor sets
-   * this indicator to false, before it reads the heir.  This field is used in
-   * combination with the heir field.
+   * This field is not protected by a lock and must be accessed only by this
+   * processor.  Code (e.g. scheduler and post-switch action requests) running
+   * on another processors must use an inter-processor interrupt to set the
+   * thread dispatch necessary indicator to true.
    *
    * @see _Thread_Get_heir_and_make_it_executing().
    */
diff --git a/cpukit/score/include/rtems/score/smpimpl.h b/cpukit/score/include/rtems/score/smpimpl.h
index 97c78b0..3167e82 100644
--- a/cpukit/score/include/rtems/score/smpimpl.h
+++ b/cpukit/score/include/rtems/score/smpimpl.h
@@ -146,6 +146,12 @@ static inline void _SMP_Inter_processor_interrupt_handler( void )
 {
   Per_CPU_Control *cpu_self = _Per_CPU_Get();
 
+  /*
+   * In the common case the inter-processor interrupt is issued to carry out a
+   * thread dispatch.
+   */
+  cpu_self->dispatch_necessary = true;
+
   if ( _Atomic_Load_ulong( &cpu_self->message, ATOMIC_ORDER_RELAXED ) != 0 ) {
     unsigned long message = _Atomic_Exchange_ulong(
       &cpu_self->message,
diff --git a/cpukit/score/include/rtems/score/threadimpl.h b/cpukit/score/include/rtems/score/threadimpl.h
index 68c26c3..7412bd9 100644
--- a/cpukit/score/include/rtems/score/threadimpl.h
+++ b/cpukit/score/include/rtems/score/threadimpl.h
@@ -11,7 +11,7 @@
  *  COPYRIGHT (c) 1989-2008.
  *  On-Line Applications Research Corporation (OAR).
  *
- *  Copyright (c) 2014 embedded brains GmbH.
+ *  Copyright (c) 2014-2015 embedded brains GmbH.
  *
  *  The license and distribution terms for this file may be
  *  found in the file LICENSE in this distribution or at
@@ -793,7 +793,8 @@ RTEMS_INLINE_ROUTINE Thread_Control *_Thread_Internal_allocate( void )
 /**
  * @brief Gets the heir of the processor and makes it executing.
  *
- * The thread dispatch necessary indicator is cleared as a side-effect.
+ * Must be called with interrupts disabled.  The thread dispatch necessary
+ * indicator is cleared as a side-effect.
  *
  * @return The heir thread.
  *
@@ -806,18 +807,8 @@ RTEMS_INLINE_ROUTINE Thread_Control *_Thread_Get_heir_and_make_it_executing(
 {
   Thread_Control *heir;
 
-  cpu_self->dispatch_necessary = false;
-
-#if defined( RTEMS_SMP )
-  /*
-   * It is critical that we first update the dispatch necessary and then the
-   * read the heir so that we don't miss an update by
-   * _Thread_Dispatch_update_heir().
-   */
-  _Atomic_Fence( ATOMIC_ORDER_SEQ_CST );
-#endif
-
   heir = cpu_self->heir;
+  cpu_self->dispatch_necessary = false;
   cpu_self->executing = heir;
 
   return heir;
@@ -832,23 +823,10 @@ RTEMS_INLINE_ROUTINE void _Thread_Dispatch_update_heir(
 {
   cpu_for_heir->heir = heir;
 
-  /*
-   * It is critical that we first update the heir and then the dispatch
-   * necessary so that _Thread_Get_heir_and_make_it_executing() cannot miss an
-   * update.
-   */
-  _Atomic_Fence( ATOMIC_ORDER_SEQ_CST );
-
-  /*
-   * Only update the dispatch necessary indicator if not already set to
-   * avoid superfluous inter-processor interrupts.
-   */
-  if ( !cpu_for_heir->dispatch_necessary ) {
-    cpu_for_heir->dispatch_necessary = true;
-
-    if ( cpu_for_heir != cpu_self ) {
-      _Per_CPU_Send_interrupt( cpu_for_heir );
-    }
+  if ( cpu_for_heir == cpu_self ) {
+    cpu_self->dispatch_necessary = true;
+  } else {
+    _Per_CPU_Send_interrupt( cpu_for_heir );
   }
 }
 #endif
@@ -930,12 +908,15 @@ RTEMS_INLINE_ROUTINE void _Thread_Add_post_switch_action(
   ISR_Level        level;
 
   cpu_of_thread = _Thread_Action_ISR_disable_and_acquire( thread, &level );
-  cpu_of_thread->dispatch_necessary = true;
 
 #if defined(RTEMS_SMP)
-  if ( _Per_CPU_Get() != cpu_of_thread ) {
+  if ( _Per_CPU_Get() == cpu_of_thread ) {
+    cpu_of_thread->dispatch_necessary = true;
+  } else {
     _Per_CPU_Send_interrupt( cpu_of_thread );
   }
+#else
+  cpu_of_thread->dispatch_necessary = true;
 #endif
 
   _Chain_Append_if_is_off_chain_unprotected(
diff --git a/testsuites/smptests/smpthreadlife01/init.c b/testsuites/smptests/smpthreadlife01/init.c
index 12b6bd9..4597520 100644
--- a/testsuites/smptests/smpthreadlife01/init.c
+++ b/testsuites/smptests/smpthreadlife01/init.c
@@ -201,7 +201,6 @@ static void delay_ipi_task(rtems_task_argument variant)
   ISR_Level level;
 
   _ISR_Disable_without_giant(level);
-  (void) level;
 
   /* (C) */
   barrier(ctx, &ctx->worker_barrier_state);
@@ -216,6 +215,8 @@ static void delay_ipi_task(rtems_task_argument variant)
     _Thread_Disable_dispatch();
   }
 
+  _ISR_Enable_without_giant(level);
+
   /*
    * We get deleted as a side effect of enabling the thread life protection or
    * later if we enable the thread dispatching.




More information about the vc mailing list