[PATCH] score: PR2183: Fix context switch on SMP

Sebastian Huber sebastian.huber at embedded-brains.de
Tue Jul 1 14:18:43 UTC 2014


Fix context switch on SMP for ARM, PowerPC and SPARC.

Atomically test and set the is executing indicator of the heir context
to ensure that at most one processor uses the heir context.  Break the
busy wait loop also due to heir updates.
---
 c/src/lib/libbsp/sparc/shared/irq_asm.S           |   55 +++++++--
 c/src/lib/libcpu/powerpc/new-exceptions/cpu_asm.S |  140 +++++++++++++--------
 cpukit/score/cpu/arm/arm_exc_interrupt.S          |   12 --
 cpukit/score/cpu/arm/cpu_asm.S                    |   54 +++++++--
 cpukit/score/cpu/arm/rtems/asm.h                  |   14 ++-
 cpukit/score/cpu/no_cpu/rtems/score/cpu.h         |   33 +++++-
 cpukit/score/cpu/powerpc/rtems/score/cpu.h        |    2 +-
 cpukit/score/cpu/sparc/rtems/score/cpu.h          |    2 +-
 8 files changed, 226 insertions(+), 86 deletions(-)

diff --git a/c/src/lib/libbsp/sparc/shared/irq_asm.S b/c/src/lib/libbsp/sparc/shared/irq_asm.S
index bf2dca9..7ba06bb 100644
--- a/c/src/lib/libbsp/sparc/shared/irq_asm.S
+++ b/c/src/lib/libbsp/sparc/shared/irq_asm.S
@@ -165,17 +165,18 @@ done_flushing:
 
 #if defined(RTEMS_SMP)
         ! The executing context no longer executes on this processor
-        stb     %g0, [%o0 + SPARC_CONTEXT_CONTROL_IS_EXECUTING_OFFSET]
+        st      %g0, [%o0 + SPARC_CONTEXT_CONTROL_IS_EXECUTING_OFFSET]
 
-        ! Wait for heir context to stop execution
-1:
-        ldub    [%o1 + SPARC_CONTEXT_CONTROL_IS_EXECUTING_OFFSET], %g1
+        ! Try to update the is executing indicator of the heir context
+        mov     1, %g1
+
+try_update_is_executing:
+
+        swap    [%o1 + SPARC_CONTEXT_CONTROL_IS_EXECUTING_OFFSET], %g1
         cmp     %g1, 0
-        bne     1b
-         mov    1, %g1
+        bne     check_is_executing
 
-        ! The heir context executes now on this processor
-        stb     %g1, [%o1 + SPARC_CONTEXT_CONTROL_IS_EXECUTING_OFFSET]
+        ! The next load is in a delay slot, which is all right
 #endif
 
         ld      [%o1 + G5_OFFSET], %g5        ! restore the global registers
@@ -203,6 +204,44 @@ done_flushing:
         jmp     %o7 + 8                       ! return
         nop                                   ! delay slot
 
+#if defined(RTEMS_SMP)
+check_is_executing:
+
+        ! Check the is executing indicator of the heir context
+        ld      [%o1 + SPARC_CONTEXT_CONTROL_IS_EXECUTING_OFFSET], %g1
+        cmp     %g1, 0
+        beq     try_update_is_executing
+         mov    1, %g1
+
+        ! Check if a thread dispatch is necessary
+        ldub    [%g6 + PER_CPU_DISPATCH_NEEDED], %g1
+        cmp     %g1, 0
+        beq     check_is_executing
+         nop
+
+        ! We have a new heir
+
+        ! Clear the thread dispatch necessary flag
+        stub    %g0, [%g6 + PER_CPU_DISPATCH_NEEDED]
+
+        ! Here we assume a strong memory order, otherwise a memory barrier must
+        ! be inserted here
+
+        ! Read the executing and heir
+        ld      [%g6 + PER_CPU_OFFSET_EXECUTING], %g1
+        ld      [%g6 + PER_CPU_OFFSET_HEIR], %g2
+
+        ! Calculate the heir context pointer
+        sub     %o1, %g1, %g1
+        add     %g1, %g2, %o1
+
+        ! Update the executing
+        st      %g2, [%g6 + PER_CPU_OFFSET_EXECUTING]
+
+        ba      try_update_is_executing
+         mov    1, %g1
+#endif
+
 /*
  *  void _CPU_Context_restore(
  *    Context_Control *new_context
diff --git a/c/src/lib/libcpu/powerpc/new-exceptions/cpu_asm.S b/c/src/lib/libcpu/powerpc/new-exceptions/cpu_asm.S
index 6bde8bd..35ceccc 100644
--- a/c/src/lib/libcpu/powerpc/new-exceptions/cpu_asm.S
+++ b/c/src/lib/libcpu/powerpc/new-exceptions/cpu_asm.S
@@ -23,7 +23,7 @@
  *  COPYRIGHT (c) 1989-1997.
  *  On-Line Applications Research Corporation (OAR).
  *
- *  Copyright (c) 2011-2013 embedded brains GmbH.
+ *  Copyright (c) 2011-2014 embedded brains GmbH.
  *
  *  The license and distribution terms for this file may in
  *  the file LICENSE in this distribution or at
@@ -32,7 +32,8 @@
 
 #include <rtems/asm.h>
 #include <rtems/powerpc/powerpc.h>
-#include <rtems/score/cpu.h>
+#include <rtems/score/percpu.h>
+#include <libcpu/powerpc-utility.h>
 #include <bspopts.h>
 
 #if PPC_DEFAULT_CACHE_LINE_SIZE != 32
@@ -257,33 +258,37 @@ PROC (_CPU_Context_switch):
 
 	/* Align to a cache line */
 	clrrwi	r3, r3, 5
-	clrrwi	r4, r4, 5
+	clrrwi	r5, r4, 5
 
 	DATA_CACHE_ZERO_AND_TOUCH(r10, PPC_CONTEXT_CACHE_LINE_0)
 	DATA_CACHE_ZERO_AND_TOUCH(r11, PPC_CONTEXT_CACHE_LINE_1)
 
 	/* Save context to r3 */
 
-	mfmsr	r5
-	mflr	r6
-	mfcr	r7
+	mfmsr	r6
+	mflr	r7
+	mfcr	r8
 
 	/*
 	 * We have to clear the reservation of the executing thread.  See also
 	 * Book E section 6.1.6.2 "Atomic Update Primitives".  Recent GCC
-	 * versions use atomic operations in the C++ library for example.
+	 * versions use atomic operations in the C++ library for example.  On
+	 * SMP configurations the reservation is cleared later during the
+	 * context switch.
 	 */
 #if PPC_CONTEXT_OFFSET_GPR1 != PPC_CONTEXT_CACHE_LINE_0 \
   || !BSP_DATA_CACHE_ENABLED \
   || PPC_CACHE_ALIGNMENT != 32
 	li	r10, PPC_CONTEXT_OFFSET_GPR1
 #endif
+#ifndef RTEMS_SMP
 	stwcx.	r1, r3, r10
+#endif
 
 	stw	r1, PPC_CONTEXT_OFFSET_GPR1(r3)
-	stw	r5, PPC_CONTEXT_OFFSET_MSR(r3)
-	stw	r6, PPC_CONTEXT_OFFSET_LR(r3)
-	stw	r7, PPC_CONTEXT_OFFSET_CR(r3)
+	stw	r6, PPC_CONTEXT_OFFSET_MSR(r3)
+	stw	r7, PPC_CONTEXT_OFFSET_LR(r3)
+	stw	r8, PPC_CONTEXT_OFFSET_CR(r3)
 	PPC_GPR_STORE	r14, PPC_CONTEXT_OFFSET_GPR14(r3)
 	PPC_GPR_STORE	r15, PPC_CONTEXT_OFFSET_GPR15(r3)
 
@@ -329,66 +334,69 @@ PROC (_CPU_Context_switch):
 #ifdef RTEMS_SMP
 	/* The executing context no longer executes on this processor */
 	msync
-	li	r5, 0
-	stb	r5, PPC_CONTEXT_OFFSET_IS_EXECUTING(r3)
+	li	r6, 0
+	stw	r6, PPC_CONTEXT_OFFSET_IS_EXECUTING(r3)
 
-	/* Wait for heir context to stop execution */
-1:
-	lbz	r5, PPC_CONTEXT_OFFSET_IS_EXECUTING(r4)
-	cmpwi	r5, 0
-	bne	1b
+check_is_executing:
 
-	/* The heir context executes now on this processor */
-	li	r5, 1
-	stb	r5, PPC_CONTEXT_OFFSET_IS_EXECUTING(r4)
+	/* Check the is executing indicator of the heir context */
+	addi	r6, r5, PPC_CONTEXT_OFFSET_IS_EXECUTING
+	lwarx	r7, r0, r6
+	cmpwi	r7, 0
+	bne	check_thread_dispatch_necessary
+
+	/* Try to update the is executing indicator of the heir context */
+	li	r7, 1
+	stwcx.	r7, r0, r6
+	bne	check_thread_dispatch_necessary
 	isync
 #endif
 
-	/* Restore context from r4 */
+	/* Restore context from r5 */
 restore_context:
 
 #ifdef __ALTIVEC__
-	mr	r14, r4 
+	mr	r14, r5
 	.extern	_CPU_Context_switch_altivec
 	bl	_CPU_Context_switch_altivec
-	mr	r4, r14
+	mr	r5, r14
 #endif
 
-	lwz	r1, PPC_CONTEXT_OFFSET_GPR1(r4)
-	lwz	r5, PPC_CONTEXT_OFFSET_MSR(r4)
-	lwz	r6, PPC_CONTEXT_OFFSET_LR(r4)
-	lwz	r7, PPC_CONTEXT_OFFSET_CR(r4)
+	lwz	r1, PPC_CONTEXT_OFFSET_GPR1(r5)
+	lwz	r6, PPC_CONTEXT_OFFSET_MSR(r5)
+	lwz	r7, PPC_CONTEXT_OFFSET_LR(r5)
+	lwz	r8, PPC_CONTEXT_OFFSET_CR(r5)
 
-	PPC_GPR_LOAD	r14, PPC_CONTEXT_OFFSET_GPR14(r4)
-	PPC_GPR_LOAD	r15, PPC_CONTEXT_OFFSET_GPR15(r4)
+	PPC_GPR_LOAD	r14, PPC_CONTEXT_OFFSET_GPR14(r5)
+	PPC_GPR_LOAD	r15, PPC_CONTEXT_OFFSET_GPR15(r5)
 
 	DATA_CACHE_TOUCH(r0, r1)
 
-	PPC_GPR_LOAD	r16, PPC_CONTEXT_OFFSET_GPR16(r4)
-	PPC_GPR_LOAD	r17, PPC_CONTEXT_OFFSET_GPR17(r4)
-	PPC_GPR_LOAD	r18, PPC_CONTEXT_OFFSET_GPR18(r4)
-	PPC_GPR_LOAD	r19, PPC_CONTEXT_OFFSET_GPR19(r4)
+	PPC_GPR_LOAD	r16, PPC_CONTEXT_OFFSET_GPR16(r5)
+	PPC_GPR_LOAD	r17, PPC_CONTEXT_OFFSET_GPR17(r5)
+	PPC_GPR_LOAD	r18, PPC_CONTEXT_OFFSET_GPR18(r5)
+	PPC_GPR_LOAD	r19, PPC_CONTEXT_OFFSET_GPR19(r5)
 
-	PPC_GPR_LOAD	r20, PPC_CONTEXT_OFFSET_GPR20(r4)
-	PPC_GPR_LOAD	r21, PPC_CONTEXT_OFFSET_GPR21(r4)
-	PPC_GPR_LOAD	r22, PPC_CONTEXT_OFFSET_GPR22(r4)
-	PPC_GPR_LOAD	r23, PPC_CONTEXT_OFFSET_GPR23(r4)
+	PPC_GPR_LOAD	r20, PPC_CONTEXT_OFFSET_GPR20(r5)
+	PPC_GPR_LOAD	r21, PPC_CONTEXT_OFFSET_GPR21(r5)
+	PPC_GPR_LOAD	r22, PPC_CONTEXT_OFFSET_GPR22(r5)
+	PPC_GPR_LOAD	r23, PPC_CONTEXT_OFFSET_GPR23(r5)
 
-	PPC_GPR_LOAD	r24, PPC_CONTEXT_OFFSET_GPR24(r4)
-	PPC_GPR_LOAD	r25, PPC_CONTEXT_OFFSET_GPR25(r4)
-	PPC_GPR_LOAD	r26, PPC_CONTEXT_OFFSET_GPR26(r4)
-	PPC_GPR_LOAD	r27, PPC_CONTEXT_OFFSET_GPR27(r4)
+	PPC_GPR_LOAD	r24, PPC_CONTEXT_OFFSET_GPR24(r5)
+	PPC_GPR_LOAD	r25, PPC_CONTEXT_OFFSET_GPR25(r5)
+	PPC_GPR_LOAD	r26, PPC_CONTEXT_OFFSET_GPR26(r5)
+	PPC_GPR_LOAD	r27, PPC_CONTEXT_OFFSET_GPR27(r5)
 
-	PPC_GPR_LOAD	r28, PPC_CONTEXT_OFFSET_GPR28(r4)
-	PPC_GPR_LOAD	r29, PPC_CONTEXT_OFFSET_GPR29(r4)
-	PPC_GPR_LOAD	r30, PPC_CONTEXT_OFFSET_GPR30(r4)
-	PPC_GPR_LOAD	r31, PPC_CONTEXT_OFFSET_GPR31(r4)
+	PPC_GPR_LOAD	r28, PPC_CONTEXT_OFFSET_GPR28(r5)
+	PPC_GPR_LOAD	r29, PPC_CONTEXT_OFFSET_GPR29(r5)
+	PPC_GPR_LOAD	r30, PPC_CONTEXT_OFFSET_GPR30(r5)
+	PPC_GPR_LOAD	r31, PPC_CONTEXT_OFFSET_GPR31(r5)
 
-	lwz	r2, PPC_CONTEXT_OFFSET_GPR2(r4)
+	lwz	r2, PPC_CONTEXT_OFFSET_GPR2(r5)
 
-	mtcr	r7
-	mtlr	r6
-	mtmsr	r5
+	mtcr	r8
+	mtlr	r7
+	mtmsr	r6
 
 #ifdef BSP_USE_SYNC_IN_CONTEXT_SWITCH
 	isync
@@ -399,10 +407,42 @@ restore_context:
 	PUBLIC_PROC (_CPU_Context_restore)
 PROC (_CPU_Context_restore):
 	/* Align to a cache line */
-	clrrwi	r4, r3, 5
+	clrrwi	r5, r3, 5
 
 #ifdef __ALTIVEC__
 	li	r3, 0
 #endif
 
 	b	restore_context
+
+#ifdef RTEMS_SMP
+check_thread_dispatch_necessary:
+
+	GET_SELF_CPU_CONTROL	r6
+
+	/* Check if a thread dispatch is necessary */
+	lbz	r7, PER_CPU_DISPATCH_NEEDED(r6)
+	cmpwi	r7, 0
+	beq	check_is_executing
+
+	/* We have a new heir */
+
+	/* Clear the thread dispatch necessary flag */
+	li	r7, 0
+	stb	r7, PER_CPU_DISPATCH_NEEDED(r6)
+	msync
+
+	/* Read the executing and heir */
+	lwz	r7, PER_CPU_OFFSET_EXECUTING(r6)
+	lwz	r8, PER_CPU_OFFSET_HEIR(r6)
+
+	/* Calculate the heir context pointer */
+	sub	r7, r4, r7
+	add	r4, r8, r7
+	clrrwi	r5, r4, 5
+
+	/* Update the executing */
+	stw	r8, PER_CPU_OFFSET_EXECUTING(r6)
+
+	b	check_is_executing
+#endif
diff --git a/cpukit/score/cpu/arm/arm_exc_interrupt.S b/cpukit/score/cpu/arm/arm_exc_interrupt.S
index c80a404..e8026c8 100644
--- a/cpukit/score/cpu/arm/arm_exc_interrupt.S
+++ b/cpukit/score/cpu/arm/arm_exc_interrupt.S
@@ -31,7 +31,6 @@
 #endif
 
 #include <rtems/asm.h>
-#include <rtems/score/percpu.h>
 
 #ifdef ARM_MULTILIB_ARCH_V4
 
@@ -49,17 +48,6 @@
 #define CONTEXT_LIST {r0, r1, r2, r3, EXCHANGE_LR, EXCHANGE_SPSR, SELF_CPU_CONTROL, r12}
 #define CONTEXT_SIZE 32
 
-.macro GET_SELF_CPU_CONTROL REG, TMP
-	ldr	\REG, =_Per_CPU_Information
-#ifdef RTEMS_SMP
-	/* Use ARMv7 Multiprocessor Affinity Register (MPIDR) */
-	mrc	p15, 0, \TMP, c0, c0, 5
-
-	and	\TMP, \TMP, #0xff
-	add	\REG, \REG, \TMP, asl #PER_CPU_CONTROL_SIZE_LOG2
-#endif
-.endm
-
 .arm
 .globl _ARMV4_Exception_interrupt
 _ARMV4_Exception_interrupt:
diff --git a/cpukit/score/cpu/arm/cpu_asm.S b/cpukit/score/cpu/arm/cpu_asm.S
index 7502923..306dfbf 100644
--- a/cpukit/score/cpu/arm/cpu_asm.S
+++ b/cpukit/score/cpu/arm/cpu_asm.S
@@ -30,7 +30,6 @@
 #endif
 
 #include <rtems/asm.h>
-#include <rtems/score/cpu.h>
 
 #ifdef ARM_MULTILIB_ARCH_V4
 
@@ -73,21 +72,25 @@ DEFINE_FUNCTION_ARM(_CPU_Context_switch)
 	mov	r3, #0
 	strb	r3, [r0, #ARM_CONTEXT_CONTROL_IS_EXECUTING_OFFSET]
 
-	/* Wait for heir context to stop execution */
-1:
-	ldrb	r3, [r1, #ARM_CONTEXT_CONTROL_IS_EXECUTING_OFFSET]
-	cmp	r3, #0
-	bne	1b
+.L_check_is_executing:
 
-	/* The heir context executes now on this processor */
+	/* Check the is executing indicator of the heir context */
+	add	r3, r1, #ARM_CONTEXT_CONTROL_IS_EXECUTING_OFFSET
+	ldrexb	r4, [r3]
+	cmp	r4, #0
+	bne	.L_check_thread_dispatch_necessary
+
+	/* Try to update the is executing indicator of the heir context */
+	mov	r4, #1
+	strexb	r5, r4, [r3]
+	cmp	r5, #0
+	bne	.L_check_thread_dispatch_necessary
 	dmb
-	mov	r3, #1
-	strb	r3, [r1, #ARM_CONTEXT_CONTROL_IS_EXECUTING_OFFSET]
 #endif
 
 /* Start restoring context */
 .L_restore:
-#ifdef ARM_MULTILIB_HAS_LOAD_STORE_EXCLUSIVE
+#if !defined(RTEMS_SMP) && defined(ARM_MULTILIB_HAS_LOAD_STORE_EXCLUSIVE)
 	clrex
 #endif
 
@@ -120,4 +123,35 @@ DEFINE_FUNCTION_ARM(_CPU_Context_restore)
         mov     r1, r0
         b       .L_restore
 
+#ifdef RTEMS_SMP
+.L_check_thread_dispatch_necessary:
+
+	GET_SELF_CPU_CONTROL	r2, r3
+
+	/* Check if a thread dispatch is necessary */
+	ldrb	r4, [r2, #PER_CPU_DISPATCH_NEEDED]
+	cmp	r4, #0
+	beq	.L_check_is_executing
+
+	/* We have a new heir */
+
+	/* Clear the thread dispatch necessary flag */
+	mov	r4, #0
+	strb	r4, [r2, #PER_CPU_DISPATCH_NEEDED]
+	dmb
+
+	/* Read the executing and heir */
+	ldr	r4, [r2, #PER_CPU_OFFSET_EXECUTING]
+	ldr	r5, [r2, #PER_CPU_OFFSET_HEIR]
+
+	/* Calculate the heir context pointer */
+	sub	r4, r1, r4
+	add	r1, r5, r4
+
+	/* Update the executing */
+	str	r5, [r2, #PER_CPU_OFFSET_EXECUTING]
+
+	b	.L_check_is_executing
+#endif
+
 #endif /* ARM_MULTILIB_ARCH_V4 */
diff --git a/cpukit/score/cpu/arm/rtems/asm.h b/cpukit/score/cpu/arm/rtems/asm.h
index 6539a38..d22514d 100644
--- a/cpukit/score/cpu/arm/rtems/asm.h
+++ b/cpukit/score/cpu/arm/rtems/asm.h
@@ -41,8 +41,7 @@
 #ifndef ASM
 #define ASM
 #endif
-#include <rtems/score/cpuopts.h>
-#include <rtems/score/arm.h>
+#include <rtems/score/percpu.h>
 
 /**
  * @defgroup ScoreCPUARMASM ARM Assembler Support
@@ -188,6 +187,17 @@
 #endif /* __thumb__ */
 .endm
 
+.macro GET_SELF_CPU_CONTROL REG, TMP
+	ldr	\REG, =_Per_CPU_Information
+#ifdef RTEMS_SMP
+	/* Use ARMv7 Multiprocessor Affinity Register (MPIDR) */
+	mrc	p15, 0, \TMP, c0, c0, 5
+
+	and	\TMP, \TMP, #0xff
+	add	\REG, \REG, \TMP, asl #PER_CPU_CONTROL_SIZE_LOG2
+#endif
+.endm
+
 /** @} */
 
 #endif /* _RTEMS_ASM_H */
diff --git a/cpukit/score/cpu/no_cpu/rtems/score/cpu.h b/cpukit/score/cpu/no_cpu/rtems/score/cpu.h
index 9570fb6..9699033 100644
--- a/cpukit/score/cpu/no_cpu/rtems/score/cpu.h
+++ b/cpukit/score/cpu/no_cpu/rtems/score/cpu.h
@@ -578,11 +578,40 @@ typedef struct {
 #ifdef RTEMS_SMP
     /**
      * @brief On SMP configurations the thread context must contain a boolean
-     * indicator if this context is executing on a processor.
+     * indicator to signal if this context is executing on a processor.
      *
      * This field must be updated during a context switch.  The context switch
      * to the heir must wait until the heir context indicates that it is no
-     * longer executing on a processor.
+     * longer executing on a processor.  The context switch must also check if
+     * a thread dispatch is necessary to honor updates of the heir thread for
+     * this processor.  This indicator must be updated using an atomic test and
+     * set operation to ensure that at most one processor uses the heir
+     * context at the same time.
+     *
+     * @code
+     * void _CPU_Context_switch(
+     *   Context_Control *executing,
+     *   Context_Control *heir
+     * )
+     * {
+     *   save(executing);
+     *
+     *   executing->is_executing = false;
+     *   memory_barrier();
+     *
+     *   if (test_and_set(&heir->is_executing)) {
+     *     do {
+     *       Per_CPU_Control *cpu_self = _Per_CPU_Get_snapshot();
+     *
+     *       if (cpu_self->dispatch_necessary) {
+     *         heir = _Thread_Get_heir_and_make_it_executing(cpu_self);
+     *       }
+     *     } while (test_and_set(&heir->is_executing));
+     *   }
+     *
+     *   restore(heir);
+     * }
+     * @endcode
      */
     volatile bool is_executing;
 #endif
diff --git a/cpukit/score/cpu/powerpc/rtems/score/cpu.h b/cpukit/score/cpu/powerpc/rtems/score/cpu.h
index 3cad329..13f50ad 100644
--- a/cpukit/score/cpu/powerpc/rtems/score/cpu.h
+++ b/cpukit/score/cpu/powerpc/rtems/score/cpu.h
@@ -303,7 +303,7 @@ typedef struct {
   PPC_GPR_TYPE gpr31;
   uint32_t gpr2;
   #ifdef RTEMS_SMP
-    volatile bool is_executing;
+    volatile uint32_t is_executing;
   #endif
   #ifdef __ALTIVEC__
     /*
diff --git a/cpukit/score/cpu/sparc/rtems/score/cpu.h b/cpukit/score/cpu/sparc/rtems/score/cpu.h
index c010005..39b7825 100644
--- a/cpukit/score/cpu/sparc/rtems/score/cpu.h
+++ b/cpukit/score/cpu/sparc/rtems/score/cpu.h
@@ -475,7 +475,7 @@ typedef struct {
   uint32_t   isr_dispatch_disable;
 
 #if defined(RTEMS_SMP)
-  volatile bool is_executing;
+  volatile uint32_t is_executing;
 #endif
 } Context_Control;
 
-- 
1.7.7



More information about the devel mailing list