[PATCH 4/4] sparc: Add _CPU_Get_current_per_CPU_control()

Sebastian Huber sebastian.huber at embedded-brains.de
Tue Apr 22 08:46:46 UTC 2014


Use register g6 for the per-CPU control of the current processor.  The
register g6 is reserved for the operating system by the SPARC ABI.  On
Linux register g6 is used for a similar purpose with the same method
since 1996.

The register g6 must be initialized during system startup and then must
remain unchanged.

Since the per-CPU control is used in all critical sections of the
operating system, this is a performance optimization for the operating
system core procedures.  An additional benefit is that the low-level
context switch and interrupt processing code is now identical on non-SMP
and SMP configurations.
---
 c/src/lib/libbsp/sparc/shared/irq_asm.S     |   68 ++++++++-------------------
 c/src/lib/libbsp/sparc/shared/start/start.S |   25 +++++-----
 cpukit/score/cpu/sparc/cpu.c                |    1 -
 cpukit/score/cpu/sparc/rtems/score/cpu.h    |   68 ++++++++++++++-------------
 doc/cpu_supplement/sparc.t                  |    5 ++
 5 files changed, 74 insertions(+), 93 deletions(-)

diff --git a/c/src/lib/libbsp/sparc/shared/irq_asm.S b/c/src/lib/libbsp/sparc/shared/irq_asm.S
index fd8269f..c0ba479 100644
--- a/c/src/lib/libbsp/sparc/shared/irq_asm.S
+++ b/c/src/lib/libbsp/sparc/shared/irq_asm.S
@@ -23,23 +23,6 @@
 #include <rtems/score/percpu.h>
 #include <bspopts.h>
 
-.macro GET_SELF_CPU_CONTROL REG, TMP
-        sethi    %hi(_Per_CPU_Information), \REG
-        add      \REG, %lo(_Per_CPU_Information), \REG
-
-#if defined( RTEMS_SMP )
-#if BSP_LEON3_SMP
-        /* LEON3 SMP support */
-        rd       %asr17, \TMP
-        srl      \TMP, LEON3_ASR17_PROCESSOR_INDEX_SHIFT, \TMP
-#else
-        mov      0, \TMP
-#endif
-        sll      \TMP, PER_CPU_CONTROL_SIZE_LOG2, \TMP
-        add      \REG, \TMP, \REG
-#endif /* defined( RTEMS_SMP ) */
-.endm
-
 /*
  *  void _CPU_Context_switch(
  *    Context_Control  *run,
@@ -53,7 +36,7 @@
         PUBLIC(_CPU_Context_switch)
 SYM(_CPU_Context_switch):
         st      %g5, [%o0 + G5_OFFSET]       ! save the global registers
-        std     %g6, [%o0 + G6_OFFSET]
+        st      %g7, [%o0 + G7_OFFSET]
 
         std     %l0, [%o0 + L0_OFFSET]       ! save the local registers
         std     %l2, [%o0 + L2_OFFSET]
@@ -67,11 +50,8 @@ SYM(_CPU_Context_switch):
 
         std     %o6, [%o0 + O6_SP_OFFSET]    ! save the output registers
 
-        ! o3 = self per-CPU control
-        GET_SELF_CPU_CONTROL %o3, %o4
-
         ! load the ISR stack nesting prevention flag
-        ld      [%o3 + SPARC_PER_CPU_ISR_DISPATCH_DISABLE], %o4
+        ld      [%g6 + SPARC_PER_CPU_ISR_DISPATCH_DISABLE], %o4
         ! save it a bit later so we do not waste a couple of cycles
 
         rd      %psr, %o2
@@ -84,7 +64,6 @@ SYM(_CPU_Context_switch):
          *  This is entered from _CPU_Context_restore with:
          *    o1 = context to restore
          *    o2 = psr
-         *    o3 = self per-CPU control
          */
 
         PUBLIC(_CPU_Context_restore_heir)
@@ -185,7 +164,7 @@ done_flushing:
         nop
 
         ld      [%o1 + G5_OFFSET], %g5        ! restore the global registers
-        ldd     [%o1 + G6_OFFSET], %g6
+        ld      [%o1 + G7_OFFSET], %g7
 
         ! Load thread specific ISR dispatch prevention flag
         ld      [%o1 + ISR_DISPATCH_DISABLE_STACK_OFFSET], %o2
@@ -197,7 +176,7 @@ done_flushing:
         ldd     [%o1 + L6_OFFSET], %l6
 
         ! Now restore thread specific ISR dispatch prevention flag
-        st      %o2, [%o3 + SPARC_PER_CPU_ISR_DISPATCH_DISABLE]
+        st      %o2, [%g6 + SPARC_PER_CPU_ISR_DISPATCH_DISABLE]
 
         ldd     [%o1 + I0_OFFSET], %i0        ! restore the input registers
         ldd     [%o1 + I2_OFFSET], %i2
@@ -223,7 +202,6 @@ done_flushing:
 SYM(_CPU_Context_restore):
         save    %sp, -CPU_MINIMUM_STACK_FRAME_SIZE, %sp
         rd      %psr, %o2
-        GET_SELF_CPU_CONTROL %o3, %o4
         ba      SYM(_CPU_Context_restore_heir)
         mov     %i0, %o1                      ! in the delay slot
 
@@ -352,7 +330,7 @@ save_isf:
         st      %g1, [%sp + ISF_G1_OFFSET]     ! save g1
         std     %g2, [%sp + ISF_G2_OFFSET]     ! save g2, g3
         std     %l4, [%sp + ISF_G4_OFFSET]     ! save g4, g5 -- see above
-        std     %g6, [%sp + ISF_G6_OFFSET]     ! save g6, g7
+        st      %g7, [%sp + ISF_G7_OFFSET]     ! save g7
 
         std     %i0, [%sp + ISF_I0_OFFSET]     ! save i0, i1
         std     %i2, [%sp + ISF_I2_OFFSET]     ! save i2, i3
@@ -370,24 +348,21 @@ save_isf:
          *
          *  Register usage for this section:
          *
-         *    l5 = per cpu info pointer
          *    l6 = _Thread_Dispatch_disable_level value
          *    l7 = _ISR_Nest_level value
          *
-         *  NOTE: It is assumed that l5 - l7 will be preserved until the ISR
+         *  NOTE: It is assumed that l6 - l7 will be preserved until the ISR
          *        nest and thread dispatch disable levels are unnested.
          */
 
-	GET_SELF_CPU_CONTROL %l5, %l7
-
-        ld       [%l5 + PER_CPU_ISR_NEST_LEVEL], %l7
-        ld       [%l5 + PER_CPU_THREAD_DISPATCH_DISABLE_LEVEL], %l6
+        ld       [%g6 + PER_CPU_ISR_NEST_LEVEL], %l7
+        ld       [%g6 + PER_CPU_THREAD_DISPATCH_DISABLE_LEVEL], %l6
 
         add      %l7, 1, %l7
-        st       %l7, [%l5 + PER_CPU_ISR_NEST_LEVEL]
+        st       %l7, [%g6 + PER_CPU_ISR_NEST_LEVEL]
 
         add      %l6, 1, %l6
-        st       %l6, [%l5 + PER_CPU_THREAD_DISPATCH_DISABLE_LEVEL]
+        st       %l6, [%g6 + PER_CPU_THREAD_DISPATCH_DISABLE_LEVEL]
 
         /*
          *  If ISR nest level was zero (now 1), then switch stack.
@@ -405,7 +380,7 @@ save_isf:
          nop
 #endif
 
-	ld       [%l5 + PER_CPU_INTERRUPT_STACK_HIGH], %sp
+	ld       [%g6 + PER_CPU_INTERRUPT_STACK_HIGH], %sp
 
 dont_switch_stacks:
         /*
@@ -468,7 +443,7 @@ dont_fix_pil2:
         ld       [%l4], %o2             ! o2 = 3rd arg = interrupt exit instant
         mov      %l3, %o1               ! o1 = 2nd arg = interrupt entry instant
         call     SYM(_Profiling_Outer_most_interrupt_entry_and_exit), 0
-         mov     %l5, %o0               ! o0 = 1st arg = per-CPU control
+         mov     %g6, %o0               ! o0 = 1st arg = per-CPU control
 profiling_not_outer_most_exit:
 #else
          nop                            ! delay slot
@@ -489,15 +464,14 @@ profiling_not_outer_most_exit:
          *
          *  Register usage for this section:
          *
-         *    l5 = per cpu info pointer
          *    l6 = _Thread_Dispatch_disable_level value
          *    l7 = _ISR_Nest_level value
          */
 
-        st       %l7, [%l5 + PER_CPU_ISR_NEST_LEVEL]
+        st       %l7, [%g6 + PER_CPU_ISR_NEST_LEVEL]
 
         sub      %l6, 1, %l6
-        st       %l6, [%l5 + PER_CPU_THREAD_DISPATCH_DISABLE_LEVEL]
+        st       %l6, [%g6 + PER_CPU_THREAD_DISPATCH_DISABLE_LEVEL]
 
         /*
          *  If dispatching is disabled (includes nested interrupt case),
@@ -509,7 +483,7 @@ profiling_not_outer_most_exit:
         nop
 
         ! Are we dispatching from a previous ISR in the interrupted thread?
-        ld       [%l5 + SPARC_PER_CPU_ISR_DISPATCH_DISABLE], %l7
+        ld       [%g6 + SPARC_PER_CPU_ISR_DISPATCH_DISABLE], %l7
         orcc     %l7, %g0, %g0   ! Is this thread already doing an ISR?
         bnz      simple_return   ! Yes, then do a "simple" exit
         nop
@@ -520,7 +494,7 @@ profiling_not_outer_most_exit:
          *  return to the interrupt dispatcher.
          */
 
-        ldub     [%l5 + PER_CPU_DISPATCH_NEEDED], %l6
+        ldub     [%g6 + PER_CPU_DISPATCH_NEEDED], %l6
 
         orcc     %l6, %g0, %g0   ! Is thread switch necessary?
         bz       simple_return   ! no, then do a simple return
@@ -532,7 +506,7 @@ profiling_not_outer_most_exit:
 
         ! Set ISR dispatch nesting prevention flag
         mov      1,%l6
-        st       %l6, [%l5 + SPARC_PER_CPU_ISR_DISPATCH_DISABLE]
+        st       %l6, [%g6 + SPARC_PER_CPU_ISR_DISPATCH_DISABLE]
 
         /*
          *  The following subtract should get us back on the interrupted
@@ -569,9 +543,7 @@ isr_dispatch:
          *  _Thread_Dispatch before leaving this ISR Dispatch context.
          */
 
-	GET_SELF_CPU_CONTROL %l5, %l7
-
-        ldub     [%l5 + PER_CPU_DISPATCH_NEEDED], %l7
+        ldub     [%g6 + PER_CPU_DISPATCH_NEEDED], %l7
 
         orcc     %l7, %g0, %g0    ! Is thread switch necesary?
         bz       allow_nest_again ! No, then clear out and return
@@ -587,7 +559,7 @@ dispatchAgain:
 allow_nest_again:
 
         ! Zero out ISR stack nesting prevention flag
-        st       %g0, [%l5 + SPARC_PER_CPU_ISR_DISPATCH_DISABLE]
+        st       %g0, [%g6 + SPARC_PER_CPU_ISR_DISPATCH_DISABLE]
 
         /*
          *  The CWP in place at this point may be different from
@@ -619,7 +591,7 @@ simple_return:
                                               ! g1 is restored later
         ldd     [%fp + ISF_G2_OFFSET], %g2    ! restore g2, g3
         ldd     [%fp + ISF_G4_OFFSET], %g4    ! restore g4, g5
-        ldd     [%fp + ISF_G6_OFFSET], %g6    ! restore g6, g7
+        ld      [%fp + ISF_G7_OFFSET], %g7    ! restore g7
 
         ldd     [%fp + ISF_I0_OFFSET], %i0    ! restore i0, i1
         ldd     [%fp + ISF_I2_OFFSET], %i2    ! restore i2, i3
diff --git a/c/src/lib/libbsp/sparc/shared/start/start.S b/c/src/lib/libbsp/sparc/shared/start/start.S
index be3a4cd..99abf4f 100644
--- a/c/src/lib/libbsp/sparc/shared/start/start.S
+++ b/c/src/lib/libbsp/sparc/shared/start/start.S
@@ -222,6 +222,9 @@ SYM(hard_reset):
         nop
         nop
 
+	sethi	%hi(_Per_CPU_Information), %g6 ! get per-CPU control
+	add	%g6, %lo(_Per_CPU_Information), %g6
+
 #if defined(START_LEON3_ENABLE_SMP)
 	flush				! invalidate L1 caches of this CPU
 
@@ -232,12 +235,10 @@ SYM(hard_reset):
 	beq	cpu0
 	 nop
 
-	sethi	%hi(_Per_CPU_Information), %o1 ! get per-CPU control
-	add	%o1, %lo(_Per_CPU_Information), %o1
-	sll	%o0, PER_CPU_CONTROL_SIZE_LOG2, %o2
-	add	%o1, %o2, %o1
+	sll	%o0, PER_CPU_CONTROL_SIZE_LOG2, %l0
+	add	%g6, %l0, %g6
 
-	ld	[%o1 + PER_CPU_INTERRUPT_STACK_HIGH], %sp ! set stack pointer
+	ld	[%g6 + PER_CPU_INTERRUPT_STACK_HIGH], %sp ! set stack pointer
 	sub	%sp, 4, %sp		! stack starts at end of area - 4
 	andn	%sp, 0x0f, %sp		! align stack on 16-byte boundary
 	mov	%sp, %fp		! set frame pointer
@@ -249,8 +250,8 @@ SYM(hard_reset):
 cpu0:
 #endif
 
-	set	(SYM(rdb_start)), %g6	! End of RAM
-	st	%sp, [%g6]
+	set	(SYM(rdb_start)), %g5	! End of RAM
+	st	%sp, [%g5]
 	sub	%sp, 4, %sp		! stack starts at end of RAM - 4
 	andn	%sp, 0x0f, %sp		! align stack on 16-byte boundary
         mov     %sp, %fp                ! Set frame pointer
@@ -302,12 +303,12 @@ cpu0:
  	set	SYM(RAM_START), %l1  ! Cannot use RAM_END due to bug in linker
  	set	SYM(RAM_SIZE), %l2
  	add	%l1, %l2, %sp
- 	st	%sp, [%g6]
+ 	st	%sp, [%g5]
 
 
-	set	SYM(CLOCK_SPEED), %g6	! Use 14 MHz in simulator
+	set	SYM(CLOCK_SPEED), %g5	! Use 14 MHz in simulator
 	set	14, %g1
-	st	%g1, [%g6]
+	st	%g1, [%g5]
 
 2:
 #endif
@@ -334,8 +335,8 @@ cpu0:
 	nop
 
 copy_data:
-        ldd   [ %g2 ], %g6
-        std   %g6 , [ %g3 ]             ! copy this double word
+        ldd   [ %g2 ], %g5
+        std   %g5 , [ %g3 ]             ! copy this double word
         add   %g3, 8, %g3               ! bump the destination pointer
         add   %g2, 8, %g2               ! bump the source pointer
         cmp   %g3, %g4                  ! Is the pointer past the end of dest?
diff --git a/cpukit/score/cpu/sparc/cpu.c b/cpukit/score/cpu/sparc/cpu.c
index 463ff47..6c124db 100644
--- a/cpukit/score/cpu/sparc/cpu.c
+++ b/cpukit/score/cpu/sparc/cpu.c
@@ -36,7 +36,6 @@ RTEMS_STATIC_ASSERT(
   )
 
 SPARC_ASSERT_OFFSET(g5, G5);
-SPARC_ASSERT_OFFSET(g6, G6);
 SPARC_ASSERT_OFFSET(g7, G7);
 
 RTEMS_STATIC_ASSERT(
diff --git a/cpukit/score/cpu/sparc/rtems/score/cpu.h b/cpukit/score/cpu/sparc/rtems/score/cpu.h
index dde29fc..7a88d7c 100644
--- a/cpukit/score/cpu/sparc/rtems/score/cpu.h
+++ b/cpukit/score/cpu/sparc/rtems/score/cpu.h
@@ -408,14 +408,14 @@ typedef struct {
  * The registers g2 through g4 are reserved for applications.  GCC uses them as
  * volatile registers by default.  So they are treated like volatile registers
  * in RTEMS as well.
+ *
+ * The register g6 contains the per-CPU control of the current processor.  It
+ * is an invariant of the processor context.  This register must not be saved
+ * and restored during context switches or interrupt services.
  */
 typedef struct {
-  /** This will contain reserved space for alignment. */
-  uint32_t   reserved_for_alignment;
   /** This will contain the contents of the g5 register. */
   uint32_t   g5;
-  /** This will contain the contents of the g6 register. */
-  uint32_t   g6;
   /** This will contain the contents of the g7 register. */
   uint32_t   g7;
 
@@ -490,55 +490,53 @@ typedef struct {
  */
 
 /** This macro defines an offset into the context for use in assembly. */
-#define G5_OFFSET    0x04
-/** This macro defines an offset into the context for use in assembly. */
-#define G6_OFFSET    0x08
+#define G5_OFFSET    0x00
 /** This macro defines an offset into the context for use in assembly. */
-#define G7_OFFSET    0x0C
+#define G7_OFFSET    0x04
 
 /** This macro defines an offset into the context for use in assembly. */
-#define L0_OFFSET    0x10
+#define L0_OFFSET    0x08
 /** This macro defines an offset into the context for use in assembly. */
-#define L1_OFFSET    0x14
+#define L1_OFFSET    0x0C
 /** This macro defines an offset into the context for use in assembly. */
-#define L2_OFFSET    0x18
+#define L2_OFFSET    0x10
 /** This macro defines an offset into the context for use in assembly. */
-#define L3_OFFSET    0x1C
+#define L3_OFFSET    0x14
 /** This macro defines an offset into the context for use in assembly. */
-#define L4_OFFSET    0x20
+#define L4_OFFSET    0x18
 /** This macro defines an offset into the context for use in assembly. */
-#define L5_OFFSET    0x24
+#define L5_OFFSET    0x1C
 /** This macro defines an offset into the context for use in assembly. */
-#define L6_OFFSET    0x28
+#define L6_OFFSET    0x20
 /** This macro defines an offset into the context for use in assembly. */
-#define L7_OFFSET    0x2C
+#define L7_OFFSET    0x24
 
 /** This macro defines an offset into the context for use in assembly. */
-#define I0_OFFSET    0x30
+#define I0_OFFSET    0x28
 /** This macro defines an offset into the context for use in assembly. */
-#define I1_OFFSET    0x34
+#define I1_OFFSET    0x2C
 /** This macro defines an offset into the context for use in assembly. */
-#define I2_OFFSET    0x38
+#define I2_OFFSET    0x30
 /** This macro defines an offset into the context for use in assembly. */
-#define I3_OFFSET    0x3C
+#define I3_OFFSET    0x34
 /** This macro defines an offset into the context for use in assembly. */
-#define I4_OFFSET    0x40
+#define I4_OFFSET    0x38
 /** This macro defines an offset into the context for use in assembly. */
-#define I5_OFFSET    0x44
+#define I5_OFFSET    0x3C
 /** This macro defines an offset into the context for use in assembly. */
-#define I6_FP_OFFSET 0x48
+#define I6_FP_OFFSET 0x40
 /** This macro defines an offset into the context for use in assembly. */
-#define I7_OFFSET    0x4C
+#define I7_OFFSET    0x44
 
 /** This macro defines an offset into the context for use in assembly. */
-#define O6_SP_OFFSET 0x50
+#define O6_SP_OFFSET 0x48
 /** This macro defines an offset into the context for use in assembly. */
-#define O7_OFFSET    0x54
+#define O7_OFFSET    0x4C
 
 /** This macro defines an offset into the context for use in assembly. */
-#define PSR_OFFSET   0x58
+#define PSR_OFFSET   0x50
 /** This macro defines an offset into the context for use in assembly. */
-#define ISR_DISPATCH_DISABLE_STACK_OFFSET 0x5C
+#define ISR_DISPATCH_DISABLE_STACK_OFFSET 0x54
 
 /** This defines the size of the context area for use in assembly. */
 #define CONTEXT_CONTROL_SIZE 0x68
@@ -661,8 +659,8 @@ typedef struct {
   uint32_t                 g4;
   /** This is the offset of the g5 register on an ISF. */
   uint32_t                 g5;
-  /** This is the offset of the g6 register on an ISF. */
-  uint32_t                 g6;
+  /** This is the offset is reserved for alignment on an ISF. */
+  uint32_t                 reserved_for_alignment;
   /** This is the offset of the g7 register on an ISF. */
   uint32_t                 g7;
   /** This is the offset of the i0 register on an ISF. */
@@ -712,8 +710,6 @@ typedef struct {
 /** This macro defines an offset into the ISF for use in assembly. */
 #define ISF_G5_OFFSET          CPU_MINIMUM_STACK_FRAME_SIZE + 0x1c
 /** This macro defines an offset into the ISF for use in assembly. */
-#define ISF_G6_OFFSET          CPU_MINIMUM_STACK_FRAME_SIZE + 0x20
-/** This macro defines an offset into the ISF for use in assembly. */
 #define ISF_G7_OFFSET          CPU_MINIMUM_STACK_FRAME_SIZE + 0x24
 /** This macro defines an offset into the ISF for use in assembly. */
 #define ISF_I0_OFFSET          CPU_MINIMUM_STACK_FRAME_SIZE + 0x28
@@ -1156,6 +1152,14 @@ void _CPU_Context_restore(
   Context_Control *new_context
 ) RTEMS_COMPILER_NO_RETURN_ATTRIBUTE;
 
+/**
+ * @brief The pointer to the current per-CPU control is available via register
+ * g6.
+ */
+register struct Per_CPU_Control *_SPARC_Per_CPU_current asm( "g6" );
+
+#define _CPU_Get_current_per_CPU_control() ( _SPARC_Per_CPU_current )
+
 #if defined(RTEMS_SMP)
   uint32_t _CPU_SMP_Initialize( void );
 
diff --git a/doc/cpu_supplement/sparc.t b/doc/cpu_supplement/sparc.t
index a6862c8..d0b7641 100644
--- a/doc/cpu_supplement/sparc.t
+++ b/doc/cpu_supplement/sparc.t
@@ -401,6 +401,11 @@ The registers g2 through g4 are reserved for applications.  GCC uses them as
 volatile registers by default.  So they are treated like volatile registers in
 RTEMS as well.
 
+The register g6 is reserved for the operating system and contains the address
+of the per-CPU control block of the current processor.  This register is
+initialized during system start and then remains unchanged.  It is not
+saved/restored by the context switch or interrupt processing code.
+
 The register g7 is reserved for the operating system and contains the thread
 pointer used for thread-local storage (TLS) as mandated by the SPARC ABI.
 
-- 
1.7.7




More information about the devel mailing list