[PATCH 4/4] sparc: Add lazy floating point switch
Sebastian Huber
sebastian.huber at embedded-brains.de
Wed Jul 19 08:21:18 UTC 2017
The SPARC ABI is a bit special with respect to the floating point context.
The complete floating point context is volatile. Thus, from an ABI point
of view nothing needs to be saved and restored during a context switch.
Instead the floating point context must be saved and restored during
interrupt processing. Historically, the deferred floating point switch was
used for SPARC and the complete floating point context is saved and
restored during a context switch to the new floating point unit owner.
This is a bit dangerous since post-switch actions (e.g. signal handlers)
and context switch extensions may silently corrupt the floating point
context.
The floating point unit is disabled for interrupt handlers. Thus, in case
an interrupt handler uses the floating point unit then this will result in a
trap (INTERNAL_ERROR_ILLEGAL_USE_OF_FLOATING_POINT_UNIT).
In uniprocessor configurations, a lazy floating point context switch is
used. In case an active floating point thread is interrupted (PSR[EF] == 1)
and a thread dispatch is carried out, then this thread is registered as the
floating point owner. When a floating point owner is present during a
context switch, the floating point unit is disabled for the heir thread
(PSR[EF] == 0). The floating point disabled trap checks that the use of the
floating point unit is allowed and saves/restores the floating point context
on demand.
Update #3077.
---
c/src/lib/libbsp/sparc/erc32/startup/spurious.c | 7 +-
c/src/lib/libbsp/sparc/leon2/startup/spurious.c | 7 +-
c/src/lib/libbsp/sparc/leon3/startup/spurious.c | 7 +-
c/src/lib/libbsp/sparc/shared/irq_asm.S | 54 ++++++++--
c/src/lib/libbsp/sparc/shared/start/start.S | 15 ++-
c/src/lib/libcpu/sparc/syscall/syscall.S | 126 ++++++++++++++++++++++-
cpukit/score/cpu/sparc/Makefile.am | 2 +-
cpukit/score/cpu/sparc/cpu.c | 65 ++++++++----
cpukit/score/cpu/sparc/cpu_asm.S | 127 ------------------------
cpukit/score/cpu/sparc/rtems/score/cpu.h | 115 +++++++++------------
cpukit/score/cpu/sparc/rtems/score/cpuimpl.h | 38 ++++++-
cpukit/score/cpu/sparc/sparc-context-validate.S | 13 ++-
testsuites/sptests/spsize/size.c | 3 +-
13 files changed, 344 insertions(+), 235 deletions(-)
delete mode 100644 cpukit/score/cpu/sparc/cpu_asm.S
diff --git a/c/src/lib/libbsp/sparc/erc32/startup/spurious.c b/c/src/lib/libbsp/sparc/erc32/startup/spurious.c
index 2b262af169..b022f96f5c 100644
--- a/c/src/lib/libbsp/sparc/erc32/startup/spurious.c
+++ b/c/src/lib/libbsp/sparc/erc32/startup/spurious.c
@@ -133,9 +133,11 @@ static rtems_isr bsp_spurious_handler(
.isf = isf
};
+#if !defined(SPARC_USE_LAZY_FP_SWITCH)
if ( SPARC_REAL_TRAP_NUMBER( trap ) == 4 ) {
_Internal_error( INTERNAL_ERROR_ILLEGAL_USE_OF_FLOATING_POINT_UNIT );
}
+#endif
rtems_fatal(
RTEMS_FATAL_SOURCE_EXCEPTION,
@@ -170,10 +172,13 @@ void bsp_spurious_initialize()
*/
if (( trap == 5 || trap == 6 ) ||
+#if defined(SPARC_USE_LAZY_FP_SWITCH)
+ ( trap == 4 ) ||
+#endif
(( trap >= 0x11 ) && ( trap <= 0x1f )) ||
(( trap >= 0x70 ) && ( trap <= 0x83 )) ||
( trap == 0x80 + SPARC_SWTRAP_IRQDIS ) ||
-#if SPARC_HAS_FPU == 1
+#if defined(SPARC_USE_SYNCHRONOUS_FP_SWITCH)
( trap == 0x80 + SPARC_SWTRAP_IRQDIS_FP ) ||
#endif
( trap == 0x80 + SPARC_SWTRAP_IRQEN ))
diff --git a/c/src/lib/libbsp/sparc/leon2/startup/spurious.c b/c/src/lib/libbsp/sparc/leon2/startup/spurious.c
index 58ba4fc797..971c3cd44b 100644
--- a/c/src/lib/libbsp/sparc/leon2/startup/spurious.c
+++ b/c/src/lib/libbsp/sparc/leon2/startup/spurious.c
@@ -120,9 +120,11 @@ static rtems_isr bsp_spurious_handler(
.isf = isf
};
+#if !defined(SPARC_USE_LAZY_FP_SWITCH)
if ( SPARC_REAL_TRAP_NUMBER( trap ) == 4 ) {
_Internal_error( INTERNAL_ERROR_ILLEGAL_USE_OF_FLOATING_POINT_UNIT );
}
+#endif
rtems_fatal(
RTEMS_FATAL_SOURCE_EXCEPTION,
@@ -157,10 +159,13 @@ void bsp_spurious_initialize()
*/
if (( trap == 5 || trap == 6 ) ||
+#if defined(SPARC_USE_LAZY_FP_SWITCH)
+ ( trap == 4 ) ||
+#endif
(( trap >= 0x11 ) && ( trap <= 0x1f )) ||
(( trap >= 0x70 ) && ( trap <= 0x83 )) ||
( trap == 0x80 + SPARC_SWTRAP_IRQDIS ) ||
-#if SPARC_HAS_FPU == 1
+#if defined(SPARC_USE_SYNCHRONOUS_FP_SWITCH)
( trap == 0x80 + SPARC_SWTRAP_IRQDIS_FP ) ||
#endif
( trap == 0x80 + SPARC_SWTRAP_IRQEN ))
diff --git a/c/src/lib/libbsp/sparc/leon3/startup/spurious.c b/c/src/lib/libbsp/sparc/leon3/startup/spurious.c
index 88d977d603..23ac4bf4cf 100644
--- a/c/src/lib/libbsp/sparc/leon3/startup/spurious.c
+++ b/c/src/lib/libbsp/sparc/leon3/startup/spurious.c
@@ -119,9 +119,11 @@ static rtems_isr bsp_spurious_handler(
.isf = isf
};
+#if !defined(SPARC_USE_LAZY_FP_SWITCH)
if ( SPARC_REAL_TRAP_NUMBER( trap ) == 4 ) {
_Internal_error( INTERNAL_ERROR_ILLEGAL_USE_OF_FLOATING_POINT_UNIT );
}
+#endif
rtems_fatal(
RTEMS_FATAL_SOURCE_EXCEPTION,
@@ -156,10 +158,13 @@ void bsp_spurious_initialize()
*/
if (( trap == 5 ) || ( trap == 6 ) ||
+#if defined(SPARC_USE_LAZY_FP_SWITCH)
+ ( trap == 4 ) ||
+#endif
(( trap >= 0x11 ) && ( trap <= 0x1f )) ||
(( trap >= 0x70 ) && ( trap <= 0x83 )) ||
( trap == 0x80 + SPARC_SWTRAP_IRQDIS ) ||
-#if SPARC_HAS_FPU == 1
+#if defined(SPARC_USE_SYNCHRONOUS_FP_SWITCH)
( trap == 0x80 + SPARC_SWTRAP_IRQDIS_FP ) ||
#endif
( trap == 0x80 + SPARC_SWTRAP_IRQEN ))
diff --git a/c/src/lib/libbsp/sparc/shared/irq_asm.S b/c/src/lib/libbsp/sparc/shared/irq_asm.S
index 1d76e7e728..7d0312b7e4 100644
--- a/c/src/lib/libbsp/sparc/shared/irq_asm.S
+++ b/c/src/lib/libbsp/sparc/shared/irq_asm.S
@@ -7,7 +7,7 @@
* COPYRIGHT (c) 1989-2011.
* On-Line Applications Research Corporation (OAR).
*
- * Copyright (c) 2014, 2016 embedded brains GmbH
+ * Copyright (c) 2014, 2017 embedded brains GmbH
*
* The license and distribution terms for this file may be
* found in the file LICENSE in this distribution or at
@@ -218,7 +218,17 @@ done_flushing:
! The next load is in a delay slot, which is all right
#endif
+#if defined(SPARC_USE_LAZY_FP_SWITCH)
+ ld [%g6 + SPARC_PER_CPU_FP_OWNER_OFFSET], %g2
+#endif
ld [%o1 + PSR_OFFSET], %g1 ! g1 = heir psr with traps enabled
+#if defined(SPARC_USE_LAZY_FP_SWITCH)
+ sethi %hi(SPARC_PSR_EF_MASK), %g5
+ cmp %g2, %g0
+ bne,a .Lclear_psr_ef_done
+ andn %g1, %g5, %g1 ! g1 = heir psr w/o PSR[EF]
+.Lclear_psr_ef_done:
+#endif
andn %g1, SPARC_PSR_CWP_MASK, %g1 ! g1 = heir psr w/o cwp
or %g1, %g3, %g1 ! g1 = heir psr with cwp
mov %g1, %psr ! restore status register and
@@ -583,7 +593,7 @@ pil_fixed:
nop ! delay slot
#endif
-#if SPARC_HAS_FPU == 1
+#if defined(SPARC_USE_SYNCHRONOUS_FP_SWITCH)
mov %l0, %g1 ! PSR[EF] value of interrupted context
ta SPARC_SWTRAP_IRQDIS_FP ! **** DISABLE INTERRUPTS ****
#else
@@ -652,10 +662,38 @@ isr_dispatch:
mov 0, %o1 ! ISR level for _Thread_Do_dispatch()
-#if defined(SPARC_USE_SYNCHRONOUS_FP_SWITCH)
+#if defined(SPARC_USE_LAZY_FP_SWITCH)
/* Test if we interrupted a floating point thread (PSR[EF] == 1) */
andcc %l0, %l5, %g0
- be non_fp_thread_dispatch
+ be .Lnon_fp_thread_dispatch
+ ld [%g6 + PER_CPU_OFFSET_EXECUTING], %l6
+
+ /* Set new floating point unit owner to executing thread */
+ st %l6, [%g6 + SPARC_PER_CPU_FP_OWNER_OFFSET]
+
+ call SYM(_Thread_Do_dispatch)
+ mov %g6, %o0
+
+ /*
+ * If we are still the floating point unit owner, then reset the
+ * floating point unit owner to NULL, otherwise clear PSR[EF] in the
+ * interrupt frame and let the FP disabled system call do the floating
+ * point context save/restore.
+ */
+ ld [%g6 + SPARC_PER_CPU_FP_OWNER_OFFSET], %l7
+ cmp %l6, %l7
+ bne .Ldisable_fp
+ andn %l0, %l5, %l0
+ ba .Lthread_dispatch_done
+ st %g0, [%g6 + SPARC_PER_CPU_FP_OWNER_OFFSET]
+.Ldisable_fp:
+ ba .Lthread_dispatch_done
+ st %l0, [%fp + ISF_PSR_OFFSET]
+.Lnon_fp_thread_dispatch:
+#elif defined(SPARC_USE_SYNCHRONOUS_FP_SWITCH)
+ /* Test if we interrupted a floating point thread (PSR[EF] == 1) */
+ andcc %l0, %l5, %g0
+ be .Lnon_fp_thread_dispatch
nop
/*
@@ -722,17 +760,17 @@ isr_dispatch:
ldd [%sp + FP_FRAME_OFFSET_F28_F29], %f28
ldd [%sp + FP_FRAME_OFFSET_F3O_F31], %f30
ld [%sp + FP_FRAME_OFFSET_FSR], %fsr
- ba thread_dispatch_done
+ ba .Lthread_dispatch_done
add %sp, FP_FRAME_SIZE, %sp
-non_fp_thread_dispatch:
+.Lnon_fp_thread_dispatch:
#endif
call SYM(_Thread_Do_dispatch)
mov %g6, %o0
-#if defined(SPARC_USE_SYNCHRONOUS_FP_SWITCH)
-thread_dispatch_done:
+#if SPARC_HAS_FPU == 1
+.Lthread_dispatch_done:
#endif
ta SPARC_SWTRAP_IRQDIS ! **** DISABLE INTERRUPTS ****
diff --git a/c/src/lib/libbsp/sparc/shared/start/start.S b/c/src/lib/libbsp/sparc/shared/start/start.S
index 3b9f841223..64498c6110 100644
--- a/c/src/lib/libbsp/sparc/shared/start/start.S
+++ b/c/src/lib/libbsp/sparc/shared/start/start.S
@@ -35,6 +35,15 @@
/*
* System call optimized trap table entry
*/
+#define FPDIS_TRAP(_handler) \
+ mov %psr, %l0 ; \
+ sethi %hi(_handler), %l4 ; \
+ jmp %l4+%lo(_handler); \
+ sethi %hi(SPARC_PSR_EF_MASK), %l3
+
+/*
+ * System call optimized trap table entry
+ */
#define IRQDIS_TRAP(_handler) \
mov %psr, %l0 ; \
sethi %hi(_handler), %l4 ; \
@@ -100,7 +109,11 @@ SYM(trap_table):
! exception
BAD_TRAP; ! 02 illegal instruction
BAD_TRAP; ! 03 privileged instruction
+#if defined(SPARC_USE_LAZY_FP_SWITCH)
+ FPDIS_TRAP(SYM(syscall_lazy_fp_switch)); ! 04 fp disabled
+#else
BAD_TRAP; ! 04 fp disabled
+#endif
WOTRAP(5, SYM(window_overflow_trap_handler)); ! 05 window overflow
WUTRAP(6, SYM(window_underflow_trap_handler));! 06 window underflow
BAD_TRAP; ! 07 memory address not aligned
@@ -209,7 +222,7 @@ SYM(CLOCK_SPEED):
*/
IRQDIS_TRAP(SYM(syscall_irqdis)); ! 89 IRQ Disable syscall trap
IRQEN_TRAP(SYM(syscall_irqen)); ! 8A IRQ Enable syscall trap
-#if SPARC_HAS_FPU == 1
+#if defined(SPARC_USE_SYNCHRONOUS_FP_SWITCH)
IRQDIS_TRAP(SYM(syscall_irqdis_fp)); ! 8B IRQ disable
! and set PSR[EF] syscall trap
#else
diff --git a/c/src/lib/libcpu/sparc/syscall/syscall.S b/c/src/lib/libcpu/sparc/syscall/syscall.S
index d5e1f43403..da0ee43889 100644
--- a/c/src/lib/libcpu/sparc/syscall/syscall.S
+++ b/c/src/lib/libcpu/sparc/syscall/syscall.S
@@ -11,12 +11,15 @@
* COPYRIGHT:
*
* COPYRIGHT (c) 1995. European Space Agency.
+ * Copyright (c) 2016, 2017 embedded brains GmbH
*
* This terms of the RTEMS license apply to this file.
*
*/
#include <rtems/asm.h>
+#include <rtems/score/cpuimpl.h>
+#include <rtems/score/percpu.h>
#include "syscall.h"
.section ".text"
@@ -95,7 +98,7 @@ SYM(syscall_irqen):
jmp %l2 ! Return to after TA 10.
rett %l2 + 4
-#if SPARC_HAS_FPU == 1
+#if defined(SPARC_USE_SYNCHRONOUS_FP_SWITCH)
/*
* system call - Interrupt disable and set PSR[EF] according to caller
* specified %g1
@@ -132,6 +135,127 @@ SYM(syscall_irqdis_fp):
rett %l2 + 4
#endif
+#if defined(SPARC_USE_LAZY_FP_SWITCH)
+
+ /*
+ * system call - Perform a lazy floating point switch
+ *
+ * On entry:
+ *
+ * l0 = psr (from trap table)
+ * l1 = pc
+ * l2 = npc
+ * l3 = SPARC_PSR_EF_MASK
+ */
+
+.align 32 ! Align to 32-byte cache-line
+ PUBLIC(syscall_lazy_fp_switch)
+
+SYM(syscall_lazy_fp_switch):
+ ld [%g6 + PER_CPU_OFFSET_EXECUTING], %l4
+ ld [%g6 + PER_CPU_ISR_NEST_LEVEL], %l5
+ ld [%l4 + %lo(SPARC_THREAD_CONTROL_FP_CONTEXT_OFFSET)], %l6
+ ld [%g6 + SPARC_PER_CPU_FP_OWNER_OFFSET], %l7
+
+ /* Ensure that we are not in interrupt context */
+ cmp %l5, 0
+ bne .Lillegal_use_of_floating_point_unit
+ or %l0, %l3, %l0
+
+ /* Ensure that we are a proper floating point thread */
+ cmp %l6, 0
+ be .Lillegal_use_of_floating_point_unit
+ ld [%l4 + %lo(SPARC_THREAD_CONTROL_REGISTERS_FP_CONTEXT_OFFSET)], %l6
+
+ /* Set PSR[EF] to 1, PSR write delay 3 instructions! */
+ mov %l0, %psr
+
+ /*
+ * Check if there is a floating point owner. We have to check this
+ * here, since the floating point owner may have been deleted in the
+ * meantime. Save the floating point context if necessary.
+ */
+ cmp %l7, 0
+ be .Lfp_save_done
+ nop
+ ld [%l7 + %lo(SPARC_THREAD_CONTROL_FP_CONTEXT_OFFSET)], %l5
+ std %f0, [%l5 + SPARC_FP_CONTEXT_OFFSET_F0_F1]
+ SPARC_LEON3FT_B2BST_NOP
+ std %f2, [%l5 + SPARC_FP_CONTEXT_OFFSET_F2_F3]
+ SPARC_LEON3FT_B2BST_NOP
+ std %f4, [%l5 + SPARC_FP_CONTEXT_OFFSET_F4_F5]
+ SPARC_LEON3FT_B2BST_NOP
+ std %f6, [%l5 + SPARC_FP_CONTEXT_OFFSET_F6_F7]
+ SPARC_LEON3FT_B2BST_NOP
+ std %f8, [%l5 + SPARC_FP_CONTEXT_OFFSET_F8_F9]
+ SPARC_LEON3FT_B2BST_NOP
+ std %f10, [%l5 + SPARC_FP_CONTEXT_OFFSET_F10_F11]
+ SPARC_LEON3FT_B2BST_NOP
+ std %f12, [%l5 + SPARC_FP_CONTEXT_OFFSET_F12_F13]
+ SPARC_LEON3FT_B2BST_NOP
+ std %f14, [%l5 + SPARC_FP_CONTEXT_OFFSET_F14_F15]
+ SPARC_LEON3FT_B2BST_NOP
+ std %f16, [%l5 + SPARC_FP_CONTEXT_OFFSET_F16_F17]
+ SPARC_LEON3FT_B2BST_NOP
+ std %f18, [%l5 + SPARC_FP_CONTEXT_OFFSET_F18_F19]
+ SPARC_LEON3FT_B2BST_NOP
+ std %f20, [%l5 + SPARC_FP_CONTEXT_OFFSET_F20_F21]
+ SPARC_LEON3FT_B2BST_NOP
+ std %f22, [%l5 + SPARC_FP_CONTEXT_OFFSET_F22_F23]
+ SPARC_LEON3FT_B2BST_NOP
+ std %f24, [%l5 + SPARC_FP_CONTEXT_OFFSET_F24_F25]
+ SPARC_LEON3FT_B2BST_NOP
+ std %f26, [%l5 + SPARC_FP_CONTEXT_OFFSET_F26_F27]
+ SPARC_LEON3FT_B2BST_NOP
+ std %f28, [%l5 + SPARC_FP_CONTEXT_OFFSET_F28_F29]
+ SPARC_LEON3FT_B2BST_NOP
+ std %f30, [%l5 + SPARC_FP_CONTEXT_OFFSET_F30_F31]
+ SPARC_LEON3FT_B2BST_NOP
+ st %fsr, [%l5 + SPARC_FP_CONTEXT_OFFSET_FSR]
+ SPARC_LEON3FT_B2BST_NOP
+ st %g0, [%g6 + SPARC_PER_CPU_FP_OWNER_OFFSET]
+ SPARC_LEON3FT_B2BST_NOP
+ st %l5, [%l7 + %lo(SPARC_THREAD_CONTROL_REGISTERS_FP_CONTEXT_OFFSET)]
+
+.Lfp_save_done:
+
+ /* Restore the floating point context if necessary */
+ cmp %l6, 0
+ be .Lfp_restore_done
+ st %g0, [%l4 + %lo(SPARC_THREAD_CONTROL_REGISTERS_FP_CONTEXT_OFFSET)]
+ ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F0_F1], %f0
+ ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F2_F3], %f2
+ ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F4_F5], %f4
+ ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F6_F7], %f6
+ ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F8_F9], %f8
+ ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F10_F11], %f10
+ ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F12_F13], %f12
+ ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F14_F15], %f14
+ ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F16_F17], %f16
+ ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F18_F19], %f18
+ ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F20_F21], %f20
+ ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F22_F23], %f22
+ ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F24_F25], %f24
+ ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F26_F27], %f26
+ ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F28_F29], %f28
+ ldd [%l6 + SPARC_FP_CONTEXT_OFFSET_F30_F31], %f30
+ ld [%l6 + SPARC_FP_CONTEXT_OFFSET_FSR], %fsr
+
+.Lfp_restore_done:
+
+ /* Now, retry the floating point instruction with PSR[EF] == 1 */
+ jmp %l1
+ rett %l1 + 4
+
+.Lillegal_use_of_floating_point_unit:
+
+ sethi %hi(_Internal_error), %l1
+ or %l1, %lo(_Internal_error), %l1
+ mov 38, %i0
+ jmp %l1
+ rett %l1 + 4
+#endif
+
#if defined(RTEMS_PARAVIRT)
PUBLIC(_SPARC_Get_PSR)
diff --git a/cpukit/score/cpu/sparc/Makefile.am b/cpukit/score/cpu/sparc/Makefile.am
index ec600b6cad..96d29e0e11 100644
--- a/cpukit/score/cpu/sparc/Makefile.am
+++ b/cpukit/score/cpu/sparc/Makefile.am
@@ -12,7 +12,7 @@ include_rtems_score_HEADERS += rtems/score/types.h
include_rtems_score_HEADERS += rtems/score/cpuatomic.h
noinst_LIBRARIES = libscorecpu.a
-libscorecpu_a_SOURCES = cpu.c cpu_asm.S
+libscorecpu_a_SOURCES = cpu.c
libscorecpu_a_SOURCES += sparc-context-volatile-clobber.S
libscorecpu_a_SOURCES += sparc-context-validate.S
libscorecpu_a_SOURCES += sparc-counter.c
diff --git a/cpukit/score/cpu/sparc/cpu.c b/cpukit/score/cpu/sparc/cpu.c
index 5bea27fda5..8916a7d988 100644
--- a/cpukit/score/cpu/sparc/cpu.c
+++ b/cpukit/score/cpu/sparc/cpu.c
@@ -8,6 +8,8 @@
* COPYRIGHT (c) 1989-2007.
* On-Line Applications Research Corporation (OAR).
*
+ * Copyright (c) 2017 embedded brains GmbH
+ *
* The license and distribution terms for this file may be
* found in the file LICENSE in this distribution or at
* http://www.rtems.org/license/LICENSE.
@@ -21,6 +23,7 @@
#include <rtems/score/isr.h>
#include <rtems/score/percpu.h>
#include <rtems/score/tls.h>
+#include <rtems/score/thread.h>
#include <rtems/rtems/cache.h>
#if SPARC_HAS_FPU == 1
@@ -29,6 +32,14 @@
== SPARC_PER_CPU_FSR_OFFSET,
SPARC_PER_CPU_FSR_OFFSET
);
+
+ #if defined(SPARC_USE_LAZY_FP_SWITCH)
+ RTEMS_STATIC_ASSERT(
+ offsetof( Per_CPU_Control, cpu_per_cpu.fp_owner)
+ == SPARC_PER_CPU_FP_OWNER_OFFSET,
+ SPARC_PER_CPU_FP_OWNER_OFFSET
+ );
+ #endif
#endif
#define SPARC_ASSERT_OFFSET(field, off) \
@@ -99,6 +110,30 @@ SPARC_ASSERT_ISF_OFFSET(i7, I7);
SPARC_ASSERT_ISF_OFFSET(y, Y);
SPARC_ASSERT_ISF_OFFSET(tpc, TPC);
+#define SPARC_ASSERT_FP_OFFSET(field, off) \
+ RTEMS_STATIC_ASSERT( \
+ offsetof(Context_Control_fp, field) == SPARC_FP_CONTEXT_OFFSET_ ## off, \
+ Context_Control_fp_offset_ ## field \
+ )
+
+SPARC_ASSERT_FP_OFFSET(f0_f1, F0_F1);
+SPARC_ASSERT_FP_OFFSET(f2_f3, F2_F3);
+SPARC_ASSERT_FP_OFFSET(f4_f5, F4_F5);
+SPARC_ASSERT_FP_OFFSET(f6_f7, F6_F7);
+SPARC_ASSERT_FP_OFFSET(f8_f9, F8_F9);
+SPARC_ASSERT_FP_OFFSET(f10_f11, F10_F11);
+SPARC_ASSERT_FP_OFFSET(f12_f13, F12_F13);
+SPARC_ASSERT_FP_OFFSET(f14_f15, F14_F15);
+SPARC_ASSERT_FP_OFFSET(f16_f17, F16_F17);
+SPARC_ASSERT_FP_OFFSET(f18_f19, F18_F19);
+SPARC_ASSERT_FP_OFFSET(f20_f21, F20_F21);
+SPARC_ASSERT_FP_OFFSET(f22_f23, F22_F23);
+SPARC_ASSERT_FP_OFFSET(f24_f25, F24_F25);
+SPARC_ASSERT_FP_OFFSET(f26_f27, F26_F27);
+SPARC_ASSERT_FP_OFFSET(f28_f29, F28_F29);
+SPARC_ASSERT_FP_OFFSET(f30_f31, F30_F31);
+SPARC_ASSERT_FP_OFFSET(fsr, FSR);
+
RTEMS_STATIC_ASSERT(
sizeof(SPARC_Minimum_stack_frame) == SPARC_MINIMUM_STACK_FRAME_SIZE,
SPARC_MINIMUM_STACK_FRAME_SIZE
@@ -110,10 +145,6 @@ RTEMS_STATIC_ASSERT(
CPU_Interrupt_frame_alignment
);
-#if (SPARC_HAS_FPU == 1) && !defined(SPARC_USE_SYNCHRONOUS_FP_SWITCH)
-Context_Control_fp _CPU_Null_fp_context;
-#endif
-
/*
* _CPU_Initialize
*
@@ -129,22 +160,16 @@ Context_Control_fp _CPU_Null_fp_context;
void _CPU_Initialize(void)
{
-#if (SPARC_HAS_FPU == 1) && !defined(SPARC_USE_SYNCHRONOUS_FP_SWITCH)
- Context_Control_fp *pointer;
- uint32_t psr;
-
- sparc_get_psr( psr );
- psr |= SPARC_PSR_EF_MASK;
- sparc_set_psr( psr );
-
- /*
- * This seems to be the most appropriate way to obtain an initial
- * FP context on the SPARC. The NULL fp context is copied it to
- * the task's FP context during Context_Initialize.
- */
-
- pointer = &_CPU_Null_fp_context;
- _CPU_Context_save_fp( &pointer );
+#if defined(SPARC_USE_LAZY_FP_SWITCH)
+ __asm__ volatile (
+ ".global SPARC_THREAD_CONTROL_REGISTERS_FP_CONTEXT_OFFSET\n"
+ ".set SPARC_THREAD_CONTROL_REGISTERS_FP_CONTEXT_OFFSET, %0\n"
+ ".global SPARC_THREAD_CONTROL_FP_CONTEXT_OFFSET\n"
+ ".set SPARC_THREAD_CONTROL_FP_CONTEXT_OFFSET, %1\n"
+ :
+ : "i" (offsetof(Thread_Control, Registers.fp_context)),
+ "i" (offsetof(Thread_Control, fp_context))
+ );
#endif
}
diff --git a/cpukit/score/cpu/sparc/cpu_asm.S b/cpukit/score/cpu/sparc/cpu_asm.S
deleted file mode 100644
index 66d36935a3..0000000000
--- a/cpukit/score/cpu/sparc/cpu_asm.S
+++ /dev/null
@@ -1,127 +0,0 @@
-/* cpu_asm.s
- *
- * This file contains the basic algorithms for all assembly code used
- * in an specific CPU port of RTEMS. These algorithms must be implemented
- * in assembly language.
- *
- * COPYRIGHT (c) 1989-2011.
- * On-Line Applications Research Corporation (OAR).
- *
- * The license and distribution terms for this file may be
- * found in the file LICENSE in this distribution or at
- * http://www.rtems.org/license/LICENSE.
- *
- * Ported to ERC32 implementation of the SPARC by On-Line Applications
- * Research Corporation (OAR) under contract to the European Space
- * Agency (ESA).
- *
- * ERC32 modifications of respective RTEMS file: COPYRIGHT (c) 1995.
- * European Space Agency.
- */
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include <rtems/asm.h>
-#include <rtems/system.h>
-
-#if (SPARC_HAS_FPU == 1) && !defined(SPARC_USE_SYNCHRONOUS_FP_SWITCH)
-
-/*
- * void _CPU_Context_save_fp(
- * void **fp_context_ptr
- * )
- *
- * This routine is responsible for saving the FP context
- * at *fp_context_ptr. If the point to load the FP context
- * from is changed then the pointer is modified by this routine.
- *
- * NOTE: See the README in this directory for information on the
- * management of the "EF" bit in the PSR.
- */
-
- .align 4
- PUBLIC(_CPU_Context_save_fp)
-SYM(_CPU_Context_save_fp):
- ld [%o0], %o1
- std %f0, [%o1 + FO_F1_OFFSET]
- SPARC_LEON3FT_B2BST_NOP
- std %f2, [%o1 + F2_F3_OFFSET]
- SPARC_LEON3FT_B2BST_NOP
- std %f4, [%o1 + F4_F5_OFFSET]
- SPARC_LEON3FT_B2BST_NOP
- std %f6, [%o1 + F6_F7_OFFSET]
- SPARC_LEON3FT_B2BST_NOP
- std %f8, [%o1 + F8_F9_OFFSET]
- SPARC_LEON3FT_B2BST_NOP
- std %f10, [%o1 + F1O_F11_OFFSET]
- SPARC_LEON3FT_B2BST_NOP
- std %f12, [%o1 + F12_F13_OFFSET]
- SPARC_LEON3FT_B2BST_NOP
- std %f14, [%o1 + F14_F15_OFFSET]
- SPARC_LEON3FT_B2BST_NOP
- std %f16, [%o1 + F16_F17_OFFSET]
- SPARC_LEON3FT_B2BST_NOP
- std %f18, [%o1 + F18_F19_OFFSET]
- SPARC_LEON3FT_B2BST_NOP
- std %f20, [%o1 + F2O_F21_OFFSET]
- SPARC_LEON3FT_B2BST_NOP
- std %f22, [%o1 + F22_F23_OFFSET]
- SPARC_LEON3FT_B2BST_NOP
- std %f24, [%o1 + F24_F25_OFFSET]
- SPARC_LEON3FT_B2BST_NOP
- std %f26, [%o1 + F26_F27_OFFSET]
- SPARC_LEON3FT_B2BST_NOP
- std %f28, [%o1 + F28_F29_OFFSET]
- SPARC_LEON3FT_B2BST_NOP
- std %f30, [%o1 + F3O_F31_OFFSET]
- SPARC_LEON3FT_B2BST_NOP
-#if defined(__FIX_LEON3FT_B2BST)
- st %fsr, [%o1 + FSR_OFFSET]
- jmp %o7 + 8
- nop
-#else
- jmp %o7 + 8
- st %fsr, [%o1 + FSR_OFFSET]
-#endif
-
-/*
- * void _CPU_Context_restore_fp(
- * void **fp_context_ptr
- * )
- *
- * This routine is responsible for restoring the FP context
- * at *fp_context_ptr. If the point to load the FP context
- * from is changed then the pointer is modified by this routine.
- *
- * NOTE: See the README in this directory for information on the
- * management of the "EF" bit in the PSR.
- */
-
- .align 4
- PUBLIC(_CPU_Context_restore_fp)
-SYM(_CPU_Context_restore_fp):
- ld [%o0], %o1
- ldd [%o1 + FO_F1_OFFSET], %f0
- ldd [%o1 + F2_F3_OFFSET], %f2
- ldd [%o1 + F4_F5_OFFSET], %f4
- ldd [%o1 + F6_F7_OFFSET], %f6
- ldd [%o1 + F8_F9_OFFSET], %f8
- ldd [%o1 + F1O_F11_OFFSET], %f10
- ldd [%o1 + F12_F13_OFFSET], %f12
- ldd [%o1 + F14_F15_OFFSET], %f14
- ldd [%o1 + F16_F17_OFFSET], %f16
- ldd [%o1 + F18_F19_OFFSET], %f18
- ldd [%o1 + F2O_F21_OFFSET], %f20
- ldd [%o1 + F22_F23_OFFSET], %f22
- ldd [%o1 + F24_F25_OFFSET], %f24
- ldd [%o1 + F26_F27_OFFSET], %f26
- ldd [%o1 + F28_F29_OFFSET], %f28
- ldd [%o1 + F3O_F31_OFFSET], %f30
- jmp %o7 + 8
- ld [%o1 + FSR_OFFSET], %fsr
-
-#endif /* SPARC_HAS_FPU */
-
-/* end of file */
diff --git a/cpukit/score/cpu/sparc/rtems/score/cpu.h b/cpukit/score/cpu/sparc/rtems/score/cpu.h
index 48db756ea6..2b50592e08 100644
--- a/cpukit/score/cpu/sparc/rtems/score/cpu.h
+++ b/cpukit/score/cpu/sparc/rtems/score/cpu.h
@@ -30,17 +30,28 @@ extern "C" {
/*
* The SPARC ABI is a bit special with respect to the floating point context.
- * The complete floating point context is volatile. Thus from an ABI point
+ * The complete floating point context is volatile. Thus, from an ABI point
* of view nothing needs to be saved and restored during a context switch.
* Instead the floating point context must be saved and restored during
- * interrupt processing. Historically, the deferred floating point switch is
+ * interrupt processing. Historically, the deferred floating point switch was
* used for SPARC and the complete floating point context is saved and
* restored during a context switch to the new floating point unit owner.
* This is a bit dangerous since post-switch actions (e.g. signal handlers)
* and context switch extensions may silently corrupt the floating point
- * context. The floating point unit is disabled for interrupt handlers.
- * Thus in case an interrupt handler uses the floating point unit then this
- * will result in a trap.
+ * context.
+ *
+ * The floating point unit is disabled for interrupt handlers. Thus, in case
+ * an interrupt handler uses the floating point unit then this will result in a
+ * trap (INTERNAL_ERROR_ILLEGAL_USE_OF_FLOATING_POINT_UNIT).
+ *
+ * In uniprocessor configurations, a lazy floating point context switch is
+ * used. In case an active floating point thread is interrupted (PSR[EF] == 1)
+ * and a thread dispatch is carried out, then this thread is registered as the
+ * floating point owner. When a floating point owner is present during a
+ * context switch, the floating point unit is disabled for the heir thread
+ * (PSR[EF] == 0). The floating point disabled trap checks that the use of the
+ * floating point unit is allowed and saves/restores the floating point context
+ * on demand.
*
* In SMP configurations, the deferred floating point switch is not supported
* in principle. So, use here a synchronous floating point switching.
@@ -52,6 +63,8 @@ extern "C" {
#if SPARC_HAS_FPU == 1
#if defined(RTEMS_SMP)
#define SPARC_USE_SYNCHRONOUS_FP_SWITCH
+ #else
+ #define SPARC_USE_LAZY_FP_SWITCH
#endif
#endif
@@ -152,28 +165,7 @@ extern "C" {
*/
#define CPU_IDLE_TASK_IS_FP FALSE
-/**
- * Should the saving of the floating point registers be deferred
- * until a context switch is made to another different floating point
- * task?
- *
- * - If TRUE, then the floating point context will not be stored until
- * necessary. It will remain in the floating point registers and not
- * disturned until another floating point task is switched to.
- *
- * - If FALSE, then the floating point context is saved when a floating
- * point task is switched out and restored when the next floating point
- * task is restored. The state of the floating point registers between
- * those two operations is not specified.
- *
- * On the SPARC, we can disable the FPU for integer only tasks so
- * it is safe to defer floating point context switches.
- */
-#if defined(SPARC_USE_SYNCHRONOUS_FP_SWITCH)
- #define CPU_USE_DEFERRED_FP_SWITCH FALSE
-#else
- #define CPU_USE_DEFERRED_FP_SWITCH TRUE
-#endif
+#define CPU_USE_DEFERRED_FP_SWITCH FALSE
#define CPU_ENABLE_ROBUST_THREAD_DISPATCH FALSE
@@ -356,6 +348,7 @@ typedef struct {
/**@{**/
#ifndef ASM
+typedef struct Context_Control_fp Context_Control_fp;
/**
* @brief SPARC basic context.
@@ -433,6 +426,10 @@ typedef struct {
*/
uint32_t isr_dispatch_disable;
+#if defined(SPARC_USE_LAZY_FP_SWITCH)
+ Context_Control_fp *fp_context;
+#endif
+
#if defined(RTEMS_SMP)
volatile uint32_t is_executing;
#endif
@@ -528,7 +525,7 @@ typedef struct {
*
* This structure defines floating point context area.
*/
-typedef struct {
+struct Context_Control_fp {
/** This will contain the contents of the f0 and f1 register. */
double f0_f1;
/** This will contain the contents of the f2 and f3 register. */
@@ -563,7 +560,7 @@ typedef struct {
double f30_f31;
/** This will contain the contents of the floating point status register. */
uint32_t fsr;
-} Context_Control_fp;
+};
#endif /* ASM */
@@ -670,13 +667,6 @@ typedef struct {
#ifndef ASM
/**
- * This variable is contains the initialize context for the FP unit.
- * It is filled in by _CPU_Initialize and copied into the task's FP
- * context area during _CPU_Context_Initialize.
- */
-extern Context_Control_fp _CPU_Null_fp_context;
-
-/**
* The following type defines an entry in the SPARC's trap table.
*
* NOTE: The instructions chosen are RTEMS dependent although one is
@@ -958,18 +948,22 @@ void _CPU_Context_Initialize(
_CPU_Context_restore( (_the_context) );
/**
- * This routine initializes the FP context area passed to it to.
- *
- * The SPARC allows us to use the simple initialization model
- * in which an "initial" FP context was saved into _CPU_Null_fp_context
- * at CPU initialization and it is simply copied into the destination
- * context.
+ * @brief Nothing to do due to the synchronous or lazy floating point switch.
*/
#define _CPU_Context_Initialize_fp( _destination ) \
- do { \
- *(*(_destination)) = _CPU_Null_fp_context; \
- } while (0)
+ do { } while ( 0 )
+/**
+ * @brief Nothing to do due to the synchronous or lazy floating point switch.
+ */
+#define _CPU_Context_save_fp( _fp_context_ptr ) \
+ do { } while ( 0 )
+
+/**
+ * @brief Nothing to do due to the synchronous or lazy floating point switch.
+ */
+#define _CPU_Context_restore_fp( _fp_context_ptr ) \
+ do { } while ( 0 )
/* end of Context handler macros */
/* Fatal Error manager macros */
@@ -1095,27 +1089,16 @@ void _CPU_Context_restore(
}
#endif
-/**
- * @brief SPARC specific save FPU method.
- *
- * This routine saves the floating point context passed to it.
- *
- * @param[in] fp_context_ptr is the area to save into
- */
-void _CPU_Context_save_fp(
- Context_Control_fp **fp_context_ptr
-);
-
-/**
- * @brief SPARC specific restore FPU method.
- *
- * This routine restores the floating point context passed to it.
- *
- * @param[in] fp_context_ptr is the area to restore from
- */
-void _CPU_Context_restore_fp(
- Context_Control_fp **fp_context_ptr
-);
+#if defined(SPARC_USE_LAZY_FP_SWITCH)
+#define _CPU_Context_Destroy( _the_thread, _the_context ) \
+ do { \
+ Per_CPU_Control *cpu_self = _Per_CPU_Get(); \
+ Thread_Control *_fp_owner = cpu_self->cpu_per_cpu.fp_owner; \
+ if ( _fp_owner == _the_thread ) { \
+ cpu_self->cpu_per_cpu.fp_owner = NULL; \
+ } \
+ } while ( 0 )
+#endif
void _CPU_Context_volatile_clobber( uintptr_t pattern );
diff --git a/cpukit/score/cpu/sparc/rtems/score/cpuimpl.h b/cpukit/score/cpu/sparc/rtems/score/cpuimpl.h
index 5563db8911..6ee913f28c 100644
--- a/cpukit/score/cpu/sparc/rtems/score/cpuimpl.h
+++ b/cpukit/score/cpu/sparc/rtems/score/cpuimpl.h
@@ -67,7 +67,28 @@
/** This defines the size of the ISF area for use in assembly. */
#define CPU_INTERRUPT_FRAME_SIZE SPARC_MINIMUM_STACK_FRAME_SIZE + 0x50
-#if ( SPARC_HAS_FPU == 1 )
+#define SPARC_FP_CONTEXT_OFFSET_F0_F1 0
+#define SPARC_FP_CONTEXT_OFFSET_F2_F3 8
+#define SPARC_FP_CONTEXT_OFFSET_F4_F5 16
+#define SPARC_FP_CONTEXT_OFFSET_F6_F7 24
+#define SPARC_FP_CONTEXT_OFFSET_F8_F9 32
+#define SPARC_FP_CONTEXT_OFFSET_F10_F11 40
+#define SPARC_FP_CONTEXT_OFFSET_F12_F13 48
+#define SPARC_FP_CONTEXT_OFFSET_F14_F15 56
+#define SPARC_FP_CONTEXT_OFFSET_F16_F17 64
+#define SPARC_FP_CONTEXT_OFFSET_F18_F19 72
+#define SPARC_FP_CONTEXT_OFFSET_F20_F21 80
+#define SPARC_FP_CONTEXT_OFFSET_F22_F23 88
+#define SPARC_FP_CONTEXT_OFFSET_F24_F25 96
+#define SPARC_FP_CONTEXT_OFFSET_F26_F27 104
+#define SPARC_FP_CONTEXT_OFFSET_F28_F29 112
+#define SPARC_FP_CONTEXT_OFFSET_F30_F31 120
+#define SPARC_FP_CONTEXT_OFFSET_FSR 128
+
+
+#if defined(SPARC_USE_LAZY_FP_SWITCH)
+ #define CPU_PER_CPU_CONTROL_SIZE 8
+#elif ( SPARC_HAS_FPU == 1 )
#define CPU_PER_CPU_CONTROL_SIZE 8
#else
#define CPU_PER_CPU_CONTROL_SIZE 0
@@ -79,6 +100,14 @@
* Per_CPU_Control begin.
*/
#define SPARC_PER_CPU_FSR_OFFSET 0
+
+ #if defined(SPARC_USE_LAZY_FP_SWITCH)
+ /**
+ * @brief Offset of the CPU_Per_CPU_control::fp_owner field relative to the
+ * Per_CPU_Control begin.
+ */
+ #define SPARC_PER_CPU_FP_OWNER_OFFSET 4
+ #endif
#endif
#ifndef ASM
@@ -98,9 +127,16 @@ typedef struct {
*/
uint32_t fsr;
+#if defined(SPARC_USE_LAZY_FP_SWITCH)
+ /**
+ * @brief The current floating point owner.
+ */
+ struct _Thread_Control *fp_owner;
+#else
/* See Per_CPU_Control::Interrupt_frame */
uint32_t reserved_for_alignment_of_interrupt_frame;
#endif
+#endif
} CPU_Per_CPU_control;
/**
diff --git a/cpukit/score/cpu/sparc/sparc-context-validate.S b/cpukit/score/cpu/sparc/sparc-context-validate.S
index 777f4dd8a6..83ecc54156 100644
--- a/cpukit/score/cpu/sparc/sparc-context-validate.S
+++ b/cpukit/score/cpu/sparc/sparc-context-validate.S
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015 embedded brains GmbH. All rights reserved.
+ * Copyright (c) 2015, 2017 embedded brains GmbH. All rights reserved.
*
* embedded brains GmbH
* Dornierstr. 4
@@ -18,6 +18,7 @@
#include <rtems/asm.h>
#include <rtems/score/cpuimpl.h>
+#include <rtems/score/percpu.h>
#define FRAME_OFFSET_BUFFER_0 (SPARC_MINIMUM_STACK_FRAME_SIZE)
#define FRAME_OFFSET_BUFFER_1 (FRAME_OFFSET_BUFFER_0 + 0x04)
@@ -73,13 +74,15 @@
PUBLIC(_CPU_Context_validate)
SYM(_CPU_Context_validate):
- /*
- * g2 checks if the Floating Point Unit in the Processor Status
- * Register (PSR) is set.
- */
+ /* g2 indicates if the FPU should be checked */
+#if defined(SPARC_USE_LAZY_FP_SWITCH)
+ ld [%g6 + PER_CPU_OFFSET_EXECUTING], %g2
+ ld [%g2 + %lo(SPARC_THREAD_CONTROL_FP_CONTEXT_OFFSET)], %g2
+#else
mov %psr, %g2
sethi %hi(SPARC_PSR_EF_MASK), %g3
and %g2, %g3, %g2
+#endif
/* g1 is used to save the original pattern */
mov %o0, %g1
diff --git a/testsuites/sptests/spsize/size.c b/testsuites/sptests/spsize/size.c
index 3414ec0c80..7b7b3a5dbb 100644
--- a/testsuites/sptests/spsize/size.c
+++ b/testsuites/sptests/spsize/size.c
@@ -410,8 +410,7 @@ uninitialized += (sizeof _CPU_Interrupt_stack_low) +
/* cpu.h */
uninitialized += (sizeof _CPU_Interrupt_stack_low) +
- (sizeof _CPU_Interrupt_stack_high) +
- (sizeof _CPU_Null_fp_context);
+ (sizeof _CPU_Interrupt_stack_high);
#endif
--
2.12.3
More information about the devel
mailing list