[PATCH 4/4] sparc: Optimize non-deferred FP save/restore
Gedare Bloom
gedare at gwu.edu
Fri May 29 14:22:57 UTC 2015
On Fri, May 29, 2015 at 8:54 AM, Sebastian Huber
<sebastian.huber at embedded-brains.de> wrote:
> Update #2270.
> ---
> cpukit/score/cpu/sparc/cpu.c | 16 ++++++++++++
> cpukit/score/cpu/sparc/cpu_asm.S | 44 ++++++++++++++++++++++++++++++++
> cpukit/score/cpu/sparc/rtems/score/cpu.h | 22 ++++++++++++++--
> 3 files changed, 80 insertions(+), 2 deletions(-)
>
> diff --git a/cpukit/score/cpu/sparc/cpu.c b/cpukit/score/cpu/sparc/cpu.c
> index bab0040..b2f67c4 100644
> --- a/cpukit/score/cpu/sparc/cpu.c
> +++ b/cpukit/score/cpu/sparc/cpu.c
> @@ -134,6 +134,10 @@ void _CPU_Initialize(void)
> #if (SPARC_HAS_FPU == 1)
> Context_Control_fp *pointer;
> uint32_t psr;
> +#if CPU_USE_DEFERRED_FP_SWITCH == FALSE
> + Per_CPU_Control *cpu_self;
> + uint32_t isr_dispatch_disable;
> +#endif
>
> sparc_get_psr( psr );
> psr |= SPARC_PSR_EF_MASK;
> @@ -146,7 +150,19 @@ void _CPU_Initialize(void)
> */
>
> pointer = &_CPU_Null_fp_context;
> +
> +#if CPU_USE_DEFERRED_FP_SWITCH == FALSE
> + cpu_self = _Per_CPU_Get();
> + isr_dispatch_disable = cpu_self->cpu_per_cpu.isr_dispatch_disable;
> + cpu_self->cpu_per_cpu.isr_dispatch_disable = 1;
> + pointer->is_valid = 1;
> +#endif
> +
> _CPU_Context_save_fp( &pointer );
> +
> +#if CPU_USE_DEFERRED_FP_SWITCH == FALSE
> + cpu_self->cpu_per_cpu.isr_dispatch_disable = isr_dispatch_disable;
> +#endif
> #endif
> }
>
> diff --git a/cpukit/score/cpu/sparc/cpu_asm.S b/cpukit/score/cpu/sparc/cpu_asm.S
> index 3fa0532..fd63f19 100644
> --- a/cpukit/score/cpu/sparc/cpu_asm.S
> +++ b/cpukit/score/cpu/sparc/cpu_asm.S
> @@ -44,7 +44,26 @@
> .align 4
> PUBLIC(_CPU_Context_save_fp)
> SYM(_CPU_Context_save_fp):
> +#if CPU_USE_DEFERRED_FP_SWITCH == TRUE
> ld [%o0], %o1
> +#else
> + /*
> + * On SMP configurations we cannot use the deferred floating point
> + * switch. Save the FP context only in case we have an interrupted
> + * thread (indicated by a non-zero per-CPU ISR dispatch disable flag).
> + * This works since the complete floating point context is volatile on
> + * SPARC.
> + */
> + ld [%g6 + SPARC_PER_CPU_ISR_DISPATCH_DISABLE], %o2
> + cmp %o2, 0
> + bne do_save_fp
> + ld [%o0], %o1
> + jmp %o7 + 8
> + nop
> +
> +do_save_fp:
> +#endif
> +
> std %f0, [%o1 + FO_F1_OFFSET]
> std %f2, [%o1 + F2_F3_OFFSET]
> std %f4, [%o1 + F4_F5_OFFSET]
> @@ -61,6 +80,15 @@ SYM(_CPU_Context_save_fp):
> std %f26, [%o1 + F26_F27_OFFSET]
> std %f28, [%o1 + F28_F29_OFFSET]
> std %f30, [%o1 + F3O_F31_OFFSET]
> +
> +#if CPU_USE_DEFERRED_FP_SWITCH == FALSE
> + /*
> + * Set the is valid flag of the floating point context to true. It is
> + * store here to store the fields in sequence.
> + */
> + st %o2, [%o1 + SPARC_FP_CONTEXT_IS_VALID_OFFSET]
> +#endif
> +
> jmp %o7 + 8
> st %fsr, [%o1 + FSR_OFFSET]
>
> @@ -81,6 +109,22 @@ SYM(_CPU_Context_save_fp):
> PUBLIC(_CPU_Context_restore_fp)
> SYM(_CPU_Context_restore_fp):
> ld [%o0], %o1
> +
> +#if CPU_USE_DEFERRED_FP_SWITCH == FALSE
> + /*
> + * Check if we have a valid floating point context. Clear the is valid
> + * flag. Restore the floating point context if necessary.
> + */
> + ld [%o1 + SPARC_FP_CONTEXT_IS_VALID_OFFSET], %o2
> + cmp %o2, 0
> + bne do_restore_fp
> + st %g0, [%o1 + SPARC_FP_CONTEXT_IS_VALID_OFFSET]
> + jmp %o7 + 8
> + nop
> +
> +do_restore_fp:
> +#endif
> +
> ldd [%o1 + FO_F1_OFFSET], %f0
> ldd [%o1 + F2_F3_OFFSET], %f2
> ldd [%o1 + F4_F5_OFFSET], %f4
> diff --git a/cpukit/score/cpu/sparc/rtems/score/cpu.h b/cpukit/score/cpu/sparc/rtems/score/cpu.h
> index 02891b0..e51a4ab 100644
> --- a/cpukit/score/cpu/sparc/rtems/score/cpu.h
> +++ b/cpukit/score/cpu/sparc/rtems/score/cpu.h
> @@ -609,6 +609,10 @@ typedef struct {
> double f28_f29;
> /** This will contain the contents of the f30 and f31 register. */
> double f30_f31;
> +#if CPU_USE_DEFERRED_FP_SWITCH == FALSE
> + /** Indicates if the context is valid. */
> + uint32_t is_valid;
Is it the case that is_valid is only true during a context switch
while interrupt handling (dispatch disabled)? I'd appreciate a bit
more clarity in this comment. Is there a need to put is_valid before
fsr? if the #if controlled field goes to the end of the block, then
that can simplify the below offset calculations.
> +#endif
> /** This will contain the contents of the floating point status register. */
> uint32_t fsr;
> } Context_Control_fp;
> @@ -651,11 +655,25 @@ typedef struct {
> #define F28_F29_OFFSET 0x70
> /** This macro defines an offset into the FPU context for use in assembly. */
> #define F3O_F31_OFFSET 0x78
> +
> +#if CPU_USE_DEFERRED_FP_SWITCH == FALSE
> + /** This macro defines an offset into the FPU context for use in assembly. */
> + #define SPARC_FP_CONTEXT_IS_VALID_OFFSET 0x80
> +#endif
> +
> /** This macro defines an offset into the FPU context for use in assembly. */
> -#define FSR_OFFSET 0x80
> +#if CPU_USE_DEFERRED_FP_SWITCH == TRUE
> + #define FSR_OFFSET 0x80
> +#else
> + #define FSR_OFFSET 0x84
> +#endif
>
> /** This defines the size of the FPU context area for use in assembly. */
> -#define CONTEXT_CONTROL_FP_SIZE 0x84
> +#if CPU_USE_DEFERRED_FP_SWITCH == TRUE
> + #define CONTEXT_CONTROL_FP_SIZE 0x84
> +#else
> + #define CONTEXT_CONTROL_FP_SIZE 0x88
> +#endif
>
I'd prefer to always have the #if check the same direction, i.e.
always check if it is FALSE.
> #ifndef ASM
>
> --
> 1.8.4.5
>
> _______________________________________________
> devel mailing list
> devel at rtems.org
> http://lists.rtems.org/mailman/listinfo/devel
More information about the devel
mailing list