[PATCH 2/4] sparc: Avoid new window for FP save/restore
Gedare Bloom
gedare at gwu.edu
Fri May 29 14:07:14 UTC 2015
Straightforward leaf-optimization, except is there a reason to use jmp
%o7 + 8 instead of ret?
On Fri, May 29, 2015 at 8:54 AM, Sebastian Huber
<sebastian.huber at embedded-brains.de> wrote:
> Update #2270.
> ---
> cpukit/score/cpu/sparc/cpu_asm.S | 102 ++++++++++++++++++---------------------
> 1 file changed, 48 insertions(+), 54 deletions(-)
>
> diff --git a/cpukit/score/cpu/sparc/cpu_asm.S b/cpukit/score/cpu/sparc/cpu_asm.S
> index 92674de..ddb2a13 100644
> --- a/cpukit/score/cpu/sparc/cpu_asm.S
> +++ b/cpukit/score/cpu/sparc/cpu_asm.S
> @@ -44,38 +44,35 @@
> .align 4
> PUBLIC(_CPU_Context_save_fp)
> SYM(_CPU_Context_save_fp):
> - save %sp, -CPU_MINIMUM_STACK_FRAME_SIZE, %sp
> -
> /*
> * The following enables the floating point unit.
> */
>
> - mov %psr, %l0
> - sethi %hi(SPARC_PSR_EF_MASK), %l1
> - or %l1, %lo(SPARC_PSR_EF_MASK), %l1
> - or %l0, %l1, %l0
> - mov %l0, %psr ! **** ENABLE FLOAT ACCESS ****
> + mov %psr, %o1
> + sethi %hi(SPARC_PSR_EF_MASK), %o2
> + or %o2, %lo(SPARC_PSR_EF_MASK), %o2
> + or %o1, %o2, %o1
> + mov %o1, %psr ! **** ENABLE FLOAT ACCESS ****
> nop; nop; nop; ! Need three nops before EF is
> - ld [%i0], %l0 ! active due to pipeline delay!!!
> - std %f0, [%l0 + FO_F1_OFFSET]
> - std %f2, [%l0 + F2_F3_OFFSET]
> - std %f4, [%l0 + F4_F5_OFFSET]
> - std %f6, [%l0 + F6_F7_OFFSET]
> - std %f8, [%l0 + F8_F9_OFFSET]
> - std %f10, [%l0 + F1O_F11_OFFSET]
> - std %f12, [%l0 + F12_F13_OFFSET]
> - std %f14, [%l0 + F14_F15_OFFSET]
> - std %f16, [%l0 + F16_F17_OFFSET]
> - std %f18, [%l0 + F18_F19_OFFSET]
> - std %f20, [%l0 + F2O_F21_OFFSET]
> - std %f22, [%l0 + F22_F23_OFFSET]
> - std %f24, [%l0 + F24_F25_OFFSET]
> - std %f26, [%l0 + F26_F27_OFFSET]
> - std %f28, [%l0 + F28_F29_OFFSET]
> - std %f30, [%l0 + F3O_F31_OFFSET]
> - st %fsr, [%l0 + FSR_OFFSET]
> - ret
> - restore
> + ld [%o0], %o1 ! active due to pipeline delay!!!
> + std %f0, [%o1 + FO_F1_OFFSET]
> + std %f2, [%o1 + F2_F3_OFFSET]
> + std %f4, [%o1 + F4_F5_OFFSET]
> + std %f6, [%o1 + F6_F7_OFFSET]
> + std %f8, [%o1 + F8_F9_OFFSET]
> + std %f10, [%o1 + F1O_F11_OFFSET]
> + std %f12, [%o1 + F12_F13_OFFSET]
> + std %f14, [%o1 + F14_F15_OFFSET]
> + std %f16, [%o1 + F16_F17_OFFSET]
> + std %f18, [%o1 + F18_F19_OFFSET]
> + std %f20, [%o1 + F2O_F21_OFFSET]
> + std %f22, [%o1 + F22_F23_OFFSET]
> + std %f24, [%o1 + F24_F25_OFFSET]
> + std %f26, [%o1 + F26_F27_OFFSET]
> + std %f28, [%o1 + F28_F29_OFFSET]
> + std %f30, [%o1 + F3O_F31_OFFSET]
> + jmp %o7 + 8
> + st %fsr, [%o1 + FSR_OFFSET]
>
> /*
> * void _CPU_Context_restore_fp(
> @@ -93,38 +90,35 @@ SYM(_CPU_Context_save_fp):
> .align 4
> PUBLIC(_CPU_Context_restore_fp)
> SYM(_CPU_Context_restore_fp):
> - save %sp, -CPU_MINIMUM_STACK_FRAME_SIZE , %sp
> -
> /*
> * The following enables the floating point unit.
> */
>
> - mov %psr, %l0
> - sethi %hi(SPARC_PSR_EF_MASK), %l1
> - or %l1, %lo(SPARC_PSR_EF_MASK), %l1
> - or %l0, %l1, %l0
> - mov %l0, %psr ! **** ENABLE FLOAT ACCESS ****
> + mov %psr, %o1
> + sethi %hi(SPARC_PSR_EF_MASK), %o2
> + or %o2, %lo(SPARC_PSR_EF_MASK), %o2
> + or %o1, %o2, %o1
> + mov %o1, %psr ! **** ENABLE FLOAT ACCESS ****
> nop; nop; nop; ! Need three nops before EF is
> - ld [%i0], %l0 ! active due to pipeline delay!!!
> - ldd [%l0 + FO_F1_OFFSET], %f0
> - ldd [%l0 + F2_F3_OFFSET], %f2
> - ldd [%l0 + F4_F5_OFFSET], %f4
> - ldd [%l0 + F6_F7_OFFSET], %f6
> - ldd [%l0 + F8_F9_OFFSET], %f8
> - ldd [%l0 + F1O_F11_OFFSET], %f10
> - ldd [%l0 + F12_F13_OFFSET], %f12
> - ldd [%l0 + F14_F15_OFFSET], %f14
> - ldd [%l0 + F16_F17_OFFSET], %f16
> - ldd [%l0 + F18_F19_OFFSET], %f18
> - ldd [%l0 + F2O_F21_OFFSET], %f20
> - ldd [%l0 + F22_F23_OFFSET], %f22
> - ldd [%l0 + F24_F25_OFFSET], %f24
> - ldd [%l0 + F26_F27_OFFSET], %f26
> - ldd [%l0 + F28_F29_OFFSET], %f28
> - ldd [%l0 + F3O_F31_OFFSET], %f30
> - ld [%l0 + FSR_OFFSET], %fsr
> - ret
> - restore
> + ld [%o0], %o1 ! active due to pipeline delay!!!
> + ldd [%o1 + FO_F1_OFFSET], %f0
> + ldd [%o1 + F2_F3_OFFSET], %f2
> + ldd [%o1 + F4_F5_OFFSET], %f4
> + ldd [%o1 + F6_F7_OFFSET], %f6
> + ldd [%o1 + F8_F9_OFFSET], %f8
> + ldd [%o1 + F1O_F11_OFFSET], %f10
> + ldd [%o1 + F12_F13_OFFSET], %f12
> + ldd [%o1 + F14_F15_OFFSET], %f14
> + ldd [%o1 + F16_F17_OFFSET], %f16
> + ldd [%o1 + F18_F19_OFFSET], %f18
> + ldd [%o1 + F2O_F21_OFFSET], %f20
> + ldd [%o1 + F22_F23_OFFSET], %f22
> + ldd [%o1 + F24_F25_OFFSET], %f24
> + ldd [%o1 + F26_F27_OFFSET], %f26
> + ldd [%o1 + F28_F29_OFFSET], %f28
> + ldd [%o1 + F3O_F31_OFFSET], %f30
> + jmp %o7 + 8
> + ld [%o1 + FSR_OFFSET], %fsr
>
> #endif /* SPARC_HAS_FPU */
>
> --
> 1.8.4.5
>
> _______________________________________________
> devel mailing list
> devel at rtems.org
> http://lists.rtems.org/mailman/listinfo/devel
More information about the devel
mailing list