[PATCH 1/2] arm: Add support for FPv4-SP floating point unit
Gedare Bloom
gedare at rtems.org
Mon Aug 11 21:02:55 UTC 2014
On Mon, Aug 11, 2014 at 4:48 PM, Sebastian Huber
<sebastian.huber at embedded-brains.de> wrote:
> This floating point unit is available in Cortex-M4F processors and
> defined by ARMv7-M. This adds basic support for other VFP-D16 variants.
> ---
> c/src/lib/libbsp/arm/shared/start/start.S | 20 ++++++++++++++
> cpukit/score/cpu/arm/arm-context-validate.S | 31 +++++++++++++---------
> .../score/cpu/arm/arm-context-volatile-clobber.S | 8 +++---
> cpukit/score/cpu/arm/arm_exc_interrupt.S | 12 ++++++---
> cpukit/score/cpu/arm/armv4-exception-default.S | 6 +++--
> cpukit/score/cpu/arm/armv7m-context-switch.c | 13 +++++++--
> cpukit/score/cpu/arm/armv7m-isr-dispatch.c | 30 +++++++++++++++++----
> cpukit/score/cpu/arm/cpu.c | 2 +-
> cpukit/score/cpu/arm/cpu_asm.S | 4 +--
> cpukit/score/cpu/arm/rtems/score/arm.h | 17 +++++++++---
> cpukit/score/cpu/arm/rtems/score/armv7m.h | 22 ++++++++++++++-
> cpukit/score/cpu/arm/rtems/score/cpu.h | 8 +++---
> 12 files changed, 133 insertions(+), 40 deletions(-)
>
> diff --git a/c/src/lib/libbsp/arm/shared/start/start.S b/c/src/lib/libbsp/arm/shared/start/start.S
> index 096e9bd..8c73b8d 100644
> --- a/c/src/lib/libbsp/arm/shared/start/start.S
> +++ b/c/src/lib/libbsp/arm/shared/start/start.S
> @@ -300,6 +300,26 @@ bsp_start_vector_table_end:
>
> _start:
>
> +#ifdef ARM_MULTILIB_VFP
> + /* CPACR is located at address 0xe000ed88 */
> + ldr.w r0, =0xe000ed88
Maybe the constant should be provided by a header file? The comments
are nice though. And I didn't see anything else in the rest, although
my arm asm is rusty.
> +
> + /* Read CPACR */
> + ldr r1, [r0]
> +
> + /* Set bits 20-23 to enable CP10 and CP11 co-processors */
> + orr r1, r1, #(0xf << 20)
> +
> + /* Write back the modified value to the CPACR */
> + str r1, [r0]
> +
> + /* Wait for store to complete */
> + dsb
> +
> + /* Reset pipeline now the FPU is enabled */
> + isb
> +#endif
> +
> ldr sp, =bsp_stack_main_end
> ldr lr, =bsp_start_hook_0_done + 1
> b bsp_start_hook_0
> diff --git a/cpukit/score/cpu/arm/arm-context-validate.S b/cpukit/score/cpu/arm/arm-context-validate.S
> index f2772b4..fdfb6c1 100644
> --- a/cpukit/score/cpu/arm/arm-context-validate.S
> +++ b/cpukit/score/cpu/arm/arm-context-validate.S
> @@ -1,5 +1,5 @@
> /*
> - * Copyright (c) 2013 embedded brains GmbH. All rights reserved.
> + * Copyright (c) 2013-2014 embedded brains GmbH. All rights reserved.
> *
> * embedded brains GmbH
> * Dornierstr. 4
> @@ -29,7 +29,7 @@
> #define FRAME_OFFSET_R11 28
> #define FRAME_OFFSET_LR 32
>
> -#ifdef ARM_MULTILIB_VFP_D32
> +#ifdef ARM_MULTILIB_VFP
> #define FRAME_OFFSET_D8 40
> #define FRAME_OFFSET_D9 48
> #define FRAME_OFFSET_D10 56
> @@ -71,7 +71,7 @@ FUNCTION_THUMB_ENTRY(_CPU_Context_validate)
> mov r1, lr
> str r1, [sp, #FRAME_OFFSET_LR]
>
> -#ifdef ARM_MULTILIB_VFP_D32
> +#ifdef ARM_MULTILIB_VFP
> vstr d8, [sp, #FRAME_OFFSET_D8]
> vstr d9, [sp, #FRAME_OFFSET_D9]
> vstr d10, [sp, #FRAME_OFFSET_D10]
> @@ -96,11 +96,15 @@ FUNCTION_THUMB_ENTRY(_CPU_Context_validate)
> .endm
>
>
> -#ifdef ARM_MULTILIB_VFP_D32
> +#ifdef ARM_MULTILIB_VFP
> /* R3 contains the FPSCR */
> vmrs r3, FPSCR
> movs r4, #0x001f
> +#ifdef ARM_MULTILIB_ARCH_V7M
> + movt r4, #0xf000
> +#else
> movt r4, #0xf800
> +#endif
> bic r3, r3, r4
> and r4, r4, r0
> orr r3, r3, r4
> @@ -120,7 +124,7 @@ FUNCTION_THUMB_ENTRY(_CPU_Context_validate)
> fill_register r12
> fill_register lr
>
> -#ifdef ARM_MULTILIB_VFP_D32
> +#ifdef ARM_MULTILIB_VFP
> .macro fill_vfp_register reg
> add r1, r1, #1
> vmov \reg, r1, r1
> @@ -142,6 +146,7 @@ FUNCTION_THUMB_ENTRY(_CPU_Context_validate)
> fill_vfp_register d13
> fill_vfp_register d14
> fill_vfp_register d15
> +#ifdef ARM_MULTILIB_VFP_D32
> fill_vfp_register d16
> fill_vfp_register d17
> fill_vfp_register d18
> @@ -158,7 +163,8 @@ FUNCTION_THUMB_ENTRY(_CPU_Context_validate)
> fill_vfp_register d29
> fill_vfp_register d30
> fill_vfp_register d31
> -#endif
> +#endif /* ARM_MULTILIB_VFP_D32 */
> +#endif /* ARM_MULTILIB_VFP */
>
> /* Check */
> check:
> @@ -174,7 +180,7 @@ check:
>
> mov r1, r0
>
> -#ifndef ARM_MULTILIB_VFP_D32
> +#ifndef ARM_MULTILIB_VFP
> check_register r3
> #endif
>
> @@ -189,7 +195,7 @@ check:
> check_register r12
> check_register lr
>
> -#ifdef ARM_MULTILIB_VFP_D32
> +#ifdef ARM_MULTILIB_VFP
> b check_vfp
> #endif
>
> @@ -217,7 +223,7 @@ restore:
> ldr r1, [sp, #FRAME_OFFSET_LR]
> mov lr, r1
>
> -#ifdef ARM_MULTILIB_VFP_D32
> +#ifdef ARM_MULTILIB_VFP
> vldr d8, [sp, #FRAME_OFFSET_D8]
> vldr d9, [sp, #FRAME_OFFSET_D9]
> vldr d10, [sp, #FRAME_OFFSET_D10]
> @@ -234,7 +240,7 @@ restore:
>
> FUNCTION_END(_CPU_Context_validate)
>
> -#ifdef ARM_MULTILIB_VFP_D32
> +#ifdef ARM_MULTILIB_VFP
> check_vfp:
>
> .macro check_vfp_register reg
> @@ -270,6 +276,7 @@ check_vfp:
> check_vfp_register d13
> check_vfp_register d14
> check_vfp_register d15
> +#ifdef ARM_MULTILIB_VFP_D32
> check_vfp_register d16
> check_vfp_register d17
> check_vfp_register d18
> @@ -286,6 +293,7 @@ check_vfp:
> check_vfp_register d29
> check_vfp_register d30
> check_vfp_register d31
> +#endif /* ARM_MULTILIB_VFP_D32 */
>
> /* Restore r4 and r5 */
> mov r1, r0
> @@ -293,5 +301,4 @@ check_vfp:
> fill_register r5
>
> b check
> -
> -#endif
> +#endif /* ARM_MULTILIB_VFP */
> diff --git a/cpukit/score/cpu/arm/arm-context-volatile-clobber.S b/cpukit/score/cpu/arm/arm-context-volatile-clobber.S
> index 459acba..7970b8e 100644
> --- a/cpukit/score/cpu/arm/arm-context-volatile-clobber.S
> +++ b/cpukit/score/cpu/arm/arm-context-volatile-clobber.S
> @@ -1,5 +1,5 @@
> /*
> - * Copyright (c) 2013 embedded brains GmbH. All rights reserved.
> + * Copyright (c) 2013-2014 embedded brains GmbH. All rights reserved.
> *
> * embedded brains GmbH
> * Dornierstr. 4
> @@ -27,7 +27,7 @@ FUNCTION_THUMB_ENTRY(_CPU_Context_volatile_clobber)
> mov \reg, r0
> .endm
>
> -#ifdef ARM_MULTILIB_VFP_D32
> +#ifdef ARM_MULTILIB_VFP
> vmrs r1, FPSCR
> movs r2, #0x001f
> movt r2, #0xf800
> @@ -49,6 +49,7 @@ FUNCTION_THUMB_ENTRY(_CPU_Context_volatile_clobber)
> clobber_vfp_register d5
> clobber_vfp_register d6
> clobber_vfp_register d7
> +#ifdef ARM_MULTILIB_VFP_D32
> clobber_vfp_register d16
> clobber_vfp_register d17
> clobber_vfp_register d18
> @@ -65,7 +66,8 @@ FUNCTION_THUMB_ENTRY(_CPU_Context_volatile_clobber)
> clobber_vfp_register d29
> clobber_vfp_register d30
> clobber_vfp_register d31
> -#endif
> +#endif /* ARM_MULTILIB_VFP_D32 */
> +#endif /* ARM_MULTILIB_VFP */
>
> clobber_register r1
> clobber_register r2
> diff --git a/cpukit/score/cpu/arm/arm_exc_interrupt.S b/cpukit/score/cpu/arm/arm_exc_interrupt.S
> index e8026c8..7930c32 100644
> --- a/cpukit/score/cpu/arm/arm_exc_interrupt.S
> +++ b/cpukit/score/cpu/arm/arm_exc_interrupt.S
> @@ -75,13 +75,15 @@ _ARMV4_Exception_interrupt:
> stmdb sp!, CONTEXT_LIST
> stmdb sp!, {SP_OF_INTERRUPTED_CONTEXT, lr}
>
> -#ifdef ARM_MULTILIB_VFP_D32
> +#ifdef ARM_MULTILIB_VFP
> /* Save VFP context */
> vmrs r0, FPSCR
> vstmdb sp!, {d0-d7}
> +#ifdef ARM_MULTILIB_VFP_D32
> vstmdb sp!, {d16-d31}
> - stmdb sp!, {r0, r1}
> #endif
> + stmdb sp!, {r0, r1}
> +#endif /* ARM_MULTILIB_VFP */
>
> /* Get per-CPU control of current processor */
> GET_SELF_CPU_CONTROL SELF_CPU_CONTROL, r1
> @@ -166,13 +168,15 @@ thread_dispatch_done:
> /* Switch to ARM instructions if necessary */
> SWITCH_FROM_THUMB_TO_ARM
>
> -#ifdef ARM_MULTILIB_VFP_D32
> +#ifdef ARM_MULTILIB_VFP
> /* Restore VFP context */
> ldmia sp!, {r0, r1}
> +#ifdef ARM_MULTILIB_VFP_D32
> vldmia sp!, {d16-d31}
> +#endif
> vldmia sp!, {d0-d7}
> vmsr FPSCR, r0
> -#endif
> +#endif /* ARM_MULTILIB_VFP */
>
> /* Restore SP_OF_INTERRUPTED_CONTEXT register and link register */
> ldmia sp!, {SP_OF_INTERRUPTED_CONTEXT, lr}
> diff --git a/cpukit/score/cpu/arm/armv4-exception-default.S b/cpukit/score/cpu/arm/armv4-exception-default.S
> index 950ad67..8a97320 100644
> --- a/cpukit/score/cpu/arm/armv4-exception-default.S
> +++ b/cpukit/score/cpu/arm/armv4-exception-default.S
> @@ -118,7 +118,7 @@ save_more_context:
> /* Argument for high level handler */
> mov r0, sp
>
> -#ifdef ARM_MULTILIB_VFP_D32
> +#ifdef ARM_MULTILIB_VFP
> /* Ensure that the FPU is enabled */
> vmrs r1, FPEXC
> tst r1, #(1 << 30)
> @@ -132,10 +132,12 @@ save_more_context:
> vmrs r2, FPSCR
> stmia r4!, {r1-r2}
> vstmia r4!, {d0-d15}
> +#ifdef ARM_MULTILIB_VFP_D32
> vstmia r4!, {d16-d31}
> +#endif
>
> fpu_save_done:
> -#endif
> +#endif /* ARM_MULTILIB_VFP */
>
> /* Call high level handler */
> SWITCH_FROM_ARM_TO_THUMB r1
> diff --git a/cpukit/score/cpu/arm/armv7m-context-switch.c b/cpukit/score/cpu/arm/armv7m-context-switch.c
> index eabf2c8..359a1a7 100644
> --- a/cpukit/score/cpu/arm/armv7m-context-switch.c
> +++ b/cpukit/score/cpu/arm/armv7m-context-switch.c
> @@ -5,7 +5,7 @@
> */
>
> /*
> - * Copyright (c) 2011 Sebastian Huber. All rights reserved.
> + * Copyright (c) 2011-2014 Sebastian Huber. All rights reserved.
> *
> * embedded brains GmbH
> * Obere Lagerstr. 30
> @@ -37,17 +37,26 @@ void __attribute__((naked)) _CPU_Context_switch(
> "movt r2, #:upper16:_Per_CPU_Information\n"
> "ldr r3, [r2, %[isrpcpuoff]]\n"
> "stm r0, {r4-r11, lr}\n"
> +#ifdef ARM_MULTILIB_VFP
> + "add r4, r0, %[d8off]\n"
> + "vstm r4, {d8-d15}\n"
> +#endif
> "str sp, [r0, %[spctxoff]]\n"
> "str r3, [r0, %[isrctxoff]]\n"
> "ldr r3, [r1, %[isrctxoff]]\n"
> "ldr sp, [r1, %[spctxoff]]\n"
> +#ifdef ARM_MULTILIB_VFP
> + "add r4, r1, %[d8off]\n"
> + "vldm r4, {d8-d15}\n"
> +#endif
> "ldm r1, {r4-r11, lr}\n"
> "str r3, [r2, %[isrpcpuoff]]\n"
> "bx lr\n"
> :
> : [spctxoff] "J" (offsetof(Context_Control, register_sp)),
> [isrctxoff] "J" (offsetof(Context_Control, isr_nest_level)),
> - [isrpcpuoff] "J" (offsetof(Per_CPU_Control, isr_nest_level))
> + [isrpcpuoff] "J" (offsetof(Per_CPU_Control, isr_nest_level)),
> + [d8off] "J" (ARM_CONTEXT_CONTROL_D8_OFFSET)
> );
> }
>
> diff --git a/cpukit/score/cpu/arm/armv7m-isr-dispatch.c b/cpukit/score/cpu/arm/armv7m-isr-dispatch.c
> index 048ffa8..e460e9c 100644
> --- a/cpukit/score/cpu/arm/armv7m-isr-dispatch.c
> +++ b/cpukit/score/cpu/arm/armv7m-isr-dispatch.c
> @@ -5,7 +5,7 @@
> */
>
> /*
> - * Copyright (c) 2011 Sebastian Huber. All rights reserved.
> + * Copyright (c) 2011-2014 Sebastian Huber. All rights reserved.
> *
> * embedded brains GmbH
> * Obere Lagerstr. 30
> @@ -37,13 +37,27 @@ static void __attribute__((naked)) _ARMV7M_Thread_dispatch( void )
> );
> }
>
> +static void _ARMV7M_Trigger_lazy_floating_point_context_save( void )
> +{
> +#ifdef ARM_MULTILIB_VFP
> + __asm__ volatile (
> + "vmov.f32 s0, s0\n"
> + );
> +#endif
> +}
> +
> void _ARMV7M_Pendable_service_call( void )
> {
> + ARMV7M_Exception_frame *ef;
> +
> _ISR_Nest_level = 1;
> +
> _ARMV7M_SCB->icsr = ARMV7M_SCB_ICSR_PENDSVCLR;
> - ARMV7M_Exception_frame *ef = (ARMV7M_Exception_frame *) _ARMV7M_Get_PSP();
> + _ARMV7M_Trigger_lazy_floating_point_context_save();
> +
> + ef = (ARMV7M_Exception_frame *) _ARMV7M_Get_PSP();
> --ef;
> - _ARMV7M_Set_PSP((uint32_t) ef);
> + _ARMV7M_Set_PSP( (uint32_t) ef );
>
> /*
> * According to "ARMv7-M Architecture Reference Manual" section B1.5.6
> @@ -57,11 +71,17 @@ void _ARMV7M_Pendable_service_call( void )
>
> void _ARMV7M_Supervisor_call( void )
> {
> - ARMV7M_Exception_frame *ef = (ARMV7M_Exception_frame *) _ARMV7M_Get_PSP();
> + ARMV7M_Exception_frame *ef;
> +
> + _ARMV7M_Trigger_lazy_floating_point_context_save();
> +
> + ef = (ARMV7M_Exception_frame *) _ARMV7M_Get_PSP();
> ++ef;
> - _ARMV7M_Set_PSP((uint32_t) ef);
> + _ARMV7M_Set_PSP( (uint32_t) ef );
> +
> _ISR_Nest_level = 0;
> RTEMS_COMPILER_MEMORY_BARRIER();
> +
> if ( _Thread_Dispatch_necessary ) {
> _ARMV7M_Pendable_service_call();
> }
> diff --git a/cpukit/score/cpu/arm/cpu.c b/cpukit/score/cpu/arm/cpu.c
> index 089826e..9942c4a 100644
> --- a/cpukit/score/cpu/arm/cpu.c
> +++ b/cpukit/score/cpu/arm/cpu.c
> @@ -35,7 +35,7 @@
> #include <rtems/score/tls.h>
> #include <rtems/score/cpu.h>
>
> -#ifdef ARM_MULTILIB_VFP_D32
> +#ifdef ARM_MULTILIB_VFP
> RTEMS_STATIC_ASSERT(
> offsetof( Context_Control, register_d8 ) == ARM_CONTEXT_CONTROL_D8_OFFSET,
> ARM_CONTEXT_CONTROL_D8_OFFSET
> diff --git a/cpukit/score/cpu/arm/cpu_asm.S b/cpukit/score/cpu/arm/cpu_asm.S
> index d4355b4..344512b 100644
> --- a/cpukit/score/cpu/arm/cpu_asm.S
> +++ b/cpukit/score/cpu/arm/cpu_asm.S
> @@ -58,7 +58,7 @@ DEFINE_FUNCTION_ARM(_CPU_Context_switch)
> mrs r2, CPSR
> stmia r0, {r2, r4, r5, r6, r7, r8, r9, r10, r11, r13, r14}
>
> -#ifdef ARM_MULTILIB_VFP_D32
> +#ifdef ARM_MULTILIB_VFP
> add r3, r0, #ARM_CONTEXT_CONTROL_D8_OFFSET
> vstm r3, {d8-d15}
> #endif
> @@ -101,7 +101,7 @@ DEFINE_FUNCTION_ARM(_CPU_Context_switch)
> mcr p15, 0, r3, c13, c0, 3
> #endif
>
> -#ifdef ARM_MULTILIB_VFP_D32
> +#ifdef ARM_MULTILIB_VFP
> add r3, r1, #ARM_CONTEXT_CONTROL_D8_OFFSET
> vldm r3, {d8-d15}
> #endif
> diff --git a/cpukit/score/cpu/arm/rtems/score/arm.h b/cpukit/score/cpu/arm/rtems/score/arm.h
> index a105f17..586a8cb 100644
> --- a/cpukit/score/cpu/arm/rtems/score/arm.h
> +++ b/cpukit/score/cpu/arm/rtems/score/arm.h
> @@ -50,10 +50,19 @@ extern "C" {
> #define ARM_MULTILIB_HAS_THREAD_ID_REGISTER
> #endif
>
> -#if defined(__ARM_NEON__)
> - #define ARM_MULTILIB_VFP_D32
> -#elif !defined(__SOFTFP__)
> - #error "FPU support not implemented"
> +#if !defined(__SOFTFP__)
> + #if defined(__ARM_NEON__)
> + #define ARM_MULTILIB_VFP_D32
> + #elif defined(__VFP_FP__)
> + #define ARM_MULTILIB_VFP_D16
> + #else
> + #error "FPU support not implemented"
> + #endif
> +#endif
> +
> +#if defined(ARM_MULTILIB_VFP_D16) \
> + || defined(ARM_MULTILIB_VFP_D32)
> + #define ARM_MULTILIB_VFP
> #endif
>
> /*
> diff --git a/cpukit/score/cpu/arm/rtems/score/armv7m.h b/cpukit/score/cpu/arm/rtems/score/armv7m.h
> index b545859..c1f820b 100644
> --- a/cpukit/score/cpu/arm/rtems/score/armv7m.h
> +++ b/cpukit/score/cpu/arm/rtems/score/armv7m.h
> @@ -5,7 +5,7 @@
> */
>
> /*
> - * Copyright (c) 2011 Sebastian Huber. All rights reserved.
> + * Copyright (c) 2011-2014 Sebastian Huber. All rights reserved.
> *
> * embedded brains GmbH
> * Obere Lagerstr. 30
> @@ -47,6 +47,26 @@ typedef struct {
> void *register_lr;
> void *register_pc;
> uint32_t register_xpsr;
> +#ifdef ARM_MULTILIB_VFP
> + uint32_t register_s0;
> + uint32_t register_s1;
> + uint32_t register_s2;
> + uint32_t register_s3;
> + uint32_t register_s4;
> + uint32_t register_s5;
> + uint32_t register_s6;
> + uint32_t register_s7;
> + uint32_t register_s8;
> + uint32_t register_s9;
> + uint32_t register_s10;
> + uint32_t register_s11;
> + uint32_t register_s12;
> + uint32_t register_s13;
> + uint32_t register_s14;
> + uint32_t register_s15;
> + uint32_t register_fpscr;
> + uint32_t reserved;
> +#endif
> } ARMV7M_Exception_frame;
>
> typedef struct {
> diff --git a/cpukit/score/cpu/arm/rtems/score/cpu.h b/cpukit/score/cpu/arm/rtems/score/cpu.h
> index ad070df..089fc27 100644
> --- a/cpukit/score/cpu/arm/rtems/score/cpu.h
> +++ b/cpukit/score/cpu/arm/rtems/score/cpu.h
> @@ -8,7 +8,7 @@
> * This include file contains information pertaining to the ARM
> * processor.
> *
> - * Copyright (c) 2009-2013 embedded brains GmbH.
> + * Copyright (c) 2009-2014 embedded brains GmbH.
> *
> * Copyright (c) 2007 Ray Xu <Rayx.cn at gmail.com>
> *
> @@ -212,12 +212,12 @@
> #define ARM_CONTEXT_CONTROL_THREAD_ID_OFFSET 44
> #endif
>
> -#ifdef ARM_MULTILIB_VFP_D32
> +#ifdef ARM_MULTILIB_VFP
> #define ARM_CONTEXT_CONTROL_D8_OFFSET 48
> #endif
>
> #ifdef RTEMS_SMP
> - #ifdef ARM_MULTILIB_VFP_D32
> + #ifdef ARM_MULTILIB_VFP
> #define ARM_CONTEXT_CONTROL_IS_EXECUTING_OFFSET 112
> #else
> #define ARM_CONTEXT_CONTROL_IS_EXECUTING_OFFSET 48
> @@ -278,7 +278,7 @@ typedef struct {
> #ifdef ARM_MULTILIB_HAS_THREAD_ID_REGISTER
> uint32_t thread_id;
> #endif
> -#ifdef ARM_MULTILIB_VFP_D32
> +#ifdef ARM_MULTILIB_VFP
> uint64_t register_d8;
> uint64_t register_d9;
> uint64_t register_d10;
> --
> 1.8.1.4
>
> _______________________________________________
> devel mailing list
> devel at rtems.org
> http://lists.rtems.org/mailman/listinfo/devel
More information about the devel
mailing list