[PATCH] powerpc: Add support for VRSAVE
Sebastian Huber
sebastian.huber at embedded-brains.de
Thu Sep 1 08:26:42 UTC 2022
The VRSAVE feature of the Altivec unit can be used to reduce the amount of
Altivec registers which need to be saved/restored during interrupt processing
and context switches.
Update #4712.
---
bsps/powerpc/shared/cpu_asm.S | 156 ++++++++++++++-
.../shared/exceptions/ppc_exc_async_normal.S | 189 +++++++++++++++++-
cpukit/score/cpu/powerpc/cpu.c | 5 +-
.../cpu/powerpc/include/rtems/score/cpu.h | 25 ++-
.../score/cpu/powerpc/ppc-context-validate.S | 77 ++++++-
5 files changed, 429 insertions(+), 23 deletions(-)
diff --git a/bsps/powerpc/shared/cpu_asm.S b/bsps/powerpc/shared/cpu_asm.S
index 63f6a3fdfe..21860c8eea 100644
--- a/bsps/powerpc/shared/cpu_asm.S
+++ b/bsps/powerpc/shared/cpu_asm.S
@@ -23,7 +23,7 @@
* COPYRIGHT (c) 1989-1997.
* On-Line Applications Research Corporation (OAR).
*
- * Copyright (c) 2011, 2017 embedded brains GmbH
+ * Copyright (c) 2011, 2020 embedded brains GmbH
*
* The license and distribution terms for this file may in
* the file LICENSE in this distribution or at
@@ -267,6 +267,10 @@ PROC (_CPU_Context_switch_no_return):
isync
#endif
+#if defined(PPC_MULTILIB_ALTIVEC) && defined(__PPC_VRSAVE__)
+ mfvrsave r9
+#endif
+
/* Align to a cache line */
CLEAR_RIGHT_IMMEDIATE r3, r3, PPC_DEFAULT_CACHE_LINE_POWER
CLEAR_RIGHT_IMMEDIATE r5, r4, PPC_DEFAULT_CACHE_LINE_POWER
@@ -284,6 +288,14 @@ PROC (_CPU_Context_switch_no_return):
mfmsr r6
#endif /* END PPC_DISABLE_MSR_ACCESS */
mfcr r7
+#ifdef PPC_MULTILIB_ALTIVEC
+#ifdef __PPC_VRSAVE__
+ /* Mark v0 as used since we need it to get the VSCR */
+ oris r8, r9, 0x8000
+ mtvrsave r8
+#endif
+ mfvscr v0
+#endif
mflr r8
lwz r11, PER_CPU_ISR_DISPATCH_DISABLE(r12)
@@ -356,6 +368,16 @@ PROC (_CPU_Context_switch_no_return):
stw r11, PPC_CONTEXT_OFFSET_ISR_DISPATCH_DISABLE(r3)
#ifdef PPC_MULTILIB_ALTIVEC
+ li r10, PPC_CONTEXT_OFFSET_VSCR
+ stvewx v0, r3, r10
+
+#ifdef __PPC_VRSAVE__
+ stw r9, PPC_CONTEXT_OFFSET_VRSAVE(r3)
+ andi. r9, r9, 0xfff
+ bne .Laltivec_save
+
+.Laltivec_save_continue:
+#else /* __PPC_VRSAVE__ */
li r9, PPC_CONTEXT_OFFSET_V20
stvx v20, r3, r9
li r9, PPC_CONTEXT_OFFSET_V21
@@ -397,7 +419,8 @@ PROC (_CPU_Context_switch_no_return):
stvx v31, r3, r9
mfvrsave r9
stw r9, PPC_CONTEXT_OFFSET_VRSAVE(r3)
-#endif
+#endif /* __PPC_VRSAVE__ */
+#endif /* PPC_MULTILIB_ALTIVEC */
#ifdef PPC_MULTILIB_FPU
stfd f14, PPC_CONTEXT_OFFSET_F14(r3)
@@ -461,6 +484,14 @@ restore_context:
PPC_REG_LOAD r1, PPC_CONTEXT_OFFSET_GPR1(r5)
PPC_REG_LOAD r8, PPC_CONTEXT_OFFSET_LR(r5)
+#ifdef PPC_MULTILIB_ALTIVEC
+ li r10, PPC_CONTEXT_OFFSET_VSCR
+ lvewx v0, r5, r10
+#ifdef __PPC_VRSAVE__
+ lwz r9, PPC_CONTEXT_OFFSET_VRSAVE(r5)
+#endif
+#endif
+
PPC_GPR_LOAD r14, PPC_CONTEXT_OFFSET_GPR14(r5)
PPC_GPR_LOAD r15, PPC_CONTEXT_OFFSET_GPR15(r5)
@@ -494,6 +525,15 @@ restore_context:
lwz r11, PPC_CONTEXT_OFFSET_ISR_DISPATCH_DISABLE(r5)
#ifdef PPC_MULTILIB_ALTIVEC
+ mtvscr v0
+
+#ifdef __PPC_VRSAVE__
+ mtvrsave r9
+ andi. r9, r9, 0xfff
+ bne .Laltivec_restore
+
+.Laltivec_restore_continue:
+#else /* __PPC_VRSAVE__ */
li r9, PPC_CONTEXT_OFFSET_V20
lvx v20, r5, r9
li r9, PPC_CONTEXT_OFFSET_V21
@@ -520,7 +560,8 @@ restore_context:
lvx v31, r5, r9
lwz r9, PPC_CONTEXT_OFFSET_VRSAVE(r5)
mtvrsave r9
-#endif
+#endif /* __PPC_VRSAVE__ */
+#endif /* PPC_MULTILIB_ALTIVEC */
#ifdef PPC_MULTILIB_FPU
lfd f14, PPC_CONTEXT_OFFSET_F14(r5)
@@ -567,6 +608,13 @@ PROC (_CPU_Context_restore):
li r3, 0
#endif
+#if defined(PPC_MULTILIB_ALTIVEC) && defined(__PPC_VRSAVE__)
+ /* Mark v0 as used since we need it to get the VSCR */
+ mfvrsave r9
+ oris r8, r9, 0x8000
+ mtvrsave r8
+#endif
+
b restore_context
#ifdef RTEMS_SMP
@@ -595,3 +643,105 @@ PROC (_CPU_Context_restore):
b .Lcheck_is_executing
#endif
+
+#if defined(PPC_MULTILIB_ALTIVEC) && defined(__PPC_VRSAVE__)
+.Laltivec_save:
+
+ /*
+ * Let X be VRSAVE, calculate:
+ *
+ * Z = X & 0x777
+ * Z = Z + 0x777
+ * X = X | Z
+ *
+ * Afterwards, we have in X for each group of four non-volatile VR
+ * registers:
+ *
+ * 0111b, if VRSAVE group of four registers == 0
+ * 1XXXb, if VRSAVE group of four registers != 0
+ */
+ andi. r10, r9, 0x777
+ addi r10, r10, 0x777
+ or r9, r9, r10
+ mtcr r9
+
+ bf 20, .Laltivec_save_v24
+ li r9, PPC_CONTEXT_OFFSET_V20
+ stvx v20, r3, r9
+ li r9, PPC_CONTEXT_OFFSET_V21
+ stvx v21, r3, r9
+ li r9, PPC_CONTEXT_OFFSET_V22
+ stvx v22, r3, r9
+ li r9, PPC_CONTEXT_OFFSET_V23
+ stvx v23, r3, r9
+
+.Laltivec_save_v24:
+
+ bf 24, .Laltivec_save_v28
+ li r9, PPC_CONTEXT_OFFSET_V24
+ stvx v24, r3, r9
+ li r9, PPC_CONTEXT_OFFSET_V25
+ stvx v25, r3, r9
+ li r9, PPC_CONTEXT_OFFSET_V26
+ stvx v26, r3, r9
+ li r9, PPC_CONTEXT_OFFSET_V27
+ stvx v27, r3, r9
+
+.Laltivec_save_v28:
+
+ bf 28, .Laltivec_save_continue
+ li r9, PPC_CONTEXT_OFFSET_V28
+ stvx v28, r3, r9
+ li r9, PPC_CONTEXT_OFFSET_V29
+ stvx v29, r3, r9
+ li r9, PPC_CONTEXT_OFFSET_V30
+ stvx v30, r3, r9
+ li r9, PPC_CONTEXT_OFFSET_V31
+ stvx v31, r3, r9
+
+ b .Laltivec_save_continue
+
+.Laltivec_restore:
+
+ /* See comment at .Laltivec_save */
+ andi. r10, r9, 0x777
+ addi r10, r10, 0x777
+ or r9, r9, r10
+ mtcr r9
+
+ bf 20, .Laltivec_restore_v24
+ li r9, PPC_CONTEXT_OFFSET_V20
+ lvx v20, r5, r9
+ li r9, PPC_CONTEXT_OFFSET_V21
+ lvx v21, r5, r9
+ li r9, PPC_CONTEXT_OFFSET_V22
+ lvx v22, r5, r9
+ li r9, PPC_CONTEXT_OFFSET_V23
+ lvx v23, r5, r9
+
+.Laltivec_restore_v24:
+
+ bf 24, .Laltivec_restore_v28
+ li r9, PPC_CONTEXT_OFFSET_V24
+ lvx v24, r5, r9
+ li r9, PPC_CONTEXT_OFFSET_V25
+ lvx v25, r5, r9
+ li r9, PPC_CONTEXT_OFFSET_V26
+ lvx v26, r5, r9
+ li r9, PPC_CONTEXT_OFFSET_V27
+ lvx v27, r5, r9
+
+.Laltivec_restore_v28:
+
+ bf 28, .Laltivec_restore_continue
+ li r9, PPC_CONTEXT_OFFSET_V28
+ lvx v28, r5, r9
+ li r9, PPC_CONTEXT_OFFSET_V29
+ lvx v29, r5, r9
+ li r9, PPC_CONTEXT_OFFSET_V30
+ lvx v30, r5, r9
+ li r9, PPC_CONTEXT_OFFSET_V31
+ lvx v31, r5, r9
+
+ b .Laltivec_restore_continue
+#endif /* PPC_MULTILIB_ALTIVEC && __PPC_VRSAVE__ */
diff --git a/bsps/powerpc/shared/exceptions/ppc_exc_async_normal.S b/bsps/powerpc/shared/exceptions/ppc_exc_async_normal.S
index de4621ef55..f6318de256 100644
--- a/bsps/powerpc/shared/exceptions/ppc_exc_async_normal.S
+++ b/bsps/powerpc/shared/exceptions/ppc_exc_async_normal.S
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: BSD-2-Clause */
/*
- * Copyright (c) 2011, 2017 embedded brains GmbH. All rights reserved.
+ * Copyright (c) 2011, 2020 embedded brains GmbH. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -173,8 +173,15 @@ ppc_exc_interrupt:
evstdd SCRATCH_5_REGISTER, PPC_EXC_ACC_OFFSET(r1)
#endif
-#ifdef PPC_MULTILIB_ALTIVEC
/* Save volatile AltiVec context */
+#ifdef PPC_MULTILIB_ALTIVEC
+#ifdef __PPC_VRSAVE__
+ mfvrsave SCRATCH_0_REGISTER
+ cmpwi SCRATCH_0_REGISTER, 0
+ bne .Laltivec_save
+
+.Laltivec_save_continue:
+#else /* __PPC_VRSAVE__ */
li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(0)
stvx v0, r1, SCRATCH_0_REGISTER
mfvscr v0
@@ -218,7 +225,8 @@ ppc_exc_interrupt:
stvx v19, r1, SCRATCH_0_REGISTER
li SCRATCH_0_REGISTER, PPC_EXC_MIN_VSCR_OFFSET
stvewx v0, r1, SCRATCH_0_REGISTER
-#endif
+#endif /* __PPC_VRSAVE__ */
+#endif /* PPC_MULTILIB_ALTIVEC */
#ifdef PPC_MULTILIB_FPU
/* Save volatile FPU context */
@@ -334,8 +342,15 @@ ppc_exc_interrupt:
.Lthread_dispatch_done:
-#ifdef PPC_MULTILIB_ALTIVEC
/* Restore volatile AltiVec context */
+#ifdef PPC_MULTILIB_ALTIVEC
+#ifdef __PPC_VRSAVE__
+ mfvrsave SCRATCH_0_REGISTER
+ cmpwi SCRATCH_0_REGISTER, 0
+ bne .Laltivec_restore
+
+.Laltivec_restore_continue:
+#else /* __PPC_VRSAVE__ */
li SCRATCH_0_REGISTER, PPC_EXC_MIN_VSCR_OFFSET
lvewx v0, r1, SCRATCH_0_REGISTER
mtvscr v0
@@ -379,7 +394,8 @@ ppc_exc_interrupt:
lvx v18, r1, SCRATCH_0_REGISTER
li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(19)
lvx v19, r1, SCRATCH_0_REGISTER
-#endif
+#endif /* __PPC_VRSAVE__ */
+#endif /* PPC_MULTILIB_ALTIVEC */
#ifdef PPC_MULTILIB_FPU
/* Restore volatile FPU context */
@@ -478,6 +494,169 @@ ppc_exc_interrupt:
/* Return */
rfi
+#if defined(PPC_MULTILIB_ALTIVEC) && defined(__PPC_VRSAVE__)
+.Laltivec_save:
+
+ /*
+ * Let X be VRSAVE, calculate:
+ *
+ * Y = 0x77777777
+ * Z = X & Y
+ * Z = Z + Y
+ * X = X | Z
+ *
+ * Afterwards, we have in X for each group of four VR registers:
+ *
+ * 0111b, if VRSAVE group of four registers == 0
+ * 1XXXb, if VRSAVE group of four registers != 0
+ */
+ lis SCRATCH_5_REGISTER, 0x7777
+ ori SCRATCH_5_REGISTER, SCRATCH_5_REGISTER, 0x7777
+ and SCRATCH_6_REGISTER, SCRATCH_0_REGISTER, SCRATCH_5_REGISTER
+ add SCRATCH_6_REGISTER, SCRATCH_5_REGISTER, SCRATCH_6_REGISTER
+ or SCRATCH_0_REGISTER, SCRATCH_0_REGISTER, SCRATCH_6_REGISTER
+ mtcr SCRATCH_0_REGISTER
+
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(0)
+ stvx v0, r1, SCRATCH_0_REGISTER
+
+ /* Move VCSR to V0 */
+ mfvscr v0
+
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(1)
+ stvx v1, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(2)
+ stvx v2, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(3)
+ stvx v3, r1, SCRATCH_0_REGISTER
+
+ /* Save VCSR using V0 */
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VSCR_OFFSET
+ stvewx v0, r1, SCRATCH_0_REGISTER
+
+ bf 4, .Laltivec_save_v8
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(4)
+ stvx v4, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(5)
+ stvx v5, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(6)
+ stvx v6, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(7)
+ stvx v7, r1, SCRATCH_0_REGISTER
+
+.Laltivec_save_v8:
+
+ bf 8, .Laltivec_save_v12
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(8)
+ stvx v8, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(9)
+ stvx v9, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(10)
+ stvx v10, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(11)
+ stvx v11, r1, SCRATCH_0_REGISTER
+
+.Laltivec_save_v12:
+
+ bf 12, .Laltivec_save_v16
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(12)
+ stvx v12, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(13)
+ stvx v13, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(14)
+ stvx v14, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(15)
+ stvx v15, r1, SCRATCH_0_REGISTER
+
+.Laltivec_save_v16:
+
+ bf 16, .Laltivec_save_continue
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(16)
+ stvx v16, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(17)
+ stvx v17, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(18)
+ stvx v18, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(19)
+ stvx v19, r1, SCRATCH_0_REGISTER
+
+ b .Laltivec_save_continue
+
+.Laltivec_restore:
+
+ /* Load VCSR using V0 */
+ li SCRATCH_5_REGISTER, PPC_EXC_MIN_VSCR_OFFSET
+ lvewx v0, r1, SCRATCH_5_REGISTER
+
+ /* See comment at .Laltivec_save */
+ lis SCRATCH_5_REGISTER, 0x7777
+ ori SCRATCH_5_REGISTER, SCRATCH_5_REGISTER, 0x7777
+ and SCRATCH_6_REGISTER, SCRATCH_0_REGISTER, SCRATCH_5_REGISTER
+ add SCRATCH_6_REGISTER, SCRATCH_5_REGISTER, SCRATCH_6_REGISTER
+ or SCRATCH_0_REGISTER, SCRATCH_0_REGISTER, SCRATCH_6_REGISTER
+ mtcr SCRATCH_0_REGISTER
+
+ /* Restore VCR using V0 */
+ mtvscr v0
+
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(0)
+ lvx v0, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(1)
+ lvx v1, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(2)
+ lvx v2, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(3)
+ lvx v3, r1, SCRATCH_0_REGISTER
+
+ bf 4, .Laltivec_restore_v8
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(4)
+ lvx v4, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(5)
+ lvx v5, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(6)
+ lvx v6, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(7)
+ lvx v7, r1, SCRATCH_0_REGISTER
+
+.Laltivec_restore_v8:
+
+ bf 8, .Laltivec_restore_v12
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(8)
+ lvx v8, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(9)
+ lvx v9, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(10)
+ lvx v10, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(11)
+ lvx v11, r1, SCRATCH_0_REGISTER
+
+.Laltivec_restore_v12:
+
+ bf 12, .Laltivec_restore_v16
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(12)
+ lvx v12, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(13)
+ lvx v13, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(14)
+ lvx v14, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(15)
+ lvx v15, r1, SCRATCH_0_REGISTER
+
+.Laltivec_restore_v16:
+
+ bf 16, .Laltivec_restore_continue
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(16)
+ lvx v16, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(17)
+ lvx v17, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(18)
+ lvx v18, r1, SCRATCH_0_REGISTER
+ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(19)
+ lvx v19, r1, SCRATCH_0_REGISTER
+
+ b .Laltivec_restore_continue
+#endif /* PPC_MULTILIB_ALTIVEC && __PPC_VRSAVE__ */
+
/* Symbol provided for debugging and tracing */
ppc_exc_interrupt_end:
diff --git a/cpukit/score/cpu/powerpc/cpu.c b/cpukit/score/cpu/powerpc/cpu.c
index 6147d7be74..bdb9cf6ab5 100644
--- a/cpukit/score/cpu/powerpc/cpu.c
+++ b/cpukit/score/cpu/powerpc/cpu.c
@@ -79,8 +79,10 @@ PPC_ASSERT_OFFSET(isr_dispatch_disable, ISR_DISPATCH_DISABLE);
#endif
#ifdef PPC_MULTILIB_ALTIVEC
+ PPC_ASSERT_OFFSET(vrsave, VRSAVE);
+ PPC_ASSERT_OFFSET(vscr, VSCR);
RTEMS_STATIC_ASSERT(
- PPC_CONTEXT_OFFSET_V20 % 16 == 0,
+ PPC_CONTEXT_OFFSET_V20 % PPC_DEFAULT_CACHE_LINE_SIZE == 0,
ppc_context_altivec
);
PPC_ASSERT_OFFSET(v20, V20);
@@ -95,7 +97,6 @@ PPC_ASSERT_OFFSET(isr_dispatch_disable, ISR_DISPATCH_DISABLE);
PPC_ASSERT_OFFSET(v29, V29);
PPC_ASSERT_OFFSET(v30, V30);
PPC_ASSERT_OFFSET(v31, V31);
- PPC_ASSERT_OFFSET(vrsave, VRSAVE);
#endif
#ifdef PPC_MULTILIB_FPU
diff --git a/cpukit/score/cpu/powerpc/include/rtems/score/cpu.h b/cpukit/score/cpu/powerpc/include/rtems/score/cpu.h
index 42900aeb1d..a9f0acac00 100644
--- a/cpukit/score/cpu/powerpc/include/rtems/score/cpu.h
+++ b/cpukit/score/cpu/powerpc/include/rtems/score/cpu.h
@@ -29,7 +29,7 @@
*
* Copyright (c) 2001 Surrey Satellite Technology Limited (SSTL).
*
- * Copyright (c) 2010, 2017 embedded brains GmbH.
+ * Copyright (c) 2010, 2020 embedded brains GmbH.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -246,6 +246,13 @@ typedef struct {
uint32_t isr_dispatch_disable;
uint32_t reserved_for_alignment;
#if defined(PPC_MULTILIB_ALTIVEC)
+ #if !defined(__powerpc64__)
+ uint32_t reserved_for_alignment_2[4];
+ #endif
+ uint32_t vrsave;
+ uint32_t reserved_for_alignment_3[2];
+ /* This field must take stvewx/lvewx requirements into account */
+ uint32_t vscr;
uint8_t v20[16];
uint8_t v21[16];
uint8_t v22[16];
@@ -258,7 +265,6 @@ typedef struct {
uint8_t v29[16];
uint8_t v30[16];
uint8_t v31[16];
- uint32_t vrsave;
#elif defined(__ALTIVEC__)
/*
* 12 non-volatile vector registers, cache-aligned area for vscr/vrsave
@@ -373,8 +379,16 @@ static inline ppc_context *ppc_get_context( const Context_Control *context )
#define PPC_CONTEXT_OFFSET_ISR_DISPATCH_DISABLE PPC_CONTEXT_GPR_OFFSET( 32 )
#ifdef PPC_MULTILIB_ALTIVEC
+ #ifdef __powerpc64__
+ #define PPC_CONTEXT_OFFSET_VRSAVE \
+ ( PPC_CONTEXT_OFFSET_ISR_DISPATCH_DISABLE + 8 )
+ #else
+ #define PPC_CONTEXT_OFFSET_VRSAVE \
+ ( PPC_CONTEXT_OFFSET_ISR_DISPATCH_DISABLE + 24 )
+ #endif
+ #define PPC_CONTEXT_OFFSET_VSCR ( PPC_CONTEXT_OFFSET_VRSAVE + 12 )
#define PPC_CONTEXT_OFFSET_V( v ) \
- ( ( ( v ) - 20 ) * 16 + PPC_CONTEXT_OFFSET_ISR_DISPATCH_DISABLE + 8)
+ ( ( ( v ) - 20 ) * 16 + PPC_CONTEXT_OFFSET_VRSAVE + 16)
#define PPC_CONTEXT_OFFSET_V20 PPC_CONTEXT_OFFSET_V( 20 )
#define PPC_CONTEXT_OFFSET_V21 PPC_CONTEXT_OFFSET_V( 21 )
#define PPC_CONTEXT_OFFSET_V22 PPC_CONTEXT_OFFSET_V( 22 )
@@ -387,9 +401,8 @@ static inline ppc_context *ppc_get_context( const Context_Control *context )
#define PPC_CONTEXT_OFFSET_V29 PPC_CONTEXT_OFFSET_V( 29 )
#define PPC_CONTEXT_OFFSET_V30 PPC_CONTEXT_OFFSET_V( 30 )
#define PPC_CONTEXT_OFFSET_V31 PPC_CONTEXT_OFFSET_V( 31 )
- #define PPC_CONTEXT_OFFSET_VRSAVE PPC_CONTEXT_OFFSET_V( 32 )
#define PPC_CONTEXT_OFFSET_F( f ) \
- ( ( ( f ) - 14 ) * 8 + PPC_CONTEXT_OFFSET_VRSAVE + 8 )
+ ( ( ( f ) - 14 ) * 8 + PPC_CONTEXT_OFFSET_V( 32 ) )
#else
#define PPC_CONTEXT_OFFSET_F( f ) \
( ( ( f ) - 14 ) * 8 + PPC_CONTEXT_OFFSET_ISR_DISPATCH_DISABLE + 8 )
@@ -419,7 +432,7 @@ static inline ppc_context *ppc_get_context( const Context_Control *context )
#if defined(PPC_MULTILIB_FPU)
#define PPC_CONTEXT_VOLATILE_SIZE PPC_CONTEXT_OFFSET_F( 32 )
#elif defined(PPC_MULTILIB_ALTIVEC)
- #define PPC_CONTEXT_VOLATILE_SIZE (PPC_CONTEXT_OFFSET_VRSAVE + 4)
+ #define PPC_CONTEXT_VOLATILE_SIZE PPC_CONTEXT_OFFSET_V( 33 )
#elif defined(__ALTIVEC__)
#define PPC_CONTEXT_VOLATILE_SIZE \
(PPC_CONTEXT_GPR_OFFSET( 32 ) + 8 \
diff --git a/cpukit/score/cpu/powerpc/ppc-context-validate.S b/cpukit/score/cpu/powerpc/ppc-context-validate.S
index e4331b2661..67cb5b45c3 100644
--- a/cpukit/score/cpu/powerpc/ppc-context-validate.S
+++ b/cpukit/score/cpu/powerpc/ppc-context-validate.S
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: BSD-2-Clause */
/*
- * Copyright (c) 2013, 2017 embedded brains GmbH. All rights reserved.
+ * Copyright (c) 2013, 2020 embedded brains GmbH. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -99,6 +99,7 @@
#define VTMP_OFFSET VOFFSET(12)
#define VTMP2_OFFSET VOFFSET(13)
#define VRSAVE_OFFSET VOFFSET(14)
+ #define VRSAVE2_OFFSET (VOFFSET(14) + 4)
#define VSCR_OFFSET (VOFFSET(14) + 12)
#define ALTIVECEND VOFFSET(15)
#else
@@ -161,6 +162,13 @@ _CPU_Context_validate:
#endif
#ifdef PPC_MULTILIB_ALTIVEC
+ mfvrsave r0
+ stw r0, VRSAVE_OFFSET(r1)
+ li r0, 0xffffffff
+ mtvrsave r0
+ mfvscr v0
+ li r0, VSCR_OFFSET
+ stvewx v0, r1, r0
li r0, V20_OFFSET
stvx v20, r1, r0
li r0, V21_OFFSET
@@ -185,11 +193,6 @@ _CPU_Context_validate:
stvx v30, r1, r0
li r0, V31_OFFSET
stvx v31, r1, r0
- mfvscr v0
- li r0, VSCR_OFFSET
- stvewx v0, r1, r0
- mfvrsave r0
- stw r0, VRSAVE_OFFSET(r1)
#endif
/* Fill */
@@ -337,8 +340,10 @@ _CPU_Context_validate:
FILL_V 29
FILL_V 30
FILL_V 31
+#ifndef __PPC_VRSAVE__
addi r4, r3, 0x700
mtvrsave r4
+#endif
#endif
/* Check */
@@ -516,6 +521,15 @@ check:
#ifdef PPC_MULTILIB_ALTIVEC
.macro CHECK_V i
+#ifdef __PPC_VRSAVE__
+ mfvrsave r4
+.if (31 - \i) > 15
+ andis. r5, r4, 1 << (31 - \i - 16)
+.else
+ andi. r5, r4, 1 << (31 - \i)
+.endif
+ beq 1f
+#endif
li r4, VTMP_OFFSET
stvx \i, r1, r4
lwz r5, VTMP_OFFSET(r1)
@@ -534,9 +548,43 @@ check:
addi r4, r3, 0x600 + \i
cmpw r5, r4
bne restore
+#ifdef __PPC_VRSAVE__
+ mfvrsave r4
+.if (31 - \i) > 15
+ xoris r4, r4, 1 << (31 - \i - 16)
+.else
+ xori r4, r4, 1 << (31 - \i)
+.endif
+ mtvrsave r4
+ b 2f
+1:
+.if (31 - \i) > 15
+ oris r4, r4, 1 << (31 - \i - 16)
+.else
+ ori r4, r4, 1 << (31 - \i)
+.endif
+ mtvrsave r4
+ addi r4, r3, 0x300 + \i
+ stw r4, VTMP_OFFSET(r1)
+ addi r4, r3, 0x400 + \i
+ stw r4, VTMP_OFFSET + 4(r1)
+ addi r4, r3, 0x500 + \i
+ stw r4, VTMP_OFFSET + 8(r1)
+ addi r4, r3, 0x600 + \i
+ stw r4, VTMP_OFFSET + 12(r1)
+ li r4, VTMP_OFFSET
+ lvx \i, r1, r4
+2:
+#endif
.endm
/* Check VSCR */
+#ifdef __PPC_VRSAVE__
+ mfvrsave r4
+ stw r4, VRSAVE2_OFFSET(r1)
+ oris r4, r4, 0x8000
+ mtvrsave r4
+#endif
li r4, VTMP_OFFSET
stvx v0, r1, r4
mfvscr v0
@@ -548,6 +596,10 @@ check:
bne restore
li r4, VTMP_OFFSET
lvx v0, r1, r4
+#ifdef __PPC_VRSAVE__
+ lwz r4, VRSAVE2_OFFSET(r1)
+ mtvrsave r4
+#endif
CHECK_V 0
CHECK_V 1
@@ -582,9 +634,15 @@ check:
CHECK_V 30
CHECK_V 31
mfvrsave r5
+#ifdef __PPC_VRSAVE__
+ addi r5, r5, 1
+ cmplwi r0, r5, 1
+ bgt restore
+#else
addi r4, r3, 0x700
cmpw r5, r4
bne restore
+#endif
#endif
mtcr r29
@@ -595,7 +653,7 @@ check:
restore:
#ifdef PPC_MULTILIB_ALTIVEC
- lwz r0, VRSAVE_OFFSET(r1)
+ li r0, 0xffffffff
mtvrsave r0
li r0, V31_OFFSET
lvx v31, r1, r0
@@ -621,6 +679,11 @@ restore:
lvx v21, r1, r0
li r0, V20_OFFSET
lvx v20, r1, r0
+ li r0, VSCR_OFFSET
+ lvewx v0, r1, r0
+ mtvscr v0
+ lwz r0, VRSAVE_OFFSET(r1)
+ mtvrsave r0
#endif
#ifdef PPC_MULTILIB_FPU
--
2.35.3
More information about the devel
mailing list