[PATCH] powerpc: Add support for VRSAVE

Sebastian Huber sebastian.huber at embedded-brains.de
Thu Sep 1 08:26:42 UTC 2022


The VRSAVE feature of the Altivec unit can be used to reduce the amount of
Altivec registers which need to be saved/restored during interrupt processing
and context switches.

Update #4712.
---
 bsps/powerpc/shared/cpu_asm.S                 | 156 ++++++++++++++-
 .../shared/exceptions/ppc_exc_async_normal.S  | 189 +++++++++++++++++-
 cpukit/score/cpu/powerpc/cpu.c                |   5 +-
 .../cpu/powerpc/include/rtems/score/cpu.h     |  25 ++-
 .../score/cpu/powerpc/ppc-context-validate.S  |  77 ++++++-
 5 files changed, 429 insertions(+), 23 deletions(-)

diff --git a/bsps/powerpc/shared/cpu_asm.S b/bsps/powerpc/shared/cpu_asm.S
index 63f6a3fdfe..21860c8eea 100644
--- a/bsps/powerpc/shared/cpu_asm.S
+++ b/bsps/powerpc/shared/cpu_asm.S
@@ -23,7 +23,7 @@
  *  COPYRIGHT (c) 1989-1997.
  *  On-Line Applications Research Corporation (OAR).
  *
- *  Copyright (c) 2011, 2017 embedded brains GmbH
+ *  Copyright (c) 2011, 2020 embedded brains GmbH
  *
  *  The license and distribution terms for this file may in
  *  the file LICENSE in this distribution or at
@@ -267,6 +267,10 @@ PROC (_CPU_Context_switch_no_return):
 	isync
 #endif
 
+#if defined(PPC_MULTILIB_ALTIVEC) && defined(__PPC_VRSAVE__)
+	mfvrsave	r9
+#endif
+
 	/* Align to a cache line */
 	CLEAR_RIGHT_IMMEDIATE	r3, r3, PPC_DEFAULT_CACHE_LINE_POWER
 	CLEAR_RIGHT_IMMEDIATE	r5, r4, PPC_DEFAULT_CACHE_LINE_POWER
@@ -284,6 +288,14 @@ PROC (_CPU_Context_switch_no_return):
 	mfmsr	r6
 #endif  /* END PPC_DISABLE_MSR_ACCESS */
 	mfcr	r7
+#ifdef PPC_MULTILIB_ALTIVEC
+#ifdef __PPC_VRSAVE__
+	/* Mark v0 as used since we need it to get the VSCR */
+	oris	r8, r9, 0x8000
+	mtvrsave	r8
+#endif
+	mfvscr	v0
+#endif
 	mflr	r8
 	lwz	r11, PER_CPU_ISR_DISPATCH_DISABLE(r12)
 
@@ -356,6 +368,16 @@ PROC (_CPU_Context_switch_no_return):
 	stw	r11, PPC_CONTEXT_OFFSET_ISR_DISPATCH_DISABLE(r3)
 
 #ifdef PPC_MULTILIB_ALTIVEC
+	li	r10, PPC_CONTEXT_OFFSET_VSCR
+	stvewx	v0, r3, r10
+
+#ifdef __PPC_VRSAVE__
+	stw	r9, PPC_CONTEXT_OFFSET_VRSAVE(r3)
+	andi.	r9, r9, 0xfff
+	bne	.Laltivec_save
+
+.Laltivec_save_continue:
+#else /* __PPC_VRSAVE__ */
 	li	r9, PPC_CONTEXT_OFFSET_V20
 	stvx	v20, r3, r9
 	li	r9, PPC_CONTEXT_OFFSET_V21
@@ -397,7 +419,8 @@ PROC (_CPU_Context_switch_no_return):
 	stvx	v31, r3, r9
 	mfvrsave	r9
 	stw	r9, PPC_CONTEXT_OFFSET_VRSAVE(r3)
-#endif
+#endif /* __PPC_VRSAVE__ */
+#endif /* PPC_MULTILIB_ALTIVEC */
 
 #ifdef PPC_MULTILIB_FPU
 	stfd	f14, PPC_CONTEXT_OFFSET_F14(r3)
@@ -461,6 +484,14 @@ restore_context:
 	PPC_REG_LOAD	r1, PPC_CONTEXT_OFFSET_GPR1(r5)
 	PPC_REG_LOAD	r8, PPC_CONTEXT_OFFSET_LR(r5)
 
+#ifdef PPC_MULTILIB_ALTIVEC
+	li	r10, PPC_CONTEXT_OFFSET_VSCR
+	lvewx	v0, r5, r10
+#ifdef __PPC_VRSAVE__
+	lwz	r9, PPC_CONTEXT_OFFSET_VRSAVE(r5)
+#endif
+#endif
+
 	PPC_GPR_LOAD	r14, PPC_CONTEXT_OFFSET_GPR14(r5)
 	PPC_GPR_LOAD	r15, PPC_CONTEXT_OFFSET_GPR15(r5)
 
@@ -494,6 +525,15 @@ restore_context:
 	lwz	r11, PPC_CONTEXT_OFFSET_ISR_DISPATCH_DISABLE(r5)
 
 #ifdef PPC_MULTILIB_ALTIVEC
+	mtvscr	v0
+
+#ifdef __PPC_VRSAVE__
+	mtvrsave	r9
+	andi.	r9, r9, 0xfff
+	bne	.Laltivec_restore
+
+.Laltivec_restore_continue:
+#else /* __PPC_VRSAVE__ */
 	li	r9, PPC_CONTEXT_OFFSET_V20
 	lvx	v20, r5, r9
 	li	r9, PPC_CONTEXT_OFFSET_V21
@@ -520,7 +560,8 @@ restore_context:
 	lvx	v31, r5, r9
 	lwz	r9, PPC_CONTEXT_OFFSET_VRSAVE(r5)
 	mtvrsave	r9
-#endif
+#endif /* __PPC_VRSAVE__ */
+#endif /* PPC_MULTILIB_ALTIVEC */
 
 #ifdef PPC_MULTILIB_FPU
 	lfd	f14, PPC_CONTEXT_OFFSET_F14(r5)
@@ -567,6 +608,13 @@ PROC (_CPU_Context_restore):
 	li	r3, 0
 #endif
 
+#if defined(PPC_MULTILIB_ALTIVEC) && defined(__PPC_VRSAVE__)
+	/* Mark v0 as used since we need it to get the VSCR */
+	mfvrsave	r9
+	oris	r8, r9, 0x8000
+	mtvrsave	r8
+#endif
+
 	b	restore_context
 
 #ifdef RTEMS_SMP
@@ -595,3 +643,105 @@ PROC (_CPU_Context_restore):
 
 	b	.Lcheck_is_executing
 #endif
+
+#if defined(PPC_MULTILIB_ALTIVEC) && defined(__PPC_VRSAVE__)
+.Laltivec_save:
+
+	/*
+	 * Let X be VRSAVE, calculate:
+	 *
+	 * Z = X & 0x777
+	 * Z = Z + 0x777
+	 * X = X | Z
+	 *
+	 * Afterwards, we have in X for each group of four non-volatile VR
+	 * registers:
+	 *
+	 * 0111b, if VRSAVE group of four registers == 0
+	 * 1XXXb, if VRSAVE group of four registers != 0
+	 */
+	andi.	r10, r9, 0x777
+	addi	r10, r10, 0x777
+	or	r9, r9, r10
+	mtcr	r9
+
+	bf	20, .Laltivec_save_v24
+	li	r9, PPC_CONTEXT_OFFSET_V20
+	stvx	v20, r3, r9
+	li	r9, PPC_CONTEXT_OFFSET_V21
+	stvx	v21, r3, r9
+	li	r9, PPC_CONTEXT_OFFSET_V22
+	stvx	v22, r3, r9
+	li	r9, PPC_CONTEXT_OFFSET_V23
+	stvx	v23, r3, r9
+
+.Laltivec_save_v24:
+
+	bf	24, .Laltivec_save_v28
+	li	r9, PPC_CONTEXT_OFFSET_V24
+	stvx	v24, r3, r9
+	li	r9, PPC_CONTEXT_OFFSET_V25
+	stvx	v25, r3, r9
+	li	r9, PPC_CONTEXT_OFFSET_V26
+	stvx	v26, r3, r9
+	li	r9, PPC_CONTEXT_OFFSET_V27
+	stvx	v27, r3, r9
+
+.Laltivec_save_v28:
+
+	bf	28, .Laltivec_save_continue
+	li	r9, PPC_CONTEXT_OFFSET_V28
+	stvx	v28, r3, r9
+	li	r9, PPC_CONTEXT_OFFSET_V29
+	stvx	v29, r3, r9
+	li	r9, PPC_CONTEXT_OFFSET_V30
+	stvx	v30, r3, r9
+	li	r9, PPC_CONTEXT_OFFSET_V31
+	stvx	v31, r3, r9
+
+	b	.Laltivec_save_continue
+
+.Laltivec_restore:
+
+	/* See comment at .Laltivec_save */
+	andi.	r10, r9, 0x777
+	addi	r10, r10, 0x777
+	or	r9, r9, r10
+	mtcr	r9
+
+	bf	20, .Laltivec_restore_v24
+	li	r9, PPC_CONTEXT_OFFSET_V20
+	lvx	v20, r5, r9
+	li	r9, PPC_CONTEXT_OFFSET_V21
+	lvx	v21, r5, r9
+	li	r9, PPC_CONTEXT_OFFSET_V22
+	lvx	v22, r5, r9
+	li	r9, PPC_CONTEXT_OFFSET_V23
+	lvx	v23, r5, r9
+
+.Laltivec_restore_v24:
+
+	bf	24, .Laltivec_restore_v28
+	li	r9, PPC_CONTEXT_OFFSET_V24
+	lvx	v24, r5, r9
+	li	r9, PPC_CONTEXT_OFFSET_V25
+	lvx	v25, r5, r9
+	li	r9, PPC_CONTEXT_OFFSET_V26
+	lvx	v26, r5, r9
+	li	r9, PPC_CONTEXT_OFFSET_V27
+	lvx	v27, r5, r9
+
+.Laltivec_restore_v28:
+
+	bf	28, .Laltivec_restore_continue
+	li	r9, PPC_CONTEXT_OFFSET_V28
+	lvx	v28, r5, r9
+	li	r9, PPC_CONTEXT_OFFSET_V29
+	lvx	v29, r5, r9
+	li	r9, PPC_CONTEXT_OFFSET_V30
+	lvx	v30, r5, r9
+	li	r9, PPC_CONTEXT_OFFSET_V31
+	lvx	v31, r5, r9
+
+	b	.Laltivec_restore_continue
+#endif /* PPC_MULTILIB_ALTIVEC && __PPC_VRSAVE__ */
diff --git a/bsps/powerpc/shared/exceptions/ppc_exc_async_normal.S b/bsps/powerpc/shared/exceptions/ppc_exc_async_normal.S
index de4621ef55..f6318de256 100644
--- a/bsps/powerpc/shared/exceptions/ppc_exc_async_normal.S
+++ b/bsps/powerpc/shared/exceptions/ppc_exc_async_normal.S
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: BSD-2-Clause */
 
 /*
- * Copyright (c) 2011, 2017 embedded brains GmbH.  All rights reserved.
+ * Copyright (c) 2011, 2020 embedded brains GmbH.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -173,8 +173,15 @@ ppc_exc_interrupt:
 	evstdd	SCRATCH_5_REGISTER, PPC_EXC_ACC_OFFSET(r1)
 #endif
 
-#ifdef PPC_MULTILIB_ALTIVEC
 	/* Save volatile AltiVec context */
+#ifdef PPC_MULTILIB_ALTIVEC
+#ifdef __PPC_VRSAVE__
+	mfvrsave	SCRATCH_0_REGISTER
+	cmpwi	SCRATCH_0_REGISTER, 0
+	bne	.Laltivec_save
+
+.Laltivec_save_continue:
+#else /* __PPC_VRSAVE__ */
 	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(0)
 	stvx	v0, r1, SCRATCH_0_REGISTER
 	mfvscr	v0
@@ -218,7 +225,8 @@ ppc_exc_interrupt:
 	stvx	v19, r1, SCRATCH_0_REGISTER
 	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VSCR_OFFSET
 	stvewx	v0, r1, SCRATCH_0_REGISTER
-#endif
+#endif /* __PPC_VRSAVE__ */
+#endif /* PPC_MULTILIB_ALTIVEC */
 
 #ifdef PPC_MULTILIB_FPU
 	/* Save volatile FPU context */
@@ -334,8 +342,15 @@ ppc_exc_interrupt:
 
 .Lthread_dispatch_done:
 
-#ifdef PPC_MULTILIB_ALTIVEC
 	/* Restore volatile AltiVec context */
+#ifdef PPC_MULTILIB_ALTIVEC
+#ifdef __PPC_VRSAVE__
+	mfvrsave	SCRATCH_0_REGISTER
+	cmpwi	SCRATCH_0_REGISTER, 0
+	bne	.Laltivec_restore
+
+.Laltivec_restore_continue:
+#else /* __PPC_VRSAVE__ */
 	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VSCR_OFFSET
 	lvewx	v0, r1, SCRATCH_0_REGISTER
 	mtvscr	v0
@@ -379,7 +394,8 @@ ppc_exc_interrupt:
 	lvx	v18, r1, SCRATCH_0_REGISTER
 	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(19)
 	lvx	v19, r1, SCRATCH_0_REGISTER
-#endif
+#endif /* __PPC_VRSAVE__ */
+#endif /* PPC_MULTILIB_ALTIVEC */
 
 #ifdef PPC_MULTILIB_FPU
 	/* Restore volatile FPU context */
@@ -478,6 +494,169 @@ ppc_exc_interrupt:
 	/* Return */
 	rfi
 
+#if defined(PPC_MULTILIB_ALTIVEC) && defined(__PPC_VRSAVE__)
+.Laltivec_save:
+
+	/*
+	 * Let X be VRSAVE, calculate:
+	 *
+	 * Y = 0x77777777
+	 * Z = X & Y
+	 * Z = Z + Y
+	 * X = X | Z
+	 *
+	 * Afterwards, we have in X for each group of four VR registers:
+	 *
+	 * 0111b, if VRSAVE group of four registers == 0
+	 * 1XXXb, if VRSAVE group of four registers != 0
+	 */
+	lis	SCRATCH_5_REGISTER, 0x7777
+	ori	SCRATCH_5_REGISTER, SCRATCH_5_REGISTER, 0x7777
+	and	SCRATCH_6_REGISTER, SCRATCH_0_REGISTER, SCRATCH_5_REGISTER
+	add	SCRATCH_6_REGISTER, SCRATCH_5_REGISTER, SCRATCH_6_REGISTER
+	or	SCRATCH_0_REGISTER, SCRATCH_0_REGISTER, SCRATCH_6_REGISTER
+	mtcr	SCRATCH_0_REGISTER
+
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(0)
+	stvx	v0, r1, SCRATCH_0_REGISTER
+
+	/* Move VCSR to V0 */
+	mfvscr	v0
+
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(1)
+	stvx	v1, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(2)
+	stvx	v2, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(3)
+	stvx	v3, r1, SCRATCH_0_REGISTER
+
+	/* Save VCSR using V0 */
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VSCR_OFFSET
+	stvewx	v0, r1, SCRATCH_0_REGISTER
+
+	bf	4, .Laltivec_save_v8
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(4)
+	stvx	v4, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(5)
+	stvx	v5, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(6)
+	stvx	v6, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(7)
+	stvx	v7, r1, SCRATCH_0_REGISTER
+
+.Laltivec_save_v8:
+
+	bf	8, .Laltivec_save_v12
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(8)
+	stvx	v8, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(9)
+	stvx	v9, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(10)
+	stvx	v10, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(11)
+	stvx	v11, r1, SCRATCH_0_REGISTER
+
+.Laltivec_save_v12:
+
+	bf	12, .Laltivec_save_v16
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(12)
+	stvx	v12, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(13)
+	stvx	v13, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(14)
+	stvx	v14, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(15)
+	stvx	v15, r1, SCRATCH_0_REGISTER
+
+.Laltivec_save_v16:
+
+	bf	16, .Laltivec_save_continue
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(16)
+	stvx	v16, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(17)
+	stvx	v17, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(18)
+	stvx	v18, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(19)
+	stvx	v19, r1, SCRATCH_0_REGISTER
+
+	b	.Laltivec_save_continue
+
+.Laltivec_restore:
+
+	/* Load VCSR using V0 */
+	li	SCRATCH_5_REGISTER, PPC_EXC_MIN_VSCR_OFFSET
+	lvewx	v0, r1, SCRATCH_5_REGISTER
+
+	/* See comment at .Laltivec_save */
+	lis	SCRATCH_5_REGISTER, 0x7777
+	ori	SCRATCH_5_REGISTER, SCRATCH_5_REGISTER, 0x7777
+	and	SCRATCH_6_REGISTER, SCRATCH_0_REGISTER, SCRATCH_5_REGISTER
+	add	SCRATCH_6_REGISTER, SCRATCH_5_REGISTER, SCRATCH_6_REGISTER
+	or	SCRATCH_0_REGISTER, SCRATCH_0_REGISTER, SCRATCH_6_REGISTER
+	mtcr	SCRATCH_0_REGISTER
+
+	/* Restore VCR using V0 */
+	mtvscr	v0
+
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(0)
+	lvx	v0, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(1)
+	lvx	v1, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(2)
+	lvx	v2, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(3)
+	lvx	v3, r1, SCRATCH_0_REGISTER
+
+	bf	4, .Laltivec_restore_v8
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(4)
+	lvx	v4, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(5)
+	lvx	v5, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(6)
+	lvx	v6, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(7)
+	lvx	v7, r1, SCRATCH_0_REGISTER
+
+.Laltivec_restore_v8:
+
+	bf	8, .Laltivec_restore_v12
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(8)
+	lvx	v8, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(9)
+	lvx	v9, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(10)
+	lvx	v10, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(11)
+	lvx	v11, r1, SCRATCH_0_REGISTER
+
+.Laltivec_restore_v12:
+
+	bf	12, .Laltivec_restore_v16
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(12)
+	lvx	v12, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(13)
+	lvx	v13, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(14)
+	lvx	v14, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(15)
+	lvx	v15, r1, SCRATCH_0_REGISTER
+
+.Laltivec_restore_v16:
+
+	bf	16, .Laltivec_restore_continue
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(16)
+	lvx	v16, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(17)
+	lvx	v17, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(18)
+	lvx	v18, r1, SCRATCH_0_REGISTER
+	li	SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(19)
+	lvx	v19, r1, SCRATCH_0_REGISTER
+
+	b	.Laltivec_restore_continue
+#endif /* PPC_MULTILIB_ALTIVEC && __PPC_VRSAVE__ */
+
 /* Symbol provided for debugging and tracing */
 ppc_exc_interrupt_end:
 
diff --git a/cpukit/score/cpu/powerpc/cpu.c b/cpukit/score/cpu/powerpc/cpu.c
index 6147d7be74..bdb9cf6ab5 100644
--- a/cpukit/score/cpu/powerpc/cpu.c
+++ b/cpukit/score/cpu/powerpc/cpu.c
@@ -79,8 +79,10 @@ PPC_ASSERT_OFFSET(isr_dispatch_disable, ISR_DISPATCH_DISABLE);
 #endif
 
 #ifdef PPC_MULTILIB_ALTIVEC
+  PPC_ASSERT_OFFSET(vrsave, VRSAVE);
+  PPC_ASSERT_OFFSET(vscr, VSCR);
   RTEMS_STATIC_ASSERT(
-    PPC_CONTEXT_OFFSET_V20 % 16 == 0,
+    PPC_CONTEXT_OFFSET_V20 % PPC_DEFAULT_CACHE_LINE_SIZE == 0,
     ppc_context_altivec
   );
   PPC_ASSERT_OFFSET(v20, V20);
@@ -95,7 +97,6 @@ PPC_ASSERT_OFFSET(isr_dispatch_disable, ISR_DISPATCH_DISABLE);
   PPC_ASSERT_OFFSET(v29, V29);
   PPC_ASSERT_OFFSET(v30, V30);
   PPC_ASSERT_OFFSET(v31, V31);
-  PPC_ASSERT_OFFSET(vrsave, VRSAVE);
 #endif
 
 #ifdef PPC_MULTILIB_FPU
diff --git a/cpukit/score/cpu/powerpc/include/rtems/score/cpu.h b/cpukit/score/cpu/powerpc/include/rtems/score/cpu.h
index 42900aeb1d..a9f0acac00 100644
--- a/cpukit/score/cpu/powerpc/include/rtems/score/cpu.h
+++ b/cpukit/score/cpu/powerpc/include/rtems/score/cpu.h
@@ -29,7 +29,7 @@
  *
  *  Copyright (c) 2001 Surrey Satellite Technology Limited (SSTL).
  *
- *  Copyright (c) 2010, 2017 embedded brains GmbH.
+ *  Copyright (c) 2010, 2020 embedded brains GmbH.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -246,6 +246,13 @@ typedef struct {
   uint32_t isr_dispatch_disable;
   uint32_t reserved_for_alignment;
   #if defined(PPC_MULTILIB_ALTIVEC)
+    #if !defined(__powerpc64__)
+      uint32_t reserved_for_alignment_2[4];
+    #endif
+    uint32_t vrsave;
+    uint32_t reserved_for_alignment_3[2];
+    /* This field must take stvewx/lvewx requirements into account */
+    uint32_t vscr;
     uint8_t v20[16];
     uint8_t v21[16];
     uint8_t v22[16];
@@ -258,7 +265,6 @@ typedef struct {
     uint8_t v29[16];
     uint8_t v30[16];
     uint8_t v31[16];
-    uint32_t vrsave;
   #elif defined(__ALTIVEC__)
     /*
      * 12 non-volatile vector registers, cache-aligned area for vscr/vrsave
@@ -373,8 +379,16 @@ static inline ppc_context *ppc_get_context( const Context_Control *context )
 #define PPC_CONTEXT_OFFSET_ISR_DISPATCH_DISABLE PPC_CONTEXT_GPR_OFFSET( 32 )
 
 #ifdef PPC_MULTILIB_ALTIVEC
+  #ifdef __powerpc64__
+    #define PPC_CONTEXT_OFFSET_VRSAVE \
+      ( PPC_CONTEXT_OFFSET_ISR_DISPATCH_DISABLE + 8 )
+  #else
+    #define PPC_CONTEXT_OFFSET_VRSAVE \
+      ( PPC_CONTEXT_OFFSET_ISR_DISPATCH_DISABLE + 24 )
+  #endif
+  #define PPC_CONTEXT_OFFSET_VSCR ( PPC_CONTEXT_OFFSET_VRSAVE + 12 )
   #define PPC_CONTEXT_OFFSET_V( v ) \
-    ( ( ( v ) - 20 ) * 16 + PPC_CONTEXT_OFFSET_ISR_DISPATCH_DISABLE + 8)
+    ( ( ( v ) - 20 ) * 16 + PPC_CONTEXT_OFFSET_VRSAVE + 16)
   #define PPC_CONTEXT_OFFSET_V20 PPC_CONTEXT_OFFSET_V( 20 )
   #define PPC_CONTEXT_OFFSET_V21 PPC_CONTEXT_OFFSET_V( 21 )
   #define PPC_CONTEXT_OFFSET_V22 PPC_CONTEXT_OFFSET_V( 22 )
@@ -387,9 +401,8 @@ static inline ppc_context *ppc_get_context( const Context_Control *context )
   #define PPC_CONTEXT_OFFSET_V29 PPC_CONTEXT_OFFSET_V( 29 )
   #define PPC_CONTEXT_OFFSET_V30 PPC_CONTEXT_OFFSET_V( 30 )
   #define PPC_CONTEXT_OFFSET_V31 PPC_CONTEXT_OFFSET_V( 31 )
-  #define PPC_CONTEXT_OFFSET_VRSAVE PPC_CONTEXT_OFFSET_V( 32 )
   #define PPC_CONTEXT_OFFSET_F( f ) \
-    ( ( ( f ) - 14 ) * 8 + PPC_CONTEXT_OFFSET_VRSAVE + 8 )
+    ( ( ( f ) - 14 ) * 8 + PPC_CONTEXT_OFFSET_V( 32 ) )
 #else
   #define PPC_CONTEXT_OFFSET_F( f ) \
     ( ( ( f ) - 14 ) * 8 + PPC_CONTEXT_OFFSET_ISR_DISPATCH_DISABLE + 8 )
@@ -419,7 +432,7 @@ static inline ppc_context *ppc_get_context( const Context_Control *context )
 #if defined(PPC_MULTILIB_FPU)
   #define PPC_CONTEXT_VOLATILE_SIZE PPC_CONTEXT_OFFSET_F( 32 )
 #elif defined(PPC_MULTILIB_ALTIVEC)
-  #define PPC_CONTEXT_VOLATILE_SIZE (PPC_CONTEXT_OFFSET_VRSAVE + 4)
+  #define PPC_CONTEXT_VOLATILE_SIZE PPC_CONTEXT_OFFSET_V( 33 )
 #elif defined(__ALTIVEC__)
   #define PPC_CONTEXT_VOLATILE_SIZE \
     (PPC_CONTEXT_GPR_OFFSET( 32 ) + 8 \
diff --git a/cpukit/score/cpu/powerpc/ppc-context-validate.S b/cpukit/score/cpu/powerpc/ppc-context-validate.S
index e4331b2661..67cb5b45c3 100644
--- a/cpukit/score/cpu/powerpc/ppc-context-validate.S
+++ b/cpukit/score/cpu/powerpc/ppc-context-validate.S
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: BSD-2-Clause */
 
 /*
- * Copyright (c) 2013, 2017 embedded brains GmbH.  All rights reserved.
+ * Copyright (c) 2013, 2020 embedded brains GmbH.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -99,6 +99,7 @@
   #define VTMP_OFFSET VOFFSET(12)
   #define VTMP2_OFFSET VOFFSET(13)
   #define VRSAVE_OFFSET VOFFSET(14)
+  #define VRSAVE2_OFFSET (VOFFSET(14) + 4)
   #define VSCR_OFFSET (VOFFSET(14) + 12)
   #define ALTIVECEND VOFFSET(15)
 #else
@@ -161,6 +162,13 @@ _CPU_Context_validate:
 #endif
 
 #ifdef PPC_MULTILIB_ALTIVEC
+	mfvrsave	r0
+	stw	r0, VRSAVE_OFFSET(r1)
+	li	r0, 0xffffffff
+	mtvrsave	r0
+	mfvscr	v0
+	li	r0, VSCR_OFFSET
+	stvewx	v0, r1, r0
 	li	r0, V20_OFFSET
 	stvx	v20, r1, r0
 	li	r0, V21_OFFSET
@@ -185,11 +193,6 @@ _CPU_Context_validate:
 	stvx	v30, r1, r0
 	li	r0, V31_OFFSET
 	stvx	v31, r1, r0
-	mfvscr	v0
-	li	r0, VSCR_OFFSET
-	stvewx	v0, r1, r0
-	mfvrsave	r0
-	stw	r0, VRSAVE_OFFSET(r1)
 #endif
 
 	/* Fill */
@@ -337,8 +340,10 @@ _CPU_Context_validate:
 	FILL_V 29
 	FILL_V 30
 	FILL_V 31
+#ifndef __PPC_VRSAVE__
 	addi	r4, r3, 0x700
 	mtvrsave	r4
+#endif
 #endif
 
 	/* Check */
@@ -516,6 +521,15 @@ check:
 
 #ifdef PPC_MULTILIB_ALTIVEC
 .macro CHECK_V i
+#ifdef __PPC_VRSAVE__
+	mfvrsave	r4
+.if	(31 - \i) > 15
+	andis.	r5, r4, 1 << (31 - \i - 16)
+.else
+	andi.	r5, r4, 1 << (31 - \i)
+.endif
+	beq	1f
+#endif
 	li	r4, VTMP_OFFSET
 	stvx	\i, r1, r4
 	lwz	r5, VTMP_OFFSET(r1)
@@ -534,9 +548,43 @@ check:
 	addi	r4, r3, 0x600 + \i
 	cmpw	r5, r4
 	bne	restore
+#ifdef __PPC_VRSAVE__
+	mfvrsave	r4
+.if	(31 - \i) > 15
+	xoris	r4, r4, 1 << (31 - \i - 16)
+.else
+	xori	r4, r4, 1 << (31 - \i)
+.endif
+	mtvrsave	r4
+	b	2f
+1:
+.if	(31 - \i) > 15
+	oris	r4, r4, 1 << (31 - \i - 16)
+.else
+	ori	r4, r4, 1 << (31 - \i)
+.endif
+	mtvrsave	r4
+	addi	r4, r3, 0x300 + \i
+	stw	r4, VTMP_OFFSET(r1)
+	addi	r4, r3, 0x400 + \i
+	stw	r4, VTMP_OFFSET + 4(r1)
+	addi	r4, r3, 0x500 + \i
+	stw	r4, VTMP_OFFSET + 8(r1)
+	addi	r4, r3, 0x600 + \i
+	stw	r4, VTMP_OFFSET + 12(r1)
+	li	r4, VTMP_OFFSET
+	lvx	\i, r1, r4
+2:
+#endif
 .endm
 
 	/* Check VSCR */
+#ifdef __PPC_VRSAVE__
+	mfvrsave	r4
+	stw	r4, VRSAVE2_OFFSET(r1)
+	oris	r4, r4, 0x8000
+	mtvrsave	r4
+#endif
 	li	r4, VTMP_OFFSET
 	stvx	v0, r1, r4
 	mfvscr	v0
@@ -548,6 +596,10 @@ check:
 	bne	restore
 	li	r4, VTMP_OFFSET
 	lvx	v0, r1, r4
+#ifdef __PPC_VRSAVE__
+	lwz	r4, VRSAVE2_OFFSET(r1)
+	mtvrsave	r4
+#endif
 
 	CHECK_V 0
 	CHECK_V 1
@@ -582,9 +634,15 @@ check:
 	CHECK_V 30
 	CHECK_V 31
 	mfvrsave	r5
+#ifdef __PPC_VRSAVE__
+	addi	r5, r5, 1
+	cmplwi	r0, r5, 1
+	bgt	restore
+#else
 	addi	r4, r3, 0x700
 	cmpw	r5, r4
 	bne	restore
+#endif
 #endif
 
 	mtcr	r29
@@ -595,7 +653,7 @@ check:
 restore:
 
 #ifdef PPC_MULTILIB_ALTIVEC
-	lwz	r0, VRSAVE_OFFSET(r1)
+	li	r0, 0xffffffff
 	mtvrsave	r0
 	li	r0, V31_OFFSET
 	lvx	v31, r1, r0
@@ -621,6 +679,11 @@ restore:
 	lvx	v21, r1, r0
 	li	r0, V20_OFFSET
 	lvx	v20, r1, r0
+	li	r0, VSCR_OFFSET
+	lvewx	v0, r1, r0
+	mtvscr	v0
+	lwz	r0, VRSAVE_OFFSET(r1)
+	mtvrsave	r0
 #endif
 
 #ifdef PPC_MULTILIB_FPU
-- 
2.35.3



More information about the devel mailing list