[PATCH 05/15] Various updates and improvements
Ric Claus
claus at slac.stanford.edu
Tue Nov 27 03:26:43 UTC 2012
---
c/src/lib/libbsp/powerpc/virtex4/Makefile.am | 15 +-
c/src/lib/libbsp/powerpc/virtex4/clock/clock.c | 265 ---------
c/src/lib/libbsp/powerpc/virtex4/include/mmu.h | 270 ++++++++++
c/src/lib/libbsp/powerpc/virtex4/irq/irq_init.c | 2 -
c/src/lib/libbsp/powerpc/virtex4/libc/fastcopy.S | 143 +++++
c/src/lib/libbsp/powerpc/virtex4/libc/memcpy.c | 225 ++++++++
c/src/lib/libbsp/powerpc/virtex4/mmu/mmu.c | 568 ++++++++++++++++++++
.../lib/libbsp/powerpc/virtex4/startup/bspstart.c | 85 ++--
c/src/lib/libbsp/powerpc/virtex4/startup/linkcmds | 19 +-
c/src/lib/libbsp/powerpc/virtex4/startup/start.S | 22 +-
10 files changed, 1288 insertions(+), 326 deletions(-)
delete mode 100644 c/src/lib/libbsp/powerpc/virtex4/clock/clock.c
create mode 100644 c/src/lib/libbsp/powerpc/virtex4/include/mmu.h
create mode 100644 c/src/lib/libbsp/powerpc/virtex4/libc/fastcopy.S
create mode 100644 c/src/lib/libbsp/powerpc/virtex4/libc/memcpy.c
create mode 100644 c/src/lib/libbsp/powerpc/virtex4/mmu/mmu.c
diff --git a/c/src/lib/libbsp/powerpc/virtex4/Makefile.am b/c/src/lib/libbsp/powerpc/virtex4/Makefile.am
index adce297..eb8204b 100644
--- a/c/src/lib/libbsp/powerpc/virtex4/Makefile.am
+++ b/c/src/lib/libbsp/powerpc/virtex4/Makefile.am
@@ -41,10 +41,6 @@ libbsp_a_SOURCES = startup/bspclean.c \
# start
libbsp_a_SOURCES += startup/start.S
-# clock & timer
-libbsp_a_SOURCES += ../../../libcpu/@RTEMS_CPU@/ppc403/clock/clock.c
-libbsp_a_SOURCES += ../../../libcpu/@RTEMS_CPU@/ppc403/timer/timer.c
-
# console
libbsp_a_SOURCES += startup/dummy_console.c \
../../shared/dummy_printk_support.c
@@ -53,14 +49,23 @@ libbsp_a_SOURCES += startup/dummy_console.c \
include_bsp_HEADERS += include/irq.h
libbsp_a_SOURCES += irq/irq_init.c
+# mmu
+include_bsp_HEADERS += include/mmu.h
+libbsp_a_SOURCES += mmu/mmu.c
+
#vectors
include_bsp_HEADERS += ../../../libcpu/@RTEMS_CPU@/@exceptions@/bspsupport/vectors.h
include_bsp_HEADERS += ../../../libcpu/@RTEMS_CPU@/@exceptions@/bspsupport/irq_supp.h
+# libc
+libbsp_a_SOURCES += libc/memcpy.c libc/fastcopy.S
+
libbsp_a_LIBADD = ../../../libcpu/@RTEMS_CPU@/@exceptions@/rtems-cpu.rel \
../../../libcpu/@RTEMS_CPU@/@exceptions@/exc_bspsupport.rel \
../../../libcpu/@RTEMS_CPU@/shared/cache.rel \
- ../../../libcpu/@RTEMS_CPU@/shared/cpuIdent.rel
+ ../../../libcpu/@RTEMS_CPU@/shared/cpuIdent.rel \
+ ../../../libcpu/@RTEMS_CPU@/ppc403/clock.rel \
+ ../../../libcpu/@RTEMS_CPU@/ppc403/timer.rel
EXTRA_DIST = times
diff --git a/c/src/lib/libbsp/powerpc/virtex4/clock/clock.c b/c/src/lib/libbsp/powerpc/virtex4/clock/clock.c
deleted file mode 100644
index 7617720..0000000
--- a/c/src/lib/libbsp/powerpc/virtex4/clock/clock.c
+++ /dev/null
@@ -1,265 +0,0 @@
-/* clock.c
- *
- * This routine initializes the interval timer on the
- * PowerPC 403 CPU. The tick frequency is specified by the bsp.
- *
- * Author: Andrew Bray <andy at i-cubed.co.uk>
- *
- * COPYRIGHT (c) 1995 by i-cubed ltd.
- *
- * To anyone who acknowledges that this file is provided "AS IS"
- * without any express or implied warranty:
- * permission to use, copy, modify, and distribute this file
- * for any purpose is hereby granted without fee, provided that
- * the above copyright notice and this notice appears in all
- * copies, and that the name of i-cubed limited not be used in
- * advertising or publicity pertaining to distribution of the
- * software without specific, written prior permission.
- * i-cubed limited makes no representations about the suitability
- * of this software for any purpose.
- *
- * Derived from c/src/lib/libcpu/hppa1.1/clock/clock.c:
- *
- * Modifications for deriving timer clock from cpu system clock by
- * Thomas Doerfler <td at imd.m.isar.de>
- * for these modifications:
- * COPYRIGHT (c) 1997 by IMD, Puchheim, Germany.
- *
- * COPYRIGHT (c) 1989-2007.
- * On-Line Applications Research Corporation (OAR).
- *
- * The license and distribution terms for this file may be
- * found in the file LICENSE in this distribution or at
- * http://www.rtems.com/license/LICENSE.
- *
- * Modifications for PPC405GP by Dennis Ehlin
- */
-
-#include <rtems.h>
-#include <rtems/clockdrv.h>
-#include <rtems/libio.h>
-#include <stdlib.h> /* for atexit() */
-#include <rtems/bspIo.h>
-#include <rtems/powerpc/powerpc.h>
-
-/*
- * check, which exception handling code is present
- */
-
-#include <bsp.h>
-
-#include <bsp/vectors.h>
-#include <bsp/irq_supp.h>
-
-volatile uint32_t Clock_driver_ticks;
-static uint32_t pit_value, tick_time;
-static bool auto_restart;
-
-void Clock_exit( void );
-
-/*
- * These are set by clock driver during its init
- */
-
-rtems_device_major_number rtems_clock_major = ~0;
-rtems_device_minor_number rtems_clock_minor;
-
-static inline uint32_t get_itimer(void)
-{
- register uint32_t rc;
-
- asm volatile ("mfspr %0, 0x10c" : "=r" ((rc))); /* 405GP TBL */
-
- return rc;
-}
-
-/*
- * ISR Handler
- */
-
-int Clock_isr(BSP_Exception_frame *f, unsigned int vector)
-{
- uint32_t clicks_til_next_interrupt;
-#if defined(BSP_PPC403_CLOCK_ISR_IRQ_LEVEL)
- uint32_t l_orig = _ISR_Get_level();
-#endif
- if (!auto_restart)
- {
- uint32_t itimer_value;
- /*
- * setup for next interrupt; making sure the new value is reasonably
- * in the future.... in case we lost out on an interrupt somehow
- */
-
- itimer_value = get_itimer();
- tick_time += pit_value;
-
- /*
- * how far away is next interrupt *really*
- * It may be a long time; this subtraction works even if
- * Clock_clicks_interrupt < Clock_clicks_low_order via
- * the miracle of unsigned math.
- */
- clicks_til_next_interrupt = tick_time - itimer_value;
-
- /*
- * If it is too soon then bump it up.
- * This should only happen if CPU_HPPA_CLICKS_PER_TICK is too small.
- * But setting it low is useful for debug, so...
- */
-
- if (clicks_til_next_interrupt < 400)
- {
- tick_time = itimer_value + 1000;
- clicks_til_next_interrupt = 1000;
- /* XXX: count these! this should be rare */
- }
-
- /*
- * If it is too late, that means we missed the interrupt somehow.
- * Rather than wait 35-50s for a wrap, we just fudge it here.
- */
-
- if (clicks_til_next_interrupt > pit_value)
- {
- tick_time = itimer_value + 1000;
- clicks_til_next_interrupt = 1000;
- /* XXX: count these! this should never happen :-) */
- }
-
- asm volatile ("mtspr 0x3db, %0" :: "r"
- (clicks_til_next_interrupt)); /* PIT */
- }
-
- /* Clear the Programmable Interrupt Status */
- asm volatile ( "mtspr 0x3d8, %0" :: "r" (0x08000000)); /* TSR */
-
- Clock_driver_ticks++;
-
- rtems_clock_tick();
-
- return 0;
-}
-
-void ClockOff(void)
-{
- register uint32_t tcr;
-
- asm volatile ("mfspr %0, 0x3da" : "=r" ((tcr))); /* TCR */
-
- tcr &= ~ 0x04400000;
-
- asm volatile ("mtspr 0x3da, %0" : "=r" ((tcr)) : "0" ((tcr))); /* TCR */
-}
-
-void ClockOn(void)
-{
- uint32_t iocr;
- register uint32_t tcr;
-
- Clock_driver_ticks = 0;
-
- asm volatile ("mfdcr %0, 0x0b2" : "=r" (iocr)); /*405GP CPC0_CR1 */
- if (bsp_timer_internal_clock) {
- iocr &=~0x800000; /* timer clocked from system clock CETE*/
- }
- else {
- iocr |= 0x800000; /* select external timer clock CETE*/
- }
- asm volatile ("mtdcr 0x0b2, %0" : "=r" (iocr) : "0" (iocr)); /* 405GP CPC0_CR1 */
-
- /*
- * Enable auto restart
- */
-
- auto_restart = true;
-
- pit_value = rtems_configuration_get_microseconds_per_tick() *
- bsp_clicks_per_usec;
-
- /*
- * Set PIT value
- */
-
- asm volatile ("mtspr 0x3db, %0" : : "r" (pit_value)); /* PIT */
-
- /*
- * Set timer to autoreload, bit TCR->ARE = 1 0x0400000
- * Enable PIT interrupt, bit TCR->PIE = 1 0x4000000
- */
- tick_time = get_itimer() + pit_value;
-
- asm volatile ("mfspr %0, 0x3da" : "=r" ((tcr))); /* TCR */
- tcr = (tcr & ~0x04400000) | (auto_restart ? 0x04400000 : 0x04000000);
- asm volatile ("mtspr 0x3da, %0" : "=r" ((tcr)) : "0" ((tcr))); /* TCR */
-}
-
-
-
-void Install_clock(ppc_exc_handler_t clock_isr)
-{
-#ifdef ppc403
- uint32_t pvr;
-#endif /* ppc403 */
-
- Clock_driver_ticks = 0;
-
- /*
- * initialize the interval here
- * First tick is set to right amount of time in the future
- * Future ticks will be incremented over last value set
- * in order to provide consistent clicks in the face of
- * interrupt overhead
- */
-
- ppc_exc_set_handler( BSP_PPC403_CLOCK_HOOK_EXCEPTION, clock_isr );
- ClockOn();
-
- atexit(Clock_exit);
-}
-
-void
-ReInstall_clock(ppc_exc_handler_t clock_isr)
-{
- uint32_t isrlevel = 0;
-
- rtems_interrupt_disable(isrlevel);
-
- ppc_exc_set_handler( BSP_PPC403_CLOCK_HOOK_EXCEPTION, clock_isr );
- ClockOn();
-
- rtems_interrupt_enable(isrlevel);
-}
-
-
-/*
- * Called via atexit()
- * Remove the clock interrupt handler by setting handler to NULL
- *
- * This will not work on the 405GP because
- * when bit's are set in TCR they can only be unset by a reset
- */
-
-void Clock_exit(void)
-{
- ClockOff();
- ppc_exc_set_handler( BSP_PPC403_CLOCK_HOOK_EXCEPTION, 0 );
-}
-
-rtems_device_driver Clock_initialize(
- rtems_device_major_number major,
- rtems_device_minor_number minor,
- void *pargp
-)
-{
- Install_clock( Clock_isr );
-
- /*
- * make major/minor avail to others such as shared memory driver
- */
-
- rtems_clock_major = major;
- rtems_clock_minor = minor;
-
- return RTEMS_SUCCESSFUL;
-}
diff --git a/c/src/lib/libbsp/powerpc/virtex4/include/mmu.h b/c/src/lib/libbsp/powerpc/virtex4/include/mmu.h
new file mode 100644
index 0000000..3524f23
--- /dev/null
+++ b/c/src/lib/libbsp/powerpc/virtex4/include/mmu.h
@@ -0,0 +1,270 @@
+#ifndef RTEMS_VIRTEX4_MMU_H
+#define RTEMS_VIRTEX4_MMU_H
+/**
+ * @file
+ *
+ * @ingroup Virtex4MMU
+ *
+ * @brief Routines to manipulate the PPC 405 MMU.
+ */
+/* $Id$ */
+/*
+ * Authorship
+ * ----------
+ * This software was created by
+ * Till Straumann <strauman at slac.stanford.edu>, 2005-2007,
+ * Stanford Linear Accelerator Center, Stanford University.
+ * and was transcribed for the PPC 405 by
+ * R. Claus <claus at slac.stanford.edu>, 2012,
+ * Stanford Linear Accelerator Center, Stanford University,
+ *
+ * Acknowledgement of sponsorship
+ * ------------------------------
+ * This software was produced by
+ * the Stanford Linear Accelerator Center, Stanford University,
+ * under Contract DE-AC03-76SFO0515 with the Department of Energy.
+ *
+ * Government disclaimer of liability
+ * ----------------------------------
+ * Neither the United States nor the United States Department of Energy,
+ * nor any of their employees, makes any warranty, express or implied, or
+ * assumes any legal liability or responsibility for the accuracy,
+ * completeness, or usefulness of any data, apparatus, product, or process
+ * disclosed, or represents that its use would not infringe privately owned
+ * rights.
+ *
+ * Stanford disclaimer of liability
+ * --------------------------------
+ * Stanford University makes no representations or warranties, express or
+ * implied, nor assumes any liability for the use of this software.
+ *
+ * Stanford disclaimer of copyright
+ * --------------------------------
+ * Stanford University, owner of the copyright, hereby disclaims its
+ * copyright and all other rights in this software. Hence, anyone may
+ * freely use it for any purpose without restriction.
+ *
+ * Maintenance of notices
+ * ----------------------
+ * In the interest of clarity regarding the origin and status of this
+ * SLAC software, this and all the preceding Stanford University notices
+ * are to remain affixed to any copy or derivative of this software made
+ * or distributed by the recipient and are to be affixed to any copy of
+ * software made or distributed by the recipient that contains a copy or
+ * derivative of this software.
+ *
+ * ------------------ SLAC Software Notices, Set 4 OTT.002a, 2004 FEB 03
+ */
+
+#include <rtems.h>
+#include <inttypes.h>
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @defgroup Virtex4MMU Virtex 4 - MMU Support
+ *
+ * @ingroup Virtex4
+ *
+ * @brief MMU support.
+ *
+ * @{
+ */
+
+/* Some routines require or return a index 'key'.
+ */
+typedef int bsp_tlb_idx_t;
+
+/* Cache the relevant TLB entries so that we can make sure the user cannot
+ * create conflicting (overlapping) entries. Keep them public for informational
+ * purposes.
+ */
+typedef struct {
+ struct {
+ uint32_t pad:24;
+ uint32_t tid:8; /** Translation ID */
+ } id;
+ struct {
+ uint32_t epn:22; /** Effective page number */
+ uint32_t size:3; /** Page size */
+ uint32_t v:1; /** Valid */
+ uint32_t att:2; /** Little-endian, User-defined */
+ uint32_t pad:4;
+ } hi; /** High word*/
+ struct {
+ uint32_t rpn:22; /** Real page number */
+ uint32_t perm:6; /** Execute enable, Write-enable, Zone select */
+ uint32_t wimg:4; /** Write-through, Caching inhibited, Mem coherent, Guarded */
+ } lo; /** Low word */
+} bsp_tlb_entry_t;
+
+#define NTLBS 64
+
+extern bsp_tlb_entry_t* bsp_mmu_cache;
+
+
+// These constants will have to be shifted right by 20 bits before
+// being inserted the high word of the TLB.
+
+#define MMU_M_SIZE_1K (0x00000000U)
+#define MMU_M_SIZE_4K (0x08000000U)
+#define MMU_M_SIZE_16K (0x10000000U)
+#define MMU_M_SIZE_64K (0x18000000U)
+#define MMU_M_SIZE_256K (0x20000000U)
+#define MMU_M_SIZE_1M (0x28000000U)
+#define MMU_M_SIZE_4M (0x30000000U)
+#define MMU_M_SIZE_16M (0x38000000U)
+#define MMU_M_SIZE_MIN (MMU_M_SIZE_1K)
+#define MMU_M_SIZE_MAX (MMU_M_SIZE_16M)
+#define MMU_M_SIZE (0x38000000U)
+#define MMU_V_SIZE (27)
+
+#define MMU_M_ATTR_LITTLE_ENDIAN (0x02000000U)
+#define MMU_M_ATTR_USER0 (0x01000000U)
+#define MMU_M_ATTR (0x03000000U)
+#define MMU_V_ATTR (24)
+
+// These constants have the same bit positions they'll occupy
+// in low word of the TLB.
+
+#define MMU_M_PERM_EXEC (0x00000200U)
+#define MMU_M_PERM_DATA_WRITE (0x00000100U)
+#define MMU_M_PERM_ZONE_SELECT (0x000000f0U)
+#define MMU_M_PERM (0x000003f0U)
+#define MMU_V_PERM (4)
+
+#define MMU_M_PROP_WRITE_THROUGH (0x00000008U)
+#define MMU_M_PROP_UNCACHED (0x00000004U)
+#define MMU_M_PROP_MEM_COHERENT (0x00000002U)
+#define MMU_M_PROP_GUARDED (0x00000001U)
+#define MMU_M_PROP (0x0000000fU)
+#define MMU_V_PROP (0)
+
+
+/*
+ * Dump (cleartext) content info from cached TLB entries
+ * to a file (stdout if f==NULL).
+ */
+void
+bsp_mmu_dump_cache(FILE *f);
+
+/* Read a TLB entry from the hardware and store the settings in the
+ * bsp_mmu_cache[] structure.
+ *
+ * The routine can perform this operation quietly or
+ * print information to a file.
+ *
+ * 'key': TLB entry index.
+ * 'quiet': perform operation silently (no info printed) if nonzero.
+ * 'f': open FILE where to print information. May be NULL, in
+ * which case 'stdout' is used.
+ *
+ * RETURNS:
+ * 0: success; TLB entry is VALID
+ * +1: success but TLB entry is INVALID
+ * < 0: error (-1: invalid argument)
+ * (-2: driver not initialized)
+ */
+int
+bsp_mmu_update(bsp_tlb_idx_t key, bool quiet, FILE *f);
+
+/* Initialize cache. Should be done only once although this is not enforced.
+ *
+ * RETURNS: zero on success, nonzero on error; in this case the driver will
+ * refuse to change TLB entries (other than disabling them).
+ */
+int
+bsp_mmu_initialize(void);
+
+/* Find first free TLB entry by examining all entries' valid bit. The first
+ * entry without the valid bit set is returned.
+ *
+ * RETURNS: A free TLB entry number. -1 if no entry can be found.
+ */
+bsp_tlb_idx_t
+bsp_mmu_find_first_free(void);
+
+/* Write a TLB entry (can also be used to disable an entry).
+ *
+ * The routine checks against the cached data in bsp_mmu_cache[]
+ * to prevent the user from generating overlapping entries.
+ *
+ * 'idx': TLB entry # to manipulate
+ * 'ea': Effective address (must be page aligned)
+ * 'pa': Physical address (must be page aligned)
+ * 'sz': Page size selector; page size is 1024 * 2^(2*sz) bytes.
+ * 'sz' may also be one of the following:
+ * - page size in bytes ( >= 1024 ); the selector
+ * value is then computed by this routine.
+ * However, 'sz' must be a valid page size
+ * or -1 will be returned.
+ * - a value < 0 to invalidate/disable the
+ * TLB entry.
+ * 'flgs': Page's little-endian & user-defined flags, permissions and attributes
+ * 'tid': Translation ID
+ *
+ * RETURNS: 0 on success, nonzero on error:
+ *
+ * >0: requested mapping would overlap with
+ * existing mapping in another entry. Return
+ * value gives conflicting entry + 1; i.e.,
+ * if a value of 4 is returned then the request
+ * conflicts with existing mapping in entry 3.
+ * -1: invalid argument
+ * -3: driver not initialized (or initialization failed).
+ * <0: other error
+ */
+bsp_tlb_idx_t
+bsp_mmu_write(bsp_tlb_idx_t idx, uint32_t ea, uint32_t pa, uint sz,
+ uint32_t flgs, uint32_t tid);
+
+/* Check if a ea/tid/sz mapping overlaps with an existing entry.
+ *
+ * 'ea': The Effective Address to match against
+ * 'sz': The 'logarithmic' size selector; the page size
+ * is 1024*2^(2*sz).
+ * 'tid': The TID to match against
+ *
+ * RETURNS:
+ * >= 0: index of TLB entry that already provides a mapping
+ * which overlaps within the ea range.
+ * -1: SUCCESS (no conflicting entry found)
+ * <=-2: ERROR (invalid input)
+ */
+bsp_tlb_idx_t
+bsp_mmu_match(uint32_t ea, int sz, uint32_t tid);
+
+/* Find TLB index that maps 'ea/tid' combination
+ *
+ * 'ea': Effective address to match against
+ * 'tid': The TID to match against
+ *
+ * RETURNS: index 'key'; i.e., the index number.
+ *
+ * On error (no mapping) -1 is returned.
+ */
+bsp_tlb_idx_t
+bsp_mmu_find(uint32_t ea, uint32_t tid);
+
+/* Mark TLB entry as invalid ('disabled').
+ *
+ * 'key': TLB entry index.
+ *
+ * RETURNS: zero on success, nonzero on error (TLB unchanged).
+ *
+ * NOTE: If a TLB entry is disabled the associated
+ * entry in bsp_mmu_cache[] is also marked as disabled.
+ */
+int
+bsp_mmu_invalidate(bsp_tlb_idx_t key);
+
+/** @} */
+
+#ifdef __cplusplus
+};
+#endif
+
+#endif
diff --git a/c/src/lib/libbsp/powerpc/virtex4/irq/irq_init.c b/c/src/lib/libbsp/powerpc/virtex4/irq/irq_init.c
index 74aaebb..ffcfe4c 100644
--- a/c/src/lib/libbsp/powerpc/virtex4/irq/irq_init.c
+++ b/c/src/lib/libbsp/powerpc/virtex4/irq/irq_init.c
@@ -26,8 +26,6 @@
#include <rtems/powerpc/powerpc.h>
#include <bsp/vectors.h>
-uint32_t* IRQ_Counter = (uint32_t*)0x1500;
-
static rtems_irq_connect_data rtemsIrqTbl[BSP_IRQ_NUMBER];
rtems_irq_connect_data *BSP_rtems_irq_tbl;
rtems_irq_global_settings* BSP_rtems_irq_config;
diff --git a/c/src/lib/libbsp/powerpc/virtex4/libc/fastcopy.S b/c/src/lib/libbsp/powerpc/virtex4/libc/fastcopy.S
new file mode 100644
index 0000000..2ecbdcc
--- /dev/null
+++ b/c/src/lib/libbsp/powerpc/virtex4/libc/fastcopy.S
@@ -0,0 +1,143 @@
+/*----------------------------------------------------------------------------+
+| COPYRIGHT I B M CORPORATION 2007
+| LICENSED MATERIAL - PROGRAM PROPERTY OF I B M
+| US Government Users Restricted Rights - Use, duplication or
+| disclosure restricted by GSA ADP Schedule Contract with
+| IBM Corp.
++----------------------------------------------------------------------------*/
+/*----------------------------------------------------------------------------+
+| PPC440 Example Code
+| Author: Paul Gramann
+| Component: none
+| File: fastcopy.S
+| Purpose: Fast memory copy for PPC405 and PPC440 cores
+| Changes:
+| Date: Comment:
+| ----- --------
+| 29-Nov-07 Created pag
++----------------------------------------------------------------------------*/
+/*----------------------------------------------------------------------------+
+| void fastcopy(dst, src, line_count);
+|
+| Inputs:
+| r3: destination address
+| r4: source address
+| r5: line count
+|
+| Notes:
+| dst and src address are assumed to be cache-line aligned.
+| line_count is assumed to be at least 3 cache-lines.
+| function needs to be in I-cache in order to not interfere with D-cache
+| access pattern.
+|
+| Algorithm:
+| touch (dcbt) 2 lines into the cache from source
+| zero (dcbz) 2 lines in cache for destination
+| repeat until 2 lines left to copy {
+| touch (dcbt) another line into the cache from source
+| load a source line from cache into 8 registers
+| zero (dcbz) another line in the cache for destination
+| store 8 registers to a destination cache line
+| }
+| copy last 2 cache lines of data from source to destination.
+|
+| During this algorithm the touches and loads are performed two lines ahead
+| of the data being copied.
+|
+| Register usage:
+| r0: scratch and copy
+| r1: stack frame pointer - not used
+| r2: small data area pointer - not used
+| r3: destination address
+| r4: source address
+| r5: line count and copy
+| r6: dcbz/dcbt offset and icbt address
+| r7-r12: copy and timing
++----------------------------------------------------------------------------*/
+
+#include <rtems/asm.h>
+
+ // Global Entry Point Definitions
+ .section .text,"ax"
+
+ .global fastcopy
+
+fastcopy:
+// touch this code into the cache
+mflr r0 // save lr
+bl 4 // get our code address in lr
+mflr r6 // in r6
+mtlr r0 // restore lr
+addi r7,0,0 // initialize r7 to 0
+addi r0,0,0x10
+mtctr r0 // initialize ctr to 0x10 lines
+itouch_loop:
+icbt r6,r7 // touch line into i-cache
+addi r6,r6,0x20 // move to next line
+bdnz itouch_loop // repeat
+
+// initialize r6 for use in dcbt and dcbz's.
+// also adjust the data pointers to work with load/store update
+addi r6,0,4
+subf r4,r6,r4
+subf r3,r6,r3
+
+// pre-touch and pre-zero two lines of the source and destination
+// leaving r6 offset pointing to next untouched line
+// Note that the order must be dcbt, dcbt, dcbz, dcbz
+dcbt r4,r6
+addi r6,r6,0x20
+dcbt r4,r6
+addi r6,r6,-0x20
+dcbz r3,r6
+addi r6,r6,0x20
+dcbz r3,r6
+addi r6,r6,0x20
+
+addi r5,r5,-2 // subtract 2 from count
+mtctr r5 // and put in ctr
+
+cache_copy_loop:
+dcbt r4,r6 // touch 2 lines ahead
+lwzu r0,4(r4) // load 8 registers from cache
+lwzu r5,4(r4)
+lwzu r7,4(r4)
+lwzu r8,4(r4)
+lwzu r9,4(r4)
+lwzu r10,4(r4)
+lwzu r11,4(r4)
+lwzu r12,4(r4)
+dcbz r3,r6 // zero 2 lines ahead
+stwu r0,4(r3) // store 8 registers to cache
+stwu r5,4(r3)
+stwu r7,4(r3)
+stwu r8,4(r3)
+stwu r9,4(r3)
+stwu r10,4(r3)
+stwu r11,4(r3)
+stwu r12,4(r3)
+bdnz cache_copy_loop
+
+// Copy last two lines of data without cache ops.
+addi r0,0,2
+mtctr r0 // set ctr to 2
+final_copy_loop:
+lwzu r0,4(r4) // load 8 regs
+lwzu r5,4(r4)
+lwzu r7,4(r4)
+lwzu r8,4(r4)
+lwzu r9,4(r4)
+lwzu r10,4(r4)
+lwzu r11,4(r4)
+lwzu r12,4(r4)
+stwu r0,4(r3) // store 8 regs
+stwu r5,4(r3)
+stwu r7,4(r3)
+stwu r8,4(r3)
+stwu r9,4(r3)
+stwu r10,4(r3)
+stwu r11,4(r3)
+stwu r12,4(r3)
+bdnz final_copy_loop
+addi r3,r3,4 // adjust dst pointer
+blr // all done - return
diff --git a/c/src/lib/libbsp/powerpc/virtex4/libc/memcpy.c b/c/src/lib/libbsp/powerpc/virtex4/libc/memcpy.c
new file mode 100644
index 0000000..a987195
--- /dev/null
+++ b/c/src/lib/libbsp/powerpc/virtex4/libc/memcpy.c
@@ -0,0 +1,225 @@
+/*!@file memcpy.c
+*
+* @brief PowerPC optimized memcpy routines
+*
+* @author S. Maldonado -- REG - (smaldona at slac.stanford.edu)
+*
+* @date Sept 24, 2012 -- Created
+*
+* $Revision: $
+*
+* @verbatim Copyright 2012
+* by
+* The Board of Trustees of the
+* Leland Stanford Junior University.
+* All rights reserved.
+* @endverbatim
+*/
+
+#include <rtems.h>
+#include <rtems/powerpc/powerpc.h> /* For PPC_CACHE_ALIGNMENT */
+
+/* Nonzero if either X or Y is not aligned on a "long" boundary. */
+#define UNALIGNED(X, Y) \
+ (((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
+
+/* How many bytes are copied each iteration of the 4X unrolled loop. */
+#define BIGBLOCKSIZE (sizeof (long) << 2)
+
+/* How many bytes are copied each iteration of the word copy loop. */
+#define LITTLEBLOCKSIZE (sizeof (long))
+
+/* Threshhold for punting to the byte copier. */
+#define TOO_SMALL(LEN) ((LEN) < BIGBLOCKSIZE)
+
+/* Nonzero if either X or Y is not aligned on a cache line boundary. */
+#define CACHE_UNALIGNED(X, Y) \
+ (((long)X & (PPC_CACHE_ALIGNMENT - 1)) | ((long)Y & (PPC_CACHE_ALIGNMENT - 1)))
+
+/* Threshhold for fastcopy - 16 cache lines */
+#define MIN_BLOCKS(LEN) ((LEN) < (PPC_CACHE_ALIGNMENT*16))
+
+/* IBM fastcopy assembly routine */
+extern int *fastcopy(void *dst, const void *src, size_t count);
+
+void *memcpyppc32(void *dst, const void *src, size_t size);
+
+void *memcpynewlib(char *dst0, const char *src0, size_t len0);
+
+/*
+ * This is the RTEMS memcpy drop-in replacement which executes the IBM fastcopy algorithm
+ * or a secondary optimized block copy.
+ * The fastcopy routine is optimized for copying larger blocks with aligned
+ * source and destination addresses.
+ * The memcpyppc32 routine is optimized for copying small and large blocks which may
+ * use unaligned destination addresses.
+ */
+void *memcpy(void *dst, const void *src, size_t len)
+{
+ unsigned int len0;
+
+ /*
+ * If the size less than min cache lines, or either SRC or DST is not cache aligned,
+ * then proceed to optimized memcpy. Otherwise, execute fast block copy.
+ */
+ if (!MIN_BLOCKS(len) && !CACHE_UNALIGNED(src, dst))
+ {
+ /* fastcopy requires cache line count as argument */
+ len0 = len/PPC_CACHE_ALIGNMENT;
+ fastcopy(dst,src,len0);
+
+ /* Adjust pointers and length */
+ len0 = len0*PPC_CACHE_ALIGNMENT;
+ src = (uint32_t *)(src + len0);
+ dst = (uint32_t *)(dst + len0);
+ len -= len0;
+
+ /* Pick up any residual words */
+ while (len >= LITTLEBLOCKSIZE)
+ {
+ *((uint32_t*)dst) = *((uint32_t*)src);
+ src += LITTLEBLOCKSIZE;
+ dst += LITTLEBLOCKSIZE;
+ len -= LITTLEBLOCKSIZE;
+ }
+
+ /* Pick up any residual bytes */
+ while (len--)
+ *((uint8_t*)dst++) = *((uint8_t*)src++);
+
+ return dst;
+ }
+ else
+ return memcpyppc32(dst,src,len);
+}
+
+/*
+ * A memcpy routine optimized for ppc32 which handles small data blocks,
+ * destination address alignment, and cache line copying.
+ * Adopted from code examples provided in a 2004 posting at
+ * http://www.powerdeveloper.org/forums/viewtopic.php?t=1426
+ */
+void *memcpyppc32(void *dst, const void *src, size_t size)
+{
+ uint32_t i;
+ uint32_t *src32, *dst32;
+
+ if(size<4) goto memcpy_bytes;
+
+ /* align dest to 16 bits */
+ if( (uint32_t)dst & 1)
+ {
+ *((uint8_t*)dst++) = *((uint8_t*)src++);
+ size--;
+ }
+
+ /* align dest to 32 bits */
+ if ((uint32_t)dst & 2)
+ {
+ *((uint16_t*)dst) = *((uint16_t*)src);
+ src+=2;
+ dst+=2;
+ size -= 2;
+ }
+
+ /* cache line copy, min 4 lines */
+ if(size >= 128)
+ {
+ /* align to cache line, 32 bytes */
+ while( (uint32_t)dst & 31)
+ {
+ *((uint32_t*)dst) = *((uint32_t*)src);
+ src+=4;
+ dst+=4;
+ size -= 4;
+ }
+
+ src32 = (uint32_t *)src;
+ dst32 = (uint32_t *)dst;
+
+ /* copy cache lines */
+ for (i=size/(8*sizeof(uint32_t));i;i--)
+ {
+ asm volatile ("dcbz 0,%0" : : "r" (&dst32[0]));
+ /* dcbt here doesn't improve performance */
+ /* asm volatile ("dcbt 0,%0" : : "r" (&src32[0])); */
+ dst32[0] = src32[0];
+ dst32[1] = src32[1];
+ dst32[2] = src32[2];
+ dst32[3] = src32[3];
+ dst32[4] = src32[4];
+ dst32[5] = src32[5];
+ dst32[6] = src32[6];
+ dst32[7] = src32[7];
+
+ src32+=8;
+ dst32+=8;
+ }
+ size &= 8*sizeof(uint32_t)-1;
+ src = (uint8_t *)src32;
+ dst = (uint8_t *)dst32;
+ }
+
+ /* copy residual words */
+ for (i=size/sizeof(uint32_t);i;i--)
+ {
+ *((uint32_t*)dst) = *((uint32_t*)src);
+ src+=4;
+ dst+=4;
+ }
+ size &= sizeof(uint32_t)-1;
+
+ /* copy residual bytes */
+memcpy_bytes:
+ while (size--)
+ *((uint8_t*)dst++) = *((uint8_t*)src++);
+
+ return dst;
+}
+
+/*
+ * For reference, this is the default RTEMS version of memcpy provided with newlib-1.16.0.
+ */
+void *memcpynewlib(char *dst0, const char *src0, size_t len0) {
+ char *dst = dst0;
+ _CONST char *src = src0;
+ long *aligned_dst;
+ _CONST long *aligned_src;
+ unsigned int len = len0;
+
+ /*
+ * If the size is small, or either SRC or DST is unaligned,
+ * then punt into the byte copy loop. This should be rare.
+ */
+ if (!TOO_SMALL(len) && !UNALIGNED (src, dst))
+ {
+ aligned_dst = (long*)dst;
+ aligned_src = (long*)src;
+
+ /* Copy 4X long words at a time if possible. */
+ while (len >= BIGBLOCKSIZE)
+ {
+ *aligned_dst++ = *aligned_src++;
+ *aligned_dst++ = *aligned_src++;
+ *aligned_dst++ = *aligned_src++;
+ *aligned_dst++ = *aligned_src++;
+ len -= BIGBLOCKSIZE;
+ }
+
+ /* Copy one long word at a time if possible. */
+ while (len >= LITTLEBLOCKSIZE)
+ {
+ *aligned_dst++ = *aligned_src++;
+ len -= LITTLEBLOCKSIZE;
+ }
+
+ dst = (char*)aligned_dst;
+ src = (char*)aligned_src;
+ }
+
+ /* Pick up any residual bytes */
+ while (len--)
+ *dst++ = *src++;
+
+ return dst;
+}
diff --git a/c/src/lib/libbsp/powerpc/virtex4/mmu/mmu.c b/c/src/lib/libbsp/powerpc/virtex4/mmu/mmu.c
new file mode 100644
index 0000000..c294a9a
--- /dev/null
+++ b/c/src/lib/libbsp/powerpc/virtex4/mmu/mmu.c
@@ -0,0 +1,568 @@
+/**
+ * @file
+ *
+ * @ingroup Virtex4MMU
+ *
+ * @brief Implementation of routines to manipulate the PPC 405 mmu.
+ *
+ * Since this is a real-time OS we want to stay away from
+ * software TLB replacement.
+ */
+/* $Id$ */
+/*
+ * Authorship
+ * ----------
+ * This software was created by
+ * Till Straumann <strauman at slac.stanford.edu>, 2005-2007,
+ * Stanford Linear Accelerator Center, Stanford University.
+ * and was transcribed for the PPC 405 by
+ * R. Claus <claus at slac.stanford.edu>, 2012,
+ * Stanford Linear Accelerator Center, Stanford University,
+ *
+ * Acknowledgement of sponsorship
+ * ------------------------------
+ * This software was produced by
+ * the Stanford Linear Accelerator Center, Stanford University,
+ * under Contract DE-AC03-76SFO0515 with the Department of Energy.
+ *
+ * Government disclaimer of liability
+ * ----------------------------------
+ * Neither the United States nor the United States Department of Energy,
+ * nor any of their employees, makes any warranty, express or implied, or
+ * assumes any legal liability or responsibility for the accuracy,
+ * completeness, or usefulness of any data, apparatus, product, or process
+ * disclosed, or represents that its use would not infringe privately owned
+ * rights.
+ *
+ * Stanford disclaimer of liability
+ * --------------------------------
+ * Stanford University makes no representations or warranties, express or
+ * implied, nor assumes any liability for the use of this software.
+ *
+ * Stanford disclaimer of copyright
+ * --------------------------------
+ * Stanford University, owner of the copyright, hereby disclaims its
+ * copyright and all other rights in this software. Hence, anyone may
+ * freely use it for any purpose without restriction.
+ *
+ * Maintenance of notices
+ * ----------------------
+ * In the interest of clarity regarding the origin and status of this
+ * SLAC software, this and all the preceding Stanford University notices
+ * are to remain affixed to any copy or derivative of this software made
+ * or distributed by the recipient and are to be affixed to any copy of
+ * software made or distributed by the recipient that contains a copy or
+ * derivative of this software.
+ *
+ * ------------------ SLAC Software Notices, Set 4 OTT.002a, 2004 FEB 03
+ */
+
+/* 405 MSR definitions; note that there are *substantial* differences
+ * compared to classic powerpc; in particular, IS/DS are *different*
+ * from IR/DR.
+ *
+ * Also: To disable/enable all external interrupts, CE and EE must both be
+ * controlled.
+ */
+#include <rtems.h>
+#include <rtems/bspIo.h>
+#include <rtems/powerpc/powerpc.h>
+#include <inttypes.h>
+#include <stdio.h>
+
+#include <bsp/mmu.h>
+
+
+#ifdef DEBUG
+#define STATIC
+#else
+#define STATIC static
+#endif
+
+
+bsp_tlb_entry_t* bsp_mmu_cache = 0;
+
+
+/* Since it is likely that these routines are used during
+ * early initialization when stdio is not available yet
+ * we provide a helper that resorts to 'printk()'
+ */
+static void
+myprintf(FILE *f, char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+
+ if (!f || !_impure_ptr->__sdidinit) {
+ /* Might be called at an early stage when stdio is not yet initialized. */
+ vprintk(fmt,ap);
+ } else {
+ vfprintf(f,fmt,ap);
+ }
+ va_end(ap);
+}
+
+
+void
+bsp_mmu_dump_cache(FILE *f)
+{
+ bsp_tlb_idx_t idx;
+ if ( !bsp_mmu_cache ) {
+ myprintf(stderr,"MMU TLB cache not initialized\n");
+ return;
+ }
+ for ( idx=0; idx<NTLBS; idx++ ) {
+ bsp_tlb_entry_t *tlb = bsp_mmu_cache + idx;
+ if ( !tlb->hi.v )
+ continue;
+ myprintf(f, "#%2i: EA 0x%08x .. 0x%08x, TID 0x%03x, EU0 0x%01x\n",
+ idx,
+ tlb->hi.epn << 10,
+ (tlb->hi.epn << 10) + (1024<<(2*tlb->hi.size))-1,
+ tlb->id.tid,
+ tlb->hi.att);
+ myprintf(f, " PA 0x%08"PRIx32" .. 0x%08"PRIx32", PERM 0x%03x, WIMG 0x%02x\n",
+ tlb->lo.rpn << 10,
+ (tlb->lo.rpn << 10) + (1024<<(2*tlb->hi.size))-1,
+ tlb->lo.perm,
+ tlb->lo.wimg);
+ }
+}
+
+static void
+fetch(bsp_tlb_idx_t key, bsp_tlb_entry_t* tlb)
+{
+ register uint32_t tmp;
+ __asm__ volatile ("mfpid %[tmp] \n\t"
+ "stw %[tmp],0(%[tlb]) \n\t"
+ "tlbrehi %[tmp],%[key] \n\t"
+ "stw %[tmp],4(%[tlb]) \n\t"
+ "tlbrelo %[tmp],%[key] \n\t"
+ "stw %[tmp],8(%[tlb]) \n\t"
+ "sync \n\t"
+ : [tmp]"=&r"(tmp)
+ : [key]"r"(key),
+ [tlb]"b"(tlb)
+ );
+}
+
+
+static void
+store(bsp_tlb_idx_t key, bsp_tlb_entry_t* tlb)
+{
+ register uint32_t tmp;
+ __asm__ volatile ("lwz %[tmp],0(%[tlb]) \n\t"
+ "mtpid %[tmp] \n\t"
+ "lwz %[tmp],4(%[tlb]) \n\t"
+ "tlbwehi %[tmp],%[key] \n\t"
+ "lwz %[tmp],8(%[tlb]) \n\t"
+ "tlbwelo %[tmp],%[key] \n\t"
+ : [tmp]"=&r"(tmp)
+ : [tlb]"b"(tlb),
+ [key]"r"(key)
+ );
+}
+
+
+static void
+commit(void)
+{
+ __asm__ volatile("isync \n\t");
+}
+
+
+/*
+ * Read a TLB entry from the hardware and store the current settings in the
+ * bsp_mmu_cache[] structure.
+ *
+ * The routine can perform this operation quietly or
+ * print information to a file.
+ *
+ * 'idx': which TLB entry to access.
+ * 'quiet': perform operation silently (no info printed)
+ * if nonzero.
+ * 'f': open FILE where to print information. May be
+ * NULL in which case 'stdout' is used.
+ *
+ * RETURNS:
+ * 0: success; TLB entry is VALID
+ * +1: success but TLB entry is INVALID
+ * < 0: error (-1: invalid argument)
+ * (-2: driver not initialized)
+ */
+int
+bsp_mmu_update(bsp_tlb_idx_t key, bool quiet, FILE *f)
+{
+ rtems_interrupt_level lvl;
+ bsp_tlb_entry_t* tlb;
+ bsp_tlb_idx_t idx;
+
+ idx = key;
+
+ if ( idx < 0 || idx > NTLBS-1 )
+ return -1;
+
+ if (!bsp_mmu_cache)
+ return -2;
+
+ tlb = bsp_mmu_cache + idx;
+
+ rtems_interrupt_disable(lvl);
+
+ fetch(idx, tlb);
+
+ rtems_interrupt_enable(lvl);
+
+ if ( tlb->hi.v ) {
+ if ( !quiet ) {
+/*
+ "TLB Entry # 0 spans EA range 0x00000000 - 0x00000000
+ "Mapping: VA [TID 0x00 / EPN 0x00000] -> RPN 0x00000"
+ "Size: TSIZE 0x0 (4^sz KB = 000000 KB = 0x00000000 B)
+ "Attributes: PERM 0x00 (ex/wr/zsel) WIMG 0x00 EU0 0x0"
+*/
+ myprintf(f,
+ "TLB Entry # %2d spans EA range 0x%08x - 0x%08x\n",
+ idx,
+ (tlb->hi.epn << 10),
+ (tlb->hi.epn << 10) + (1024<<(2*tlb->hi.size)) - 1
+ );
+
+ myprintf(f,
+ "Mapping: VA [TID 0x%02x / EPN 0x%05x] -> RPN 0x%05"PRIx32"\n",
+ tlb->id.tid, tlb->hi.epn, tlb->lo.rpn
+ );
+ myprintf(f,
+ "Size: TSIZE 0x%x (4^sz KB = %6d KB = 0x%08x B)\n",
+ tlb->hi.size, (1<<(2*tlb->hi.size)), (1024<<(2*tlb->hi.size))
+ );
+ myprintf(f,
+ "Attributes: PERM 0x%02x (ex/wr/zsel) WIMG 0x%02x EU0 0x%01x\n",
+ tlb->lo.perm, tlb->lo.wimg, tlb->hi.att
+ );
+ }
+ } else {
+ if ( !quiet ) {
+ myprintf(f,
+ "TLB Entry # %2d <OFF> (size 0x%x = 0x%xb)\n",
+ idx, tlb->hi.size, (1024<<(2*tlb->hi.size))
+ );
+ }
+ return 1;
+ }
+ return 0;
+}
+
+/* Initialize cache. Should be done only once although this is not enforced.
+ *
+ * RETURNS: zero on success, nonzero on error; in this case the driver will
+ * refuse to change TLB entries (other than disabling them).
+ */
+int
+bsp_mmu_initialize()
+{
+ static bsp_tlb_entry_t mmu_cache[NTLBS];
+ bsp_tlb_entry_t* tlb = mmu_cache; /* Should malloc if it's not too early */
+ rtems_interrupt_level lvl;
+
+ bsp_tlb_idx_t idx;
+ rtems_interrupt_disable(lvl);
+ for (idx=0; idx<NTLBS; tlb++, idx++)
+ {
+ fetch(idx, tlb);
+ }
+ rtems_interrupt_enable(lvl);
+
+ bsp_mmu_cache = mmu_cache;
+ return 0;
+}
+
+/* Find first free TLB entry by examining all entries' valid bit. The first
+ * entry without the valid bit set is returned.
+ *
+ * RETURNS: A free TLB entry number. -1 if no entry can be found.
+ */
+bsp_tlb_idx_t
+bsp_mmu_find_first_free()
+{
+ bsp_tlb_idx_t idx;
+ bsp_tlb_entry_t entry;
+
+ for (idx=0; idx<NTLBS; idx++) {
+ register uint32_t tmp;
+ __asm__ volatile ("tlbrehi %[tmp],%[idx] \n\t"
+ "stw %[tmp],4(%[tlb]) \n\t" /* entry.hi */
+ "sync \n\t"
+ : [tmp]"=&r"(tmp)
+ : [idx]"r"(idx),
+ [tlb]"b"(&entry)
+ : "memory"
+ );
+ if (!(entry.hi.v))
+ break;
+ }
+ return (idx < NTLBS) ? idx : -1;
+}
+
+/*
+ * Write TLB entry (can also be used to disable an entry).
+ *
+ * The routine checks against the cached data in
+ * bsp_mmu_cache[] to prevent the user from generating
+ * overlapping entries.
+ *
+ * 'idx': TLB entry # to manipulate
+ * 'ea': Effective address (must be page aligned)
+ * 'pa': Physical address (must be page aligned)
+ * 'sz': Page size selector; page size is
+ * 1024 * 2^(2*sz) bytes.
+ * 'sz' may also be one of the following:
+ * - page size in bytes ( >= 1024 ); the selector
+ * value is then computed by this routine.
+ * However, 'sz' must be a valid page size
+ * or -1 will be returned.
+ * - a value < 0 to invalidate/disable the
+ * TLB entry.
+ * 'flgs': Page's little-endian & user-defined flags, permissions and attributes
+ * 'tid': Translation ID
+ *
+ * RETURNS: 0 on success, nonzero on error:
+ *
+ * >0: requested mapping would overlap with
+ * existing mapping in other entry. Return
+ * value gives conflicting entry + 1; i.e.,
+ * if a value of 4 is returned then the request
+ * conflicts with existing mapping in entry 3.
+ * -1: invalid argument
+ * -3: driver not initialized (or initialization failed).
+ * <0: other error
+ */
+bsp_tlb_idx_t
+bsp_mmu_write(bsp_tlb_idx_t idx, uint32_t ea, uint32_t pa, uint sz,
+ uint32_t flgs, uint32_t tid)
+{
+ bsp_tlb_entry_t tlb;
+ uint32_t msk;
+ bsp_tlb_idx_t lkup;
+ rtems_interrupt_level lvl;
+
+ if ( sz >= 1024 ) {
+ /* Assume they literally specify a size */
+ msk = sz;
+ sz = 0;
+ while ( msk != (1024u<<(2*sz)) ) {
+ if ( ++sz > 7 ) {
+ return -1;
+ }
+ }
+ /* OK, acceptable */
+ }
+
+ msk = sz > 0 ? (1024u<<(2*sz)) - 1 : 0;
+
+ if ( !bsp_mmu_cache && sz > 0 ) {
+ myprintf(stderr,"MMU driver not initialized; refusing to enable any entry\n");
+ return -3;
+ }
+
+ if ( (ea & msk) || (pa & msk) ) {
+ myprintf(stderr,"Misaligned EA (%08x) or PA (%08x) (mask is %08x)\n", ea, pa, msk);
+ return -1;
+ }
+
+ if ( idx < 0 || idx > NTLBS-1 )
+ return -1;
+
+ if ( sz > 7 ) {
+ myprintf(stderr,"Invalid size %u = %08x = %u KB\n", sz, 1024u<<(2*sz), (1024u<<(2*sz))/1024);
+ return -1;
+ }
+
+ if ( sz >=0 ) {
+ lkup = bsp_mmu_match(ea, sz, tid);
+
+ if ( lkup < -1 ) {
+ /* some error */
+ return lkup;
+ }
+ if ( (lkup >= 0) && (lkup != idx) && (bsp_mmu_cache[lkup].hi.v != 0) ) {
+ myprintf(stderr,"TLB #%i overlaps with requested mapping\n", lkup);
+ bsp_mmu_update( lkup, false, stderr);
+ return lkup+1;
+ }
+ }
+
+ /* OK to proceed */
+ tlb.id.tid = tid;
+ tlb.hi.v = sz >= 0;
+ tlb.hi.size = sz;
+ tlb.hi.epn = (ea & (0xfffffc00 << (sz + sz))) >> 10;
+ tlb.lo.rpn = (pa & (0xfffffc00 << (sz + sz))) >> 10;
+ tlb.hi.att = (flgs & MMU_M_ATTR) >> MMU_V_ATTR;
+ tlb.lo.perm = (flgs & MMU_M_PERM) >> MMU_V_PERM;
+ tlb.lo.wimg = (flgs & MMU_M_PROP) >> MMU_V_PROP;
+
+ rtems_interrupt_disable(lvl);
+
+ store(idx, &tlb);
+
+ commit();
+
+ rtems_interrupt_enable(lvl);
+
+ /* update cache */
+ bsp_mmu_update(idx, true, 0);
+
+ return 0;
+}
+
+/*
+ * Check if a ea/sz/tid mapping overlaps with an existing entry.
+ *
+ * 'ea': The Effective Address to match against
+ * 'sz': The 'logarithmic' size selector; the page size
+ * is 1024*2^(2*sz).
+ * 'tid': The TID to match against
+ *
+ * RETURNS:
+ * >= 0: index of the TLB entry that already provides a mapping
+ * which overlaps within the ea range.
+ * -1: SUCCESS (no conflicting entry found)
+ * <=-2: ERROR (invalid input)
+ */
+bsp_tlb_idx_t
+bsp_mmu_match(uint32_t ea, int sz, uint32_t tid)
+{
+ bsp_tlb_idx_t idx;
+ uint32_t m,a;
+ bsp_tlb_entry_t* tlb;
+
+ if ( sz < 0 || sz > 7 )
+ return -4;
+
+ sz = (1024<<(2*sz));
+
+ if ( !bsp_mmu_cache ) {
+ /* cache not initialized */
+ return -3;
+ }
+
+ if ( ea & (sz-1) ) {
+ /* misaligned ea */
+ return -2;
+ }
+
+ for ( idx=0, tlb=bsp_mmu_cache; idx<NTLBS; idx++, tlb++ ) {
+ if ( ! tlb->hi.v )
+ continue;
+ if ( tlb->id.tid && tlb->id.tid != tid )
+ continue;
+ /* TID matches a valid entry */
+ m = (1024<<(2*tlb->hi.size)) - 1;
+ /* calculate starting address of this entry */
+ a = tlb->hi.epn << 10;
+ if ( ea <= a + m && ea + sz -1 >= a ) {
+ /* overlap */
+ return idx;
+ }
+ }
+ return -1;
+}
+
+/* Find TLB index that maps 'ea/tid' combination
+ *
+ * 'ea': Effective address to match against
+ * 'tid': The TID to match against
+ *
+ * RETURNS: index 'key' which indicates whether
+ * the mapping was found.
+ *
+ * On error (no mapping) -1 is returned.
+ */
+bsp_tlb_idx_t
+bsp_mmu_find(uint32_t ea, uint32_t tid)
+{
+ rtems_interrupt_level lvl;
+ register uint32_t pid;
+ register bsp_tlb_idx_t idx;
+ register int failure;
+
+ rtems_interrupt_disable(lvl);
+
+ __asm__ volatile ("mfpid %[pid] \n\t" /* Save PID */
+ "mtpid %[tid] \n\t"
+ "tlbsx. %[idx],0,%[ea] \n\t" /* Failure changes the index reg randomly. */
+ "mfcr %[failure] \n\t"
+ "mtpid %[pid] \n\t" /* Restore PID */
+ : [pid]"=r"(pid),
+ [idx]"=&r"(idx),
+ [failure]"=&r"(failure)
+ : [tid]"r"(tid),
+ [ea]"r"(ea)
+ : "cc"
+ );
+
+ rtems_interrupt_enable(lvl);
+
+ return (failure & 0x20000000) ? idx : -1;
+}
+
+/* Mark TLB entry as invalid ('disabled').
+ *
+ * 'key': TLB entry (index).
+ *
+ * RETURNS: zero on success, nonzero on error (TLB unchanged).
+ *
+ * NOTE: If a TLB entry is disabled the associated
+ * entry in bsp_mmu_cache[] is also
+ * marked as disabled.
+ */
+int
+bsp_mmu_invalidate(bsp_tlb_idx_t key)
+{
+ bsp_tlb_idx_t k0;
+ rtems_interrupt_level lvl;
+ bsp_tlb_entry_t tlb;
+ uint32_t msr;
+
+ /* Minimal guard against bad key */
+ if ( key < 0 || key > NTLBS-1 )
+ return -1;
+
+ _CPU_MSR_GET(msr);
+
+ /* While address translation is enabled... */
+ if (msr & (PPC_MSR_IR | PPC_MSR_DR))
+ {
+ /* Must not invalidate page 0 which holds vectors, text etc... */
+ k0 = bsp_mmu_find(0, 0);
+ if ( -1 == k0 ) {
+ myprintf(stderr,"No mapping for address 0 found\n");
+ return -2;
+ }
+
+ /* NOTE: we assume PID is ignored */
+ if ( k0 == key ) {
+ myprintf(stderr,"Cannot invalidate page holding address 0 (always needed)\n");
+ return -3;
+ }
+ }
+
+ rtems_interrupt_disable(lvl);
+
+ fetch(key, &tlb);
+
+ /* Invalidate old entries */
+ tlb.hi.v = 0;
+
+ store(key, &tlb);
+
+ commit();
+
+ /* update cache */
+ bsp_mmu_cache[ key ].hi.v = tlb.hi.v;
+
+ rtems_interrupt_enable(lvl);
+
+ return 0;
+}
diff --git a/c/src/lib/libbsp/powerpc/virtex4/startup/bspstart.c b/c/src/lib/libbsp/powerpc/virtex4/startup/bspstart.c
index 2aab2c2..53fd92e 100644
--- a/c/src/lib/libbsp/powerpc/virtex4/startup/bspstart.c
+++ b/c/src/lib/libbsp/powerpc/virtex4/startup/bspstart.c
@@ -54,13 +54,8 @@
* Modifications for PPC405GP by Dennis Ehlin
* Modifications for Virtex4 by Richard Claus <claus at slac.stanford.edu>
*/
-
-#include <string.h>
-#include <fcntl.h>
-
-#include <bsp.h>
-#include <bsp/irq.h>
-#include <bsp/vectors.h>
+#include <rtems.h>
+#include <rtems/config.h>
#include <rtems/bspIo.h>
#include <rtems/libio.h>
#include <rtems/libcsupport.h>
@@ -68,6 +63,14 @@
#include <libcpu/cpuIdent.h>
#include <libcpu/spr.h>
+#include <bsp.h>
+#include <bsp/vectors.h>
+#include <bsp/bootcard.h>
+#include <bsp/irq.h>
+
+#include <string.h>
+#include <fcntl.h>
+
#define DO_DOWN_ALIGN(x,a) ((x) & ~((a)-1))
#define DO_UP_ALIGN(x,a) DO_DOWN_ALIGN(((x) + (a) - 1 ),a)
@@ -76,13 +79,6 @@
#define CPU_UP_ALIGN(x) DO_UP_ALIGN(x, CPU_ALIGNMENT)
-/* Expected by clock.c */
-uint32_t bsp_clicks_per_usec;
-bool bsp_timer_internal_clock; /* true, when timer runs with CPU clk */
-uint32_t bsp_timer_least_valid;
-uint32_t bsp_timer_average_overhead;
-
-
/* Defined in linkcmds linker script */
LINKER_SYMBOL(RamBase);
LINKER_SYMBOL(RamSize);
@@ -96,35 +92,24 @@ LINKER_SYMBOL(WorkAreaBase);
LINKER_SYMBOL(MsgAreaBase);
LINKER_SYMBOL(MsgAreaSize);
LINKER_SYMBOL(__phy_ram_end);
+LINKER_SYMBOL(bsp_exc_vector_base);
+
+
+/* Expected by clock.c */
+uint32_t bsp_clicks_per_usec;
+bool bsp_timer_internal_clock; /* true, when timer runs with CPU clk */
+uint32_t bsp_timer_least_valid;
+uint32_t bsp_timer_average_overhead;
/*
* Provide weak aliases so that RTEMS distribution builds
*/
static void _noopfun(void) {}
-static void _bsp_start(void)
-{
- uintptr_t intrStackStart = CPU_UP_ALIGN((uint32_t)__bsp_ram_start);
- uintptr_t intrStackSize = rtems_configuration_get_interrupt_stack_size();
-
- /*
- * Initialize default raw exception handlers.
- *
- * This BSP does not assume anything about firmware possibly loaded in the
- * FPGA, so the external interrupt should not be enabled in order to avoid
- * spurious interrupts.
- */
- ppc_exc_initialize(PPC_INTERRUPT_DISABLE_MASK_DEFAULT & ~MSR_EE,
- intrStackStart,
- intrStackSize);
-
- /* Install our own set of exception vectors */
- BSP_rtems_irq_mngt_init(0);
-}
void app_bsp_start(void)
-__attribute__(( weak, alias("_bsp_start") ));
+__attribute__(( weak, alias("_noopfun") ));
void app_bsp_pretasking_hook(void)
__attribute__(( weak, alias("_noopfun") ));
@@ -147,20 +132,31 @@ static void __bsp_outchar_to_memory(char c)
void BSP_ask_for_reset(void)
{
printk("\nSystem stopped, issue RESET");
+
for(;;);
}
void BSP_panic(char *s)
{
- printk("\n%s PANIC %s\n", _RTEMS_version, s);
+ rtems_interrupt_level level;
+
+ rtems_interrupt_disable(level);
+
+ printk("\n%s PANIC %s\n", rtems_get_version_string(), s);
+
BSP_ask_for_reset();
}
void _BSP_Fatal_error(unsigned int v)
{
- printk("\n%s FATAL ERROR %x\n", _RTEMS_version, v);
+ rtems_interrupt_level level;
+
+ rtems_interrupt_disable(level);
+
+ printk("\n%s FATAL ERROR %x\n", rtems_get_version_string(), v);
+
BSP_ask_for_reset();
}
@@ -176,13 +172,14 @@ void bsp_start(void)
{
uintptr_t intrStackStart;
uintptr_t intrStackSize;
+
ppc_cpu_id_t myCpu;
ppc_cpu_revision_t myCpuRevision;
/* Set the character output function; The application may override this */
BSP_output_char = __bsp_outchar_to_memory;
- printk("\nWelcome to RTEMS %s\n", _RTEMS_version );
+ printk("RTEMS %s\n", rtems_get_version_string());
/*
* Get CPU identification dynamically. Note that the get_ppc_cpu_type()
@@ -208,6 +205,15 @@ void bsp_start(void)
*/
intrStackStart = CPU_UP_ALIGN((uint32_t)__bsp_ram_start);
intrStackSize = rtems_configuration_get_interrupt_stack_size();
+
+ sc = ppc_exc_initialize(PPC_INTERRUPT_DISABLE_MASK_DEFAULT,
+ intrStackStart,
+ intrStackSize);
+ if (sc != RTEMS_SUCCESSFUL) {
+ BSP_panic("Cannot initialize exceptions");
+ }
+
+ /* Let the user know what parameters we were compiled with */
printk(" Base/Start End Size\n"
"RAM: 0x%08x 0x%x\n"
"RTEMS: 0x%08x\n"
@@ -224,6 +230,11 @@ void bsp_start(void)
(uint32_t)MsgAreaBase, (uint32_t)MsgAreaSize,
(uint32_t)__phy_ram_end);
+ /*
+ * Initialize RTEMS IRQ system
+ */
+ BSP_rtems_irq_mngt_init(0);
+
/* Continue with application-specific initialization */
app_bsp_start();
}
diff --git a/c/src/lib/libbsp/powerpc/virtex4/startup/linkcmds b/c/src/lib/libbsp/powerpc/virtex4/startup/linkcmds
index cff5767..edd9f07 100644
--- a/c/src/lib/libbsp/powerpc/virtex4/startup/linkcmds
+++ b/c/src/lib/libbsp/powerpc/virtex4/startup/linkcmds
@@ -20,16 +20,17 @@ HeapSize = DEFINED(HeapSize) ? HeapSize : 0; /* 0=Use def */
MEMORY
{
- VECTORS : ORIGIN = 0x00000000, LENGTH = 8K
- RAM : ORIGIN = 0x00002000, LENGTH = 128M - 8K
+ VECTORS : ORIGIN = 0x00000000, LENGTH = 12K
+ RAM : ORIGIN = 0x00003000, LENGTH = 128M - 12K
}
SECTIONS
{
- __exeentry = download_entry;
- __exestart = 0x100;
- .vectors __exestart : { *(.vectors) } > VECTORS
+ bsp_exc_vector_base = 0x100;
+ __exeentry = download_entry;
+ __exestart = bsp_exc_vector_base;
+ .vectors bsp_exc_vector_base : { *(.vectors) } > VECTORS
/* Read-only sections, merged into text segment: */
.interp : { *(.interp) } > RAM
@@ -227,7 +228,7 @@ _SDA_BASE_ = __SDATA_START__ + 0x8000;
__exeend = ALIGN(4);
__rtems_end = .;
- . = ALIGN(0x10); /* Align to a cache-line boundary */
+ . = ALIGN(0x20); /* Align to a cache-line boundary */
PROVIDE(__bsp_ram_start = .);
/* Interrupt stack: aligned on a cache-line boundary */
@@ -235,12 +236,12 @@ _SDA_BASE_ = __SDATA_START__ + 0x8000;
__intrStack = .;
/* Main stack lives here */
- _stack = ALIGN(0x10); /* Align to a cache-line boundary */
+ _stack = ALIGN(0x20); /* Align to a cache-line boundary */
. += StackSize;
__stack_base = .; /* Initial stack builds downwards */
/* RTEMS workspace: size specified by application */
- WorkAreaBase = ALIGN(0x10); /* Align to a cache-line boundary */
+ WorkAreaBase = ALIGN(0x20); /* Align to a cache-line boundary */
/* The heap comes after the work space */
@@ -249,7 +250,7 @@ _SDA_BASE_ = __SDATA_START__ + 0x8000;
/* Message area for capturing early printk output */
/* Placed here to be easily findable with a debugger */
- MsgAreaBase = __bsp_ram_end;
+ MsgAreaBase = .;
. += MsgAreaSize;
__phy_ram_end = .; /* True end of physical memory */
diff --git a/c/src/lib/libbsp/powerpc/virtex4/startup/start.S b/c/src/lib/libbsp/powerpc/virtex4/startup/start.S
index c3053fb..55d5318 100644
--- a/c/src/lib/libbsp/powerpc/virtex4/startup/start.S
+++ b/c/src/lib/libbsp/powerpc/virtex4/startup/start.S
@@ -166,15 +166,15 @@ startupDow:
* Tell the processor where the exception vector table will be.
*------------------------------------------------------------------*/
.extern SYM(__vectors)
- lis r0, __vectors at h /* set EVPR exc. vector prefix */
- mtspr evpr,r0
+ lis r2, __vectors at h /* set EVPR exc. vector prefix */
+ mtevpr r2
/*-------------------------------------------------------------------
* Set up the debug register to freeze timers on debug events.
*------------------------------------------------------------------*/
- mfdbcr0 r0
- ori r0,r0,0x0001
- mtdbcr0 r0
+ mfdbcr0 r2
+ ori r2,r2,0x0001
+ mtdbcr0 r2
isync
/* Select whether APU, Wait Enable, interrupts/exceptions and address
@@ -182,14 +182,20 @@ startupDow:
lis r0,0x00000000 at h /* SRR1 value */
mtsrr1 r0 /* Potentially: 0x80000000 >> 6 is APU */
+ /* Configure timer facilities */
+ mttbl r0 /* Clear Timebase to prevent Fixed Interval.. */
+ mttbu r0 /* ..timer and Watchdog Timer exceptions */
+ mtpit r0 /* Programmable interval timer */
+ li r2,-1 /* -1 to clear TSR */
+ mttsr r2 /* Timer status register */
+
/* Clear out stale values in certain registers to avoid confusion */
+ mtcrf 0xff,r0 /* Need for simulation */
+ mtctr r0 /* Counter register */
mtxer r0 /* Fixed-point exception register */
mtesr r0 /* Exception syndrome register */
mtdear r0 /* Data exception address register */
mtmcsr r0 /* Machine check syndrome register */
- mtpit r0 /* Programmable interval timer */
- li r0,-1 /* -1 to clear TSR */
- mttsr r0 /* Timer status register */
/* Invalidate the data cache */
li r2,0 /* Start address */
--
1.7.1
More information about the devel
mailing list