[PATCH 05/15] Various updates and improvements

Ric Claus claus at slac.stanford.edu
Tue Nov 27 03:26:43 UTC 2012


---
 c/src/lib/libbsp/powerpc/virtex4/Makefile.am       |   15 +-
 c/src/lib/libbsp/powerpc/virtex4/clock/clock.c     |  265 ---------
 c/src/lib/libbsp/powerpc/virtex4/include/mmu.h     |  270 ++++++++++
 c/src/lib/libbsp/powerpc/virtex4/irq/irq_init.c    |    2 -
 c/src/lib/libbsp/powerpc/virtex4/libc/fastcopy.S   |  143 +++++
 c/src/lib/libbsp/powerpc/virtex4/libc/memcpy.c     |  225 ++++++++
 c/src/lib/libbsp/powerpc/virtex4/mmu/mmu.c         |  568 ++++++++++++++++++++
 .../lib/libbsp/powerpc/virtex4/startup/bspstart.c  |   85 ++--
 c/src/lib/libbsp/powerpc/virtex4/startup/linkcmds  |   19 +-
 c/src/lib/libbsp/powerpc/virtex4/startup/start.S   |   22 +-
 10 files changed, 1288 insertions(+), 326 deletions(-)
 delete mode 100644 c/src/lib/libbsp/powerpc/virtex4/clock/clock.c
 create mode 100644 c/src/lib/libbsp/powerpc/virtex4/include/mmu.h
 create mode 100644 c/src/lib/libbsp/powerpc/virtex4/libc/fastcopy.S
 create mode 100644 c/src/lib/libbsp/powerpc/virtex4/libc/memcpy.c
 create mode 100644 c/src/lib/libbsp/powerpc/virtex4/mmu/mmu.c

diff --git a/c/src/lib/libbsp/powerpc/virtex4/Makefile.am b/c/src/lib/libbsp/powerpc/virtex4/Makefile.am
index adce297..eb8204b 100644
--- a/c/src/lib/libbsp/powerpc/virtex4/Makefile.am
+++ b/c/src/lib/libbsp/powerpc/virtex4/Makefile.am
@@ -41,10 +41,6 @@ libbsp_a_SOURCES = startup/bspclean.c \
 # start
 libbsp_a_SOURCES += startup/start.S
 
-# clock & timer
-libbsp_a_SOURCES += ../../../libcpu/@RTEMS_CPU@/ppc403/clock/clock.c
-libbsp_a_SOURCES += ../../../libcpu/@RTEMS_CPU@/ppc403/timer/timer.c
-
 # console
 libbsp_a_SOURCES += startup/dummy_console.c \
                     ../../shared/dummy_printk_support.c
@@ -53,14 +49,23 @@ libbsp_a_SOURCES += startup/dummy_console.c \
 include_bsp_HEADERS += include/irq.h
 libbsp_a_SOURCES += irq/irq_init.c
 
+# mmu
+include_bsp_HEADERS += include/mmu.h
+libbsp_a_SOURCES += mmu/mmu.c
+
 #vectors
 include_bsp_HEADERS += ../../../libcpu/@RTEMS_CPU@/@exceptions@/bspsupport/vectors.h
 include_bsp_HEADERS += ../../../libcpu/@RTEMS_CPU@/@exceptions@/bspsupport/irq_supp.h
 
+# libc
+libbsp_a_SOURCES += libc/memcpy.c libc/fastcopy.S
+
 libbsp_a_LIBADD = ../../../libcpu/@RTEMS_CPU@/@exceptions@/rtems-cpu.rel \
                   ../../../libcpu/@RTEMS_CPU@/@exceptions@/exc_bspsupport.rel \
                   ../../../libcpu/@RTEMS_CPU@/shared/cache.rel \
-                  ../../../libcpu/@RTEMS_CPU@/shared/cpuIdent.rel
+                  ../../../libcpu/@RTEMS_CPU@/shared/cpuIdent.rel \
+                  ../../../libcpu/@RTEMS_CPU@/ppc403/clock.rel \
+                  ../../../libcpu/@RTEMS_CPU@/ppc403/timer.rel
 
 EXTRA_DIST = times
 
diff --git a/c/src/lib/libbsp/powerpc/virtex4/clock/clock.c b/c/src/lib/libbsp/powerpc/virtex4/clock/clock.c
deleted file mode 100644
index 7617720..0000000
--- a/c/src/lib/libbsp/powerpc/virtex4/clock/clock.c
+++ /dev/null
@@ -1,265 +0,0 @@
-/*  clock.c
- *
- *  This routine initializes the interval timer on the
- *  PowerPC 403 CPU.  The tick frequency is specified by the bsp.
- *
- *  Author: Andrew Bray <andy at i-cubed.co.uk>
- *
- *  COPYRIGHT (c) 1995 by i-cubed ltd.
- *
- *  To anyone who acknowledges that this file is provided "AS IS"
- *  without any express or implied warranty:
- *      permission to use, copy, modify, and distribute this file
- *      for any purpose is hereby granted without fee, provided that
- *      the above copyright notice and this notice appears in all
- *      copies, and that the name of i-cubed limited not be used in
- *      advertising or publicity pertaining to distribution of the
- *      software without specific, written prior permission.
- *      i-cubed limited makes no representations about the suitability
- *      of this software for any purpose.
- *
- *  Derived from c/src/lib/libcpu/hppa1.1/clock/clock.c:
- *
- *  Modifications for deriving timer clock from cpu system clock by
- *              Thomas Doerfler <td at imd.m.isar.de>
- *  for these modifications:
- *  COPYRIGHT (c) 1997 by IMD, Puchheim, Germany.
- *
- *  COPYRIGHT (c) 1989-2007.
- *  On-Line Applications Research Corporation (OAR).
- *
- *  The license and distribution terms for this file may be
- *  found in the file LICENSE in this distribution or at
- *  http://www.rtems.com/license/LICENSE.
- *
- *  Modifications for PPC405GP by Dennis Ehlin
- */
-
-#include <rtems.h>
-#include <rtems/clockdrv.h>
-#include <rtems/libio.h>
-#include <stdlib.h>                     /* for atexit() */
-#include <rtems/bspIo.h>
-#include <rtems/powerpc/powerpc.h>
-
-/*
- * check, which exception handling code is present
- */
-
-#include <bsp.h>
-
-#include <bsp/vectors.h>
-#include <bsp/irq_supp.h>
-
-volatile uint32_t   Clock_driver_ticks;
-static uint32_t   pit_value, tick_time;
-static bool auto_restart;
-
-void Clock_exit( void );
-
-/*
- * These are set by clock driver during its init
- */
-
-rtems_device_major_number rtems_clock_major = ~0;
-rtems_device_minor_number rtems_clock_minor;
-
-static inline uint32_t   get_itimer(void)
-{
-    register uint32_t   rc;
-
-    asm volatile ("mfspr %0, 0x10c" : "=r" ((rc))); /* 405GP TBL */
-
-    return rc;
-}
-
-/*
- *  ISR Handler
- */
-
-int Clock_isr(BSP_Exception_frame *f, unsigned int vector)
-{
-    uint32_t   clicks_til_next_interrupt;
-#if defined(BSP_PPC403_CLOCK_ISR_IRQ_LEVEL)
-    uint32_t   l_orig = _ISR_Get_level();
-#endif
-    if (!auto_restart)
-    {
-      uint32_t   itimer_value;
-      /*
-       * setup for next interrupt; making sure the new value is reasonably
-       * in the future.... in case we lost out on an interrupt somehow
-       */
-
-      itimer_value = get_itimer();
-      tick_time += pit_value;
-
-      /*
-       * how far away is next interrupt *really*
-       * It may be a long time; this subtraction works even if
-       * Clock_clicks_interrupt < Clock_clicks_low_order via
-       * the miracle of unsigned math.
-       */
-      clicks_til_next_interrupt = tick_time - itimer_value;
-
-      /*
-       * If it is too soon then bump it up.
-       * This should only happen if CPU_HPPA_CLICKS_PER_TICK is too small.
-       * But setting it low is useful for debug, so...
-       */
-
-      if (clicks_til_next_interrupt < 400)
-      {
-        tick_time = itimer_value + 1000;
-        clicks_til_next_interrupt = 1000;
-        /* XXX: count these! this should be rare */
-      }
-
-      /*
-       * If it is too late, that means we missed the interrupt somehow.
-       * Rather than wait 35-50s for a wrap, we just fudge it here.
-       */
-
-      if (clicks_til_next_interrupt > pit_value)
-      {
-        tick_time = itimer_value + 1000;
-        clicks_til_next_interrupt = 1000;
-        /* XXX: count these! this should never happen :-) */
-      }
-
-      asm volatile ("mtspr 0x3db, %0" :: "r"
-                         (clicks_til_next_interrupt)); /* PIT */
-    }
-
-    /* Clear the Programmable Interrupt Status */
-    asm volatile ( "mtspr 0x3d8, %0" :: "r" (0x08000000)); /* TSR */
-
-    Clock_driver_ticks++;
-
-    rtems_clock_tick();
-
-    return 0;
-}
-
-void ClockOff(void)
-{
-    register uint32_t   tcr;
-
-    asm volatile ("mfspr %0, 0x3da" : "=r" ((tcr))); /* TCR */
-
-    tcr &= ~ 0x04400000;
-
-    asm volatile ("mtspr 0x3da, %0" : "=r" ((tcr)) : "0" ((tcr))); /* TCR */
-}
-
-void ClockOn(void)
-{
-    uint32_t   iocr;
-    register uint32_t   tcr;
-
-    Clock_driver_ticks = 0;
-
-    asm volatile ("mfdcr %0, 0x0b2" : "=r" (iocr));  /*405GP CPC0_CR1 */
-    if (bsp_timer_internal_clock) {
-      iocr &=~0x800000;               /* timer clocked from system clock CETE*/
-    }
-    else {
-      iocr |= 0x800000;               /* select external timer clock CETE*/
-    }
-    asm volatile ("mtdcr 0x0b2, %0" : "=r" (iocr) : "0" (iocr)); /* 405GP CPC0_CR1 */
-
-     /*
-      * Enable auto restart
-      */
-
-    auto_restart = true;
-
-    pit_value = rtems_configuration_get_microseconds_per_tick() *
-      bsp_clicks_per_usec;
-
-     /*
-      * Set PIT value
-      */
-
-    asm volatile ("mtspr 0x3db, %0" : : "r" (pit_value)); /* PIT */
-
-     /*
-      * Set timer to autoreload, bit TCR->ARE = 1  0x0400000
-      * Enable PIT interrupt, bit TCR->PIE = 1     0x4000000
-      */
-    tick_time = get_itimer() + pit_value;
-
-    asm volatile ("mfspr %0, 0x3da" : "=r" ((tcr)));               /* TCR */
-    tcr = (tcr & ~0x04400000) | (auto_restart ? 0x04400000 : 0x04000000);
-    asm volatile ("mtspr 0x3da, %0" : "=r" ((tcr)) : "0" ((tcr))); /* TCR */
-}
-
-
-
-void Install_clock(ppc_exc_handler_t clock_isr)
-{
-#ifdef ppc403
-   uint32_t   pvr;
-#endif /* ppc403 */
-
-   Clock_driver_ticks = 0;
-
-   /*
-    * initialize the interval here
-    * First tick is set to right amount of time in the future
-    * Future ticks will be incremented over last value set
-    * in order to provide consistent clicks in the face of
-    * interrupt overhead
-    */
-
-   ppc_exc_set_handler( BSP_PPC403_CLOCK_HOOK_EXCEPTION, clock_isr );
-   ClockOn();
-
-   atexit(Clock_exit);
-}
-
-void
-ReInstall_clock(ppc_exc_handler_t clock_isr)
-{
-  uint32_t   isrlevel = 0;
-
-  rtems_interrupt_disable(isrlevel);
-
-   ppc_exc_set_handler( BSP_PPC403_CLOCK_HOOK_EXCEPTION, clock_isr );
-   ClockOn();
-
-  rtems_interrupt_enable(isrlevel);
-}
-
-
-/*
- * Called via atexit()
- * Remove the clock interrupt handler by setting handler to NULL
- *
- * This will not work on the 405GP because
- * when bit's are set in TCR they can only be unset by a reset
- */
-
-void Clock_exit(void)
-{
-  ClockOff();
-  ppc_exc_set_handler( BSP_PPC403_CLOCK_HOOK_EXCEPTION, 0 );
-}
-
-rtems_device_driver Clock_initialize(
-  rtems_device_major_number major,
-  rtems_device_minor_number minor,
-  void *pargp
-)
-{
-  Install_clock( Clock_isr );
-
-  /*
-   * make major/minor avail to others such as shared memory driver
-   */
-
-  rtems_clock_major = major;
-  rtems_clock_minor = minor;
-
-  return RTEMS_SUCCESSFUL;
-}
diff --git a/c/src/lib/libbsp/powerpc/virtex4/include/mmu.h b/c/src/lib/libbsp/powerpc/virtex4/include/mmu.h
new file mode 100644
index 0000000..3524f23
--- /dev/null
+++ b/c/src/lib/libbsp/powerpc/virtex4/include/mmu.h
@@ -0,0 +1,270 @@
+#ifndef RTEMS_VIRTEX4_MMU_H
+#define RTEMS_VIRTEX4_MMU_H
+/**
+ * @file
+ *
+ * @ingroup Virtex4MMU
+ *
+ * @brief Routines to manipulate the PPC 405 MMU.
+ */
+/* $Id$ */
+/*
+ * Authorship
+ * ----------
+ * This software was created by
+ *     Till Straumann <strauman at slac.stanford.edu>, 2005-2007,
+ *       Stanford Linear Accelerator Center, Stanford University.
+ * and was transcribed for the PPC 405 by
+ *     R. Claus <claus at slac.stanford.edu>, 2012,
+ *       Stanford Linear Accelerator Center, Stanford University,
+ *
+ * Acknowledgement of sponsorship
+ * ------------------------------
+ * This software was produced by
+ *     the Stanford Linear Accelerator Center, Stanford University,
+ *     under Contract DE-AC03-76SFO0515 with the Department of Energy.
+ *
+ * Government disclaimer of liability
+ * ----------------------------------
+ * Neither the United States nor the United States Department of Energy,
+ * nor any of their employees, makes any warranty, express or implied, or
+ * assumes any legal liability or responsibility for the accuracy,
+ * completeness, or usefulness of any data, apparatus, product, or process
+ * disclosed, or represents that its use would not infringe privately owned
+ * rights.
+ *
+ * Stanford disclaimer of liability
+ * --------------------------------
+ * Stanford University makes no representations or warranties, express or
+ * implied, nor assumes any liability for the use of this software.
+ *
+ * Stanford disclaimer of copyright
+ * --------------------------------
+ * Stanford University, owner of the copyright, hereby disclaims its
+ * copyright and all other rights in this software.  Hence, anyone may
+ * freely use it for any purpose without restriction.
+ *
+ * Maintenance of notices
+ * ----------------------
+ * In the interest of clarity regarding the origin and status of this
+ * SLAC software, this and all the preceding Stanford University notices
+ * are to remain affixed to any copy or derivative of this software made
+ * or distributed by the recipient and are to be affixed to any copy of
+ * software made or distributed by the recipient that contains a copy or
+ * derivative of this software.
+ *
+ * ------------------ SLAC Software Notices, Set 4 OTT.002a, 2004 FEB 03
+ */
+
+#include <rtems.h>
+#include <inttypes.h>
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @defgroup Virtex4MMU Virtex 4 - MMU Support
+ *
+ * @ingroup Virtex4
+ *
+ * @brief MMU support.
+ *
+ * @{
+ */
+
+/* Some routines require or return a index 'key'.
+ */
+typedef int bsp_tlb_idx_t;
+
+/* Cache the relevant TLB entries so that we can make sure the user cannot
+ * create conflicting (overlapping) entries. Keep them public for informational
+ * purposes.
+ */
+typedef struct {
+  struct {
+    uint32_t pad:24;
+    uint32_t tid:8;             /** Translation ID */
+  }        id;
+  struct {
+    uint32_t epn:22;            /** Effective page number */
+    uint32_t size:3;            /** Page size */
+    uint32_t v:1;               /** Valid */
+    uint32_t att:2;             /** Little-endian, User-defined */
+    uint32_t pad:4;
+  }        hi;                  /** High word*/
+  struct {
+    uint32_t rpn:22;            /** Real page number */
+    uint32_t perm:6;            /** Execute enable, Write-enable, Zone select */
+    uint32_t wimg:4;            /** Write-through, Caching inhibited, Mem coherent, Guarded */
+  }        lo;                  /** Low word */
+} bsp_tlb_entry_t;
+
+#define NTLBS  64
+
+extern bsp_tlb_entry_t* bsp_mmu_cache;
+
+
+// These constants will have to be shifted right by 20 bits before
+// being inserted the high word of the TLB.
+
+#define MMU_M_SIZE_1K               (0x00000000U)
+#define MMU_M_SIZE_4K               (0x08000000U)
+#define MMU_M_SIZE_16K              (0x10000000U)
+#define MMU_M_SIZE_64K              (0x18000000U)
+#define MMU_M_SIZE_256K             (0x20000000U)
+#define MMU_M_SIZE_1M               (0x28000000U)
+#define MMU_M_SIZE_4M               (0x30000000U)
+#define MMU_M_SIZE_16M              (0x38000000U)
+#define MMU_M_SIZE_MIN              (MMU_M_SIZE_1K)
+#define MMU_M_SIZE_MAX              (MMU_M_SIZE_16M)
+#define MMU_M_SIZE                  (0x38000000U)
+#define MMU_V_SIZE                  (27)
+
+#define MMU_M_ATTR_LITTLE_ENDIAN    (0x02000000U)
+#define MMU_M_ATTR_USER0            (0x01000000U)
+#define MMU_M_ATTR                  (0x03000000U)
+#define MMU_V_ATTR                  (24)
+
+// These constants have the same bit positions they'll occupy
+// in low word of the TLB.
+
+#define MMU_M_PERM_EXEC             (0x00000200U)
+#define MMU_M_PERM_DATA_WRITE       (0x00000100U)
+#define MMU_M_PERM_ZONE_SELECT      (0x000000f0U)
+#define MMU_M_PERM                  (0x000003f0U)
+#define MMU_V_PERM                  (4)
+
+#define MMU_M_PROP_WRITE_THROUGH    (0x00000008U)
+#define MMU_M_PROP_UNCACHED         (0x00000004U)
+#define MMU_M_PROP_MEM_COHERENT     (0x00000002U)
+#define MMU_M_PROP_GUARDED          (0x00000001U)
+#define MMU_M_PROP                  (0x0000000fU)
+#define MMU_V_PROP                  (0)
+
+
+/*
+ * Dump (cleartext) content info from cached TLB entries
+ * to a file (stdout if f==NULL).
+ */
+void
+bsp_mmu_dump_cache(FILE *f);
+
+/* Read a TLB entry from the hardware and store the settings in the
+ * bsp_mmu_cache[] structure.
+ *
+ * The routine can perform this operation quietly or
+ * print information to a file.
+ *
+ *   'key': TLB entry index.
+ * 'quiet': perform operation silently (no info printed) if nonzero.
+ *     'f': open FILE where to print information. May be NULL, in
+ *          which case 'stdout' is used.
+ *
+ * RETURNS:
+ *       0: success; TLB entry is VALID
+ *      +1: success but TLB entry is INVALID
+ *     < 0: error (-1: invalid argument)
+ *                (-2: driver not initialized)
+ */
+int
+bsp_mmu_update(bsp_tlb_idx_t key, bool quiet, FILE *f);
+
+/* Initialize cache.  Should be done only once although this is not enforced.
+ *
+ * RETURNS: zero on success, nonzero on error; in this case the driver will
+ *          refuse to change TLB entries (other than disabling them).
+ */
+int
+bsp_mmu_initialize(void);
+
+/* Find first free TLB entry by examining all entries' valid bit.  The first
+ * entry without the valid bit set is returned.
+ *
+ * RETURNS: A free TLB entry number.  -1 if no entry can be found.
+ */
+bsp_tlb_idx_t
+bsp_mmu_find_first_free(void);
+
+/* Write a TLB entry (can also be used to disable an entry).
+ *
+ * The routine checks against the cached data in bsp_mmu_cache[]
+ * to prevent the user from generating overlapping entries.
+ *
+ *   'idx': TLB entry # to manipulate
+ *    'ea': Effective address (must be page aligned)
+ *    'pa': Physical  address (must be page aligned)
+ *    'sz': Page size selector; page size is 1024 * 2^(2*sz) bytes.
+ *          'sz' may also be one of the following:
+ *          - page size in bytes ( >= 1024 ); the selector
+ *            value is then computed by this routine.
+ *            However, 'sz' must be a valid page size
+ *            or -1 will be returned.
+ *          - a value < 0 to invalidate/disable the
+ *            TLB entry.
+ *  'flgs': Page's little-endian & user-defined flags, permissions and attributes
+ *   'tid': Translation ID
+ *
+ * RETURNS: 0 on success, nonzero on error:
+ *
+ *         >0: requested mapping would overlap with
+ *             existing mapping in another entry. Return
+ *             value gives conflicting entry + 1; i.e.,
+ *             if a value of 4 is returned then the request
+ *             conflicts with existing mapping in entry 3.
+ *         -1: invalid argument
+ *         -3: driver not initialized (or initialization failed).
+ *         <0: other error
+ */
+bsp_tlb_idx_t
+bsp_mmu_write(bsp_tlb_idx_t idx, uint32_t ea, uint32_t pa, uint sz,
+              uint32_t flgs, uint32_t tid);
+
+/* Check if a ea/tid/sz mapping overlaps with an existing entry.
+ *
+ *    'ea': The Effective Address to match against
+ *    'sz': The 'logarithmic' size selector; the page size
+ *          is 1024*2^(2*sz).
+ *   'tid': The TID to match against
+ *
+ * RETURNS:
+ *     >= 0: index of TLB entry that already provides a mapping
+ *           which overlaps within the ea range.
+ *       -1: SUCCESS (no conflicting entry found)
+ *     <=-2: ERROR (invalid input)
+ */
+bsp_tlb_idx_t
+bsp_mmu_match(uint32_t ea, int sz, uint32_t tid);
+
+/* Find TLB index that maps 'ea/tid' combination
+ *
+ *    'ea': Effective address to match against
+ *   'tid': The TID to match against
+ *
+ * RETURNS: index 'key'; i.e., the index number.
+ *
+ *          On error (no mapping) -1 is returned.
+ */
+bsp_tlb_idx_t
+bsp_mmu_find(uint32_t ea, uint32_t tid);
+
+/* Mark TLB entry as invalid ('disabled').
+ *
+ * 'key': TLB entry index.
+ *
+ * RETURNS: zero on success, nonzero on error (TLB unchanged).
+ *
+ * NOTE:  If a TLB entry is disabled the associated
+ *        entry in bsp_mmu_cache[] is also marked as disabled.
+ */
+int
+bsp_mmu_invalidate(bsp_tlb_idx_t key);
+
+/** @} */
+
+#ifdef __cplusplus
+};
+#endif
+
+#endif
diff --git a/c/src/lib/libbsp/powerpc/virtex4/irq/irq_init.c b/c/src/lib/libbsp/powerpc/virtex4/irq/irq_init.c
index 74aaebb..ffcfe4c 100644
--- a/c/src/lib/libbsp/powerpc/virtex4/irq/irq_init.c
+++ b/c/src/lib/libbsp/powerpc/virtex4/irq/irq_init.c
@@ -26,8 +26,6 @@
 #include <rtems/powerpc/powerpc.h>
 #include <bsp/vectors.h>
 
-uint32_t* IRQ_Counter = (uint32_t*)0x1500;
-
 static rtems_irq_connect_data rtemsIrqTbl[BSP_IRQ_NUMBER];
 rtems_irq_connect_data *BSP_rtems_irq_tbl;
 rtems_irq_global_settings* BSP_rtems_irq_config;
diff --git a/c/src/lib/libbsp/powerpc/virtex4/libc/fastcopy.S b/c/src/lib/libbsp/powerpc/virtex4/libc/fastcopy.S
new file mode 100644
index 0000000..2ecbdcc
--- /dev/null
+++ b/c/src/lib/libbsp/powerpc/virtex4/libc/fastcopy.S
@@ -0,0 +1,143 @@
+/*----------------------------------------------------------------------------+
+| COPYRIGHT I B M CORPORATION 2007
+| LICENSED MATERIAL - PROGRAM PROPERTY OF I B M
+| US Government Users Restricted Rights - Use, duplication or
+| disclosure restricted by GSA ADP Schedule Contract with
+| IBM Corp.
++----------------------------------------------------------------------------*/
+/*----------------------------------------------------------------------------+
+| PPC440 Example Code
+| Author: Paul Gramann
+| Component: none
+| File: fastcopy.S
+| Purpose: Fast memory copy for PPC405 and PPC440 cores
+| Changes:
+| Date: Comment:
+| ----- --------
+| 29-Nov-07 Created pag
++----------------------------------------------------------------------------*/
+/*----------------------------------------------------------------------------+
+| void fastcopy(dst, src, line_count);
+|
+| Inputs:
+| r3: destination address
+| r4: source address
+| r5: line count
+|
+| Notes:
+| dst and src address are assumed to be cache-line aligned.
+| line_count is assumed to be at least 3 cache-lines.
+| function needs to be in I-cache in order to not interfere with D-cache
+| access pattern.
+|
+| Algorithm:
+| touch (dcbt) 2 lines into the cache from source
+| zero (dcbz) 2 lines in cache for destination
+| repeat until 2 lines left to copy {
+| touch (dcbt) another line into the cache from source
+| load a source line from cache into 8 registers
+| zero (dcbz) another line in the cache for destination
+| store 8 registers to a destination cache line
+| }
+| copy last 2 cache lines of data from source to destination.
+|
+| During this algorithm the touches and loads are performed two lines ahead
+| of the data being copied.
+|
+| Register usage:
+| r0: scratch and copy
+| r1: stack frame pointer - not used
+| r2: small data area pointer - not used
+| r3: destination address
+| r4: source address
+| r5: line count and copy
+| r6: dcbz/dcbt offset and icbt address
+| r7-r12: copy and timing
++----------------------------------------------------------------------------*/
+
+#include <rtems/asm.h>
+
+        // Global Entry Point Definitions
+        .section .text,"ax"
+
+        .global fastcopy
+        
+fastcopy:
+// touch this code into the cache
+mflr r0          // save lr
+bl 4             // get our code address in lr
+mflr r6          // in r6
+mtlr r0          // restore lr
+addi r7,0,0      // initialize r7 to 0
+addi r0,0,0x10
+mtctr r0         // initialize ctr to 0x10 lines
+itouch_loop:
+icbt r6,r7       // touch line into i-cache
+addi r6,r6,0x20  // move to next line
+bdnz itouch_loop // repeat
+
+// initialize r6 for use in dcbt and dcbz's.
+// also adjust the data pointers to work with load/store update
+addi r6,0,4
+subf r4,r6,r4
+subf r3,r6,r3
+
+// pre-touch and pre-zero two lines of the source and destination
+// leaving r6 offset pointing to next untouched line
+// Note that the order must be dcbt, dcbt, dcbz, dcbz
+dcbt r4,r6
+addi r6,r6,0x20
+dcbt r4,r6
+addi r6,r6,-0x20
+dcbz r3,r6
+addi r6,r6,0x20
+dcbz r3,r6
+addi r6,r6,0x20
+
+addi r5,r5,-2 // subtract 2 from count
+mtctr r5      // and put in ctr
+
+cache_copy_loop:
+dcbt r4,r6    // touch 2 lines ahead
+lwzu r0,4(r4) // load 8 registers from cache
+lwzu r5,4(r4)
+lwzu r7,4(r4)
+lwzu r8,4(r4)
+lwzu r9,4(r4)
+lwzu r10,4(r4)
+lwzu r11,4(r4)
+lwzu r12,4(r4)
+dcbz r3,r6    // zero 2 lines ahead
+stwu r0,4(r3) // store 8 registers to cache
+stwu r5,4(r3)
+stwu r7,4(r3)
+stwu r8,4(r3)
+stwu r9,4(r3)
+stwu r10,4(r3)
+stwu r11,4(r3)
+stwu r12,4(r3)
+bdnz cache_copy_loop
+
+// Copy last two lines of data without cache ops.
+addi r0,0,2
+mtctr r0      // set ctr to 2
+final_copy_loop:
+lwzu r0,4(r4) // load 8 regs
+lwzu r5,4(r4)
+lwzu r7,4(r4)
+lwzu r8,4(r4)
+lwzu r9,4(r4)
+lwzu r10,4(r4)
+lwzu r11,4(r4)
+lwzu r12,4(r4)
+stwu r0,4(r3) // store 8 regs
+stwu r5,4(r3)
+stwu r7,4(r3)
+stwu r8,4(r3)
+stwu r9,4(r3)
+stwu r10,4(r3)
+stwu r11,4(r3)
+stwu r12,4(r3)
+bdnz final_copy_loop
+addi r3,r3,4    // adjust dst pointer
+blr             // all done - return
diff --git a/c/src/lib/libbsp/powerpc/virtex4/libc/memcpy.c b/c/src/lib/libbsp/powerpc/virtex4/libc/memcpy.c
new file mode 100644
index 0000000..a987195
--- /dev/null
+++ b/c/src/lib/libbsp/powerpc/virtex4/libc/memcpy.c
@@ -0,0 +1,225 @@
+/*!@file     memcpy.c
+*
+* @brief     PowerPC optimized memcpy routines
+*
+* @author    S. Maldonado -- REG - (smaldona at slac.stanford.edu)
+*
+* @date      Sept 24, 2012 -- Created
+*
+* $Revision: $
+*
+* @verbatim                    Copyright 2012
+*                                     by
+*                        The Board of Trustees of the
+*                      Leland Stanford Junior University.
+*                             All rights reserved.
+* @endverbatim
+*/
+
+#include <rtems.h>
+#include <rtems/powerpc/powerpc.h>  /* For PPC_CACHE_ALIGNMENT */
+
+/* Nonzero if either X or Y is not aligned on a "long" boundary.  */
+#define UNALIGNED(X, Y) \
+  (((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
+
+/* How many bytes are copied each iteration of the 4X unrolled loop.  */
+#define BIGBLOCKSIZE    (sizeof (long) << 2)
+
+/* How many bytes are copied each iteration of the word copy loop.  */
+#define LITTLEBLOCKSIZE (sizeof (long))
+
+/* Threshhold for punting to the byte copier.  */
+#define TOO_SMALL(LEN)  ((LEN) < BIGBLOCKSIZE)
+
+/* Nonzero if either X or Y is not aligned on a cache line boundary.  */
+#define CACHE_UNALIGNED(X, Y) \
+  (((long)X & (PPC_CACHE_ALIGNMENT - 1)) | ((long)Y & (PPC_CACHE_ALIGNMENT - 1)))
+
+/* Threshhold for fastcopy - 16 cache lines  */
+#define MIN_BLOCKS(LEN)  ((LEN) < (PPC_CACHE_ALIGNMENT*16))
+
+/* IBM fastcopy assembly routine */
+extern int *fastcopy(void *dst, const void *src, size_t count);
+
+void *memcpyppc32(void *dst, const void *src, size_t size);
+
+void *memcpynewlib(char *dst0, const char *src0, size_t len0);
+
+/*
+ * This is the RTEMS memcpy drop-in replacement which executes the IBM fastcopy algorithm
+ * or a secondary optimized block copy.
+ * The fastcopy routine is optimized for copying larger blocks with aligned 
+ * source and destination addresses.
+ * The memcpyppc32 routine is optimized for copying small and large blocks which may
+ * use unaligned destination addresses.
+ */
+void *memcpy(void *dst, const void *src, size_t len)  
+{
+  unsigned int len0;
+  
+  /* 
+   * If the size less than min cache lines, or either SRC or DST is not cache aligned,
+   * then proceed to optimized memcpy. Otherwise, execute fast block copy. 
+   */
+  if (!MIN_BLOCKS(len) && !CACHE_UNALIGNED(src, dst))
+  {   
+      /* fastcopy requires cache line count as argument */
+      len0 = len/PPC_CACHE_ALIGNMENT;
+      fastcopy(dst,src,len0);
+      
+      /* Adjust pointers and length */
+      len0 = len0*PPC_CACHE_ALIGNMENT;
+      src = (uint32_t *)(src + len0);
+      dst = (uint32_t *)(dst + len0);
+      len -= len0;
+
+      /* Pick up any residual words */   
+      while (len >= LITTLEBLOCKSIZE)
+      {
+        *((uint32_t*)dst) = *((uint32_t*)src);
+        src += LITTLEBLOCKSIZE;                                            
+        dst += LITTLEBLOCKSIZE;                                                    
+        len -= LITTLEBLOCKSIZE;
+      }
+
+      /* Pick up any residual bytes */
+      while (len--)
+        *((uint8_t*)dst++) = *((uint8_t*)src++);
+     
+      return dst;
+  }
+  else
+      return memcpyppc32(dst,src,len);
+}
+
+/*
+ * A memcpy routine optimized for ppc32 which handles small data blocks,
+ * destination address alignment, and cache line copying.
+ * Adopted from code examples provided in a 2004 posting at
+ * http://www.powerdeveloper.org/forums/viewtopic.php?t=1426
+ */
+void *memcpyppc32(void *dst, const void *src, size_t size)
+{
+  uint32_t i;
+  uint32_t *src32, *dst32;
+
+  if(size<4) goto memcpy_bytes;                        
+
+  /* align dest to 16 bits */
+  if( (uint32_t)dst & 1) 
+  {                             
+    *((uint8_t*)dst++) = *((uint8_t*)src++);           
+    size--;                                            
+  }   
+
+  /* align dest to 32 bits */                                                   
+  if ((uint32_t)dst & 2) 
+  {                             
+    *((uint16_t*)dst) = *((uint16_t*)src);             
+    src+=2;                                            
+    dst+=2;                                            
+    size -= 2;                                         
+  }                                                    
+
+  /* cache line copy, min 4 lines */
+  if(size >= 128) 
+  {    
+    /* align to cache line, 32 bytes */                                   
+    while( (uint32_t)dst & 31) 
+    {                      
+      *((uint32_t*)dst) = *((uint32_t*)src);           
+      src+=4;                                          
+      dst+=4;                                          
+      size -= 4;                                       
+    }                                                  
+
+    src32 = (uint32_t *)src; 
+    dst32 = (uint32_t *)dst;
+     
+    /* copy cache lines */
+    for (i=size/(8*sizeof(uint32_t));i;i--) 
+    {          
+      asm volatile ("dcbz 0,%0" : : "r" (&dst32[0]));
+      /* dcbt here doesn't improve performance */
+      /* asm volatile ("dcbt 0,%0" : : "r" (&src32[0])); */ 
+      dst32[0] = src32[0];
+      dst32[1] = src32[1];
+      dst32[2] = src32[2];
+      dst32[3] = src32[3];
+      dst32[4] = src32[4];
+      dst32[5] = src32[5];
+      dst32[6] = src32[6];
+      dst32[7] = src32[7];  
+
+      src32+=8;                                        
+      dst32+=8;                                        
+    }                                                  
+    size &= 8*sizeof(uint32_t)-1;                
+    src = (uint8_t *)src32;
+    dst = (uint8_t *)dst32;
+  }
+
+  /* copy residual words */
+  for (i=size/sizeof(uint32_t);i;i--) 
+  {                
+    *((uint32_t*)dst) = *((uint32_t*)src);             
+    src+=4;                                            
+    dst+=4;                                            
+  }                                                    
+  size &= sizeof(uint32_t)-1;      
+
+  /* copy residual bytes */
+memcpy_bytes:
+  while (size--)
+    *((uint8_t*)dst++) = *((uint8_t*)src++);           
+                                                   
+  return dst;
+}
+
+/*
+ * For reference, this is the default RTEMS version of memcpy provided with newlib-1.16.0.
+ */
+void *memcpynewlib(char *dst0, const char *src0, size_t len0)  {
+  char *dst = dst0;
+  _CONST char *src = src0;
+  long *aligned_dst;
+  _CONST long *aligned_src;
+  unsigned int   len =  len0;
+
+  /* 
+   * If the size is small, or either SRC or DST is unaligned,
+   * then punt into the byte copy loop.  This should be rare.  
+   */
+  if (!TOO_SMALL(len) && !UNALIGNED (src, dst))
+    {
+      aligned_dst = (long*)dst;
+      aligned_src = (long*)src;
+
+      /* Copy 4X long words at a time if possible.  */
+      while (len >= BIGBLOCKSIZE)
+        {
+          *aligned_dst++ = *aligned_src++;
+          *aligned_dst++ = *aligned_src++;
+          *aligned_dst++ = *aligned_src++;
+          *aligned_dst++ = *aligned_src++;
+          len -= BIGBLOCKSIZE;
+        }
+
+      /* Copy one long word at a time if possible.  */
+      while (len >= LITTLEBLOCKSIZE)
+        {
+          *aligned_dst++ = *aligned_src++;
+          len -= LITTLEBLOCKSIZE;
+        }
+       
+      dst = (char*)aligned_dst;
+      src = (char*)aligned_src;
+    }
+
+  /* Pick up any residual bytes  */   
+  while (len--)
+    *dst++ = *src++;
+
+  return dst;
+}
diff --git a/c/src/lib/libbsp/powerpc/virtex4/mmu/mmu.c b/c/src/lib/libbsp/powerpc/virtex4/mmu/mmu.c
new file mode 100644
index 0000000..c294a9a
--- /dev/null
+++ b/c/src/lib/libbsp/powerpc/virtex4/mmu/mmu.c
@@ -0,0 +1,568 @@
+/**
+ * @file
+ *
+ * @ingroup Virtex4MMU
+ *
+ * @brief Implementation of routines to manipulate the PPC 405 mmu.
+ *
+ *        Since this is a real-time OS we want to stay away from
+ *        software TLB replacement.
+ */
+/* $Id$ */
+/*
+ * Authorship
+ * ----------
+ * This software was created by
+ *     Till Straumann <strauman at slac.stanford.edu>, 2005-2007,
+ * 	   Stanford Linear Accelerator Center, Stanford University.
+ * and was transcribed for the PPC 405 by
+ *     R. Claus <claus at slac.stanford.edu>, 2012,
+ *       Stanford Linear Accelerator Center, Stanford University,
+ *
+ * Acknowledgement of sponsorship
+ * ------------------------------
+ * This software was produced by
+ *     the Stanford Linear Accelerator Center, Stanford University,
+ * 	   under Contract DE-AC03-76SFO0515 with the Department of Energy.
+ *
+ * Government disclaimer of liability
+ * ----------------------------------
+ * Neither the United States nor the United States Department of Energy,
+ * nor any of their employees, makes any warranty, express or implied, or
+ * assumes any legal liability or responsibility for the accuracy,
+ * completeness, or usefulness of any data, apparatus, product, or process
+ * disclosed, or represents that its use would not infringe privately owned
+ * rights.
+ *
+ * Stanford disclaimer of liability
+ * --------------------------------
+ * Stanford University makes no representations or warranties, express or
+ * implied, nor assumes any liability for the use of this software.
+ *
+ * Stanford disclaimer of copyright
+ * --------------------------------
+ * Stanford University, owner of the copyright, hereby disclaims its
+ * copyright and all other rights in this software.  Hence, anyone may
+ * freely use it for any purpose without restriction.
+ *
+ * Maintenance of notices
+ * ----------------------
+ * In the interest of clarity regarding the origin and status of this
+ * SLAC software, this and all the preceding Stanford University notices
+ * are to remain affixed to any copy or derivative of this software made
+ * or distributed by the recipient and are to be affixed to any copy of
+ * software made or distributed by the recipient that contains a copy or
+ * derivative of this software.
+ *
+ * ------------------ SLAC Software Notices, Set 4 OTT.002a, 2004 FEB 03
+ */
+
+/* 405 MSR definitions; note that there are *substantial* differences
+ * compared to classic powerpc; in particular, IS/DS are *different*
+ * from IR/DR.
+ *
+ * Also: To disable/enable all external interrupts, CE and EE must both be
+ *       controlled.
+ */
+#include <rtems.h>
+#include <rtems/bspIo.h>
+#include <rtems/powerpc/powerpc.h>
+#include <inttypes.h>
+#include <stdio.h>
+
+#include <bsp/mmu.h>
+
+
+#ifdef DEBUG
+#define STATIC
+#else
+#define STATIC static
+#endif
+
+
+bsp_tlb_entry_t* bsp_mmu_cache = 0;
+
+
+/* Since it is likely that these routines are used during
+ * early initialization when stdio is not available yet
+ * we provide a helper that resorts to 'printk()'
+ */
+static void
+myprintf(FILE *f, char *fmt, ...)
+{
+  va_list ap;
+  va_start(ap, fmt);
+
+  if (!f || !_impure_ptr->__sdidinit) {
+    /* Might be called at an early stage when stdio is not yet initialized. */
+    vprintk(fmt,ap);
+  } else {
+    vfprintf(f,fmt,ap);
+  }
+  va_end(ap);
+}
+
+
+void
+bsp_mmu_dump_cache(FILE *f)
+{
+  bsp_tlb_idx_t idx;
+  if ( !bsp_mmu_cache ) {
+    myprintf(stderr,"MMU TLB cache not initialized\n");
+    return;
+  }
+  for ( idx=0; idx<NTLBS; idx++ ) {
+    bsp_tlb_entry_t *tlb = bsp_mmu_cache + idx;
+    if ( !tlb->hi.v )
+      continue;
+    myprintf(f, "#%2i: EA 0x%08x .. 0x%08x, TID  0x%03x, EU0  0x%01x\n",
+             idx,
+             tlb->hi.epn << 10,
+             (tlb->hi.epn << 10) + (1024<<(2*tlb->hi.size))-1,
+             tlb->id.tid,
+             tlb->hi.att);
+    myprintf(f, "     PA 0x%08"PRIx32" .. 0x%08"PRIx32", PERM 0x%03x, WIMG 0x%02x\n",
+             tlb->lo.rpn << 10,
+             (tlb->lo.rpn << 10) + (1024<<(2*tlb->hi.size))-1,
+             tlb->lo.perm,
+             tlb->lo.wimg);
+  }
+}
+
+static void
+fetch(bsp_tlb_idx_t key, bsp_tlb_entry_t* tlb)
+{
+  register uint32_t tmp;
+  __asm__ volatile ("mfpid   %[tmp]           \n\t"
+                    "stw     %[tmp],0(%[tlb]) \n\t"
+                    "tlbrehi %[tmp],%[key]    \n\t"
+                    "stw     %[tmp],4(%[tlb]) \n\t"
+                    "tlbrelo %[tmp],%[key]    \n\t"
+                    "stw     %[tmp],8(%[tlb]) \n\t"
+                    "sync                     \n\t"
+                    : [tmp]"=&r"(tmp)
+                    : [key]"r"(key),
+                      [tlb]"b"(tlb)
+                    );
+}
+
+
+static void
+store(bsp_tlb_idx_t key, bsp_tlb_entry_t* tlb)
+{
+  register uint32_t tmp;
+  __asm__ volatile ("lwz     %[tmp],0(%[tlb]) \n\t"
+                    "mtpid   %[tmp]           \n\t"
+                    "lwz     %[tmp],4(%[tlb]) \n\t"
+                    "tlbwehi %[tmp],%[key]    \n\t"
+                    "lwz     %[tmp],8(%[tlb]) \n\t"
+                    "tlbwelo %[tmp],%[key]    \n\t"
+                    : [tmp]"=&r"(tmp)
+                    : [tlb]"b"(tlb),
+                      [key]"r"(key)
+                    );
+}
+
+
+static void
+commit(void)
+{
+  __asm__ volatile("isync           \n\t");
+}
+
+
+/*
+ * Read a TLB entry from the hardware and store the current settings in the
+ * bsp_mmu_cache[] structure.
+ *
+ * The routine can perform this operation quietly or
+ * print information to a file.
+ *
+ *   'idx': which TLB entry to access.
+ * 'quiet': perform operation silently (no info printed)
+ *          if nonzero.
+ *     'f': open FILE where to print information. May be
+ *          NULL in which case 'stdout' is used.
+ *
+ * RETURNS:
+ *       0: success; TLB entry is VALID
+ *      +1: success but TLB entry is INVALID
+ *     < 0: error (-1: invalid argument)
+ *                (-2: driver not initialized)
+ */
+int
+bsp_mmu_update(bsp_tlb_idx_t key, bool quiet, FILE *f)
+{
+  rtems_interrupt_level  lvl;
+  bsp_tlb_entry_t*       tlb;
+  bsp_tlb_idx_t          idx;
+
+  idx = key;
+
+  if ( idx < 0 || idx > NTLBS-1 )
+    return -1;
+
+  if (!bsp_mmu_cache)
+    return -2;
+
+  tlb = bsp_mmu_cache + idx;
+
+  rtems_interrupt_disable(lvl);
+
+  fetch(idx, tlb);
+
+  rtems_interrupt_enable(lvl);
+
+  if ( tlb->hi.v ) {
+    if ( !quiet ) {
+/*
+               "TLB Entry #  0 spans EA range     0x00000000 - 0x00000000
+               "Mapping:     VA     [TID 0x00 / EPN 0x00000] -> RPN 0x00000"
+               "Size:        TSIZE 0x0  (4^sz KB = 000000 KB = 0x00000000 B)
+               "Attributes:  PERM  0x00 (ex/wr/zsel) WIMG 0x00 EU0 0x0"
+*/
+      myprintf(f,
+               "TLB Entry # %2d spans EA range     0x%08x - 0x%08x\n",
+               idx,
+               (tlb->hi.epn << 10),
+               (tlb->hi.epn << 10) + (1024<<(2*tlb->hi.size)) - 1
+               );
+
+      myprintf(f,
+               "Mapping:     VA     [TID 0x%02x / EPN 0x%05x] -> RPN 0x%05"PRIx32"\n",
+               tlb->id.tid, tlb->hi.epn, tlb->lo.rpn
+               );
+      myprintf(f,
+               "Size:        TSIZE 0x%x  (4^sz KB = %6d KB = 0x%08x B)\n",
+               tlb->hi.size, (1<<(2*tlb->hi.size)), (1024<<(2*tlb->hi.size))
+               );
+      myprintf(f,
+               "Attributes:  PERM  0x%02x (ex/wr/zsel) WIMG 0x%02x EU0 0x%01x\n",
+               tlb->lo.perm, tlb->lo.wimg, tlb->hi.att
+               );
+    }
+  } else {
+    if ( !quiet ) {
+      myprintf(f,
+               "TLB Entry # %2d <OFF> (size 0x%x = 0x%xb)\n",
+               idx, tlb->hi.size, (1024<<(2*tlb->hi.size))
+               );
+    }
+    return 1;
+  }
+  return 0;
+}
+
+/* Initialize cache.  Should be done only once although this is not enforced.
+ *
+ * RETURNS: zero on success, nonzero on error; in this case the driver will
+ *          refuse to change TLB entries (other than disabling them).
+ */
+int
+bsp_mmu_initialize()
+{
+  static bsp_tlb_entry_t mmu_cache[NTLBS];
+  bsp_tlb_entry_t*       tlb = mmu_cache;  /* Should malloc if it's not too early */
+  rtems_interrupt_level  lvl;
+
+  bsp_tlb_idx_t idx;
+  rtems_interrupt_disable(lvl);
+  for (idx=0; idx<NTLBS; tlb++, idx++)
+  {
+    fetch(idx, tlb);
+  }
+  rtems_interrupt_enable(lvl);
+
+  bsp_mmu_cache = mmu_cache;
+  return 0;
+}
+
+/* Find first free TLB entry by examining all entries' valid bit.  The first
+ * entry without the valid bit set is returned.
+ *
+ * RETURNS: A free TLB entry number.  -1 if no entry can be found.
+ */
+bsp_tlb_idx_t
+bsp_mmu_find_first_free()
+{
+  bsp_tlb_idx_t   idx;
+  bsp_tlb_entry_t entry;
+
+  for (idx=0; idx<NTLBS; idx++) {
+    register uint32_t tmp;
+    __asm__ volatile ("tlbrehi %[tmp],%[idx]    \n\t"
+                      "stw     %[tmp],4(%[tlb]) \n\t" /* entry.hi */
+                      "sync                     \n\t"
+                      : [tmp]"=&r"(tmp)
+                      : [idx]"r"(idx),
+                        [tlb]"b"(&entry)
+                      : "memory"
+                      );
+    if (!(entry.hi.v))
+      break;
+  }
+  return (idx < NTLBS) ? idx : -1;
+}
+
+/*
+ * Write TLB entry (can also be used to disable an entry).
+ *
+ * The routine checks against the cached data in
+ * bsp_mmu_cache[] to prevent the user from generating
+ * overlapping entries.
+ *
+ *   'idx': TLB entry # to manipulate
+ *    'ea': Effective address (must be page aligned)
+ *    'pa': Physical  address (must be page aligned)
+ *    'sz': Page size selector; page size is
+ *          1024 * 2^(2*sz) bytes.
+ *          'sz' may also be one of the following:
+ *          - page size in bytes ( >= 1024 ); the selector
+ *            value is then computed by this routine.
+ *            However, 'sz' must be a valid page size
+ *            or -1 will be returned.
+ *          - a value < 0 to invalidate/disable the
+ *            TLB entry.
+ *  'flgs': Page's little-endian & user-defined flags, permissions and attributes
+ *   'tid': Translation ID
+ *
+ * RETURNS: 0 on success, nonzero on error:
+ *
+ *         >0: requested mapping would overlap with
+ *             existing mapping in other entry. Return
+ *             value gives conflicting entry + 1; i.e.,
+ *             if a value of 4 is returned then the request
+ *             conflicts with existing mapping in entry 3.
+ *         -1: invalid argument
+ *         -3: driver not initialized (or initialization failed).
+ *         <0: other error
+ */
+bsp_tlb_idx_t
+bsp_mmu_write(bsp_tlb_idx_t idx, uint32_t ea, uint32_t pa, uint sz,
+              uint32_t flgs, uint32_t tid)
+{
+  bsp_tlb_entry_t       tlb;
+  uint32_t              msk;
+  bsp_tlb_idx_t         lkup;
+  rtems_interrupt_level lvl;
+
+  if ( sz >= 1024 ) {
+    /* Assume they literally specify a size */
+    msk = sz;
+    sz  = 0;
+    while ( msk != (1024u<<(2*sz)) ) {
+      if ( ++sz > 7 ) {
+        return -1;
+      }
+    }
+    /* OK, acceptable */
+  }
+
+  msk = sz > 0 ? (1024u<<(2*sz)) - 1 : 0;
+
+  if ( !bsp_mmu_cache && sz > 0 ) {
+    myprintf(stderr,"MMU driver not initialized; refusing to enable any entry\n");
+    return -3;
+  }
+
+  if ( (ea & msk) || (pa & msk) ) {
+    myprintf(stderr,"Misaligned EA (%08x) or PA (%08x) (mask is %08x)\n", ea, pa, msk);
+    return -1;
+  }
+
+  if ( idx < 0 || idx > NTLBS-1 )
+    return -1;
+
+  if ( sz > 7 ) {
+    myprintf(stderr,"Invalid size %u = %08x = %u KB\n", sz, 1024u<<(2*sz), (1024u<<(2*sz))/1024);
+    return -1;
+  }
+
+  if ( sz >=0 ) {
+    lkup = bsp_mmu_match(ea, sz, tid);
+
+    if ( lkup < -1 ) {
+      /* some error */
+      return lkup;
+    }
+    if ( (lkup >= 0) && (lkup != idx) && (bsp_mmu_cache[lkup].hi.v != 0) ) {
+      myprintf(stderr,"TLB #%i overlaps with requested mapping\n", lkup);
+      bsp_mmu_update( lkup, false, stderr);
+      return lkup+1;
+    }
+  }
+
+  /* OK to proceed */
+  tlb.id.tid  = tid;
+  tlb.hi.v    = sz >= 0;
+  tlb.hi.size = sz;
+  tlb.hi.epn  = (ea & (0xfffffc00 << (sz + sz))) >> 10;
+  tlb.lo.rpn  = (pa & (0xfffffc00 << (sz + sz))) >> 10;
+  tlb.hi.att  = (flgs & MMU_M_ATTR) >> MMU_V_ATTR;
+  tlb.lo.perm = (flgs & MMU_M_PERM) >> MMU_V_PERM;
+  tlb.lo.wimg = (flgs & MMU_M_PROP) >> MMU_V_PROP;
+
+  rtems_interrupt_disable(lvl);
+
+  store(idx, &tlb);
+
+  commit();
+
+  rtems_interrupt_enable(lvl);
+
+  /* update cache */
+  bsp_mmu_update(idx, true, 0);
+
+  return 0;
+}
+
+/*
+ * Check if a ea/sz/tid mapping overlaps with an existing entry.
+ *
+ *    'ea': The Effective Address to match against
+ *    'sz': The 'logarithmic' size selector; the page size
+ *          is 1024*2^(2*sz).
+ *   'tid': The TID to match against
+ *
+ * RETURNS:
+ *     >= 0: index of the TLB entry that already provides a mapping
+ *           which overlaps within the ea range.
+ *       -1: SUCCESS (no conflicting entry found)
+ *     <=-2: ERROR (invalid input)
+ */
+bsp_tlb_idx_t
+bsp_mmu_match(uint32_t ea, int sz, uint32_t tid)
+{
+  bsp_tlb_idx_t    idx;
+  uint32_t         m,a;
+  bsp_tlb_entry_t* tlb;
+
+  if ( sz < 0 || sz > 7 )
+    return -4;
+
+  sz = (1024<<(2*sz));
+
+  if ( !bsp_mmu_cache ) {
+    /* cache not initialized */
+    return -3;
+  }
+
+  if ( ea & (sz-1) ) {
+    /* misaligned ea */
+    return -2;
+  }
+
+  for ( idx=0, tlb=bsp_mmu_cache; idx<NTLBS; idx++, tlb++ ) {
+    if ( ! tlb->hi.v )
+      continue;
+    if ( tlb->id.tid && tlb->id.tid != tid )
+      continue;
+    /* TID matches a valid entry */
+    m  = (1024<<(2*tlb->hi.size)) - 1;
+    /* calculate starting address of this entry */
+    a  = tlb->hi.epn << 10;
+    if ( ea <= a + m && ea + sz -1 >= a ) {
+      /* overlap */
+      return idx;
+    }
+  }
+  return -1;
+}
+
+/* Find TLB index that maps 'ea/tid' combination
+ *
+ *    'ea': Effective address to match against
+ *   'tid': The TID to match against
+ *
+ * RETURNS: index 'key' which indicates whether
+ *          the mapping was found.
+ *
+ *          On error (no mapping) -1 is returned.
+ */
+bsp_tlb_idx_t
+bsp_mmu_find(uint32_t ea, uint32_t tid)
+{
+  rtems_interrupt_level  lvl;
+  register uint32_t      pid;
+  register bsp_tlb_idx_t idx;
+  register int           failure;
+
+  rtems_interrupt_disable(lvl);
+
+  __asm__ volatile ("mfpid  %[pid]         \n\t" /* Save PID */
+                    "mtpid  %[tid]         \n\t"
+                    "tlbsx. %[idx],0,%[ea] \n\t" /* Failure changes the index reg randomly. */
+                    "mfcr   %[failure]     \n\t"
+                    "mtpid  %[pid]         \n\t" /* Restore PID */
+                    : [pid]"=r"(pid),
+                      [idx]"=&r"(idx),
+                      [failure]"=&r"(failure)
+                    : [tid]"r"(tid),
+                      [ea]"r"(ea)
+                    : "cc"
+                    );
+
+  rtems_interrupt_enable(lvl);
+
+  return (failure & 0x20000000) ? idx : -1;
+}
+
+/* Mark TLB entry as invalid ('disabled').
+ *
+ * 'key': TLB entry (index).
+ *
+ * RETURNS: zero on success, nonzero on error (TLB unchanged).
+ *
+ * NOTE:  If a TLB entry is disabled the associated
+ *        entry in bsp_mmu_cache[] is also
+ *        marked as disabled.
+ */
+int
+bsp_mmu_invalidate(bsp_tlb_idx_t key)
+{
+  bsp_tlb_idx_t         k0;
+  rtems_interrupt_level lvl;
+  bsp_tlb_entry_t       tlb;
+  uint32_t              msr;
+
+  /* Minimal guard against bad key */
+  if ( key < 0 || key > NTLBS-1 )
+    return -1;
+
+  _CPU_MSR_GET(msr);
+
+  /* While address translation is enabled... */
+  if (msr & (PPC_MSR_IR | PPC_MSR_DR))
+  {
+    /* Must not invalidate page 0 which holds vectors, text etc...  */
+    k0 = bsp_mmu_find(0, 0);
+    if ( -1 == k0 ) {
+      myprintf(stderr,"No mapping for address 0 found\n");
+      return -2;
+    }
+
+    /* NOTE: we assume PID is ignored */
+    if ( k0 == key ) {
+      myprintf(stderr,"Cannot invalidate page holding address 0 (always needed)\n");
+      return -3;
+    }
+  }
+
+  rtems_interrupt_disable(lvl);
+
+  fetch(key, &tlb);
+
+  /* Invalidate old entries */
+  tlb.hi.v = 0;
+
+  store(key, &tlb);
+
+  commit();
+
+  /* update cache */
+  bsp_mmu_cache[ key ].hi.v = tlb.hi.v;
+
+  rtems_interrupt_enable(lvl);
+
+  return 0;
+}
diff --git a/c/src/lib/libbsp/powerpc/virtex4/startup/bspstart.c b/c/src/lib/libbsp/powerpc/virtex4/startup/bspstart.c
index 2aab2c2..53fd92e 100644
--- a/c/src/lib/libbsp/powerpc/virtex4/startup/bspstart.c
+++ b/c/src/lib/libbsp/powerpc/virtex4/startup/bspstart.c
@@ -54,13 +54,8 @@
  *  Modifications for PPC405GP by Dennis Ehlin
  *  Modifications for Virtex4 by Richard Claus <claus at slac.stanford.edu>
  */
-
-#include <string.h>
-#include <fcntl.h>
-
-#include <bsp.h>
-#include <bsp/irq.h>
-#include <bsp/vectors.h>
+#include <rtems.h>
+#include <rtems/config.h>
 #include <rtems/bspIo.h>
 #include <rtems/libio.h>
 #include <rtems/libcsupport.h>
@@ -68,6 +63,14 @@
 #include <libcpu/cpuIdent.h>
 #include <libcpu/spr.h>
 
+#include <bsp.h>
+#include <bsp/vectors.h>
+#include <bsp/bootcard.h>
+#include <bsp/irq.h>
+
+#include <string.h>
+#include <fcntl.h>
+
 #define DO_DOWN_ALIGN(x,a) ((x) & ~((a)-1))
 
 #define DO_UP_ALIGN(x,a)   DO_DOWN_ALIGN(((x) + (a) - 1 ),a)
@@ -76,13 +79,6 @@
 #define CPU_UP_ALIGN(x)    DO_UP_ALIGN(x, CPU_ALIGNMENT)
 
 
-/* Expected by clock.c */
-uint32_t    bsp_clicks_per_usec;
-bool        bsp_timer_internal_clock;   /* true, when timer runs with CPU clk */
-uint32_t    bsp_timer_least_valid;
-uint32_t    bsp_timer_average_overhead;
-
-
 /* Defined in linkcmds linker script */
 LINKER_SYMBOL(RamBase);
 LINKER_SYMBOL(RamSize);
@@ -96,35 +92,24 @@ LINKER_SYMBOL(WorkAreaBase);
 LINKER_SYMBOL(MsgAreaBase);
 LINKER_SYMBOL(MsgAreaSize);
 LINKER_SYMBOL(__phy_ram_end);
+LINKER_SYMBOL(bsp_exc_vector_base);
+
+
+/* Expected by clock.c */
+uint32_t    bsp_clicks_per_usec;
+bool        bsp_timer_internal_clock;   /* true, when timer runs with CPU clk */
+uint32_t    bsp_timer_least_valid;
+uint32_t    bsp_timer_average_overhead;
 
 
 /*
  * Provide weak aliases so that RTEMS distribution builds
  */
 static void _noopfun(void) {}
-static void _bsp_start(void)
-{
-  uintptr_t         intrStackStart = CPU_UP_ALIGN((uint32_t)__bsp_ram_start);
-  uintptr_t         intrStackSize  = rtems_configuration_get_interrupt_stack_size();
-
-  /*
-   * Initialize default raw exception handlers.
-   *
-   * This BSP does not assume anything about firmware possibly loaded in the
-   * FPGA, so the external interrupt should not be enabled in order to avoid
-   * spurious interrupts.
-   */
-  ppc_exc_initialize(PPC_INTERRUPT_DISABLE_MASK_DEFAULT & ~MSR_EE,
-                     intrStackStart,
-                     intrStackSize);
-
-  /* Install our own set of exception vectors */
-  BSP_rtems_irq_mngt_init(0);
-}
 
 
 void app_bsp_start(void)
-__attribute__(( weak, alias("_bsp_start") ));
+__attribute__(( weak, alias("_noopfun") ));
 
 void app_bsp_pretasking_hook(void)
 __attribute__(( weak, alias("_noopfun") ));
@@ -147,20 +132,31 @@ static void __bsp_outchar_to_memory(char c)
 void BSP_ask_for_reset(void)
 {
   printk("\nSystem stopped, issue RESET");
+
   for(;;);
 }
 
 
 void BSP_panic(char *s)
 {
-  printk("\n%s PANIC %s\n", _RTEMS_version, s);
+  rtems_interrupt_level level;
+
+  rtems_interrupt_disable(level);
+
+  printk("\n%s PANIC %s\n", rtems_get_version_string(), s);
+
   BSP_ask_for_reset();
 }
 
 
 void _BSP_Fatal_error(unsigned int v)
 {
-  printk("\n%s FATAL ERROR %x\n", _RTEMS_version, v);
+  rtems_interrupt_level level;
+
+  rtems_interrupt_disable(level);
+
+  printk("\n%s FATAL ERROR %x\n", rtems_get_version_string(), v);
+
   BSP_ask_for_reset();
 }
 
@@ -176,13 +172,14 @@ void bsp_start(void)
 {
   uintptr_t          intrStackStart;
   uintptr_t          intrStackSize;
+
   ppc_cpu_id_t       myCpu;
   ppc_cpu_revision_t myCpuRevision;
 
   /* Set the character output function;  The application may override this */
   BSP_output_char = __bsp_outchar_to_memory;
 
-  printk("\nWelcome to RTEMS %s\n", _RTEMS_version );
+  printk("RTEMS %s\n", rtems_get_version_string());
 
   /*
    * Get CPU identification dynamically. Note that the get_ppc_cpu_type()
@@ -208,6 +205,15 @@ void bsp_start(void)
    */
   intrStackStart = CPU_UP_ALIGN((uint32_t)__bsp_ram_start);
   intrStackSize  = rtems_configuration_get_interrupt_stack_size();
+
+  sc = ppc_exc_initialize(PPC_INTERRUPT_DISABLE_MASK_DEFAULT,
+                          intrStackStart,
+                          intrStackSize);
+  if (sc != RTEMS_SUCCESSFUL) {
+    BSP_panic("Cannot initialize exceptions");
+  }
+
+  /* Let the user know what parameters we were compiled with */
   printk("                  Base/Start     End         Size\n"
          "RAM:              0x%08x              0x%x\n"
          "RTEMS:                        0x%08x\n"
@@ -224,6 +230,11 @@ void bsp_start(void)
          (uint32_t)MsgAreaBase,                           (uint32_t)MsgAreaSize,
          (uint32_t)__phy_ram_end);
 
+  /*
+   * Initialize RTEMS IRQ system
+   */
+  BSP_rtems_irq_mngt_init(0);
+
   /* Continue with application-specific initialization */
   app_bsp_start();
 }
diff --git a/c/src/lib/libbsp/powerpc/virtex4/startup/linkcmds b/c/src/lib/libbsp/powerpc/virtex4/startup/linkcmds
index cff5767..edd9f07 100644
--- a/c/src/lib/libbsp/powerpc/virtex4/startup/linkcmds
+++ b/c/src/lib/libbsp/powerpc/virtex4/startup/linkcmds
@@ -20,16 +20,17 @@ HeapSize          = DEFINED(HeapSize)      ? HeapSize      : 0; /* 0=Use def */
 
 MEMORY
 {
-  VECTORS         : ORIGIN = 0x00000000, LENGTH = 8K
-  RAM             : ORIGIN = 0x00002000, LENGTH = 128M - 8K
+  VECTORS         : ORIGIN = 0x00000000, LENGTH = 12K
+  RAM             : ORIGIN = 0x00003000, LENGTH = 128M - 12K
 }
 
 
 SECTIONS
 {
-  __exeentry      = download_entry;
-  __exestart      = 0x100;
-  .vectors __exestart : { *(.vectors)                           } > VECTORS
+  bsp_exc_vector_base = 0x100;
+  __exeentry          = download_entry;
+  __exestart          = bsp_exc_vector_base;
+  .vectors bsp_exc_vector_base : { *(.vectors)                  } > VECTORS
 
   /* Read-only sections, merged into text segment: */
   .interp         : { *(.interp)                                } > RAM
@@ -227,7 +228,7 @@ _SDA_BASE_ = __SDATA_START__ + 0x8000;
 
   __exeend        = ALIGN(4);
   __rtems_end     = .;
-  .               = ALIGN(0x10);        /* Align to a cache-line boundary */
+  .               = ALIGN(0x20);        /* Align to a cache-line boundary */
   PROVIDE(__bsp_ram_start = .);
 
   /* Interrupt stack: aligned on a cache-line boundary */
@@ -235,12 +236,12 @@ _SDA_BASE_ = __SDATA_START__ + 0x8000;
   __intrStack     = .;
 
   /* Main stack lives here */
-  _stack          = ALIGN(0x10); 	/* Align to a cache-line boundary */
+  _stack          = ALIGN(0x20); 	/* Align to a cache-line boundary */
   .              += StackSize;
   __stack_base    = .;                  /* Initial stack builds downwards */
 
   /* RTEMS workspace: size specified by application */
-  WorkAreaBase    = ALIGN(0x10); 	/* Align to a cache-line boundary */
+  WorkAreaBase    = ALIGN(0x20); 	/* Align to a cache-line boundary */
 
   /* The heap comes after the work space */
 
@@ -249,7 +250,7 @@ _SDA_BASE_ = __SDATA_START__ + 0x8000;
 
   /* Message area for capturing early printk output */
   /* Placed here to be easily findable with a debugger */
-  MsgAreaBase     = __bsp_ram_end;
+  MsgAreaBase     = .;
   .              += MsgAreaSize;
 
   __phy_ram_end   = .;                  /* True end of physical memory */
diff --git a/c/src/lib/libbsp/powerpc/virtex4/startup/start.S b/c/src/lib/libbsp/powerpc/virtex4/startup/start.S
index c3053fb..55d5318 100644
--- a/c/src/lib/libbsp/powerpc/virtex4/startup/start.S
+++ b/c/src/lib/libbsp/powerpc/virtex4/startup/start.S
@@ -166,15 +166,15 @@ startupDow:
         * Tell the processor where the exception vector table will be.
         *------------------------------------------------------------------*/
         .extern SYM(__vectors)
-        lis     r0, __vectors at h    /* set EVPR exc. vector prefix */
-        mtspr   evpr,r0
+        lis     r2, __vectors at h    /* set EVPR exc. vector prefix */
+        mtevpr  r2
 
        /*-------------------------------------------------------------------
         * Set up the debug register to freeze timers on debug events.
         *------------------------------------------------------------------*/
-        mfdbcr0 r0
-        ori     r0,r0,0x0001
-        mtdbcr0 r0
+        mfdbcr0 r2
+        ori     r2,r2,0x0001
+        mtdbcr0 r2
         isync
 
         /* Select whether APU, Wait Enable, interrupts/exceptions and address
@@ -182,14 +182,20 @@ startupDow:
         lis     r0,0x00000000 at h   /* SRR1 value */
         mtsrr1  r0                /* Potentially: 0x80000000 >> 6 is APU */
 
+        /* Configure timer facilities */
+        mttbl   r0                /* Clear Timebase to prevent Fixed Interval.. */
+        mttbu   r0                /* ..timer and Watchdog Timer exceptions */
+        mtpit   r0                /* Programmable interval timer */
+        li      r2,-1             /* -1 to clear TSR */
+        mttsr   r2                /* Timer status register */
+
         /* Clear out stale values in certain registers to avoid confusion */
+        mtcrf   0xff,r0           /* Need for simulation */
+        mtctr   r0                /* Counter register */
         mtxer   r0                /* Fixed-point exception register */
         mtesr   r0                /* Exception syndrome register */
         mtdear  r0                /* Data exception address register */
         mtmcsr  r0                /* Machine check syndrome register */
-        mtpit   r0                /* Programmable interval timer */
-        li      r0,-1             /* -1 to clear TSR */
-        mttsr   r0                /* Timer status register */
 
         /* Invalidate the data cache */
         li      r2,0              /* Start address */
-- 
1.7.1




More information about the devel mailing list