[PATCH 3/5] or1k: Avoid excessive ISR toggle in cache manager

Martin Erik Werner martinerikwerner.aac at gmail.com
Fri Nov 25 18:21:42 UTC 2016


Previously _ISR_Local_{disable,enable}() was executed twice for each
cache line operation, and since operations over the entire cache were
implemented by calling the single-line operations in a loop, this made
those operations rather costly.

Fix the double-toggle by calling _OR1K_mtspr() directly, and removing
the now-unused corresponding _CPU_OR1K_Cache_* functions.

Fix the entire-operations by moving the ISR toggle outside of the
loop, and by calling _OR1K_mtspr() directly instead of the single-line
operations.

Also implement range functions, since otherwise the cache manager falls
back on looping over the single-line operations.
---
 c/src/lib/libcpu/or1k/shared/cache/cache.c  | 161 ++++++++++++++++++++--------
 c/src/lib/libcpu/or1k/shared/cache/cache_.h |   6 ++
 2 files changed, 123 insertions(+), 44 deletions(-)

diff --git a/c/src/lib/libcpu/or1k/shared/cache/cache.c b/c/src/lib/libcpu/or1k/shared/cache/cache.c
index 5d7053f..e1b2b1d 100644
--- a/c/src/lib/libcpu/or1k/shared/cache/cache.c
+++ b/c/src/lib/libcpu/or1k/shared/cache/cache.c
@@ -1,7 +1,8 @@
 /*
- * COPYRIGHT (c) 2014 ÅAC Microtec AB <www.aacmicrotec.com>
+ * COPYRIGHT (c) 2014, 2016 ÅAC Microtec AB <www.aacmicrotec.com>
  * Contributor(s):
  *  Karol Gugala <kgugala at antmicro.com>
+ *  Martin Werner <martin.werner at aacmicrotec.com>
  *
  * COPYRIGHT (c) 2014 Hesham ALMatary <heshamelmatary at gmail.com>
  *
@@ -83,28 +84,6 @@ static inline void _CPU_OR1K_Cache_data_block_prefetch(const void *d_addr)
   _ISR_Local_enable(level);
 }
 
-static inline void _CPU_OR1K_Cache_data_block_flush(const void *d_addr)
-{
-  ISR_Level level;
-
-  _ISR_Local_disable (level);
-
-  _OR1K_mtspr(CPU_OR1K_SPR_DCBFR, (uintptr_t) d_addr);
-
-  _ISR_Local_enable(level);
-}
-
-static inline void _CPU_OR1K_Cache_data_block_invalidate(const void *d_addr)
-{
-  ISR_Level level;
-
-  _ISR_Local_disable (level);
-
-  _OR1K_mtspr(CPU_OR1K_SPR_DCBIR, (uintptr_t) d_addr);
-
-  _ISR_Local_enable(level);
-}
-
 static inline void _CPU_OR1K_Cache_data_block_writeback(const void *d_addr)
 {
   ISR_Level level;
@@ -139,18 +118,6 @@ static inline void _CPU_OR1K_Cache_instruction_block_prefetch
   _ISR_Local_enable(level);
 }
 
-static inline void _CPU_OR1K_Cache_instruction_block_invalidate
-(const void *d_addr)
-{
-  ISR_Level level;
-
-  _ISR_Local_disable (level);
-
-  _OR1K_mtspr(CPU_OR1K_SPR_ICBIR, (uintptr_t) d_addr);
-
-  _ISR_Local_enable(level);
-}
-
 static inline void _CPU_OR1K_Cache_instruction_block_lock
 (const void *d_addr)
 {
@@ -171,7 +138,7 @@ void _CPU_cache_flush_1_data_line(const void *d_addr)
 
   _ISR_Local_disable (level);
 
-  _CPU_OR1K_Cache_data_block_flush(d_addr);
+  _OR1K_mtspr(CPU_OR1K_SPR_DCBFR, (uintptr_t) d_addr);
 
   //__asm__ volatile("l.csync");
 
@@ -184,7 +151,7 @@ void _CPU_cache_invalidate_1_data_line(const void *d_addr)
 
   _ISR_Local_disable (level);
 
-  _CPU_OR1K_Cache_data_block_invalidate(d_addr);
+  _OR1K_mtspr(CPU_OR1K_SPR_DCBIR, (uintptr_t) d_addr);
 
   _ISR_Local_enable(level);
 }
@@ -205,7 +172,7 @@ void _CPU_cache_invalidate_1_instruction_line(const void *d_addr)
 
   _ISR_Local_disable (level);
 
-  _CPU_OR1K_Cache_instruction_block_invalidate(d_addr);
+  _OR1K_mtspr(CPU_OR1K_SPR_ICBIR, (uintptr_t) d_addr);
 
   _ISR_Local_enable(level);
 }
@@ -222,7 +189,10 @@ void _CPU_cache_unfreeze_instruction(void)
 
 void _CPU_cache_flush_entire_data(void)
 {
-  int addr;
+  size_t addr;
+  ISR_Level level;
+
+  _ISR_Local_disable (level);
 
   /* We have only 0 level cache so we do not need to invalidate others */
   for (
@@ -230,13 +200,18 @@ void _CPU_cache_flush_entire_data(void)
       addr > 0;
       addr -= CPU_DATA_CACHE_ALIGNMENT
   ) {
-    _CPU_OR1K_Cache_data_block_flush((uintptr_t) addr);
+    _OR1K_mtspr(CPU_OR1K_SPR_DCBFR, (uintptr_t) addr);
   }
+
+  _ISR_Local_enable (level);
 }
 
 void _CPU_cache_invalidate_entire_data(void)
 {
-  int addr;
+  size_t addr;
+  ISR_Level level;
+
+  _ISR_Local_disable (level);
 
   /* We have only 0 level cache so we do not need to invalidate others */
   for (
@@ -244,13 +219,18 @@ void _CPU_cache_invalidate_entire_data(void)
       addr > 0;
       addr -= CPU_DATA_CACHE_ALIGNMENT
   ) {
-    _CPU_cache_invalidate_1_data_line((uintptr_t) addr);
+    _OR1K_mtspr(CPU_OR1K_SPR_DCBIR, (uintptr_t) addr);
   }
+
+  _ISR_Local_enable (level);
 }
 
 void _CPU_cache_invalidate_entire_instruction(void)
 {
-  int addr;
+  size_t addr;
+  ISR_Level level;
+
+  _ISR_Local_disable (level);
 
   /* We have only 0 level cache so we do not need to invalidate others */
   for (
@@ -258,7 +238,7 @@ void _CPU_cache_invalidate_entire_instruction(void)
       addr > 0;
       addr -= CPU_INSTRUCTION_CACHE_ALIGNMENT
   ) {
-    _CPU_cache_invalidate_1_instruction_line((uintptr_t) addr);
+    _OR1K_mtspr(CPU_OR1K_SPR_ICBIR, (uintptr_t) addr);
   }
 
   /* Flush instructions out of instruction buffer */
@@ -267,6 +247,99 @@ void _CPU_cache_invalidate_entire_instruction(void)
   __asm__ volatile("l.nop");
   __asm__ volatile("l.nop");
   __asm__ volatile("l.nop");
+
+  _ISR_Local_enable (level);
+}
+
+/*
+ * The range functions are copied almost verbatim from the generic
+ * implementations in c/src/lib/libcpu/shared/src/cache_manager.c. The main
+ * modification here is avoiding reapeated off/on toggling of the ISR for each
+ * cache line operation.
+ */
+
+void _CPU_cache_flush_data_range(const void *d_addr, size_t n_bytes)
+{
+  const void * final_address;
+  ISR_Level level;
+
+ /*
+  * Set d_addr to the beginning of the cache line; final_address indicates
+  * the last address_t which needs to be pushed. Increment d_addr and push
+  * the resulting line until final_address is passed.
+  */
+
+  if( n_bytes == 0 )
+    /* Do nothing if number of bytes to flush is zero */
+    return;
+
+  final_address = (void *)((size_t)d_addr + n_bytes - 1);
+  d_addr = (void *)((size_t)d_addr & ~(CPU_DATA_CACHE_ALIGNMENT - 1));
+
+  _ISR_Local_disable (level);
+
+  while( d_addr <= final_address )  {
+    _OR1K_mtspr(CPU_OR1K_SPR_DCBFR, (uintptr_t) d_addr);
+    d_addr = (void *)((size_t)d_addr + CPU_DATA_CACHE_ALIGNMENT);
+  }
+
+  _ISR_Local_enable (level);
+}
+
+void _CPU_cache_invalidate_data_range(const void *d_addr, size_t n_bytes)
+{
+  const void * final_address;
+  ISR_Level level;
+
+ /*
+  * Set d_addr to the beginning of the cache line; final_address indicates
+  * the last address_t which needs to be pushed. Increment d_addr and push
+  * the resulting line until final_address is passed.
+  */
+
+  if( n_bytes == 0 )
+    /* Do nothing if number of bytes to flush is zero */
+    return;
+
+  final_address = (void *)((size_t)d_addr + n_bytes - 1);
+  d_addr = (void *)((size_t)d_addr & ~(CPU_DATA_CACHE_ALIGNMENT - 1));
+
+  _ISR_Local_disable (level);
+
+  while( d_addr <= final_address )  {
+    _OR1K_mtspr(CPU_OR1K_SPR_DCBIR, (uintptr_t) d_addr);
+    d_addr = (void *)((size_t)d_addr + CPU_DATA_CACHE_ALIGNMENT);
+  }
+
+  _ISR_Local_enable (level);
+}
+
+void _CPU_cache_invalidate_instruction_range(const void *i_addr, size_t n_bytes)
+{
+  const void * final_address;
+  ISR_Level level;
+
+ /*
+  * Set i_addr to the beginning of the cache line; final_address indicates
+  * the last address_t which needs to be pushed. Increment i_addr and push
+  * the resulting line until final_address is passed.
+  */
+
+  if( n_bytes == 0 )
+    /* Do nothing if number of bytes to flush is zero */
+    return;
+
+  final_address = (void *)((size_t)i_addr + n_bytes - 1);
+  i_addr = (void *)((size_t)i_addr & ~(CPU_INSTRUCTION_CACHE_ALIGNMENT - 1));
+
+  _ISR_Local_disable (level);
+
+  while( i_addr <= final_address )  {
+    _OR1K_mtspr(CPU_OR1K_SPR_ICBIR, (uintptr_t) i_addr);
+    i_addr = (void *)((size_t)i_addr + CPU_DATA_CACHE_ALIGNMENT);
+  }
+
+  _ISR_Local_enable (level);
 }
 
 void _CPU_cache_enable_data(void)
diff --git a/c/src/lib/libcpu/or1k/shared/cache/cache_.h b/c/src/lib/libcpu/or1k/shared/cache/cache_.h
index 0ea939f..8d96595 100644
--- a/c/src/lib/libcpu/or1k/shared/cache/cache_.h
+++ b/c/src/lib/libcpu/or1k/shared/cache/cache_.h
@@ -8,5 +8,11 @@
 #include <bsp/cache_.h>
 #include <libcpu/cache.h>
 
+#define CPU_CACHE_SUPPORT_PROVIDES_RANGE_FUNCTIONS 1
+
+void _CPU_cache_flush_data_range(const void *d_addr, size_t n_bytes);
+void _CPU_cache_invalidate_data_range(const void *d_addr, size_t n_bytes);
+void _CPU_cache_invalidate_instruction_range(const void *i_addr, size_t n_bytes);
+
 #endif
 /* end of include file */
-- 
2.1.4



More information about the devel mailing list