[PATCH 3/5] or1k: Avoid excessive ISR toggle in cache manager
Martin Erik Werner
martinerikwerner.aac at gmail.com
Fri Nov 25 18:21:42 UTC 2016
Previously _ISR_Local_{disable,enable}() was executed twice for each
cache line operation, and since operations over the entire cache were
implemented by calling the single-line operations in a loop, this made
those operations rather costly.
Fix the double-toggle by calling _OR1K_mtspr() directly, and removing
the now-unused corresponding _CPU_OR1K_Cache_* functions.
Fix the entire-operations by moving the ISR toggle outside of the
loop, and by calling _OR1K_mtspr() directly instead of the single-line
operations.
Also implement range functions, since otherwise the cache manager falls
back on looping over the single-line operations.
---
c/src/lib/libcpu/or1k/shared/cache/cache.c | 161 ++++++++++++++++++++--------
c/src/lib/libcpu/or1k/shared/cache/cache_.h | 6 ++
2 files changed, 123 insertions(+), 44 deletions(-)
diff --git a/c/src/lib/libcpu/or1k/shared/cache/cache.c b/c/src/lib/libcpu/or1k/shared/cache/cache.c
index 5d7053f..e1b2b1d 100644
--- a/c/src/lib/libcpu/or1k/shared/cache/cache.c
+++ b/c/src/lib/libcpu/or1k/shared/cache/cache.c
@@ -1,7 +1,8 @@
/*
- * COPYRIGHT (c) 2014 ÅAC Microtec AB <www.aacmicrotec.com>
+ * COPYRIGHT (c) 2014, 2016 ÅAC Microtec AB <www.aacmicrotec.com>
* Contributor(s):
* Karol Gugala <kgugala at antmicro.com>
+ * Martin Werner <martin.werner at aacmicrotec.com>
*
* COPYRIGHT (c) 2014 Hesham ALMatary <heshamelmatary at gmail.com>
*
@@ -83,28 +84,6 @@ static inline void _CPU_OR1K_Cache_data_block_prefetch(const void *d_addr)
_ISR_Local_enable(level);
}
-static inline void _CPU_OR1K_Cache_data_block_flush(const void *d_addr)
-{
- ISR_Level level;
-
- _ISR_Local_disable (level);
-
- _OR1K_mtspr(CPU_OR1K_SPR_DCBFR, (uintptr_t) d_addr);
-
- _ISR_Local_enable(level);
-}
-
-static inline void _CPU_OR1K_Cache_data_block_invalidate(const void *d_addr)
-{
- ISR_Level level;
-
- _ISR_Local_disable (level);
-
- _OR1K_mtspr(CPU_OR1K_SPR_DCBIR, (uintptr_t) d_addr);
-
- _ISR_Local_enable(level);
-}
-
static inline void _CPU_OR1K_Cache_data_block_writeback(const void *d_addr)
{
ISR_Level level;
@@ -139,18 +118,6 @@ static inline void _CPU_OR1K_Cache_instruction_block_prefetch
_ISR_Local_enable(level);
}
-static inline void _CPU_OR1K_Cache_instruction_block_invalidate
-(const void *d_addr)
-{
- ISR_Level level;
-
- _ISR_Local_disable (level);
-
- _OR1K_mtspr(CPU_OR1K_SPR_ICBIR, (uintptr_t) d_addr);
-
- _ISR_Local_enable(level);
-}
-
static inline void _CPU_OR1K_Cache_instruction_block_lock
(const void *d_addr)
{
@@ -171,7 +138,7 @@ void _CPU_cache_flush_1_data_line(const void *d_addr)
_ISR_Local_disable (level);
- _CPU_OR1K_Cache_data_block_flush(d_addr);
+ _OR1K_mtspr(CPU_OR1K_SPR_DCBFR, (uintptr_t) d_addr);
//__asm__ volatile("l.csync");
@@ -184,7 +151,7 @@ void _CPU_cache_invalidate_1_data_line(const void *d_addr)
_ISR_Local_disable (level);
- _CPU_OR1K_Cache_data_block_invalidate(d_addr);
+ _OR1K_mtspr(CPU_OR1K_SPR_DCBIR, (uintptr_t) d_addr);
_ISR_Local_enable(level);
}
@@ -205,7 +172,7 @@ void _CPU_cache_invalidate_1_instruction_line(const void *d_addr)
_ISR_Local_disable (level);
- _CPU_OR1K_Cache_instruction_block_invalidate(d_addr);
+ _OR1K_mtspr(CPU_OR1K_SPR_ICBIR, (uintptr_t) d_addr);
_ISR_Local_enable(level);
}
@@ -222,7 +189,10 @@ void _CPU_cache_unfreeze_instruction(void)
void _CPU_cache_flush_entire_data(void)
{
- int addr;
+ size_t addr;
+ ISR_Level level;
+
+ _ISR_Local_disable (level);
/* We have only 0 level cache so we do not need to invalidate others */
for (
@@ -230,13 +200,18 @@ void _CPU_cache_flush_entire_data(void)
addr > 0;
addr -= CPU_DATA_CACHE_ALIGNMENT
) {
- _CPU_OR1K_Cache_data_block_flush((uintptr_t) addr);
+ _OR1K_mtspr(CPU_OR1K_SPR_DCBFR, (uintptr_t) addr);
}
+
+ _ISR_Local_enable (level);
}
void _CPU_cache_invalidate_entire_data(void)
{
- int addr;
+ size_t addr;
+ ISR_Level level;
+
+ _ISR_Local_disable (level);
/* We have only 0 level cache so we do not need to invalidate others */
for (
@@ -244,13 +219,18 @@ void _CPU_cache_invalidate_entire_data(void)
addr > 0;
addr -= CPU_DATA_CACHE_ALIGNMENT
) {
- _CPU_cache_invalidate_1_data_line((uintptr_t) addr);
+ _OR1K_mtspr(CPU_OR1K_SPR_DCBIR, (uintptr_t) addr);
}
+
+ _ISR_Local_enable (level);
}
void _CPU_cache_invalidate_entire_instruction(void)
{
- int addr;
+ size_t addr;
+ ISR_Level level;
+
+ _ISR_Local_disable (level);
/* We have only 0 level cache so we do not need to invalidate others */
for (
@@ -258,7 +238,7 @@ void _CPU_cache_invalidate_entire_instruction(void)
addr > 0;
addr -= CPU_INSTRUCTION_CACHE_ALIGNMENT
) {
- _CPU_cache_invalidate_1_instruction_line((uintptr_t) addr);
+ _OR1K_mtspr(CPU_OR1K_SPR_ICBIR, (uintptr_t) addr);
}
/* Flush instructions out of instruction buffer */
@@ -267,6 +247,99 @@ void _CPU_cache_invalidate_entire_instruction(void)
__asm__ volatile("l.nop");
__asm__ volatile("l.nop");
__asm__ volatile("l.nop");
+
+ _ISR_Local_enable (level);
+}
+
+/*
+ * The range functions are copied almost verbatim from the generic
+ * implementations in c/src/lib/libcpu/shared/src/cache_manager.c. The main
+ * modification here is avoiding reapeated off/on toggling of the ISR for each
+ * cache line operation.
+ */
+
+void _CPU_cache_flush_data_range(const void *d_addr, size_t n_bytes)
+{
+ const void * final_address;
+ ISR_Level level;
+
+ /*
+ * Set d_addr to the beginning of the cache line; final_address indicates
+ * the last address_t which needs to be pushed. Increment d_addr and push
+ * the resulting line until final_address is passed.
+ */
+
+ if( n_bytes == 0 )
+ /* Do nothing if number of bytes to flush is zero */
+ return;
+
+ final_address = (void *)((size_t)d_addr + n_bytes - 1);
+ d_addr = (void *)((size_t)d_addr & ~(CPU_DATA_CACHE_ALIGNMENT - 1));
+
+ _ISR_Local_disable (level);
+
+ while( d_addr <= final_address ) {
+ _OR1K_mtspr(CPU_OR1K_SPR_DCBFR, (uintptr_t) d_addr);
+ d_addr = (void *)((size_t)d_addr + CPU_DATA_CACHE_ALIGNMENT);
+ }
+
+ _ISR_Local_enable (level);
+}
+
+void _CPU_cache_invalidate_data_range(const void *d_addr, size_t n_bytes)
+{
+ const void * final_address;
+ ISR_Level level;
+
+ /*
+ * Set d_addr to the beginning of the cache line; final_address indicates
+ * the last address_t which needs to be pushed. Increment d_addr and push
+ * the resulting line until final_address is passed.
+ */
+
+ if( n_bytes == 0 )
+ /* Do nothing if number of bytes to flush is zero */
+ return;
+
+ final_address = (void *)((size_t)d_addr + n_bytes - 1);
+ d_addr = (void *)((size_t)d_addr & ~(CPU_DATA_CACHE_ALIGNMENT - 1));
+
+ _ISR_Local_disable (level);
+
+ while( d_addr <= final_address ) {
+ _OR1K_mtspr(CPU_OR1K_SPR_DCBIR, (uintptr_t) d_addr);
+ d_addr = (void *)((size_t)d_addr + CPU_DATA_CACHE_ALIGNMENT);
+ }
+
+ _ISR_Local_enable (level);
+}
+
+void _CPU_cache_invalidate_instruction_range(const void *i_addr, size_t n_bytes)
+{
+ const void * final_address;
+ ISR_Level level;
+
+ /*
+ * Set i_addr to the beginning of the cache line; final_address indicates
+ * the last address_t which needs to be pushed. Increment i_addr and push
+ * the resulting line until final_address is passed.
+ */
+
+ if( n_bytes == 0 )
+ /* Do nothing if number of bytes to flush is zero */
+ return;
+
+ final_address = (void *)((size_t)i_addr + n_bytes - 1);
+ i_addr = (void *)((size_t)i_addr & ~(CPU_INSTRUCTION_CACHE_ALIGNMENT - 1));
+
+ _ISR_Local_disable (level);
+
+ while( i_addr <= final_address ) {
+ _OR1K_mtspr(CPU_OR1K_SPR_ICBIR, (uintptr_t) i_addr);
+ i_addr = (void *)((size_t)i_addr + CPU_DATA_CACHE_ALIGNMENT);
+ }
+
+ _ISR_Local_enable (level);
}
void _CPU_cache_enable_data(void)
diff --git a/c/src/lib/libcpu/or1k/shared/cache/cache_.h b/c/src/lib/libcpu/or1k/shared/cache/cache_.h
index 0ea939f..8d96595 100644
--- a/c/src/lib/libcpu/or1k/shared/cache/cache_.h
+++ b/c/src/lib/libcpu/or1k/shared/cache/cache_.h
@@ -8,5 +8,11 @@
#include <bsp/cache_.h>
#include <libcpu/cache.h>
+#define CPU_CACHE_SUPPORT_PROVIDES_RANGE_FUNCTIONS 1
+
+void _CPU_cache_flush_data_range(const void *d_addr, size_t n_bytes);
+void _CPU_cache_invalidate_data_range(const void *d_addr, size_t n_bytes);
+void _CPU_cache_invalidate_instruction_range(const void *i_addr, size_t n_bytes);
+
#endif
/* end of include file */
--
2.1.4
More information about the devel
mailing list