[PATCH-v3 3/7] score: Add SMP support to the cache manager

Daniel Cederman cederman at gaisler.com
Mon Jul 14 15:40:20 UTC 2014


Adds functions that allows the user to specify which cores that should
perform the cache operation. SMP messages are sent to all the specified
cores and the caller waits until all cores have acknowledged that they
have flushed their cache. If CPU_CACHE_NO_INSTRUCTION_CACHE_SNOOPING is
defined the instruction cache invalidation function will perform the
operation on all cores using the previous method.
---
 c/src/lib/libbsp/sparc/leon3/include/cache_.h |    2 +
 c/src/lib/libcpu/shared/src/cache_manager.c   |  258 ++++++++++++++++++++++++-
 cpukit/rtems/include/rtems/rtems/cache.h      |   82 ++++++++
 cpukit/score/include/rtems/score/smpimpl.h    |   19 ++
 4 files changed, 355 insertions(+), 6 deletions(-)

diff --git a/c/src/lib/libbsp/sparc/leon3/include/cache_.h b/c/src/lib/libbsp/sparc/leon3/include/cache_.h
index 70c1e2c..63790c1 100644
--- a/c/src/lib/libbsp/sparc/leon3/include/cache_.h
+++ b/c/src/lib/libbsp/sparc/leon3/include/cache_.h
@@ -26,6 +26,8 @@ extern "C" {
 
 #define CPU_CACHE_SUPPORT_PROVIDES_CACHE_SIZE_FUNCTIONS
 
+#define CPU_CACHE_NO_INSTRUCTION_CACHE_SNOOPING
+
 #define CPU_INSTRUCTION_CACHE_ALIGNMENT 64
 
 #define CPU_DATA_CACHE_ALIGNMENT 64
diff --git a/c/src/lib/libcpu/shared/src/cache_manager.c b/c/src/lib/libcpu/shared/src/cache_manager.c
index 420a013..43ba497 100644
--- a/c/src/lib/libcpu/shared/src/cache_manager.c
+++ b/c/src/lib/libcpu/shared/src/cache_manager.c
@@ -37,6 +37,214 @@
 
 #include <rtems.h>
 #include "cache_.h"
+#include <rtems/score/smpimpl.h>
+#include <rtems/score/smplock.h>
+#include <rtems/score/chainimpl.h>
+#include <rtems/score/sysstate.h>
+
+#if defined( RTEMS_SMP )
+
+typedef void (*Cache_manager_Function_ptr)(const void *d_addr, size_t n_bytes);
+
+typedef struct {
+  Chain_Node Node;
+  Cache_manager_Function_ptr func;
+  const void *addr;
+  size_t size;
+  cpu_set_t *recipients;
+  size_t setsize;
+  Atomic_Ulong done;
+} Cache_manager_SMP_node;
+
+typedef struct {
+  SMP_lock_Control Lock;
+  Chain_Control List;
+} Cache_manager_SMP_control;
+
+static Cache_manager_SMP_control _Cache_manager_SMP_control = {
+  .Lock = SMP_LOCK_INITIALIZER("cachemgr"),
+  .List = CHAIN_INITIALIZER_EMPTY(_Cache_manager_SMP_control.List)
+};
+
+void
+_SMP_Cache_manager_message_handler(void)
+{
+  SMP_lock_Context lock_context;
+  Cache_manager_SMP_node *node;
+  Cache_manager_SMP_node *next;
+  uint32_t cpu_self_idx;
+
+  _SMP_lock_ISR_disable_and_acquire( &_Cache_manager_SMP_control.Lock,
+      &lock_context );
+  cpu_self_idx = _SMP_Get_current_processor();
+
+  node = (Cache_manager_SMP_node*)_Chain_First( 
+      &_Cache_manager_SMP_control.List );
+  while ( !_Chain_Is_tail( &_Cache_manager_SMP_control.List, &node->Node ) ) {
+    next = (Cache_manager_SMP_node*)_Chain_Next( &node->Node );
+    if ( CPU_ISSET_S ( cpu_self_idx, node->setsize, node->recipients ) ) {
+      CPU_CLR_S ( cpu_self_idx, node->setsize, node->recipients );
+
+      node->func( node->addr, node->size );
+
+      if ( CPU_COUNT_S( node->setsize, node->recipients ) == 0 ) {
+        _Chain_Extract_unprotected( &node->Node );
+        _Atomic_Store_ulong( &node->done, 1, ATOMIC_ORDER_RELEASE );
+      }
+    }
+    node = next;
+  }
+
+  _SMP_lock_Release_and_ISR_enable( &_Cache_manager_SMP_control.Lock,
+      &lock_context );
+}
+
+#if defined(CPU_DATA_CACHE_ALIGNMENT) || \
+    (defined(CPU_INSTRUCTION_CACHE_ALIGNMENT) && \
+    defined(CPU_CACHE_NO_INSTRUCTION_CACHE_SNOOPING))
+
+static void
+_Cache_manager_Process_cache_messages( void )
+{
+  unsigned long message;
+  Per_CPU_Control *cpu_self;
+  ISR_Level isr_level;
+
+  _ISR_Disable_without_giant( isr_level );
+
+  cpu_self = _Per_CPU_Get();
+
+  message = _Atomic_Load_ulong( &cpu_self->message, ATOMIC_ORDER_RELAXED );
+
+  if ( message & SMP_MESSAGE_CACHE_MANAGER ) {
+    if ( _Atomic_Compare_exchange_ulong( &cpu_self->message, &message,
+        message & ~SMP_MESSAGE_CACHE_MANAGER, ATOMIC_ORDER_RELAXED,
+        ATOMIC_ORDER_RELAXED ) ) {
+      _SMP_Cache_manager_message_handler();
+    }
+  }
+
+  _ISR_Enable_without_giant( isr_level );
+}
+
+/*
+ * We can not make this function static as we need to access it
+ * from the test program.
+ */
+void
+_Cache_manager_Send_smp_msg(
+    const size_t setsize,
+    const cpu_set_t *set,
+    Cache_manager_Function_ptr func,
+    const void * addr,
+    size_t size
+  );
+
+void
+_Cache_manager_Send_smp_msg(
+    const size_t setsize,
+    const cpu_set_t *set,
+    Cache_manager_Function_ptr func,
+    const void * addr,
+    size_t size
+  )
+{
+  uint32_t i;
+  Cache_manager_SMP_node node;
+  size_t set_size = CPU_ALLOC_SIZE( _SMP_Get_processor_count() );
+  char cpu_set_copy[set_size];
+  SMP_lock_Context lock_context;
+
+  if ( ! _System_state_Is_up( _System_state_Get() ) ) {
+    func( addr, size );
+    return;
+  }
+
+  memset( cpu_set_copy, 0, set_size );
+  if( set == NULL ) {
+    for( i=0; i<_SMP_Get_processor_count(); ++i )
+      CPU_SET_S( i, set_size, (cpu_set_t *)cpu_set_copy );
+  } else {
+    for( i=0; i<_SMP_Get_processor_count(); ++i )
+      if( CPU_ISSET_S( i, set_size, set ) )
+        CPU_SET_S( i, set_size, (cpu_set_t *)cpu_set_copy );
+  }
+
+  node.func = func;
+  node.addr = addr;
+  node.size = size;
+  node.setsize = set_size;
+  node.recipients = (cpu_set_t *)cpu_set_copy;
+  _Atomic_Store_ulong( &node.done, 0, ATOMIC_ORDER_RELAXED );
+
+
+  _SMP_lock_ISR_disable_and_acquire( &_Cache_manager_SMP_control.Lock,
+      &lock_context );
+  _Chain_Prepend_unprotected( &_Cache_manager_SMP_control.List, &node.Node );
+  _SMP_lock_Release_and_ISR_enable( &_Cache_manager_SMP_control.Lock, 
+      &lock_context );
+
+  _SMP_Send_message_multicast( set_size, node.recipients,
+      SMP_MESSAGE_CACHE_MANAGER );
+
+  _Cache_manager_Process_cache_messages();
+
+  while ( !_Atomic_Load_uint( &node.done, ATOMIC_ORDER_ACQUIRE ) );
+}
+#endif
+
+void
+rtems_cache_flush_multiple_data_lines_processor_set(
+  const void *addr,
+  size_t size,
+  const size_t setsize,
+  const cpu_set_t *set
+)
+{
+#if defined(CPU_DATA_CACHE_ALIGNMENT)
+  _Cache_manager_Send_smp_msg( setsize, set,
+      rtems_cache_flush_multiple_data_lines, addr, size );
+#endif
+}
+
+void
+rtems_cache_invalidate_multiple_data_lines_processor_set(
+  const void *addr,
+  size_t size,
+  const size_t setsize,
+  const cpu_set_t *set
+)
+{
+#if defined(CPU_DATA_CACHE_ALIGNMENT)
+  _Cache_manager_Send_smp_msg( setsize, set,
+      rtems_cache_invalidate_multiple_data_lines, addr, size );
+#endif
+}
+
+void
+rtems_cache_flush_entire_data_processor_set(
+  const size_t setsize,
+  const cpu_set_t *set
+)
+{
+#if defined(CPU_DATA_CACHE_ALIGNMENT)
+  _Cache_manager_Send_smp_msg( setsize, set,
+      (Cache_manager_Function_ptr)rtems_cache_flush_entire_data, 0, 0 );
+#endif
+}
+
+void
+rtems_cache_invalidate_entire_data_processor_set(
+  const size_t setsize,
+  const cpu_set_t *set
+)
+{
+#if defined(CPU_DATA_CACHE_ALIGNMENT)
+  _Cache_manager_Send_smp_msg( setsize, set,
+      (Cache_manager_Function_ptr)rtems_cache_invalidate_entire_data, 0, 0 );
+#endif
+}
+#endif
 
 /*
  * THESE FUNCTIONS ONLY HAVE BODIES IF WE HAVE A DATA CACHE
@@ -219,18 +427,21 @@ rtems_cache_disable_data( void )
  * THESE FUNCTIONS ONLY HAVE BODIES IF WE HAVE AN INSTRUCTION CACHE
  */
 
+
+
 /*
  * This function is responsible for performing an instruction cache
  * invalidate. It must determine how many cache lines need to be invalidated
  * and then perform the invalidations.
  */
-void
-rtems_cache_invalidate_multiple_instruction_lines( const void * i_addr, size_t n_bytes )
+
+#if !defined(CPU_CACHE_SUPPORT_PROVIDES_RANGE_FUNCTIONS)
+static void
+_invalidate_multiple_instruction_lines_no_range_functions(
+  const void * i_addr,
+  size_t n_bytes
+)
 {
-#if defined(CPU_INSTRUCTION_CACHE_ALIGNMENT)
-#if defined(CPU_CACHE_SUPPORT_PROVIDES_RANGE_FUNCTIONS)
-  _CPU_cache_invalidate_instruction_range( i_addr, n_bytes );
-#else
   const void * final_address;
 
  /*
@@ -249,6 +460,35 @@ rtems_cache_invalidate_multiple_instruction_lines( const void * i_addr, size_t n
     _CPU_cache_invalidate_1_instruction_line( i_addr );
     i_addr = (void *)((size_t)i_addr + CPU_INSTRUCTION_CACHE_ALIGNMENT);
   }
+}
+#endif
+
+void
+rtems_cache_invalidate_multiple_instruction_lines(
+  const void * i_addr,
+  size_t n_bytes
+)
+{
+#if defined(CPU_INSTRUCTION_CACHE_ALIGNMENT)
+#if defined(CPU_CACHE_SUPPORT_PROVIDES_RANGE_FUNCTIONS)
+
+#if defined(RTEMS_SMP) && defined(CPU_CACHE_NO_INSTRUCTION_CACHE_SNOOPING)
+  _Cache_manager_Send_smp_msg( 0, 0, _CPU_cache_invalidate_instruction_range,
+      i_addr, n_bytes );
+#else
+  _CPU_cache_invalidate_instruction_range( i_addr, n_bytes );
+#endif
+
+#else
+
+#if defined(RTEMS_SMP) && defined(CPU_CACHE_NO_INSTRUCTION_CACHE_SNOOPING)
+  _Cache_manager_Send_smp_msg( 0, 0,
+      _invalidate_multiple_instruction_lines_no_range_functions, i_addr,
+      n_bytes );
+#else
+  _invalidate_multiple_instruction_lines_no_range_functions( i_addr, n_bytes );
+#endif
+
 #endif
 #endif
 }
@@ -266,8 +506,14 @@ rtems_cache_invalidate_entire_instruction( void )
   * Call the CPU-specific routine
   */
 
+#if defined(RTEMS_SMP) && defined(CPU_CACHE_NO_INSTRUCTION_CACHE_SNOOPING)
+  _Cache_manager_Send_smp_msg( 0, 0,
+      (Cache_manager_Function_ptr)_CPU_cache_invalidate_entire_instruction,
+      0, 0 );
+#else
  _CPU_cache_invalidate_entire_instruction();
 #endif
+#endif
 }
 
 
diff --git a/cpukit/rtems/include/rtems/rtems/cache.h b/cpukit/rtems/include/rtems/rtems/cache.h
index 05f6612..ce399c6 100644
--- a/cpukit/rtems/include/rtems/rtems/cache.h
+++ b/cpukit/rtems/include/rtems/rtems/cache.h
@@ -113,6 +113,9 @@ void rtems_cache_invalidate_multiple_data_lines(
  *
  * The cache lines covering the area are marked as invalid.  A later
  * instruction fetch from the area will result in a load from memory.
+ * In SMP mode, on processors without instruction cache snooping, this
+ * operation will invalidate the instruction cache lines on all processors.
+ * It should not be called from interrupt context in such case.
  *
  * @param[in] addr The start address of the area to invalidate.
  * @param[in] size The size in bytes of the area to invalidate.
@@ -188,6 +191,85 @@ void rtems_cache_disable_instruction( void );
  */
 void *rtems_cache_aligned_malloc ( size_t nbytes );
 
+#if defined( RTEMS_SMP )
+
+/**
+ * @brief Flushes multiple data cache lines for a set of processors
+ *
+ * Dirty cache lines covering the area are transferred to memory.
+ * Depending on the cache implementation this may mark the lines as invalid.
+ *
+ * This operation should not be called from interrupt context.
+ *
+ * @param[in] addr The start address of the area to flush.
+ * @param[in] size The size in bytes of the area to flush.
+ * @param[in] setsize The size of the processor set.
+ * @param[in] set The target processor set.
+ */
+void rtems_cache_flush_multiple_data_lines_processor_set(
+  const void *addr,
+  size_t size,
+  const size_t setsize,
+  const cpu_set_t *set
+);
+
+/**
+ * @brief Invalidates multiple data cache lines for a set of processors
+ *
+ * The cache lines covering the area are marked as invalid.  A later read
+ * access in the area will load the data from memory.
+ *
+ * In case the area is not aligned on cache line boundaries, then this
+ * operation may destroy unrelated data.
+ *
+ * This operation should not be called from interrupt context.
+ *
+ * @param[in] addr The start address of the area to invalidate.
+ * @param[in] size The size in bytes of the area to invalidate.
+ * @param[in] setsize The size of the processor set.
+ * @param[in] set The target processor set.
+ */
+void rtems_cache_invalidate_multiple_data_lines_processor_set(
+  const void *addr,
+  size_t size,
+  const size_t setsize,
+  const cpu_set_t *set
+);
+
+/**
+ * @brief Flushes the entire data cache for a set of processors
+ *
+ * This operation should not be called from interrupt context.
+ *
+ * @see rtems_cache_flush_multiple_data_lines().
+ *
+ * @param[in] setsize The size of the processor set.
+ * @param[in] set The target processor set.
+ */
+void rtems_cache_flush_entire_data_processor_set(
+  const size_t setsize,
+  const cpu_set_t *set
+);
+
+/**
+ * @brief Invalidates the entire cache for a set of processors
+ *
+ * This function is responsible for performing a data cache
+ * invalidate. It invalidates the entire cache for a set of
+ * processors.
+ *
+ * This operation should not be called from interrupt context.
+ *
+ * @param[in] setsize The size of the processor set.
+ * @param[in] set The target processor set.
+ */
+void rtems_cache_invalidate_entire_data_processor_set(
+  const size_t setsize,
+  const cpu_set_t *set
+);
+
+#endif
+
 /**@}*/
 
 #ifdef __cplusplus
diff --git a/cpukit/score/include/rtems/score/smpimpl.h b/cpukit/score/include/rtems/score/smpimpl.h
index cbc6428..dca8a6b 100644
--- a/cpukit/score/include/rtems/score/smpimpl.h
+++ b/cpukit/score/include/rtems/score/smpimpl.h
@@ -21,6 +21,7 @@
 #include <rtems/score/smp.h>
 #include <rtems/score/percpu.h>
 #include <rtems/fatal.h>
+#include <rtems/rtems/cache.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -51,6 +52,13 @@ extern "C" {
 #define SMP_MESSAGE_TEST 0x2UL
 
 /**
+ * @brief SMP message to request a cache manager invocation.
+ *
+ * @see _SMP_Send_message().
+ */
+#define SMP_MESSAGE_CACHE_MANAGER 0x4UL
+
+/**
  * @brief SMP fatal codes.
  */
 typedef enum {
@@ -127,6 +135,12 @@ static inline void _SMP_Set_test_message_handler(
 }
 
 /**
+ * @brief Handles cache invalidation/flush requests from a remote processor.
+ *
+ */
+void _SMP_Cache_manager_message_handler( void );
+
+/**
  * @brief Interrupt handler for inter-processor interrupts.
  */
 static inline void _SMP_Inter_processor_interrupt_handler( void )
@@ -148,6 +162,11 @@ static inline void _SMP_Inter_processor_interrupt_handler( void )
     if ( ( message & SMP_MESSAGE_TEST ) != 0 ) {
       ( *_SMP_Test_message_handler )( cpu_self );
     }
+
+    if ( ( message & SMP_MESSAGE_CACHE_MANAGER ) != 0 ) {
+      _SMP_Cache_manager_message_handler();
+    }
+
   }
 }
 
-- 
1.7.9.5




More information about the devel mailing list