[PATCH 05/11] score: Add per-CPU profiling

Sebastian Huber sebastian.huber at embedded-brains.de
Tue Mar 11 08:28:40 UTC 2014


On 2014-03-10 17:13, Gedare Bloom wrote:
> Please add a little more to the commit message, especially since this
> also adds some of the ISR profiling support. Unless you did not mean
> to add the isr stuff yet.. Some of it looks unused to me.

Ok.

>
> On Mon, Mar 10, 2014 at 9:28 AM, Sebastian Huber
> <sebastian.huber at embedded-brains.de> wrote:
>> ---
>>   cpukit/sapi/src/profilingiterate.c                |   61 +++++++++++
>>   cpukit/score/Makefile.am                          |    2 +
>>   cpukit/score/include/rtems/score/percpu.h         |   74 ++++++++++++++
>>   cpukit/score/include/rtems/score/profiling.h      |  113 +++++++++++++++++++++
>>   cpukit/score/include/rtems/score/threaddispatch.h |   37 ++++++--
>>   cpukit/score/preinstall.am                        |    4 +
>>   cpukit/score/src/profilingisrentryexit.c          |   45 ++++++++
>>   cpukit/score/src/threaddispatch.c                 |    2 +
>>   cpukit/score/src/threaddispatchdisablelevel.c     |    3 +
>>   cpukit/score/src/threadhandler.c                  |    1 +
>>   cpukit/score/src/threadstartmultitasking.c        |    1 +
>>   11 files changed, 335 insertions(+), 8 deletions(-)
>>   create mode 100644 cpukit/score/include/rtems/score/profiling.h
>>   create mode 100644 cpukit/score/src/profilingisrentryexit.c
>>
>> diff --git a/cpukit/sapi/src/profilingiterate.c b/cpukit/sapi/src/profilingiterate.c
>> index e528932..28c06a4 100644
>> --- a/cpukit/sapi/src/profilingiterate.c
>> +++ b/cpukit/sapi/src/profilingiterate.c
>> @@ -17,10 +17,71 @@
>>   #endif
>>
>>   #include <rtems/profiling.h>
>> +#include <rtems/counter.h>
>> +#include <rtems.h>
>> +
>> +#include <string.h>
>> +
>> +static void per_cpu_stats_iterate(
>> +  rtems_profiling_visitor visitor,
>> +  void *visitor_arg,
>> +  rtems_profiling_data *data
>> +)
>> +{
>> +#ifdef RTEMS_PROFILING
>> +  uint32_t n = rtems_smp_get_processor_count();
>> +  uint32_t i;
>> +
>> +  memset(data, 0, sizeof(*data));
>> +  data->header.type = RTEMS_PROFILING_PER_CPU;
>> +  for (i = 0; i < n; ++i) {
>> +    const Per_CPU_Control *per_cpu = _Per_CPU_Get_by_index(i);
>> +    const Per_CPU_Stats *stats = &per_cpu->Stats;
>> +    rtems_profiling_per_cpu *per_cpu_data = &data->per_cpu;
>> +
>> +    per_cpu_data->processor_index = i;
>> +
>> +    per_cpu_data->max_thread_dispatch_disabled_time =
>> +      rtems_counter_ticks_to_nanoseconds(
>> +        stats->max_thread_dispatch_disabled_time
>> +      );
>> +
>> +    per_cpu_data->max_interrupt_time =
>> +      rtems_counter_ticks_to_nanoseconds(stats->max_interrupt_time);
>> +
>> +    per_cpu_data->max_interrupt_delay =
>> +      rtems_counter_ticks_to_nanoseconds(stats->max_interrupt_delay);
>> +
>> +    per_cpu_data->thread_dispatch_disabled_count =
>> +      stats->thread_dispatch_disabled_count;
>> +
>> +    per_cpu_data->total_thread_dispatch_disabled_time =
>> +      rtems_counter_ticks_to_nanoseconds(
>> +        stats->total_thread_dispatch_disabled_time
>> +      );
>> +
>> +    per_cpu_data->interrupt_count = stats->interrupt_count;
>> +
>> +    per_cpu_data->total_interrupt_time =
>> +      rtems_counter_ticks_to_nanoseconds(
>> +        stats->total_interrupt_time
>> +      );
>> +
>> +    (*visitor)(visitor_arg, data);
>> +  }
>> +#else
>> +  (void) visitor;
>> +  (void) visitor_arg;
>> +  (void) data;
>> +#endif
>> +}
>>
>>   void rtems_profiling_iterate(
>>     rtems_profiling_visitor visitor,
>>     void *visitor_arg
>>   )
>>   {
>> +  rtems_profiling_data data;
>> +
>> +  per_cpu_stats_iterate(visitor, visitor_arg, &data);
>>   }
>> diff --git a/cpukit/score/Makefile.am b/cpukit/score/Makefile.am
>> index 0dc21b2..f3f53a9 100644
>> --- a/cpukit/score/Makefile.am
>> +++ b/cpukit/score/Makefile.am
>> @@ -45,6 +45,7 @@ include_rtems_score_HEADERS += include/rtems/score/percpu.h
>>   include_rtems_score_HEADERS += include/rtems/score/priority.h
>>   include_rtems_score_HEADERS += include/rtems/score/prioritybitmap.h
>>   include_rtems_score_HEADERS += include/rtems/score/prioritybitmapimpl.h
>> +include_rtems_score_HEADERS += include/rtems/score/profiling.h
>>   include_rtems_score_HEADERS += include/rtems/score/rbtree.h
>>   include_rtems_score_HEADERS += include/rtems/score/rbtreeimpl.h
>>   include_rtems_score_HEADERS += include/rtems/score/scheduler.h
>> @@ -331,6 +332,7 @@ libscore_a_SOURCES += src/apiext.c src/chain.c src/chainappend.c \
>>       src/chainnodecount.c \
>>       src/assertthreaddispatchingrepressed.c \
>>       src/interr.c src/isr.c src/wkspace.c src/wkstringduplicate.c
>> +libscore_a_SOURCES += src/profilingisrentryexit.c
>>
>>   EXTRA_DIST = src/Unlimited.txt
>>
>> diff --git a/cpukit/score/include/rtems/score/percpu.h b/cpukit/score/include/rtems/score/percpu.h
>> index 1abbee5..e8f3370 100644
>> --- a/cpukit/score/include/rtems/score/percpu.h
>> +++ b/cpukit/score/include/rtems/score/percpu.h
>> @@ -155,6 +155,78 @@ typedef enum {
>>   #endif /* defined( RTEMS_SMP ) */
>>
>>   /**
>> + * @brief Per-CPU statistics.
>> + */
>> +typedef struct {
>> +#if defined( RTEMS_PROFILING )
>> +  /**
>> +   * @brief The thread dispatch disabled begin instant in CPU counter ticks.
>> +   *
>> +   * This value is used to measure the time of disabled thread dispatching.
>> +   */
>> +  CPU_Counter_ticks thread_dispatch_disabled_instant;
>> +
>> +  /**
>> +   * @brief The maximum time of disabled thread dispatching in CPU counter
>> +   * ticks.
>> +   */
>> +  CPU_Counter_ticks max_thread_dispatch_disabled_time;
>> +
>> +  /**
>> +   * @brief The maximum time spent to process a single sequence of nested
>> +   * interrupts in CPU counter ticks.
>> +   *
>> +   * This is the time interval between the change of the interrupt nest level
>> +   * from zero to one and the change back from one to zero.
>> +   */
>> +  CPU_Counter_ticks max_interrupt_time;
>> +
>> +  /**
>> +   * @brief The maximum interrupt delay in CPU counter ticks if supported by
>> +   * the hardware.
>> +   */
>> +  CPU_Counter_ticks max_interrupt_delay;
>> +
>> +  /**
>> +   * @brief Count of times when the thread dispatch disable level changes from
>> +   * zero to one in thread context.
>> +   *
>> +   * This value may overflow.
>> +   */
>> +  uint64_t thread_dispatch_disabled_count;
>> +
>> +  /**
>> +   * @brief Total time of disabled thread dispatching in CPU counter ticks.
>> +   *
>> +   * The average time of disabled thread dispatching is the total time of
>> +   * disabled thread dispatching divided by the thread dispatch disabled
>> +   * count.
>> +   *
>> +   * This value may overflow.
>> +   */
>> +  uint64_t total_thread_dispatch_disabled_time;
>> +
>> +  /**
>> +   * @brief Count of times when the interrupt nest level changes from zero to
>> +   * one.
>> +   *
>> +   * This value may overflow.
>> +   */
>> +  uint64_t interrupt_count;
>> +
>> +  /**
>> +   * @brief Total time of interrupt processing in CPU counter ticks.
>> +   *
>> +   * The average time of interrupt processing is the total time of interrupt
>> +   * processing divided by the interrupt count.
>> +   *
>> +   * This value may overflow.
>> +   */
>> +  uint64_t total_interrupt_time;
>> +#endif /* defined( RTEMS_PROFILING ) */
>> +} Per_CPU_Stats;
>> +
>> +/**
>>    *  @brief Per CPU Core Structure
>>    *
>>    *  This structure is used to hold per core state information.
>> @@ -236,6 +308,8 @@ typedef struct {
>>        */
>>       Per_CPU_State state;
>>     #endif
>> +
>> +  Per_CPU_Stats Stats;
>>   } Per_CPU_Control;
>>
>>   #if defined( RTEMS_SMP )
>> diff --git a/cpukit/score/include/rtems/score/profiling.h b/cpukit/score/include/rtems/score/profiling.h
>> new file mode 100644
>> index 0000000..468c124
>> --- /dev/null
>> +++ b/cpukit/score/include/rtems/score/profiling.h
>> @@ -0,0 +1,113 @@
>> +/**
>> + * @file
>> + *
>> + * @ingroup ScoreProfiling
>> + *
>> + * @brief Profiling Support API
>> + */
>> +
>> +/*
>> + * Copyright (c) 2014 embedded brains GmbH.  All rights reserved.
>> + *
>> + *  embedded brains GmbH
>> + *  Dornierstr. 4
>> + *  82178 Puchheim
>> + *  Germany
>> + *  <rtems at embedded-brains.de>
>> + *
>> + * The license and distribution terms for this file may be
>> + * found in the file LICENSE in this distribution or at
>> + * http://www.rtems.com/license/LICENSE.
>> + */
>> +
>> +#ifndef _RTEMS_SCORE_PROFILING
>> +#define _RTEMS_SCORE_PROFILING
>> +
>> +#include <rtems/score/percpu.h>
>> +
>> +#ifdef __cplusplus
>> +extern "C" {
>> +#endif /* __cplusplus */
>> +
>> +/**
>> + * @defgroup ScoreProfiling Profiling Support
>> + *
>> + * @brief Profiling support.
>> + *
>> + * @{
>> + */
>> +
>> +static inline void _Profiling_Thread_dispatch_disable(
>> +  Per_CPU_Control *per_cpu,
>> +  uint32_t previous_thread_dispatch_disable_level
>> +)
>> +{
>> +#if defined( RTEMS_PROFILING )
>> +  if ( previous_thread_dispatch_disable_level == 0 ) {
>> +    Per_CPU_Stats *stats = &per_cpu->Stats;
>> +
>> +    stats->thread_dispatch_disabled_instant = _CPU_Counter_read();
>> +    ++stats->thread_dispatch_disabled_count;
>> +  }
>> +#else
> It would be cleaner to choose to make these CPP checks either in these
> functions or around their call points, but not both.

The problem is that in two places you need additional ISR disable/enable.

>
> You could also provide a section of empty CPP macros in case there is
> no RTEMS_PROFILING.

Ok, I will change it to empty CPP macros although I doubt that the compiler has 
a problem to optimize empty inline functions away.

>
>> +  (void) per_cpu;
>> +  (void) previous_thread_dispatch_disable_level;
>> +#endif
>> +}
>> +
>> +static inline void _Profiling_Thread_dispatch_enable(
>> +  Per_CPU_Control *per_cpu,
>> +  uint32_t new_thread_dispatch_disable_level
>> +)
>> +{
>> +#if defined( RTEMS_PROFILING )
>> +  if ( new_thread_dispatch_disable_level == 0 ) {
>> +    Per_CPU_Stats *stats = &per_cpu->Stats;
>> +    CPU_Counter_ticks now = _CPU_Counter_read();
>> +    CPU_Counter_ticks delta = _CPU_Counter_difference(
>> +      now,
>> +      stats->thread_dispatch_disabled_instant
>> +    );
>> +
>> +    stats->total_thread_dispatch_disabled_time += delta;
>> +
>> +    if ( stats->max_thread_dispatch_disabled_time < delta ) {
>> +      stats->max_thread_dispatch_disabled_time = delta;
>> +    }
>> +  }
>> +#else
>> +  (void) per_cpu;
>> +  (void) new_thread_dispatch_disable_level;
>> +#endif
>> +}
>> +
>> +static inline void _Profiling_Update_max_interrupt_delay(
>> +  Per_CPU_Control *per_cpu,
>> +  CPU_Counter_ticks interrupt_delay
>> +)
>> +{
>> +#if defined( RTEMS_PROFILING )
>> +  Per_CPU_Stats *stats = &per_cpu->Stats;
>> +
>> +  if ( stats->max_interrupt_delay < interrupt_delay ) {
>> +    stats->max_interrupt_delay = interrupt_delay;
>> +  }
>> +#else
>> +  (void) per_cpu;
>> +  (void) interrupt_delay;
>> +#endif
>> +}
>> +
>> +void _Profiling_Outer_most_interrupt_entry_and_exit(
>> +  Per_CPU_Control *per_cpu,
>> +  CPU_Counter_ticks interrupt_entry_instant,
>> +  CPU_Counter_ticks interrupt_exit_instant
>> +);
> I don't like this name, but I have no better one yet until I see where
> it is used. Perhaps _Profiling_Nested_isr? I liked the above functions
> that were named after the function they profile, although here it is
> less clear since the entire function does not get profiled I would
> guess.
[...]

There is no function they profile.  Unfortunately this function must be called 
from assembly code see later patches for ARM, PowerPC and SPARC.

-- 
Sebastian Huber, embedded brains GmbH

Address : Dornierstr. 4, D-82178 Puchheim, Germany
Phone   : +49 89 189 47 41-16
Fax     : +49 89 189 47 41-09
E-Mail  : sebastian.huber at embedded-brains.de
PGP     : Public key available on request.

Diese Nachricht ist keine geschäftliche Mitteilung im Sinne des EHUG.



More information about the devel mailing list