[PATCH 4/4] score: Ensure stack alignment requirement

Gedare Bloom gedare at rtems.org
Wed Mar 3 16:04:27 UTC 2021


On Wed, Mar 3, 2021 at 1:40 AM Sebastian Huber
<sebastian.huber at embedded-brains.de> wrote:
>
> Make sure we meet the stack aligment requirement also for CPU ports with
> CPU_STACK_ALIGNMENT > CPU_HEAP_ALIGNMENT.
> ---
>  cpukit/include/rtems/score/context.h   |  3 +--
>  cpukit/include/rtems/score/stackimpl.h |  9 +++++++++
>  cpukit/score/src/threadinitialize.c    |  9 +++++++++
>  cpukit/score/src/tlsallocsize.c        | 14 +++++++++++---
>  4 files changed, 30 insertions(+), 5 deletions(-)
>
> diff --git a/cpukit/include/rtems/score/context.h b/cpukit/include/rtems/score/context.h
> index 46e04e9600..b65c15e73b 100644
> --- a/cpukit/include/rtems/score/context.h
> +++ b/cpukit/include/rtems/score/context.h
> @@ -49,8 +49,7 @@ extern "C" {
>   */
>  #if ( CPU_HARDWARE_FP == TRUE ) || ( CPU_SOFTWARE_FP == TRUE )
>    #define CONTEXT_FP_SIZE \
> -    ( ( CPU_CONTEXT_FP_SIZE + CPU_HEAP_ALIGNMENT - 1 ) \
> -      & ~( CPU_HEAP_ALIGNMENT - 1 ) )
> +    RTEMS_ALIGN_UP( CPU_CONTEXT_FP_SIZE, CPU_STACK_ALIGNMENT )
>  #else
>    #define CONTEXT_FP_SIZE 0
>  #endif
> diff --git a/cpukit/include/rtems/score/stackimpl.h b/cpukit/include/rtems/score/stackimpl.h
> index 43b7c8151e..4ff56eea93 100644
> --- a/cpukit/include/rtems/score/stackimpl.h
> +++ b/cpukit/include/rtems/score/stackimpl.h
> @@ -149,6 +149,15 @@ RTEMS_INLINE_ROUTINE size_t _Stack_Extend_size(
>
>    stack_size += extra_size;
>
> +#if CPU_STACK_ALIGNMENT > CPU_HEAP_ALIGNMENT
> +  /*
> +   * If the heap allocator does not meet the stack alignment requirement, then
> +   * we have to do the stack alignment manually in _Thread_Initialize() and
> +   * need to allocate extra space for this.
> +   */
> +  stack_size += CPU_STACK_ALIGNMENT - 1;
> +#endif
> +
>    if ( stack_size < extra_size ) {
>      /*
>       * In case of an unsigned integer overflow, saturate at the maximum value.
> diff --git a/cpukit/score/src/threadinitialize.c b/cpukit/score/src/threadinitialize.c
> index 3a331ed269..a672c02911 100644
> --- a/cpukit/score/src/threadinitialize.c
> +++ b/cpukit/score/src/threadinitialize.c
> @@ -107,6 +107,9 @@ static bool _Thread_Try_initialize(
>    size_t                   i;
>    char                    *stack_begin;
>    char                    *stack_end;
> +#if CPU_STACK_ALIGNMENT > CPU_HEAP_ALIGNMENT
> +  uintptr_t                stack_align;
> +#endif
>    Scheduler_Node          *scheduler_node;
>  #if defined(RTEMS_SMP)
>    Scheduler_Node          *scheduler_node_for_index;
> @@ -131,6 +134,12 @@ static bool _Thread_Try_initialize(
>    stack_begin = config->stack_area;
>    stack_end = stack_begin + config->stack_size;
>
> +#if CPU_STACK_ALIGNMENT > CPU_HEAP_ALIGNMENT
> +  stack_align = CPU_STACK_ALIGNMENT;
> +  stack_begin = (char *) RTEMS_ALIGN_UP( (uintptr_t) stack_begin, stack_align );
> +  stack_end = (char *) RTEMS_ALIGN_DOWN( (uintptr_t) stack_end, stack_align );

Why align down here? Why align the end at all? Is there a requirement
for the stack area to be a multiple of the stack alignment? I guess we
can lose some bytes and return less than config->stack_size as a
result of this?

Shouldn't we better do...
#if CPU_STACK_ALIGNMENT > CPU_HEAP_ALIGNMENT
> +  stack_align = CPU_STACK_ALIGNMENT;
> +  stack_begin = (char *) RTEMS_ALIGN_UP( (uintptr_t) config->stack_area, stack_align );
#else
stack_begin = config->stack_area;
#endif
stack_end = stack_begin + config->stack_size;

So we always return the requested stack size?

> +#endif
> +
>    /* Allocate floating-point context in stack area */
>  #if ( CPU_HARDWARE_FP == TRUE ) || ( CPU_SOFTWARE_FP == TRUE )
>    if ( config->is_fp ) {
> diff --git a/cpukit/score/src/tlsallocsize.c b/cpukit/score/src/tlsallocsize.c
> index e7854c677a..cba5bdcfba 100644
> --- a/cpukit/score/src/tlsallocsize.c
> +++ b/cpukit/score/src/tlsallocsize.c
> @@ -59,14 +59,16 @@ uintptr_t _TLS_Get_allocation_size( void )
>    allocation_size = _TLS_Allocation_size;
>
>    if ( allocation_size == 0 ) {
> -    allocation_size = _TLS_Heap_align_up( size );
> -    alignment = _TLS_Heap_align_up( (uintptr_t) _TLS_Alignment );
> +    uintptr_t stack_alignment;
> +
> +    stack_alignment = CPU_STACK_ALIGNMENT;
> +    alignment = RTEMS_ALIGN_UP( (uintptr_t) _TLS_Alignment, stack_alignment );
>
>      /*
>       * The stack allocator does not support aligned allocations.  Allocate
>       * enough to do the alignment manually.
>       */
> -    if ( alignment > CPU_HEAP_ALIGNMENT ) {
> +    if ( alignment > stack_alignment ) {
>        allocation_size += alignment;
>      }
>
> @@ -76,6 +78,12 @@ uintptr_t _TLS_Get_allocation_size( void )
>      allocation_size += sizeof(TLS_Dynamic_thread_vector);
>  #endif
>
> +    /*
> +     * The TLS area is allocated in the thread storage area.  Each allocation
> +     * shall meet the stack alignment requirement.
> +     */
> +    allocation_size = RTEMS_ALIGN_UP( allocation_size, stack_alignment );
> +
>      if ( _Thread_Maximum_TLS_size != 0 ) {
>        if ( allocation_size <= _Thread_Maximum_TLS_size ) {
>          allocation_size = _Thread_Maximum_TLS_size;
> --
> 2.26.2
>
> _______________________________________________
> devel mailing list
> devel at rtems.org
> http://lists.rtems.org/mailman/listinfo/devel


More information about the devel mailing list