[PATCH v2] score: Ensure stack alignment requirement

Gedare Bloom gedare at rtems.org
Thu Mar 4 15:50:34 UTC 2021


It looks fine to me. I agree with the general perspective that a user
can't explicitly control down to the last byte their stack usage, so
my complaints are not real problems.

On Thu, Mar 4, 2021 at 2:17 AM Sebastian Huber
<sebastian.huber at embedded-brains.de> wrote:
>
> Make sure that a user-provided stack size is the minimum size allocated
> for the stack.
>
> Make sure we meet the stack alignment requirement also for CPU ports
> with CPU_STACK_ALIGNMENT > CPU_HEAP_ALIGNMENT.
> ---
>  cpukit/include/rtems/score/context.h   |  3 +--
>  cpukit/include/rtems/score/stackimpl.h | 24 ++++++++++++++++++---
>  cpukit/include/rtems/score/tls.h       | 16 +++++++-------
>  cpukit/score/src/threadinitialize.c    |  5 +++++
>  cpukit/score/src/tlsallocsize.c        | 30 ++++++++++++++++----------
>  5 files changed, 54 insertions(+), 24 deletions(-)
>
> diff --git a/cpukit/include/rtems/score/context.h b/cpukit/include/rtems/score/context.h
> index 46e04e9600..b65c15e73b 100644
> --- a/cpukit/include/rtems/score/context.h
> +++ b/cpukit/include/rtems/score/context.h
> @@ -49,8 +49,7 @@ extern "C" {
>   */
>  #if ( CPU_HARDWARE_FP == TRUE ) || ( CPU_SOFTWARE_FP == TRUE )
>    #define CONTEXT_FP_SIZE \
> -    ( ( CPU_CONTEXT_FP_SIZE + CPU_HEAP_ALIGNMENT - 1 ) \
> -      & ~( CPU_HEAP_ALIGNMENT - 1 ) )
> +    RTEMS_ALIGN_UP( CPU_CONTEXT_FP_SIZE, CPU_STACK_ALIGNMENT )
>  #else
>    #define CONTEXT_FP_SIZE 0
>  #endif
> diff --git a/cpukit/include/rtems/score/stackimpl.h b/cpukit/include/rtems/score/stackimpl.h
> index 43b7c8151e..c15206002c 100644
> --- a/cpukit/include/rtems/score/stackimpl.h
> +++ b/cpukit/include/rtems/score/stackimpl.h
> @@ -135,6 +135,7 @@ RTEMS_INLINE_ROUTINE size_t _Stack_Extend_size(
>  )
>  {
>    size_t extra_size;
> +  size_t alignment_overhead;
>
>    extra_size = _TLS_Get_allocation_size();
>
> @@ -147,15 +148,32 @@ RTEMS_INLINE_ROUTINE size_t _Stack_Extend_size(
>    (void) is_fp;
>  #endif
>
> -  stack_size += extra_size;
> +  /*
> +   * In order to make sure that a user-provided stack size is the minimum which
> +   * can be allocated for the stack, we have to align it up to the next stack
> +   * boundary.
> +   */
> +  alignment_overhead = CPU_STACK_ALIGNMENT - 1;
> +
> +#if CPU_STACK_ALIGNMENT > CPU_HEAP_ALIGNMENT
> +  /*
> +   * If the heap allocator does not meet the stack alignment requirement, then
> +   * we have to do the stack alignment manually in _Thread_Initialize() and
> +   * need to allocate extra space for this.
> +   */
> +  alignment_overhead += CPU_STACK_ALIGNMENT - CPU_HEAP_ALIGNMENT;
> +#endif
>
> -  if ( stack_size < extra_size ) {
> +  if ( stack_size > SIZE_MAX - extra_size - alignment_overhead ) {
>      /*
>       * In case of an unsigned integer overflow, saturate at the maximum value.
>       */
> -    stack_size = SIZE_MAX;
> +    return SIZE_MAX;
>    }
>
> +  stack_size += extra_size;
> +  stack_size = RTEMS_ALIGN_UP( stack_size, CPU_STACK_ALIGNMENT );
> +
>    return stack_size;
>  }
>
> diff --git a/cpukit/include/rtems/score/tls.h b/cpukit/include/rtems/score/tls.h
> index a32b7164b5..7725a003ca 100644
> --- a/cpukit/include/rtems/score/tls.h
> +++ b/cpukit/include/rtems/score/tls.h
> @@ -122,17 +122,17 @@ static inline uintptr_t _TLS_Get_size( void )
>  }
>
>  /**
> - * @brief Returns the value aligned up to the heap alignment.
> + * @brief Returns the value aligned up to the stack alignment.
>   *
>   * @param val The value to align.
>   *
> - * @return The value aligned to the heap alignment.
> + * @return The value aligned to the stack alignment.
>   */
> -static inline uintptr_t _TLS_Heap_align_up( uintptr_t val )
> +static inline uintptr_t _TLS_Align_up( uintptr_t val )
>  {
> -  uintptr_t msk = CPU_HEAP_ALIGNMENT - 1;
> +  uintptr_t alignment = CPU_STACK_ALIGNMENT;
>
> -  return (val + msk) & ~msk;
> +  return RTEMS_ALIGN_UP( val, alignment );
>  }
>
>  /**
> @@ -229,7 +229,7 @@ static inline void *_TLS_TCB_at_area_begin_initialize( void *tls_area )
>    void *tls_block = (char *) tls_area
>      + _TLS_Get_thread_control_block_area_size( (uintptr_t) _TLS_Alignment );
>    TLS_Thread_control_block *tcb = tls_area;
> -  uintptr_t aligned_size = _TLS_Heap_align_up( (uintptr_t) _TLS_Size );
> +  uintptr_t aligned_size = _TLS_Align_up( (uintptr_t) _TLS_Size );
>    TLS_Dynamic_thread_vector *dtv = (TLS_Dynamic_thread_vector *)
>      ((char *) tls_block + aligned_size);
>
> @@ -253,7 +253,7 @@ static inline void *_TLS_TCB_before_TLS_block_initialize( void *tls_area )
>      + _TLS_Get_thread_control_block_area_size( (uintptr_t) _TLS_Alignment );
>    TLS_Thread_control_block *tcb = (TLS_Thread_control_block *)
>      ((char *) tls_block - sizeof(*tcb));
> -  uintptr_t aligned_size = _TLS_Heap_align_up( (uintptr_t) _TLS_Size );
> +  uintptr_t aligned_size = _TLS_Align_up( (uintptr_t) _TLS_Size );
>    TLS_Dynamic_thread_vector *dtv = (TLS_Dynamic_thread_vector *)
>      ((char *) tls_block + aligned_size);
>
> @@ -276,7 +276,7 @@ static inline void *_TLS_TCB_after_TLS_block_initialize( void *tls_area )
>    uintptr_t size = (uintptr_t) _TLS_Size;
>    uintptr_t tls_align = (uintptr_t) _TLS_Alignment;
>    uintptr_t tls_mask = tls_align - 1;
> -  uintptr_t heap_align = _TLS_Heap_align_up( tls_align );
> +  uintptr_t heap_align = _TLS_Align_up( tls_align );
>    uintptr_t heap_mask = heap_align - 1;
>    TLS_Thread_control_block *tcb = (TLS_Thread_control_block *)
>      ((char *) tls_area + ((size + heap_mask) & ~heap_mask));
> diff --git a/cpukit/score/src/threadinitialize.c b/cpukit/score/src/threadinitialize.c
> index 3a331ed269..f11e35dcf3 100644
> --- a/cpukit/score/src/threadinitialize.c
> +++ b/cpukit/score/src/threadinitialize.c
> @@ -107,6 +107,7 @@ static bool _Thread_Try_initialize(
>    size_t                   i;
>    char                    *stack_begin;
>    char                    *stack_end;
> +  uintptr_t                stack_align;
>    Scheduler_Node          *scheduler_node;
>  #if defined(RTEMS_SMP)
>    Scheduler_Node          *scheduler_node_for_index;
> @@ -128,8 +129,12 @@ static bool _Thread_Try_initialize(
>        (char *) the_thread + add_on->source_offset;
>    }
>
> +  /* Set up the properly aligned stack area begin and end */
>    stack_begin = config->stack_area;
>    stack_end = stack_begin + config->stack_size;
> +  stack_align = CPU_STACK_ALIGNMENT;
> +  stack_begin = (char *) RTEMS_ALIGN_UP( (uintptr_t) stack_begin, stack_align );
> +  stack_end = (char *) RTEMS_ALIGN_DOWN( (uintptr_t) stack_end, stack_align );
>
>    /* Allocate floating-point context in stack area */
>  #if ( CPU_HARDWARE_FP == TRUE ) || ( CPU_SOFTWARE_FP == TRUE )
> diff --git a/cpukit/score/src/tlsallocsize.c b/cpukit/score/src/tlsallocsize.c
> index e7854c677a..d761f3b6cf 100644
> --- a/cpukit/score/src/tlsallocsize.c
> +++ b/cpukit/score/src/tlsallocsize.c
> @@ -48,7 +48,6 @@ uintptr_t _TLS_Get_allocation_size( void )
>  {
>    uintptr_t size;
>    uintptr_t allocation_size;
> -  uintptr_t alignment;
>
>    size = _TLS_Get_size();
>
> @@ -59,25 +58,34 @@ uintptr_t _TLS_Get_allocation_size( void )
>    allocation_size = _TLS_Allocation_size;
>
>    if ( allocation_size == 0 ) {
> -    allocation_size = _TLS_Heap_align_up( size );
> -    alignment = _TLS_Heap_align_up( (uintptr_t) _TLS_Alignment );
> +    uintptr_t alignment;
> +
> +    alignment = _TLS_Align_up( (uintptr_t) _TLS_Alignment );
> +
> +    allocation_size = size;
> +    allocation_size += _TLS_Get_thread_control_block_area_size( alignment );
> +#ifndef __i386__
> +    allocation_size += sizeof( TLS_Dynamic_thread_vector );
> +#endif
> +
> +    /*
> +     * The TLS area is allocated in the thread storage area.  Each allocation
> +     * shall meet the stack alignment requirement.
> +     */
> +    allocation_size = _TLS_Align_up( allocation_size );
>
>      /*
>       * The stack allocator does not support aligned allocations.  Allocate
>       * enough to do the alignment manually.
>       */
> -    if ( alignment > CPU_HEAP_ALIGNMENT ) {
> -      allocation_size += alignment;
> +    if ( alignment > CPU_STACK_ALIGNMENT ) {
> +      _Assert( alignment % CPU_STACK_ALIGNMENT == 0 );
> +      allocation_size += alignment - CPU_STACK_ALIGNMENT;
>      }
>
> -    allocation_size += _TLS_Get_thread_control_block_area_size( alignment );
> -
> -#ifndef __i386__
> -    allocation_size += sizeof(TLS_Dynamic_thread_vector);
> -#endif
> -
>      if ( _Thread_Maximum_TLS_size != 0 ) {
>        if ( allocation_size <= _Thread_Maximum_TLS_size ) {
> +        _Assert( _Thread_Maximum_TLS_size % CPU_STACK_ALIGNMENT == 0 );
>          allocation_size = _Thread_Maximum_TLS_size;
>        } else {
>          _Internal_error( INTERNAL_ERROR_TOO_LARGE_TLS_SIZE );
> --
> 2.26.2
>
> _______________________________________________
> devel mailing list
> devel at rtems.org
> http://lists.rtems.org/mailman/listinfo/devel


More information about the devel mailing list