Potential SIS or RTEMS/libbsd problem

Sebastian Huber sebastian.huber at embedded-brains.de
Wed May 22 07:49:18 UTC 2019


On 22/05/2019 09:39, Jiri Gaisler wrote:
> On 5/22/19 8:03 AM, Sebastian Huber wrote:
>> Hello,
>>
>> in the libbsd there is a test for the Epoch Based Reclamation:
>>
>> https://git.rtems.org/rtems-libbsd/tree/testsuite/epoch01/test_main.c
>>
>> When I run this test using the leon3 BSP on real hardware (150MHz NGMP FP) the test completes successfully.
>>
>> If I run the test on the SIS, it is stuck at some point (using "-m 1" works):
>>
>> sparc-rtems5-sis -leon3 -nouartrx -r -tlim 200 s -m 2 build/sparc-rtems5-leon3-everything/epoch01.exe
>>
>>
> This test needs a shorter time-slice in the simulator to succeed (-d option). The more cpus, the lower number of clocks in the slice is needed. Through trial-and-error, these values seem to work:
>
> 2 CPUs: -m 2 -d 25
>
> 3 CPUs: -m 3 -d 10
>
> 4 CPUs will not work, even if -d 1 is set. This is most likely a simulator problem, I will try to find time to look at it in more detail. A quick trace shows that all CPUs are stuck in a loop checking for a lock or similar:
>
> $ ./sis -leon3 ~/epoch01.exe -m 4 -d 1
>
>   SIS - SPARC/RISCV instruction simulator 2.14,  copyright Jiri Gaisler 2019
>   Bug-reports to jiri at gaisler.se
>
>   LEON3 emulation enabled, 4 cpus online, delta 1 clocks
>
>   Loaded /home/jiri/epoch01.exe, entry 0x40000000
> cpu0> run
> Waking CPU 1
> Waking CPU 2
> Waking CPU 3
> *** LIBBSD EPOCH 1 TEST ***
> nexus0: <RTEMS Nexus device>
> <TestEpoch01>
>    <EnterExit activeWorker="1">
>      <Counter worker="0">74357</Counter>
>    </EnterExit>
>    <EnterExit activeWorker="2">
>      <Counter worker="0">74464</Counter>
>      <Counter worker="1">59621</Counter>
>    </EnterExit>
>    <EnterExit activeWorker="3">
>      <Counter worker="0">74353</Counter>
>      <Counter worker="1">59529</Counter>
>      <Counter worker="2">74710</Counter>
>    </EnterExit>
>    <EnterExit activeWorker="4">
>      <Counter worker="0">74221</Counter>
>      <Counter worker="1">66362</Counter>
>      <Counter worker="2">74605</Counter>
>      <Counter worker="3">74605</Counter>
>    </EnterExit>
>    <EnterListOpExit activeWorker="1">
>      <Counter worker="0">53231</Counter>
>      <Removals worker="0">82</Removals>
>    </EnterListOpExit>
>    <EnterListOpExit activeWorker="2">
>      <Counter worker="0">51659</Counter>
>      <Counter worker="1">51862</Counter>
>      <Removals worker="0">42</Removals>
>      <Removals worker="1">42</Removals>
>    </EnterListOpExit>
>    <EnterListOpExit activeWorker="3">
>      <Counter worker="0">50492</Counter>
>      <Counter worker="1">46310</Counter>
>      <Counter worker="2">51516</Counter>
>      <Removals worker="0">25</Removals>
>      <Removals worker="1">24</Removals>
>      <Removals worker="2">24</Removals>
>    </EnterListOpExit>
>    <EnterListOpExit activeWorker="4">
>      <Counter worker="0">46105</Counter>
>      <Counter worker="1">41697</Counter>
>      <Counter worker="2">46499</Counter>
>      <Counter worker="3">46515</Counter>
>      <Removals worker="0">19</Removals>
>      <Removals worker="1">18</Removals>
>      <Removals worker="2">18</Removals>
>      <Removals worker="3">18</Removals>
>    </EnterListOpExit>
>    <EnterExitPreempt activeWorker="1">
>      <Counter worker="0">29273</Counter>
>    </EnterExitPreempt>
>    <EnterExitPreempt activeWorker="2">
>      <Counter worker="0">29262</Counter>
>      <Counter worker="1">37024</Counter>
>    </EnterExitPreempt>
>    <EnterExitPreempt activeWorker="3">
>      <Counter worker="0">32622</Counter>
>      <Counter worker="1">36973</Counter>
>      <Counter worker="2">36973</Counter>
>    </EnterExitPreempt>
>    <EnterExitPreempt activeWorker="4">
>      <Counter worker="0">29126</Counter>
>      <Counter worker="1">36917</Counter>
>      <Counter worker="2">36917</Counter>
>      <Counter worker="3">36918</Counter>
>    </EnterExitPreempt>
>    <EnterListOpExitPreempt activeWorker="1">
>      <Counter worker="0">27539</Counter>
>      <Removals worker="0">61</Removals>
>    </EnterListOpExitPreempt>
> Interrupt!
>   Stopped at time 1090141103 (21802.822 ms)
> cpu0> tra 20
> cpu 1  1090141103  40001840:  81e80000   restore
> cpu 2  1090141103  401cc2f4:  82102000   mov  0, %g1
> cpu 3  1090141103  401cc2f4:  82102000   mov  0, %g1
> cpu 0  1090141104  40001798:  7fffffea   call  0x40001740
> cpu 1  1090141104  40001844:  81c3e008   retl
> cpu 2  1090141104  401cc2e8:  c200c000   ld  [%g3], %g1
> cpu 3  1090141104  401cc2e8:  c200c000   ld  [%g3], %g1
> cpu 0  1090141105  4000179c:  01000000   nop
> cpu 0  1090141106  40001740:  9de3bfa0   save  %sp, -96, %sp
> cpu 1  1090141106  40001848:  01000000   nop
> cpu 2  1090141106  401cc2ec:  80a08001   cmp  %g2, %g1
> cpu 3  1090141106  401cc2ec:  80a08001   cmp  %g2, %g1
> cpu 0  1090141107  40001744:  01000000   nop
> cpu 1  1090141107  400018a0:  01000000   nop
> cpu 0  1090141108  40001748:  81e80000   restore
> cpu 1  1090141108  400018a4:  81e80000   restore
> cpu 2  1090141108  401cc2f0:  12bffffe   bne  0x401cc2e8
> cpu 3  1090141108  401cc2f0:  12bffffe   bne  0x401cc2e8
> cpu 0  1090141109  4000174c:  81c3e008   retl
> cpu 1  1090141109  400018a8:  81c3e008   retl
> cpu 2  1090141109  401cc2f4:  82102000   mov  0, %g1
> cpu 3  1090141109  401cc2f4:  82102000   mov  0, %g1
> cpu 2  1090141110  401cc2e8:  c200c000   ld  [%g3], %g1
> cpu 3  1090141110  401cc2e8:  c200c000   ld  [%g3], %g1
> cpu 0  1090141111  40001750:  01000000   nop
> cpu 1  1090141111  400018ac:  01000000   nop
> cpu 0  1090141112  400017a0:  c207a044   ld  [%fp + 0x44], %g1
> cpu 1  1090141112  40002484:  c207bff0   ld  [%fp - 0x10], %g1
> cpu 2  1090141112  401cc2ec:  80a08001   cmp  %g2, %g1
> cpu 3  1090141112  401cc2ec:  80a08001   cmp  %g2, %g1
> cpu 0  1090141114  400017a4:  c407a048   ld  [%fp + 0x48], %g2
> cpu 1  1090141114  40002488:  90100001   mov  %g1, %o0
> cpu 2  1090141114  401cc2f0:  12bffffe   bne  0x401cc2e8
> cpu 3  1090141114  401cc2f0:  12bffffe   bne  0x401cc2e8
> cpu 2  1090141115  401cc2f4:  82102000   mov  0, %g1
> cpu 3  1090141115  401cc2f4:  82102000   mov  0, %g1
> cpu 0  1090141116  400017a8:  c4204000   st  %g2, [%g1]
> cpu 1  1090141116  4000248c:  7ffffcb2   call  0x40001754
> cpu 2  1090141116  401cc2e8:  c200c000   ld  [%g3], %g1
> cpu 3  1090141116  401cc2e8:  c200c000   ld  [%g3], %g1
> cpu 1  1090141117  40002490:  01000000   nop
> cpu 1  1090141118  40001754:  9de3bf98   save  %sp, -104, %sp
> cpu 2  1090141118  401cc2ec:  80a08001   cmp  %g2, %g1
> cpu 3  1090141118  401cc2ec:  80a08001   cmp  %g2, %g1
> cpu 0  1090141119  400017ac:  7fffffe5   call  0x40001740
> cpu 1  1090141119  40001758:  f027a044   st  %i0, [%fp + 0x44]
> cpu 0  1090141120  400017b0:  01000000   nop
> cpu 2  1090141120  401cc2f0:  12bffffe   bne  0x401cc2e8
> cpu 3  1090141120  401cc2f0:  12bffffe   bne  0x401cc2e8
> cpu 0  1090141121  40001740:  9de3bfa0   save  %sp, -96, %sp
> cpu 2  1090141121  401cc2f4:  82102000   mov  0, %g1
> cpu 3  1090141121  401cc2f4:  82102000   mov  0, %g1
> cpu 0  1090141122  40001744:  01000000   nop
> cpu 1  1090141122  4000175c:  7ffffff9   call  0x40001740
> cpu 2  1090141122  401cc2e8:  c200c000   ld  [%g3], %g1
> cpu 3  1090141122  401cc2e8:  c200c000   ld  [%g3], %g1
>
>   Stopped at time 1090141123 (21802.822 ms)
> cpu0>
>

(gdb) disas 0x40001740
Dump of assembler code for function ck_pr_barrier:
    0x40001740 <+0>:     save  %sp, -96, %sp
    0x40001744 <+4>:     nop
    0x40001748 <+8>:     restore
    0x4000174c <+12>:    retl
    0x40001750 <+16>:    nop
End of assembler dump.
(gdb) disas 0x401cc2e8
Dump of assembler code for function _SMP_barrier_Wait:
    0x401cc2b0 <+0>:     ld  [ %o1 ], %g2
    0x401cc2b4 <+4>:     xnor  %g0, %g2, %g2
    0x401cc2b8 <+8>:     st  %g2, [ %o1 ]
    0x401cc2bc <+12>:    ld  [ %o0 ], %g1
    0x401cc2c0 <+16>:    mov  %g1, %g3
    0x401cc2c4 <+20>:    add  %g1, 1, %g4
    0x401cc2c8 <+24>:    casa  [ %o0 ] (10), %g1, %g4
    0x401cc2cc <+28>:    cmp  %g4, %g3
    0x401cc2d0 <+32>:    bne  0x401cc2c0 <_SMP_barrier_Wait+16>
    0x401cc2d4 <+36>:    mov  %g4, %g1
    0x401cc2d8 <+40>:    add  %g3, 1, %g1
    0x401cc2dc <+44>:    cmp  %g1, %o2
    0x401cc2e0 <+48>:    be  0x401cc300 <_SMP_barrier_Wait+80>
    0x401cc2e4 <+52>:    add  %o0, 4, %g3
    0x401cc2e8 <+56>:    ld  [ %g3 ], %g1
    0x401cc2ec <+60>:    cmp  %g2, %g1
    0x401cc2f0 <+64>:    bne  0x401cc2e8 <_SMP_barrier_Wait+56>
    0x401cc2f4 <+68>:    clr  %g1
    0x401cc2f8 <+72>:    retl
    0x401cc2fc <+76>:    and  %g1, 1, %o0
    0x401cc300 <+80>:    clr  [ %o0 ]
    0x401cc304 <+84>:    mov  1, %g1
    0x401cc308 <+88>:    st  %g2, [ %o0 + 4 ]
    0x401cc30c <+92>:    retl
    0x401cc310 <+96>:    and  %g1, 1, %o0
End of assembler dump.
(gdb) disas 0x40001754
Dump of assembler code for function ck_pr_md_load_ptr:
    0x40001754 <+0>:     save  %sp, -104, %sp
    0x40001758 <+4>:     st  %i0, [ %fp + 0x44 ]
    0x4000175c <+8>:     call  0x40001740 <ck_pr_barrier>
    0x40001760 <+12>:    nop
    0x40001764 <+16>:    ld  [ %fp + 0x44 ], %g1
    0x40001768 <+20>:    ld  [ %g1 ], %g1
    0x4000176c <+24>:    st  %g1, [ %fp + -4 ]
    0x40001770 <+28>:    call  0x40001740 <ck_pr_barrier>
    0x40001774 <+32>:    nop
    0x40001778 <+36>:    ld  [ %fp + -4 ], %g1
    0x4000177c <+40>:    mov  %g1, %i0
    0x40001780 <+44>:    restore
    0x40001784 <+48>:    retl
    0x40001788 <+52>:    nop
End of assembler dump.

It seems cpu 2 and 3 are in _SMP_barrier_Wait(). The cpu 0 and 1 still 
to some stuff in the EBR algorithm (ck_* functions). Maybe the algorithm 
works only in case some random timing fluctuations occur.

-- 
Sebastian Huber, embedded brains GmbH

Address : Dornierstr. 4, D-82178 Puchheim, Germany
Phone   : +49 89 189 47 41-16
Fax     : +49 89 189 47 41-09
E-Mail  : sebastian.huber at embedded-brains.de
PGP     : Public key available on request.

Diese Nachricht ist keine geschäftliche Mitteilung im Sinne des EHUG.




More information about the devel mailing list