[PATCH rtems-libbsd 1/2] racoon/session: Honor file descriptor maximum

Chris Johns chrisj at rtems.org
Fri Mar 19 01:11:56 UTC 2021


On 3/3/21 7:41 pm, Christian MAUDERER wrote:
> Hello Chris,
> 
> Am 03.03.21 um 02:17 schrieb Chris Johns:
>> On 2/3/21 7:26 pm, Christian MAUDERER wrote:
>>> Hello Chris,
>>>
>>> Am 02.03.21 um 01:03 schrieb Chris Johns:
>>>> On 1/3/21 7:24 pm, Christian MAUDERER wrote:
>>>>> Hello Chris,
>>>>>
>>>>> thanks for the review.
>>>>>
>>>>> Am 26.02.21 um 19:04 schrieb Chris Johns:
>>>>>> On 26/2/21 2:01 am, Christian Mauderer wrote:
>>>>>>> Dynamically allocate a big enough file descriptor set for select(). A
>>>>>>> better solution would be to use kqueue() instead of select().
>>>>>>> ---
>>>>>>>     .../racoon/rtems-bsd-racoon-session-data.h    |  6 +--
>>>>>>>     ipsec-tools/src/racoon/session.c              | 40 +++++++++++++++++++
>>>>>>>     2 files changed, 43 insertions(+), 3 deletions(-)
>>>>>>>
>>>>>>> diff --git a/ipsec-tools/src/racoon/rtems-bsd-racoon-session-data.h
>>>>>>> b/ipsec-tools/src/racoon/rtems-bsd-racoon-session-data.h
>>>>>>> index b869a1518..196107a35 100644
>>>>>>> --- a/ipsec-tools/src/racoon/rtems-bsd-racoon-session-data.h
>>>>>>> +++ b/ipsec-tools/src/racoon/rtems-bsd-racoon-session-data.h
>>>>>>> @@ -2,11 +2,11 @@
>>>>>>>     #include <rtems/linkersets.h>
>>>>>>>     #include "rtems-bsd-racoon-data.h"
>>>>>>>     /* session.c */
>>>>>>> -RTEMS_LINKER_RWSET_CONTENT(bsd_prog_racoon, static fd_set active_mask);
>>>>>>> -RTEMS_LINKER_RWSET_CONTENT(bsd_prog_racoon, static fd_set preset_mask);
>>>>>>> +RTEMS_LINKER_RWSET_CONTENT(bsd_prog_racoon, static _types_fd_set
>>>>>>> *allocated_active_mask);
>>>>>>> +RTEMS_LINKER_RWSET_CONTENT(bsd_prog_racoon, static _types_fd_set
>>>>>>> *allocated_preset_mask);
>>>>>>>     RTEMS_LINKER_RWSET_CONTENT(bsd_prog_racoon, static int nfds);
>>>>>>>     RTEMS_LINKER_RWSET_CONTENT(bsd_prog_racoon, static int signals[]);
>>>>>>>     RTEMS_LINKER_RWSET_CONTENT(bsd_prog_racoon, static sig_atomic_t volatile
>>>>>>> volatile sigreq[]);
>>>>>>> -RTEMS_LINKER_RWSET_CONTENT(bsd_prog_racoon, static struct fd_monitor
>>>>>>> fd_monitors[]);
>>>>>>> +RTEMS_LINKER_RWSET_CONTENT(bsd_prog_racoon, static struct fd_monitor
>>>>>>> *allocated_fd_monitors);
>>>>>>>     RTEMS_LINKER_RWSET_CONTENT(bsd_prog_racoon, static struct
>>>>>>> fd_monitor_list
>>>>>>> fd_monitor_tree[]);
>>>>>>>     RTEMS_LINKER_RWSET_CONTENT(bsd_prog_racoon, static struct sched
>>>>>>> scflushsa);
>>>>>>> diff --git a/ipsec-tools/src/racoon/session.c
>>>>>>> b/ipsec-tools/src/racoon/session.c
>>>>>>> index 65124c15e..90120c761 100644
>>>>>>> --- a/ipsec-tools/src/racoon/session.c
>>>>>>> +++ b/ipsec-tools/src/racoon/session.c
>>>>>>> @@ -65,6 +65,10 @@
>>>>>>>     #include <sys/stat.h>
>>>>>>>     #include <paths.h>
>>>>>>>     #include <err.h>
>>>>>>> +#ifdef __rtems__
>>>>>>> +#include <sys/param.h>
>>>>>>> +#include <rtems/libio_.h>
>>>>>>> +#endif /* __rtems__ */
>>>>>>>       #include <netinet/in.h>
>>>>>>>     #include <resolv.h>
>>>>>>> @@ -123,8 +127,16 @@ static void check_sigreq __P((void));
>>>>>>>     static void check_flushsa __P((void));
>>>>>>>     static int close_sockets __P((void));
>>>>>>>     +#ifndef __rtems__
>>>>>>>     static fd_set preset_mask, active_mask;
>>>>>>>     static struct fd_monitor fd_monitors[FD_SETSIZE];
>>>>>>> +#else /* __rtems__ */
>>>>>>> +static fd_set *allocated_preset_mask, *allocated_active_mask;
>>>>>>> +static struct fd_monitor *allocated_fd_monitors;
>>>>>>> +#define preset_mask (*allocated_preset_mask)
>>>>>>> +#define active_mask (*allocated_active_mask)
>>>>>>> +#define fd_monitors (allocated_fd_monitors)
>>>>>>> +#endif /* __rtems__ */
>>>>>>>     static TAILQ_HEAD(fd_monitor_list, fd_monitor)
>>>>>>> fd_monitor_tree[NUM_PRIORITIES];
>>>>>>>     static int nfds = 0;
>>>>>>>     @@ -134,7 +146,11 @@ static struct sched scflushsa =
>>>>>>> SCHED_INITIALIZER();
>>>>>>>     void
>>>>>>>     monitor_fd(int fd, int (*callback)(void *, int), void *ctx, int
>>>>>>> priority)
>>>>>>>     {
>>>>>>> +#ifndef __rtems__
>>>>>>>         if (fd < 0 || fd >= FD_SETSIZE) {
>>>>>>> +#else /* __rtems__ */
>>>>>>> +    if (fd < 0 || fd >= rtems_libio_number_iops) {
>>>>>>> +#endif /* __rtems__ */
>>>>>>>             plog(LLV_ERROR, LOCATION, NULL, "fd_set overrun");
>>>>>>>             exit(1);
>>>>>>>         }
>>>>>>> @@ -158,7 +174,11 @@ monitor_fd(int fd, int (*callback)(void *, int), void
>>>>>>> *ctx, int priority)
>>>>>>>     void
>>>>>>>     unmonitor_fd(int fd)
>>>>>>>     {
>>>>>>> +#ifndef __rtems__
>>>>>>>         if (fd < 0 || fd >= FD_SETSIZE) {
>>>>>>> +#else /* __rtems__ */
>>>>>>> +    if (fd < 0 || fd >= rtems_libio_number_iops) {
>>>>>>> +#endif /* __rtems__ */
>>>>>>>             plog(LLV_ERROR, LOCATION, NULL, "fd_set overrun");
>>>>>>>             exit(1);
>>>>>>>         }
>>>>>>> @@ -186,7 +206,22 @@ session(void)
>>>>>>>         struct fd_monitor *fdm;
>>>>>>>           nfds = 0;
>>>>>>> +#ifndef __rtems__
>>>>>>>         FD_ZERO(&preset_mask);
>>>>>>> +#else /* __rtems__ */
>>>>>>> +    allocated_preset_mask = calloc(sizeof(fd_set),
>>>>>>> +        howmany(rtems_libio_number_iops, sizeof(fd_set) * 8));
>>>>>>
>>>>>> Does `maxfiles` work here?
>>>>>>
>>>>>
>>>>> To be honest: I'm not sure.
>>>>>
>>>>> According to the comment in file.h:
>>>>>
>>>>> extern int maxfiles; /* kernel limit on number of open files */
>>>>>
>>>>
>>>> Yes.
>>>>
>>>>> Sounds like it _can_ be the same as the maximum file number but doesn't
>>>>> have to.
>>>>
>>>> I think we need to have them be the same value.
>>>>
>>>>> I didn't find where we implement it. It's declared as an extern int
>>>>> maxfiles but
>>>>> I didn't find any definition. I found it only in libbsd in
>>>>> freebsd/sys/kern/uipc_socket.c where it is defined like follows:
>>>>>
>>>>> #define maxfiles rtems_libio_number_iops
>>>>
>>>> Ah OK. I knew it had been assigned somewhere and yes it looks like it is local
>>>> to that file.
>>>>
>>>>>
>>>>> So question is: Where and how is maxfiles defined?
>>>>>
>>>>
>>>> I have provided a value in the rtemsbsd init file as part of the set og globals
>>>> we need to maintained.
>>>
>>> Somehow I missed that. Where can I find it?
>>
>> Again sorry, I was in a rush and I was not clear. I have add this in my new
>> changes for NFSv4 support and this is what I sorted out.
>>
> 
> Ah, OK. In that case I'll keep the rtems_libio_number_iops at the moment if it
> is OK for you.

Yes that is fine.

> 
>>>
>>>>
>>>>>>> +    if (allocated_preset_mask == NULL)
>>>>>>> +        errx(1, "failed to allocate preset_mask");
>>>>>>> +    allocated_active_mask = calloc(sizeof(fd_set),
>>>>>>> +        howmany(rtems_libio_number_iops, sizeof(fd_set) * 8));
>>>>>>> +    if (allocated_active_mask == NULL)
>>>>>>> +        errx(1, "failed to allocate active_mask");
>>>>>>> +    allocated_fd_monitors = calloc(
>>>>>>> +        rtems_libio_number_iops, sizeof(struct fd_monitor));
>>>>>>> +    if (allocated_fd_monitors == NULL)
>>>>>>> +        errx(1, "failed to allocate fd_monitors");
>>>>>>
>>>>>> At the core of this issue is the rotating fd allocation that we have in
>>>>>> libio. A
>>>>>> report from a FreeBSD machine I have is:
>>>>>>
>>>>>> $ sysctl -a | grep maxfiles
>>>>>> kern.maxfiles: 1037243
>>>>>> kern.maxfilesperproc: 933516
>>>>>>
>>>>>> I doubt a select process in FreeBSD needs a select array with that many
>>>>>> bits. I
>>>>>> have added similar code else where but I am wondering if this is really
>>>>>> what we
>>>>>> want to do. A side effect for us is the stack usage is not bounded and that
>>>>>> is a
>>>>>> problem. I think our newlib based max setting is too small.
>>>>>
>>>>> I think we have to distinguish between FreeBSD kernel space and user space.
>>>>> If I
>>>>> have seen it correctly, FreeBSD uses kqueues or maybe sometimes poll
>>>>> instead of
>>>>> select in kernel space most of the time. That avoids the problem with big file
>>>>> numbers.
>>>>
>>>> Yes kqueue is nice but we need to support existing code and this is a primary
>>>> role libbsd needs to perform. The lack is signals in libbsd is another
>>>> source of
>>>> issue.
>>>>
>>>
>>> I didn't want to say that we should not support select. I only wanted to say
>>> that FreeBSD most likely just avoids that problem in kernel space so that they
>>> don't have to support a select with 1037243 files. In user space they limit a
>>> select to 1024 files.
>>
>> Yes they do. My suggestion of us using 1024 is FreeBSD applications are fine
>> with this then they should be portable to RTEMS and libbsd and the need for us
>> to patch code is removed.
>>
> 
> 1024 for a select is quite much for an embedded system. It means that we will
> need 128 bytes of stack for each select call in the future. It's a quite big
> change.

The select call in LibBSD double this internally.

> 
> Another possibility would be 256 (32 bytes) like some other *BSD do.
> 
> Maybe we should start a new discussion thread about the right FD_SETSIZE so that
> more developers get involved?
> 
>>>>> Default for FreeBSD seems to be a FD_SETSIZE of 1024. See the Notes section in
>>>>> the select man page:
>>>>>
>>>>> https://www.freebsd.org/cgi/man.cgi?query=select&sektion=2&manpath=freebsd-release-ports#end
>>>>>
>>>>>
>>>>>
>>>>
>>>> Yes.
>>>>
>>>>> I think select is used in user space applications most of the time. I would
>>>>> guess that FreeBSD has some file number mapping between kernel and user space
>>>>> that would allow every application to open 1024 files.
>>>>
>>>> Yes. There are other checks and arrays in the kernel's select that handle the
>>>> size of 1024 or smaller. We should align ourselves with FreeBSD. A select call
>>>> uses a lot of stack.
>>>>
>>>
>>> I have no problem if we align more with FreeBSD. But I'm not sure how much
>>> effort that would be. And we have to be careful not to increase the memory usage
>>> for small targets too much.
>>
>> We need to find a path between that sort of space optimization and the
>> functionality libbsd brings. The select call is costly in terms of stack size as
>> the kern_select call has:
>>
>>          /*
>>           * The magic 2048 here is chosen to be just enough for FD_SETSIZE
>>           * infds with the new FD_SETSIZE of 1024, and more than enough for
>>           * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE
>>           * of 256.
>>           */
>>          fd_mask s_selbits[howmany(2048, NFDBITS)];
>>
>> I do not know if this means we need to be more concerned about the size of less.
>>
> 
> Hm. They could have used FD_SETSIZE instead of the magic value with a comment...

It might pay to check the surrounding code a litte more before raising something
like that on the FreeBSds hackers list. There are subtle optimisations happening
to make this call perform.

> I think a big problem for RTEMS is that we have shared file numbers for all
> processes. This makes 1024 a lot less compared to starting from 0 for every
> process.

We are a single process so I do not follow. We are no different to other
processes. We have lived with a small file set size for years until libio was
changed to cycle over the fd numbers

>>>>>> FYI I have a major set of changes to libbsd that enables FreeBSD descriptor
>>>>>> support and moves the libio support to specific interfaces.
>>>>>
>>>>> OK. When do you plan to add that and to which branches?
>>>>
>>>> I am looking 6-freebsd-12 and master. Thses changes are a major rework and not
>>>> suitable for 5.
>>>
>>> I assumed that.
>>>
>>>>
>>>>> I would like to add this
>>>>> patch set to 5 sooner or later because it fixes a potential bug in racoon for
>>>>> that branch too.
>>>>
>>>> Sorry I missed this for 5. I will need to think about this on 5. I do not think
>>>> it is a problem but I am not sure about it on 6 and beyond. I would prefer we
>>>> consider a better fix for select that the limited 64 descriptors we currently
>>>> have.
>>>
>>> Maybe we should take a multi step approach:
>>>
>>> Use the workaround like I suggested for 5 and (at the moment) for master. A
>>> similar workaround is already used in quite some other locations in libbsd. So
>>> it's not a new method.
>>>
>>> Start a ticket and a discussion for a better solution.
>>
>> Sounds a good approach. I am fine with the changes because it is what we have to
>> do and this was not meant to halt the progress, rather I wanted to have the
>> discussion and raise the concerns.
>>
> 
> OK. Thanks. Like I said: Maybe we should create a ticket and start an extra
> discussion thread to get more ideas?
> 
>>> I'm not sure yet how that
>>> could look like. Some impressions from other systems:
>>>
>>> - FreeBSD, OpenBSD, NetBSD and DragonFly BSD are using the same method we use
>>> (or most likely the other way round) only with different FD_SETSIZEs:
>>>
>>> https://cgit.freebsd.org/src/tree/sys/sys/select.h
>>> https://github.com/NetBSD/src/blob/trunk/sys/sys/fd_set.h
>>> https://github.com/openbsd/src/blob/master/sys/sys/select.h
>>> https://gitweb.dragonflybsd.org/dragonfly.git/blob/HEAD:/sys/sys/_fd_set.h
>>>
>>> - Linux Kernels nolibc.h used in Tests does the same again (GPL or MIT):
>>>
>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/include/nolibc/nolibc.h#n2522
>>>
>>>
>>>
>>> - Apple is the same again. I'm not sure about the Apple license therefore no
>>> link here so I don't spoil anyone beneath myself.
>>>
>>> There are others that could be worth a look like Haiku, Musl-Libc, and so on.
>>> But my general impression is that there is no much better solution at least in
>>> the bigger systems except for avoiding select or setting a bigger FD_SETSIZE.
>>> The most common ones seem to be 256 or 1024. But that only moves the problem a
>>> bit further away.
>>
>> Yes select does not offer a great range of solutions. As Sebastian has stated a
>> number of times kqueue does and this should be encouraged. And kqueue by passes
>> the signal issue that seems to acompany select, eg timer signals.
>>
>> I see the central quesiton being the role libbsd plays and I feel it is to aid
>> porting code to RTEMS and easily as possible. New code written for RTEMS should
>> avoid select.
>>
> 
> Agreed: Select is evil ;-)
> 
> Just kidding: It's clear that we have to handle application that use select too
> due to portability reasons. I'm just not sure what a good solution would be. It
> feels like every solution is a bad one: If we increase FD_SETSIZE to typical
> values on desktop systems (256 or 1024), we will need a lot of stack and only
> cover a part of the applications. If we change every location in the libraries
> it increases maintenance effort.

We also keep in mind if an application uses select _and_ signals it will be
broken on LibBSD. PTP2 was like that.

Chris


More information about the devel mailing list