[rtems-libbsd commit] Switch to a version of select that is closer to bsd' s version.

Jennifer Averett jennifer at rtems.org
Mon Nov 26 15:55:12 UTC 2012


Module:    rtems-libbsd
Branch:    master
Commit:    0bde19eee050bbdc4511070cf14f48719e400c26
Changeset: http://git.rtems.org/rtems-libbsd/commit/?id=0bde19eee050bbdc4511070cf14f48719e400c26

Author:    Jennifer Averett <jennifer.averett at oarcorp.com>
Date:      Mon Nov 26 09:47:09 2012 -0600

Switch to a version of select that is closer to bsd's version.

---

 Makefile                                           |    5 +-
 freebsd-to-rtems.py                                |   19 +-
 freebsd-userspace/Makefile                         |   14 +-
 freebsd-userspace/commands/sbin/ping/ping.c        |    4 +
 .../lib/libc_r/uthread/uthread_kern.c              | 1157 ++++
 .../lib/libc_r/uthread/uthread_select.c            |  240 +
 freebsd-userspace/rtems/include/pthread_private.h  |  158 +
 freebsd-userspace/rtems/rtems-get_curthread.c      |   72 +
 freebsd-userspace/rtems/rtems-syspoll.c            |   30 +
 freebsd-userspace/rtems/rtems-uthread_init.c       |    8 +
 freebsd/kern/kern_condvar.c                        |  455 ++
 freebsd/kern/kern_descrip.c                        | 6912 ++++++++++++++++++++
 freebsd/kern/kern_mtxpool.c                        |  220 +
 freebsd/kern/kern_subr.c                           |    2 +-
 freebsd/kern/kern_time.c                           |    4 +-
 freebsd/kern/sys_generic.c                         | 1665 +++++
 freebsd/sys/buf.h                                  |  526 ++
 freebsd/sys/mqueue.h                               |   45 +
 freebsd/sys/proc.h                                 |    7 +
 freebsd/sys/tty.h                                  |  217 +
 freebsd/sys/ttydevsw.h                             |  169 +
 freebsd/sys/ttydisc.h                              |   86 +
 freebsd/sys/ttyhook.h                              |  147 +
 freebsd/sys/ttyqueue.h                             |  178 +
 freebsd/sys/user.h                                 |  414 ++
 rtemsbsd/src/rtems-bsd-condvar.c                   |   17 +
 rtemsbsd/src/rtems-bsd-thread.c                    |  110 +-
 27 files changed, 12823 insertions(+), 58 deletions(-)

diff --git a/Makefile b/Makefile
index 3ac26da..8a58fd8 100644
--- a/Makefile
+++ b/Makefile
@@ -12,7 +12,6 @@ CFLAGS += -I freebsd/$(RTEMS_CPU)/include
 CFLAGS += -I contrib/altq 
 CFLAGS += -I contrib/pf 
 CFLAGS += -I copied/rtemsbsd/$(RTEMS_CPU)/include 
-CFLAGS += -g 
 CFLAGS += -w 
 CFLAGS += -std=gnu99
 CFLAGS += -MT $@ -MD -MP -MF $(basename $@).d
@@ -38,7 +37,6 @@ C_FILES += rtemsbsd/src/rtems-bsd-lock.c
 C_FILES += rtemsbsd/src/rtems-bsd-log.c
 C_FILES += rtemsbsd/src/rtems-bsd-sx.c
 C_FILES += rtemsbsd/src/rtems-bsd-rwlock.c
-C_FILES += rtemsbsd/src/rtems-bsd-generic.c
 C_FILES += rtemsbsd/src/rtems-bsd-page.c
 C_FILES += rtemsbsd/src/rtems-bsd-panic.c
 C_FILES += rtemsbsd/src/rtems-bsd-synch.c
@@ -406,6 +404,9 @@ C_FILES += freebsd/netatalk/ddp_pcb.c
 C_FILES += freebsd/netatalk/ddp_usrreq.c
 C_FILES += freebsd/netatalk/at_proto.c
 C_FILES += freebsd/netatalk/ddp_output.c
+C_FILES += freebsd/kern/sys_generic.c
+C_FILES += freebsd/kern/kern_descrip.c
+C_FILES += freebsd/kern/kern_mtxpool.c
 ifeq ($(RTEMS_CPU), i386)
 C_FILES += freebsd/i386/pci/pci_bus.c
 C_FILES += freebsd/i386/i386/legacy.c
diff --git a/freebsd-to-rtems.py b/freebsd-to-rtems.py
index 9fb8788..81b1b28 100755
--- a/freebsd-to-rtems.py
+++ b/freebsd-to-rtems.py
@@ -659,7 +659,7 @@ rtems.addRTEMSSourceFiles(
 		'src/rtems-bsd-log.c',
 		'src/rtems-bsd-sx.c',
 		'src/rtems-bsd-rwlock.c',
-		'src/rtems-bsd-generic.c',
+		#'src/rtems-bsd-generic.c',
 		'src/rtems-bsd-page.c',
 		'src/rtems-bsd-panic.c',
 		'src/rtems-bsd-synch.c',
@@ -711,13 +711,13 @@ rtems.addEmptyHeaderFiles(
 		'sys/cpuset.h',
 		'sys/exec.h',
 		'sys/fail.h',
-		'sys/limits.h',
+		#'sys/limits.h',
 		'sys/sleepqueue.h',
 		'sys/namei.h',
 		'sys/_pthreadtypes.h',
 		#'sys/resourcevar.h',
 		'sys/sched.h',
-		'sys/select.h',
+		#'sys/select.h',
 		'sys/syscallsubr.h',
 		'sys/sysent.h',
 		'sys/syslimits.h',
@@ -725,7 +725,7 @@ rtems.addEmptyHeaderFiles(
 		'sys/stat.h',
 		#'sys/time.h',
 		'time.h',
-		'sys/timespec.h',
+		#'sys/timespec.h',
 		'sys/_timeval.h',
 		#'sys/vmmeter.h',
 		#'sys/vnode.h',
@@ -1346,6 +1346,14 @@ devNic.addHeaderFiles(
 		'netatalk/ddp_var.h',
 		'netatalk/phase2.h',
 		'sys/mman.h',
+		'sys/buf.h',
+		'sys/mqueue.h',
+		'sys/tty.h',
+		'sys/ttyqueue.h',
+		'sys/ttydisc.h',
+		'sys/ttydevsw.h',
+		'sys/ttyhook.h',
+		'sys/user.h',
 	]
 )
 
@@ -1412,6 +1420,9 @@ devNic.addSourceFiles(
 		'netatalk/ddp_usrreq.c',
 		'netatalk/at_proto.c',
 		'netatalk/ddp_output.c',
+		'kern/sys_generic.c',
+		'kern/kern_descrip.c',
+		'kern/kern_mtxpool.c',
 	]
 )
 
diff --git a/freebsd-userspace/Makefile b/freebsd-userspace/Makefile
index cef8546..18dde49 100644
--- a/freebsd-userspace/Makefile
+++ b/freebsd-userspace/Makefile
@@ -160,6 +160,9 @@ C_FILES += lib/libipsec/ipsec_dump_policy.c
 C_FILES += lib/libipsec/policy_token.c
 C_FILES += lib/libipsec/policy_parse.c
 
+C_FILES += lib/libc_r/uthread/uthread_select.c
+C_FILES += lib/libc_r/uthread/uthread_kern.c
+
 # RTEMS Specific Files
 # C_FILES += rtems/rtems-net-setup.c
 C_FILES += rtems/syslog.c
@@ -171,6 +174,9 @@ C_FILES += rtems/rtems-uthread_main_np.c
 C_FILES += rtems/rtems-uthread_kevent.c
 C_FILES += rtems/rtems-uthread_kqueue.c
 C_FILES += rtems/rtems-shell.c
+C_FILES += rtems/rtems-syspoll.c
+C_FILES += rtems/rtems-uthread_init.c
+C_FILES += rtems/rtems-get_curthread.c
 
 # ping command sources
 C_FILES += commands/sbin/ping/ping.c
@@ -278,14 +284,14 @@ GEN_FILES += commands/sbin/route/keywords.h
 # lib/libc/net
 GEN_FILES += lib/libc/net/nslexer.c
 GEN_FILES += lib/libc/net/nsparser.c
-EXTRA_CLEAN = lib/libc/net/nsparser.i
-EXTRA_CLEAN += lib/libc/net/y.tab.h
+CLEAN_FILES = lib/libc/net/nsparser.i
+CLEAN_FILES += lib/libc/net/y.tab.h
 
 # lib/libipsec
 GEN_FILES += lib/libipsec/policy_token.c
 GEN_FILES += lib/libipsec/policy_parse.c
-EXTRA_CLEAN += lib/libipsec/policy_parse.i
-EXTRA_CLEAN += lib/libipsec/y.tab.h
+CLEAN_FILES += lib/libipsec/policy_parse.i
+CLEAN_FILES += lib/libipsec/y.tab.h
 
 all: $(LIB) 
 
diff --git a/freebsd-userspace/commands/sbin/ping/ping.c b/freebsd-userspace/commands/sbin/ping/ping.c
index 4516253..34d8632 100644
--- a/freebsd-userspace/commands/sbin/ping/ping.c
+++ b/freebsd-userspace/commands/sbin/ping/ping.c
@@ -106,6 +106,10 @@ __FBSDID("$FreeBSD$");
 #include <sysexits.h>
 #include <unistd.h>
 
+#ifdef __rtems__
+#define select  __select
+#endif
+
 #define	INADDR_LEN	((int)sizeof(in_addr_t))
 #define	TIMEVAL_LEN	((int)sizeof(struct tv32))
 #define	MASK_LEN	(ICMP_MASKLEN - ICMP_MINLEN)
diff --git a/freebsd-userspace/lib/libc_r/uthread/uthread_kern.c b/freebsd-userspace/lib/libc_r/uthread/uthread_kern.c
new file mode 100644
index 0000000..a8c8720
--- /dev/null
+++ b/freebsd-userspace/lib/libc_r/uthread/uthread_kern.c
@@ -0,0 +1,1157 @@
+/*
+ * Copyright (c) 1995-1998 John Birrell <jb at cimlogic.com.au>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of any co-contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+#include <errno.h>
+#include <poll.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+#include <setjmp.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#ifdef __rtems__
+#include <freebsd/sys/signalvar.h>
+#include <freebsd/sys/timespec.h>
+#else
+#include <sys/signalvar.h>
+#endif
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <sys/uio.h>
+#ifndef __rtems__
+#include <sys/syscall.h>
+#endif
+#include <fcntl.h>
+#include <pthread.h>
+#include "pthread_private.h"
+
+#ifdef __rtems__
+#include <rtems.h>
+#endif
+
+/* #define DEBUG_THREAD_KERN */
+#ifdef DEBUG_THREAD_KERN
+#define DBG_MSG		stdout_debug
+#else
+#define DBG_MSG(x...)
+#endif
+
+/* Static function prototype definitions: */
+static void
+thread_kern_poll(int wait_reqd);
+
+static void
+dequeue_signals(void);
+
+static inline void
+thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in);
+
+/* Static variables: */
+static int	last_tick = 0;
+static int	called_from_handler = 0;
+
+#ifndef __rtems__
+/*
+ * This is called when a signal handler finishes and wants to
+ * return to a previous frame.
+ */
+void
+_thread_kern_sched_frame(struct pthread_signal_frame *psf)
+{
+	struct pthread	*curthread = _get_curthread();
+
+	/*
+	 * Flag the pthread kernel as executing scheduler code
+	 * to avoid a signal from interrupting this execution and
+	 * corrupting the (soon-to-be) current frame.
+	 */
+	_thread_kern_in_sched = 1;
+
+	/* Restore the signal frame: */
+	_thread_sigframe_restore(curthread, psf);
+
+	/* The signal mask was restored; check for any pending signals: */
+	curthread->check_pending = 1;
+
+	/* Switch to the thread scheduler: */
+	___longjmp(_thread_kern_sched_jb, 1);
+}
+
+
+void
+_thread_kern_sched(ucontext_t *ucp)
+{
+	struct pthread	*curthread = _get_curthread();
+
+	/*
+	 * Flag the pthread kernel as executing scheduler code
+	 * to avoid a scheduler signal from interrupting this
+	 * execution and calling the scheduler again.
+	 */
+	_thread_kern_in_sched = 1;
+
+	/* Check if this function was called from the signal handler: */
+	if (ucp != NULL) {
+		called_from_handler = 1;
+		DBG_MSG("Entering scheduler due to signal\n");
+	}
+
+	/* Save the state of the current thread: */
+	if (_setjmp(curthread->ctx.jb) != 0) {
+		DBG_MSG("Returned from ___longjmp, thread %p\n",
+		    curthread);
+		/*
+		 * This point is reached when a longjmp() is called
+		 * to restore the state of a thread.
+		 *
+		 * This is the normal way out of the scheduler.
+		 */
+		_thread_kern_in_sched = 0;
+
+		if (curthread->sig_defer_count == 0) {
+			if (((curthread->cancelflags &
+			    PTHREAD_AT_CANCEL_POINT) == 0) &&
+			    ((curthread->cancelflags &
+			    PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
+				/*
+				 * Cancellations override signals.
+				 *
+				 * Stick a cancellation point at the
+				 * start of each async-cancellable
+				 * thread's resumption.
+				 *
+				 * We allow threads woken at cancel
+				 * points to do their own checks.
+				 */
+				pthread_testcancel();
+		}
+
+		if (_sched_switch_hook != NULL) {
+			/* Run the installed switch hook: */
+			thread_run_switch_hook(_last_user_thread, curthread);
+		}
+		if (ucp == NULL)
+			return;
+		else {
+			/*
+			 * Set the process signal mask in the context; it
+			 * could have changed by the handler.
+			 */
+			ucp->uc_sigmask = _process_sigmask;
+
+			/* Resume the interrupted thread: */
+			__sys_sigreturn(ucp);
+		}
+	}
+	/* Switch to the thread scheduler: */
+	___longjmp(_thread_kern_sched_jb, 1);
+}
+
+void
+_thread_kern_sched_sig(void)
+{
+	struct pthread	*curthread = _get_curthread();
+
+	curthread->check_pending = 1;
+	_thread_kern_sched(NULL);
+}
+
+
+void
+_thread_kern_scheduler(void)
+{
+	struct timespec	ts;
+	struct timeval	tv;
+	struct pthread	*curthread = _get_curthread();
+	pthread_t	pthread, pthread_h;
+	unsigned int	current_tick;
+	int		add_to_prioq;
+
+	/* If the currently running thread is a user thread, save it: */
+	if ((curthread->flags & PTHREAD_FLAGS_PRIVATE) == 0)
+		_last_user_thread = curthread;
+
+	if (called_from_handler != 0) {
+		called_from_handler = 0;
+
+		/*
+		 * We were called from a signal handler; restore the process
+		 * signal mask.
+		 */
+		if (__sys_sigprocmask(SIG_SETMASK,
+		    &_process_sigmask, NULL) != 0)
+			PANIC("Unable to restore process mask after signal");
+	}
+
+	/*
+	 * Enter a scheduling loop that finds the next thread that is
+	 * ready to run. This loop completes when there are no more threads
+	 * in the global list or when a thread has its state restored by
+	 * either a sigreturn (if the state was saved as a sigcontext) or a
+	 * longjmp (if the state was saved by a setjmp).
+	 */
+	while (!(TAILQ_EMPTY(&_thread_list))) {
+		/* Get the current time of day: */
+		GET_CURRENT_TOD(tv);
+		TIMEVAL_TO_TIMESPEC(&tv, &ts);
+		current_tick = _sched_ticks;
+
+		/*
+		 * Protect the scheduling queues from access by the signal
+		 * handler.
+		 */
+		_queue_signals = 1;
+		add_to_prioq = 0;
+
+		if (curthread != &_thread_kern_thread) {
+			/*
+			 * This thread no longer needs to yield the CPU.
+			 */
+			curthread->yield_on_sig_undefer = 0;
+	
+			if (curthread->state != PS_RUNNING) {
+				/*
+				 * Save the current time as the time that the
+				 * thread became inactive:
+				 */
+				curthread->last_inactive = (long)current_tick;
+				if (curthread->last_inactive <
+				    curthread->last_active) {
+					/* Account for a rollover: */
+					curthread->last_inactive =+
+					    UINT_MAX + 1;
+				}
+			}
+
+			/*
+			 * Place the currently running thread into the
+			 * appropriate queue(s).
+			 */
+			switch (curthread->state) {
+			case PS_DEAD:
+			case PS_STATE_MAX: /* to silence -Wall */
+			case PS_SUSPENDED:
+				/*
+				 * Dead and suspended threads are not placed
+				 * in any queue:
+				 */
+				break;
+
+			case PS_RUNNING:
+				/*
+				 * Runnable threads can't be placed in the
+				 * priority queue until after waiting threads
+				 * are polled (to preserve round-robin
+				 * scheduling).
+				 */
+				add_to_prioq = 1;
+				break;
+
+			/*
+			 * States which do not depend on file descriptor I/O
+			 * operations or timeouts:
+			 */
+			case PS_DEADLOCK:
+			case PS_FDLR_WAIT:
+			case PS_FDLW_WAIT:
+			case PS_FILE_WAIT:
+			case PS_JOIN:
+			case PS_MUTEX_WAIT:
+			case PS_SIGSUSPEND:
+			case PS_SIGTHREAD:
+			case PS_SIGWAIT:
+			case PS_WAIT_WAIT:
+				/* No timeouts for these states: */
+				curthread->wakeup_time.tv_sec = -1;
+				curthread->wakeup_time.tv_nsec = -1;
+
+				/* Restart the time slice: */
+				curthread->slice_usec = -1;
+
+				/* Insert into the waiting queue: */
+				PTHREAD_WAITQ_INSERT(curthread);
+				break;
+
+			/* States which can timeout: */
+			case PS_COND_WAIT:
+			case PS_SLEEP_WAIT:
+				/* Restart the time slice: */
+				curthread->slice_usec = -1;
+
+				/* Insert into the waiting queue: */
+				PTHREAD_WAITQ_INSERT(curthread);
+				break;
+	
+			/* States that require periodic work: */
+			case PS_SPINBLOCK:
+				/* No timeouts for this state: */
+				curthread->wakeup_time.tv_sec = -1;
+				curthread->wakeup_time.tv_nsec = -1;
+
+				/* Increment spinblock count: */
+				_spinblock_count++;
+
+				/* FALLTHROUGH */
+			case PS_FDR_WAIT:
+			case PS_FDW_WAIT:
+			case PS_POLL_WAIT:
+			case PS_SELECT_WAIT:
+				/* Restart the time slice: */
+				curthread->slice_usec = -1;
+	
+				/* Insert into the waiting queue: */
+				PTHREAD_WAITQ_INSERT(curthread);
+	
+				/* Insert into the work queue: */
+				PTHREAD_WORKQ_INSERT(curthread);
+				break;
+			}
+
+			/*
+			 * Are there pending signals for this thread?
+			 *
+			 * This check has to be performed after the thread
+			 * has been placed in the queue(s) appropriate for
+			 * its state.  The process of adding pending signals
+			 * can change a threads state, which in turn will
+			 * attempt to add or remove the thread from any
+			 * scheduling queue to which it belongs.
+			 */
+			if (curthread->check_pending != 0) {
+				curthread->check_pending = 0;
+				_thread_sig_check_pending(curthread);
+			}
+		}
+
+		/*
+		 * Avoid polling file descriptors if there are none
+		 * waiting:
+		 */
+		if (TAILQ_EMPTY(&_workq) != 0) {
+		}
+		/*
+		 * Poll file descriptors only if a new scheduling signal
+		 * has occurred or if we have no more runnable threads.
+		 */
+		else if (((current_tick = _sched_ticks) != last_tick) ||
+		    ((curthread->state != PS_RUNNING) &&
+		    (PTHREAD_PRIOQ_FIRST() == NULL))) {
+			/* Unprotect the scheduling queues: */
+			_queue_signals = 0;
+
+			/*
+			 * Poll file descriptors to update the state of threads
+			 * waiting on file I/O where data may be available:
+			 */
+			thread_kern_poll(0);
+
+			/* Protect the scheduling queues: */
+			_queue_signals = 1;
+		}
+		last_tick = current_tick;
+
+		/*
+		 * Wake up threads that have timedout.  This has to be
+		 * done after polling in case a thread does a poll or
+		 * select with zero time.
+		 */
+		PTHREAD_WAITQ_SETACTIVE();
+		while (((pthread = TAILQ_FIRST(&_waitingq)) != NULL) &&
+		    (pthread->wakeup_time.tv_sec != -1) &&
+		    (((pthread->wakeup_time.tv_sec == 0) &&
+		    (pthread->wakeup_time.tv_nsec == 0)) ||
+		    (pthread->wakeup_time.tv_sec < ts.tv_sec) ||
+		    ((pthread->wakeup_time.tv_sec == ts.tv_sec) &&
+		    (pthread->wakeup_time.tv_nsec <= ts.tv_nsec)))) {
+			switch (pthread->state) {
+			case PS_POLL_WAIT:
+			case PS_SELECT_WAIT:
+				/* Return zero file descriptors ready: */
+				pthread->data.poll_data->nfds = 0;
+				/* FALLTHROUGH */
+			default:
+				/*
+				 * Remove this thread from the waiting queue
+				 * (and work queue if necessary) and place it
+				 * in the ready queue.
+				 */
+				PTHREAD_WAITQ_CLEARACTIVE();
+				if (pthread->flags & PTHREAD_FLAGS_IN_WORKQ)
+					PTHREAD_WORKQ_REMOVE(pthread);
+				PTHREAD_NEW_STATE(pthread, PS_RUNNING);
+				PTHREAD_WAITQ_SETACTIVE();
+				break;
+			}
+			/*
+			 * Flag the timeout in the thread structure:
+			 */
+			pthread->timeout = 1;
+		}
+		PTHREAD_WAITQ_CLEARACTIVE();
+
+		/*
+		 * Check to see if the current thread needs to be added
+		 * to the priority queue:
+		 */
+		if (add_to_prioq != 0) {
+			/*
+			 * Save the current time as the time that the
+			 * thread became inactive:
+			 */
+			current_tick = _sched_ticks;
+			curthread->last_inactive = (long)current_tick;
+			if (curthread->last_inactive <
+			    curthread->last_active) {
+				/* Account for a rollover: */
+				curthread->last_inactive =+ UINT_MAX + 1;
+			}
+
+			if ((curthread->slice_usec != -1) &&
+		 	   (curthread->attr.sched_policy != SCHED_FIFO)) {
+				/*
+				 * Accumulate the number of microseconds for
+				 * which the current thread has run:
+				 */
+				curthread->slice_usec +=
+				    (curthread->last_inactive -
+				    curthread->last_active) *
+				    (long)_clock_res_usec;
+				/* Check for time quantum exceeded: */
+				if (curthread->slice_usec > TIMESLICE_USEC)
+					curthread->slice_usec = -1;
+			}
+
+			if (curthread->slice_usec == -1) {
+				/*
+				 * The thread exceeded its time
+				 * quantum or it yielded the CPU;
+				 * place it at the tail of the
+				 * queue for its priority.
+				 */
+				PTHREAD_PRIOQ_INSERT_TAIL(curthread);
+			} else {
+				/*
+				 * The thread hasn't exceeded its
+				 * interval.  Place it at the head
+				 * of the queue for its priority.
+				 */
+				PTHREAD_PRIOQ_INSERT_HEAD(curthread);
+			}
+		}
+
+		/*
+		 * Get the highest priority thread in the ready queue.
+		 */
+		pthread_h = PTHREAD_PRIOQ_FIRST();
+
+		/* Check if there are no threads ready to run: */
+		if (pthread_h == NULL) {
+			/*
+			 * Lock the pthread kernel by changing the pointer to
+			 * the running thread to point to the global kernel
+			 * thread structure:
+			 */
+			_set_curthread(&_thread_kern_thread);
+			curthread = &_thread_kern_thread;
+
+			DBG_MSG("No runnable threads, using kernel thread %p\n",
+			    curthread);
+
+			/* Unprotect the scheduling queues: */
+			_queue_signals = 0;
+
+			/*
+			 * There are no threads ready to run, so wait until
+			 * something happens that changes this condition:
+			 */
+			thread_kern_poll(1);
+
+			/*
+			 * This process' usage will likely be very small
+			 * while waiting in a poll.  Since the scheduling
+			 * clock is based on the profiling timer, it is
+			 * unlikely that the profiling timer will fire
+			 * and update the time of day.  To account for this,
+			 * get the time of day after polling with a timeout.
+			 */
+			gettimeofday((struct timeval *) &_sched_tod, NULL);
+			
+			/* Check once more for a runnable thread: */
+			_queue_signals = 1;
+			pthread_h = PTHREAD_PRIOQ_FIRST();
+			_queue_signals = 0;
+		}
+
+		if (pthread_h != NULL) {
+			/* Remove the thread from the ready queue: */
+			PTHREAD_PRIOQ_REMOVE(pthread_h);
+
+			/* Unprotect the scheduling queues: */
+			_queue_signals = 0;
+
+			/*
+			 * Check for signals queued while the scheduling
+			 * queues were protected:
+			 */
+			while (_sigq_check_reqd != 0) {
+				/* Clear before handling queued signals: */
+				_sigq_check_reqd = 0;
+
+				/* Protect the scheduling queues again: */
+				_queue_signals = 1;
+
+				dequeue_signals();
+
+				/*
+				 * Check for a higher priority thread that
+				 * became runnable due to signal handling.
+				 */
+				if (((pthread = PTHREAD_PRIOQ_FIRST()) != NULL) &&
+				    (pthread->active_priority > pthread_h->active_priority)) {
+					/* Remove the thread from the ready queue: */
+					PTHREAD_PRIOQ_REMOVE(pthread);
+
+					/*
+					 * Insert the lower priority thread
+					 * at the head of its priority list:
+					 */
+					PTHREAD_PRIOQ_INSERT_HEAD(pthread_h);
+
+					/* There's a new thread in town: */
+					pthread_h = pthread;
+				}
+
+				/* Unprotect the scheduling queues: */
+				_queue_signals = 0;
+			}
+
+			/* Make the selected thread the current thread: */
+			_set_curthread(pthread_h);
+			curthread = pthread_h;
+
+			/*
+			 * Save the current time as the time that the thread
+			 * became active:
+			 */
+			current_tick = _sched_ticks;
+			curthread->last_active = (long) current_tick;
+
+			/*
+			 * Check if this thread is running for the first time
+			 * or running again after using its full time slice
+			 * allocation:
+			 */
+			if (curthread->slice_usec == -1) {
+				/* Reset the accumulated time slice period: */
+				curthread->slice_usec = 0;
+			}
+
+			/*
+			 * If we had a context switch, run any
+			 * installed switch hooks.
+			 */
+			if ((_sched_switch_hook != NULL) &&
+			    (_last_user_thread != curthread)) {
+				thread_run_switch_hook(_last_user_thread,
+				    curthread);
+			}
+			/*
+			 * Continue the thread at its current frame:
+			 */
+#if NOT_YET
+			_setcontext(&curthread->ctx.uc);
+#else
+			___longjmp(curthread->ctx.jb, 1);
+#endif
+			/* This point should not be reached. */
+			PANIC("Thread has returned from sigreturn or longjmp");
+		}
+	}
+
+	/* There are no more threads, so exit this process: */
+	exit(0);
+}
+#endif /* __rtems__ */
+
+void
+_thread_kern_sched_state(enum pthread_state state, char *fname, int lineno)
+{
+#ifdef __rtems__
+	rtems_task_wake_after(RTEMS_YIELD_PROCESSOR);
+#else
+	
+	struct pthread	*curthread = _get_curthread();
+
+	/*
+	 * Flag the pthread kernel as executing scheduler code
+	 * to avoid a scheduler signal from interrupting this
+	 * execution and calling the scheduler again.
+	 */
+	_thread_kern_in_sched = 1;
+
+	/*
+	 * Prevent the signal handler from fiddling with this thread
+	 * before its state is set and is placed into the proper queue.
+	 */
+	_queue_signals = 1;
+
+	/* Change the state of the current thread: */
+	curthread->state = state;
+	curthread->fname = fname;
+	curthread->lineno = lineno;
+
+	/* Schedule the next thread that is ready: */
+	_thread_kern_sched(NULL);
+#endif /* __rtems__ */
+}
+
+#ifndef __rtems__
+void
+_thread_kern_sched_state_unlock(enum pthread_state state,
+    spinlock_t *lock, char *fname, int lineno)
+{
+	struct pthread	*curthread = _get_curthread();
+
+	/*
+	 * Flag the pthread kernel as executing scheduler code
+	 * to avoid a scheduler signal from interrupting this
+	 * execution and calling the scheduler again.
+	 */
+	_thread_kern_in_sched = 1;
+
+	/*
+	 * Prevent the signal handler from fiddling with this thread
+	 * before its state is set and it is placed into the proper
+	 * queue(s).
+	 */
+	_queue_signals = 1;
+
+	/* Change the state of the current thread: */
+	curthread->state = state;
+	curthread->fname = fname;
+	curthread->lineno = lineno;
+
+	_SPINUNLOCK(lock);
+
+	/* Schedule the next thread that is ready: */
+	_thread_kern_sched(NULL);
+}
+
+static void
+thread_kern_poll(int wait_reqd)
+{
+	int             count = 0;
+	int             i, found;
+	int		kern_pipe_added = 0;
+	int             nfds = 0;
+	int		timeout_ms = 0;
+	struct pthread	*pthread;
+	struct timespec ts;
+	struct timeval  tv;
+
+	/* Check if the caller wants to wait: */
+	if (wait_reqd == 0) {
+		timeout_ms = 0;
+	}
+	else {
+		/* Get the current time of day: */
+		GET_CURRENT_TOD(tv);
+		TIMEVAL_TO_TIMESPEC(&tv, &ts);
+
+		_queue_signals = 1;
+		pthread = TAILQ_FIRST(&_waitingq);
+		_queue_signals = 0;
+
+		if ((pthread == NULL) || (pthread->wakeup_time.tv_sec == -1)) {
+			/*
+			 * Either there are no threads in the waiting queue,
+			 * or there are no threads that can timeout.
+			 */
+			timeout_ms = INFTIM;
+		}
+		else if (pthread->wakeup_time.tv_sec - ts.tv_sec > 60000)
+			/* Limit maximum timeout to prevent rollover. */
+			timeout_ms = 60000;
+		else {
+			/*
+			 * Calculate the time left for the next thread to
+			 * timeout:
+			 */
+			timeout_ms = ((pthread->wakeup_time.tv_sec - ts.tv_sec) *
+			    1000) + ((pthread->wakeup_time.tv_nsec - ts.tv_nsec) /
+			    1000000);
+			/*
+			 * Don't allow negative timeouts:
+			 */
+			if (timeout_ms < 0)
+				timeout_ms = 0;
+		}
+	}
+			
+	/* Protect the scheduling queues: */
+	_queue_signals = 1;
+
+	/*
+	 * Check to see if the signal queue needs to be walked to look
+	 * for threads awoken by a signal while in the scheduler.
+	 */
+	if (_sigq_check_reqd != 0) {
+		/* Reset flag before handling queued signals: */
+		_sigq_check_reqd = 0;
+
+		dequeue_signals();
+	}
+
+	/*
+	 * Check for a thread that became runnable due to a signal:
+	 */
+	if (PTHREAD_PRIOQ_FIRST() != NULL) {
+		/*
+		 * Since there is at least one runnable thread,
+		 * disable the wait.
+		 */
+		timeout_ms = 0;
+	}
+
+	/*
+	 * Form the poll table:
+	 */
+	nfds = 0;
+	if (timeout_ms != 0) {
+		/* Add the kernel pipe to the poll table: */
+		_thread_pfd_table[nfds].fd = _thread_kern_pipe[0];
+		_thread_pfd_table[nfds].events = POLLRDNORM;
+		_thread_pfd_table[nfds].revents = 0;
+		nfds++;
+		kern_pipe_added = 1;
+	}
+
+	PTHREAD_WAITQ_SETACTIVE();
+	TAILQ_FOREACH(pthread, &_workq, qe) {
+		switch (pthread->state) {
+		case PS_SPINBLOCK:
+			/*
+			 * If the lock is available, let the thread run.
+			 */
+			if (pthread->data.spinlock->access_lock == 0) {
+				PTHREAD_WAITQ_CLEARACTIVE();
+				PTHREAD_WORKQ_REMOVE(pthread);
+				PTHREAD_NEW_STATE(pthread,PS_RUNNING);
+				PTHREAD_WAITQ_SETACTIVE();
+				/* One less thread in a spinblock state: */
+				_spinblock_count--;
+				/*
+				 * Since there is at least one runnable
+				 * thread, disable the wait.
+				 */
+				timeout_ms = 0;
+			}
+			break;
+
+		/* File descriptor read wait: */
+		case PS_FDR_WAIT:
+			/* Limit number of polled files to table size: */
+			if (nfds < _thread_dtablesize) {
+				_thread_pfd_table[nfds].events = POLLRDNORM;
+				_thread_pfd_table[nfds].fd = pthread->data.fd.fd;
+				nfds++;
+			}
+			break;
+
+		/* File descriptor write wait: */
+		case PS_FDW_WAIT:
+			/* Limit number of polled files to table size: */
+			if (nfds < _thread_dtablesize) {
+				_thread_pfd_table[nfds].events = POLLWRNORM;
+				_thread_pfd_table[nfds].fd = pthread->data.fd.fd;
+				nfds++;
+			}
+			break;
+
+		/* File descriptor poll or select wait: */
+		case PS_POLL_WAIT:
+		case PS_SELECT_WAIT:
+			/* Limit number of polled files to table size: */
+			if (pthread->data.poll_data->nfds + nfds <
+			    _thread_dtablesize) {
+				for (i = 0; i < pthread->data.poll_data->nfds; i++) {
+					_thread_pfd_table[nfds + i].fd =
+					    pthread->data.poll_data->fds[i].fd;
+					_thread_pfd_table[nfds + i].events =
+					    pthread->data.poll_data->fds[i].events;
+				}
+				nfds += pthread->data.poll_data->nfds;
+			}
+			break;
+
+		/* Other states do not depend on file I/O. */
+		default:
+			break;
+		}
+	}
+	PTHREAD_WAITQ_CLEARACTIVE();
+
+	/*
+	 * Wait for a file descriptor to be ready for read, write, or
+	 * an exception, or a timeout to occur:
+	 */
+	count = __sys_poll(_thread_pfd_table, nfds, timeout_ms);
+
+	if (kern_pipe_added != 0)
+		/*
+		 * Remove the pthread kernel pipe file descriptor
+		 * from the pollfd table:
+		 */
+		nfds = 1;
+	else
+		nfds = 0;
+
+	/*
+	 * Check if it is possible that there are bytes in the kernel
+	 * read pipe waiting to be read:
+	 */
+	if (count < 0 || ((kern_pipe_added != 0) &&
+	    (_thread_pfd_table[0].revents & POLLRDNORM))) {
+		/*
+		 * If the kernel read pipe was included in the
+		 * count:
+		 */
+		if (count > 0) {
+			/* Decrement the count of file descriptors: */
+			count--;
+		}
+
+		if (_sigq_check_reqd != 0) {
+			/* Reset flag before handling signals: */
+			_sigq_check_reqd = 0;
+
+			dequeue_signals();
+		}
+	}
+
+	/*
+	 * Check if any file descriptors are ready:
+	 */
+	if (count > 0) {
+		/*
+		 * Enter a loop to look for threads waiting on file
+		 * descriptors that are flagged as available by the
+		 * _poll syscall:
+		 */
+		PTHREAD_WAITQ_SETACTIVE();
+		TAILQ_FOREACH(pthread, &_workq, qe) {
+			switch (pthread->state) {
+			case PS_SPINBLOCK:
+				/*
+				 * If the lock is available, let the thread run.
+				 */
+				if (pthread->data.spinlock->access_lock == 0) {
+					PTHREAD_WAITQ_CLEARACTIVE();
+					PTHREAD_WORKQ_REMOVE(pthread);
+					PTHREAD_NEW_STATE(pthread,PS_RUNNING);
+					PTHREAD_WAITQ_SETACTIVE();
+
+					/*
+					 * One less thread in a spinblock state:
+					 */
+					_spinblock_count--;
+				}
+				break;
+
+			/* File descriptor read wait: */
+			case PS_FDR_WAIT:
+				if ((nfds < _thread_dtablesize) &&
+				    (_thread_pfd_table[nfds].revents
+				       & (POLLRDNORM|POLLERR|POLLHUP|POLLNVAL))
+				      != 0) {
+					PTHREAD_WAITQ_CLEARACTIVE();
+					PTHREAD_WORKQ_REMOVE(pthread);
+					PTHREAD_NEW_STATE(pthread,PS_RUNNING);
+					PTHREAD_WAITQ_SETACTIVE();
+				}
+				nfds++;
+				break;
+
+			/* File descriptor write wait: */
+			case PS_FDW_WAIT:
+				if ((nfds < _thread_dtablesize) &&
+				    (_thread_pfd_table[nfds].revents
+				       & (POLLWRNORM|POLLERR|POLLHUP|POLLNVAL))
+				      != 0) {
+					PTHREAD_WAITQ_CLEARACTIVE();
+					PTHREAD_WORKQ_REMOVE(pthread);
+					PTHREAD_NEW_STATE(pthread,PS_RUNNING);
+					PTHREAD_WAITQ_SETACTIVE();
+				}
+				nfds++;
+				break;
+
+			/* File descriptor poll or select wait: */
+			case PS_POLL_WAIT:
+			case PS_SELECT_WAIT:
+				if (pthread->data.poll_data->nfds + nfds <
+				    _thread_dtablesize) {
+					/*
+					 * Enter a loop looking for I/O
+					 * readiness:
+					 */
+					found = 0;
+					for (i = 0; i < pthread->data.poll_data->nfds; i++) {
+						if (_thread_pfd_table[nfds + i].revents != 0) {
+							pthread->data.poll_data->fds[i].revents =
+							    _thread_pfd_table[nfds + i].revents;
+							found++;
+						}
+					}
+
+					/* Increment before destroying: */
+					nfds += pthread->data.poll_data->nfds;
+
+					if (found != 0) {
+						pthread->data.poll_data->nfds = found;
+						PTHREAD_WAITQ_CLEARACTIVE();
+						PTHREAD_WORKQ_REMOVE(pthread);
+						PTHREAD_NEW_STATE(pthread,PS_RUNNING);
+						PTHREAD_WAITQ_SETACTIVE();
+					}
+				}
+				else
+					nfds += pthread->data.poll_data->nfds;
+				break;
+
+			/* Other states do not depend on file I/O. */
+			default:
+				break;
+			}
+		}
+		PTHREAD_WAITQ_CLEARACTIVE();
+	}
+	else if (_spinblock_count != 0) {
+		/*
+		 * Enter a loop to look for threads waiting on a spinlock
+		 * that is now available.
+		 */
+		PTHREAD_WAITQ_SETACTIVE();
+		TAILQ_FOREACH(pthread, &_workq, qe) {
+			if (pthread->state == PS_SPINBLOCK) {
+				/*
+				 * If the lock is available, let the thread run.
+				 */
+				if (pthread->data.spinlock->access_lock == 0) {
+					PTHREAD_WAITQ_CLEARACTIVE();
+					PTHREAD_WORKQ_REMOVE(pthread);
+					PTHREAD_NEW_STATE(pthread,PS_RUNNING);
+					PTHREAD_WAITQ_SETACTIVE();
+
+					/*
+					 * One less thread in a spinblock state:
+					 */
+					_spinblock_count--;
+				}
+			}
+		}
+		PTHREAD_WAITQ_CLEARACTIVE();
+	}
+
+	/* Unprotect the scheduling queues: */
+	_queue_signals = 0;
+
+	while (_sigq_check_reqd != 0) {
+		/* Handle queued signals: */
+		_sigq_check_reqd = 0;
+
+		/* Protect the scheduling queues: */
+		_queue_signals = 1;
+
+		dequeue_signals();
+
+		/* Unprotect the scheduling queues: */
+		_queue_signals = 0;
+	}
+}
+#endif /* __rtems__ */
+
+void
+_thread_kern_set_timeout(const struct timespec * timeout)
+{
+	struct pthread	*curthread = _get_curthread();
+	struct timespec current_time;
+	struct timeval  tv;
+
+	/* Reset the timeout flag for the running thread: */
+	curthread->timeout = 0;
+
+	/* Check if the thread is to wait forever: */
+	if (timeout == NULL) {
+		/*
+		 * Set the wakeup time to something that can be recognised as
+		 * different to an actual time of day:
+		 */
+		curthread->wakeup_time.tv_sec = -1;
+		curthread->wakeup_time.tv_nsec = -1;
+	}
+	/* Check if no waiting is required: */
+	else if (timeout->tv_sec == 0 && timeout->tv_nsec == 0) {
+		/* Set the wake up time to 'immediately': */
+		curthread->wakeup_time.tv_sec = 0;
+		curthread->wakeup_time.tv_nsec = 0;
+	} else {
+		/* Get the current time: */
+		GET_CURRENT_TOD(tv);
+		TIMEVAL_TO_TIMESPEC(&tv, &current_time);
+
+		/* Calculate the time for the current thread to wake up: */
+		curthread->wakeup_time.tv_sec = current_time.tv_sec + timeout->tv_sec;
+		curthread->wakeup_time.tv_nsec = current_time.tv_nsec + timeout->tv_nsec;
+
+		/* Check if the nanosecond field needs to wrap: */
+		if (curthread->wakeup_time.tv_nsec >= 1000000000) {
+			/* Wrap the nanosecond field: */
+			curthread->wakeup_time.tv_sec += 1;
+			curthread->wakeup_time.tv_nsec -= 1000000000;
+		}
+	}
+}
+
+#ifndef __rtems__
+void
+_thread_kern_sig_defer(void)
+{
+	struct pthread	*curthread = _get_curthread();
+
+	/* Allow signal deferral to be recursive. */
+	curthread->sig_defer_count++;
+}
+
+void
+_thread_kern_sig_undefer(void)
+{
+	struct pthread	*curthread = _get_curthread();
+
+	/*
+	 * Perform checks to yield only if we are about to undefer
+	 * signals.
+	 */
+	if (curthread->sig_defer_count > 1) {
+		/* Decrement the signal deferral count. */
+		curthread->sig_defer_count--;
+	}
+	else if (curthread->sig_defer_count == 1) {
+		/* Reenable signals: */
+		curthread->sig_defer_count = 0;
+
+		/*
+		 * Check if there are queued signals:
+		 */
+		if (_sigq_check_reqd != 0)
+			_thread_kern_sched(NULL);
+
+		/*
+		 * Check for asynchronous cancellation before delivering any
+		 * pending signals:
+		 */
+		if (((curthread->cancelflags & PTHREAD_AT_CANCEL_POINT) == 0) &&
+		    ((curthread->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
+			pthread_testcancel();
+
+		/*
+		 * If there are pending signals or this thread has
+		 * to yield the CPU, call the kernel scheduler:
+		 *
+		 * XXX - Come back and revisit the pending signal problem
+		 */
+		if ((curthread->yield_on_sig_undefer != 0) ||
+		    SIGNOTEMPTY(curthread->sigpend)) {
+			curthread->yield_on_sig_undefer = 0;
+			_thread_kern_sched(NULL);
+		}
+	}
+}
+
+static void
+dequeue_signals(void)
+{
+	char	bufr[128];
+	int	num;
+
+	/*
+	 * Enter a loop to clear the pthread kernel pipe:
+	 */
+	while (((num = __sys_read(_thread_kern_pipe[0], bufr,
+	    sizeof(bufr))) > 0) || (num == -1 && errno == EINTR)) {
+	}
+	if ((num < 0) && (errno != EAGAIN)) {
+		/*
+		 * The only error we should expect is if there is
+		 * no data to read.
+		 */
+		PANIC("Unable to read from thread kernel pipe");
+	}
+	/* Handle any pending signals: */
+	_thread_sig_handle_pending();
+}
+
+static inline void
+thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in)
+{
+	pthread_t tid_out = thread_out;
+	pthread_t tid_in = thread_in;
+
+	if ((tid_out != NULL) &&
+	    (tid_out->flags & PTHREAD_FLAGS_PRIVATE) != 0)
+		tid_out = NULL;
+	if ((tid_in != NULL) &&
+	    (tid_in->flags & PTHREAD_FLAGS_PRIVATE) != 0)
+		tid_in = NULL;
+
+	if ((_sched_switch_hook != NULL) && (tid_out != tid_in)) {
+		/* Run the scheduler switch hook: */
+		_sched_switch_hook(tid_out, tid_in);
+	}
+}
+
+struct pthread *
+_get_curthread(void)
+{
+	if (_thread_initial == NULL)
+		_thread_init();
+
+	return (_thread_run);
+}
+
+void
+_set_curthread(struct pthread *newthread)
+{
+	_thread_run = newthread;
+}
+#endif /*  __rtems__ */
diff --git a/freebsd-userspace/lib/libc_r/uthread/uthread_select.c b/freebsd-userspace/lib/libc_r/uthread/uthread_select.c
new file mode 100644
index 0000000..632f44d
--- /dev/null
+++ b/freebsd-userspace/lib/libc_r/uthread/uthread_select.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) 1995-1998 John Birrell <jb at cimlogic.com.au>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of any co-contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+#include <unistd.h>
+#include <errno.h>
+#include <poll.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/fcntl.h>
+#include <pthread.h>
+#include "pthread_private.h"
+
+__weak_reference(__select, select);
+
+#ifdef __rtems__
+#include <freebsd/sys/timespec.h>
+#define realloc  _bsd_realloc
+#endif
+
+
+int 
+_select(int numfds, fd_set * readfds, fd_set * writefds, fd_set * exceptfds,
+    struct timeval * timeout)
+{
+	struct pthread	*curthread = _get_curthread();
+	struct timespec ts;
+	int             i, ret = 0, f_wait = 1;
+	int		pfd_index, got_events = 0, fd_count = 0;
+	struct pthread_poll_data data;
+
+#ifndef __rtems__     /* XXX - NOT SURE WHAT TO DEFINE _thread_dtablesize TO. */
+	if (numfds > _thread_dtablesize) {
+		numfds = _thread_dtablesize;
+	}
+#endif
+	/* Count the number of file descriptors to be polled: */
+	if (readfds || writefds || exceptfds) {
+		for (i = 0; i < numfds; i++) {
+			if ((readfds && FD_ISSET(i, readfds)) ||
+			    (exceptfds && FD_ISSET(i, exceptfds)) ||
+			    (writefds && FD_ISSET(i, writefds))) {
+				fd_count++;
+			}
+		}
+	}
+
+	/*
+	 * Allocate memory for poll data if it hasn't already been
+	 * allocated or if previously allocated memory is insufficient.
+	 */
+	if ((curthread->poll_data.fds == NULL) ||
+	    (curthread->poll_data.nfds < fd_count)) {
+		data.fds = (struct pollfd *) realloc(curthread->poll_data.fds,
+		    sizeof(struct pollfd) * MAX(128, fd_count));
+		if (data.fds == NULL) {
+			errno = ENOMEM;
+			ret = -1;
+		}
+		else {
+			/*
+			 * Note that the threads poll data always
+			 * indicates what is allocated, not what is
+			 * currently being polled.
+			 */
+			curthread->poll_data.fds = data.fds;
+			curthread->poll_data.nfds = MAX(128, fd_count);
+		}
+	}
+	/* Check if a timeout was specified: */
+	if (timeout) {
+		if (timeout->tv_sec < 0 ||
+			timeout->tv_usec < 0 || timeout->tv_usec >= 1000000) {
+			errno = EINVAL;
+			return (-1);
+		}
+
+		/* Convert the timeval to a timespec: */
+		TIMEVAL_TO_TIMESPEC(timeout, &ts);
+
+		/* Set the wake up time: */
+		_thread_kern_set_timeout(&ts);
+		if (ts.tv_sec == 0 && ts.tv_nsec == 0)
+			f_wait = 0;
+	} else {
+		/* Wait for ever: */
+		_thread_kern_set_timeout(NULL);
+	}
+
+	if (ret == 0) {
+		/* Setup the wait data. */
+		data.fds = curthread->poll_data.fds;
+		data.nfds = fd_count;
+
+		/*
+		 * Setup the array of pollfds.  Optimize this by
+		  running the loop in reverse and stopping when
+		 * the number of selected file descriptors is reached.
+		 */
+		for (i = numfds - 1, pfd_index = fd_count - 1;
+		    (i >= 0) && (pfd_index >= 0); i--) {
+			data.fds[pfd_index].events = 0;
+			if (readfds && FD_ISSET(i, readfds)) {
+				data.fds[pfd_index].events = POLLRDNORM;
+			}
+			if (exceptfds && FD_ISSET(i, exceptfds)) {
+				data.fds[pfd_index].events |= POLLRDBAND;
+			}
+			if (writefds && FD_ISSET(i, writefds)) {
+				data.fds[pfd_index].events |= POLLWRNORM;
+			}
+			if (data.fds[pfd_index].events != 0) {
+				/*
+				 * Set the file descriptor to be polled and
+				 * clear revents in case of a timeout which
+				 * leaves fds unchanged:
+				 */
+				data.fds[pfd_index].fd = i;
+				data.fds[pfd_index].revents = 0;
+				pfd_index--;
+			}
+		}
+		if (((ret = __sys_poll(data.fds, data.nfds, 0)) == 0) &&
+		   (f_wait != 0)) {
+			curthread->data.poll_data = &data;
+			curthread->interrupted = 0;
+			_thread_kern_sched_state(PS_SELECT_WAIT, __FILE__, __LINE__);
+			if (curthread->interrupted) {
+				errno = EINTR;
+				data.nfds = 0;
+				ret = -1;
+			} else
+				ret = data.nfds;
+		}
+	}
+
+	if (ret >= 0) {
+		numfds = 0;
+		for (i = 0; i < fd_count; i++) {
+			/*
+			 * Check the results of the poll and clear
+			 * this file descriptor from the fdset if
+			 * the requested event wasn't ready.
+			 */
+
+			/*
+			 * First check for invalid descriptor.
+			 * If found, set errno and return -1.
+			 */
+			if (data.fds[i].revents & POLLNVAL) {
+				errno = EBADF;
+				return -1;
+			}
+
+			got_events = 0;
+			if (readfds != NULL) {
+				if (FD_ISSET(data.fds[i].fd, readfds)) {
+					if ((data.fds[i].revents & (POLLIN
+					    | POLLRDNORM | POLLERR
+					    | POLLHUP | POLLNVAL)) != 0)
+						got_events++;
+					else
+						FD_CLR(data.fds[i].fd, readfds);
+				}
+			}
+			if (writefds != NULL) {
+				if (FD_ISSET(data.fds[i].fd, writefds)) {
+					if ((data.fds[i].revents & (POLLOUT
+					    | POLLWRNORM | POLLWRBAND | POLLERR
+					    | POLLHUP | POLLNVAL)) != 0)
+						got_events++;
+					else
+						FD_CLR(data.fds[i].fd,
+						    writefds);
+				}
+			}
+			if (exceptfds != NULL) {
+				if (FD_ISSET(data.fds[i].fd, exceptfds)) {
+					if (data.fds[i].revents & (POLLRDBAND |
+					    POLLPRI))
+						got_events++;
+					else
+						FD_CLR(data.fds[i].fd,
+						    exceptfds);
+				}
+			}
+			if (got_events != 0)
+				numfds+=got_events;
+		}
+		ret = numfds;
+	}
+
+	return (ret);
+}
+
+int 
+__select(int numfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
+	struct timeval *timeout)
+{
+	int ret;
+
+#ifndef __rtems__
+	_thread_enter_cancellation_point();
+#endif
+	ret = _select(numfds, readfds, writefds, exceptfds, timeout);
+#ifndef __rtems__
+	_thread_leave_cancellation_point();
+#endif
+
+	return ret;
+}
diff --git a/freebsd-userspace/rtems/include/pthread_private.h b/freebsd-userspace/rtems/include/pthread_private.h
new file mode 100644
index 0000000..ce5ba6d
--- /dev/null
+++ b/freebsd-userspace/rtems/include/pthread_private.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 1995-1998 John Birrell <jb at cimlogic.com.au>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of any co-contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Private thread definitions for the uthread kernel.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _PTHREAD_PRIVATE_H
+#define _PTHREAD_PRIVATE_H
+
+/*
+ * Evaluate the storage class specifier.
+ */
+#ifdef GLOBAL_PTHREAD_PRIVATE
+#define SCLASS
+#else
+#define SCLASS extern
+#endif
+
+/*
+ * Include files.
+ */
+#include <setjmp.h>
+#include <signal.h>
+#include <stdio.h>
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/queue.h>
+#include <freebsd/sys/types.h>
+#include <freebsd/sys/time.h>
+#include <freebsd/sys/cdefs.h>
+#include <sched.h>
+#include <spinlock.h>
+#include <pthread_np.h>
+#include <freebsd/sys/malloc.h>
+
+/*
+ * Define a thread-safe macro to get the current time of day
+ * which is updated at regular intervals by the scheduling signal
+ * handler.
+ */
+#define	GET_CURRENT_TOD(tv)				\
+	do {						\
+		tv.tv_sec = _sched_tod.tv_sec;		\
+		tv.tv_usec = _sched_tod.tv_usec;	\
+	} while (tv.tv_sec != _sched_tod.tv_sec)
+
+
+/* 
+ * rtems uses the following structure to allow the method
+ * _thread_kern_sched_state to be called.  This function
+ * is stubbed out to cause a processor yeild.
+ */
+
+/*
+ * Thread states.
+ */
+enum pthread_state {
+#if 0
+	PS_RUNNING,
+	PS_SIGTHREAD,
+	PS_MUTEX_WAIT,
+	PS_COND_WAIT,
+	PS_FDLR_WAIT,
+	PS_FDLW_WAIT,
+	PS_FDR_WAIT,
+	PS_FDW_WAIT,
+	PS_FILE_WAIT,
+	PS_POLL_WAIT,
+#endif
+	PS_SELECT_WAIT,
+#if 0
+	PS_SLEEP_WAIT,
+	PS_WAIT_WAIT,
+	PS_SIGSUSPEND,
+	PS_SIGWAIT,
+	PS_SPINBLOCK,
+	PS_JOIN,
+	PS_SUSPENDED,
+	PS_DEAD,
+	PS_DEADLOCK,
+#endif
+	PS_STATE_MAX
+};
+
+struct pthread_poll_data {
+	int	nfds;
+	struct pollfd *fds;
+};
+
+struct pthread_wait_data {
+	struct pthread_poll_data *poll_data;
+};
+
+/*
+ * Thread structure.
+ */
+struct pthread {
+
+	/*
+	 * Time to wake up thread. This is used for sleeping threads and
+	 * for any operation which may time out (such as select).
+	 */
+	struct timespec	wakeup_time;
+
+	/* TRUE if operation has timed out. */
+	int	timeout;
+
+	/* Wait data. */
+	struct pthread_wait_data data;
+
+	/*
+	 * Allocated for converting select into poll.
+	 */
+	struct pthread_poll_data poll_data;
+
+	/*
+	 * Set to TRUE if a blocking operation was
+	 * interrupted by a signal:
+	 */
+	int		interrupted;
+};
+
+/* Time of day at last scheduling timer signal: */
+SCLASS struct timeval volatile	_sched_tod
+#ifdef GLOBAL_PTHREAD_PRIVATE
+= { 0, 0 };
+#else
+;
+#endif
+struct pthread *_get_curthread(void);
+
+#endif  /* !_PTHREAD_PRIVATE_H */
diff --git a/freebsd-userspace/rtems/rtems-get_curthread.c b/freebsd-userspace/rtems/rtems-get_curthread.c
new file mode 100644
index 0000000..263cac6
--- /dev/null
+++ b/freebsd-userspace/rtems/rtems-get_curthread.c
@@ -0,0 +1,72 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+#include <freebsd/sys/malloc.h>
+
+#include <pthread.h>
+#include "pthread_private.h"
+
+
+static struct pthread *rtems_bsd_curpthread = NULL;
+
+
+static void rtems_bsd_pthread_descriptor_dtor(void *td)
+{
+	// XXX are there other pieces to clean up?
+	free(td, M_TEMP);
+}
+
+static struct pthread *
+rtems_bsd_pthread_init( rtems_id id )
+{
+	rtems_status_code sc = RTEMS_SUCCESSFUL;
+	unsigned index = 0;
+	struct pthread *td;
+
+	td = _bsd_malloc( sizeof(struct pthread), M_TEMP, M_WAITOK | M_ZERO);
+	if (td == NULL)
+		return NULL;
+
+	td->timeout = 0;
+	td->data.poll_data = NULL;
+	td->poll_data.nfds = 0;
+	td->poll_data.fds = NULL;
+	td->interrupted = 0;
+        rtems_bsd_curpthread = td;
+
+	// Now add the task descriptor as a per-task variable
+	sc = rtems_task_variable_add(
+		id,
+		&rtems_bsd_curpthread,
+		rtems_bsd_pthread_descriptor_dtor
+	);
+	if (sc != RTEMS_SUCCESSFUL) {
+		free(td, M_TEMP);
+		return NULL;
+	}
+
+  	return td;
+}
+
+/*
+ */
+
+struct pthread *
+_get_curthread(void)
+{
+  struct pthread *td;
+
+  /*
+   * If we already have a struct thread associated with this thread,
+   * obtain it. Otherwise, allocate and initialize one.
+   */
+  td = rtems_bsd_curpthread;
+  if ( td == NULL ) {
+    td = rtems_bsd_pthread_init( rtems_task_self() );
+    if ( td == NULL ){
+      panic("_get_curthread: Unable to create pthread\n");
+    }
+  }
+
+  return td;
+}
+
diff --git a/freebsd-userspace/rtems/rtems-syspoll.c b/freebsd-userspace/rtems/rtems-syspoll.c
new file mode 100644
index 0000000..d53d14e
--- /dev/null
+++ b/freebsd-userspace/rtems/rtems-syspoll.c
@@ -0,0 +1,30 @@
+#include <unistd.h>
+#include <errno.h>
+#include <sys/time.h>
+#include <rtems.h>
+#include <rtems/error.h>
+#include <freebsd/sys/poll.h>
+
+struct poll_args {
+        struct pollfd *fds;
+        u_int   nfds;
+        int     timeout;
+};
+
+int kern_poll( struct thread *td, struct poll_args *uap );
+
+
+int 
+__sys_poll(struct pollfd *fds, unsigned nfds, int timeout)
+{
+  struct poll_args uap;
+  struct thread *td = rtems_get_curthread();
+
+  uap.fds = fds;
+  uap.nfds = nfds;
+  uap.timeout = timeout;
+
+  kern_poll(td, &uap);
+
+  return -1;
+}
diff --git a/freebsd-userspace/rtems/rtems-uthread_init.c b/freebsd-userspace/rtems/rtems-uthread_init.c
new file mode 100644
index 0000000..3226ed6
--- /dev/null
+++ b/freebsd-userspace/rtems/rtems-uthread_init.c
@@ -0,0 +1,8 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/* Allocate space for global thread variables here: */
+#define GLOBAL_PTHREAD_PRIVATE
+
+#include <freebsd/sys/types.h>
+#include <pthread.h>
+#include "pthread_private.h"
diff --git a/freebsd/kern/kern_condvar.c b/freebsd/kern/kern_condvar.c
new file mode 100644
index 0000000..34ec29c
--- /dev/null
+++ b/freebsd/kern/kern_condvar.c
@@ -0,0 +1,455 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2000 Jake Burkholder <jake at freebsd.org>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_ktrace.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/ktr.h>
+#include <freebsd/sys/condvar.h>
+#include <freebsd/sys/sched.h>
+#include <freebsd/sys/signalvar.h>
+#include <freebsd/sys/sleepqueue.h>
+#include <freebsd/sys/resourcevar.h>
+#ifdef KTRACE
+#include <freebsd/sys/uio.h>
+#include <freebsd/sys/ktrace.h>
+#endif
+
+/*
+ * Common sanity checks for cv_wait* functions.
+ */
+#define	CV_ASSERT(cvp, lock, td) do {					\
+	KASSERT((td) != NULL, ("%s: curthread NULL", __func__));	\
+	KASSERT(TD_IS_RUNNING(td), ("%s: not TDS_RUNNING", __func__));	\
+	KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__));		\
+	KASSERT((lock) != NULL, ("%s: lock NULL", __func__));		\
+} while (0)
+
+/*
+ * Initialize a condition variable.  Must be called before use.
+ */
+void
+cv_init(struct cv *cvp, const char *desc)
+{
+
+	cvp->cv_description = desc;
+	cvp->cv_waiters = 0;
+}
+
+/*
+ * Destroy a condition variable.  The condition variable must be re-initialized
+ * in order to be re-used.
+ */
+void
+cv_destroy(struct cv *cvp)
+{
+#ifdef INVARIANTS
+	struct sleepqueue *sq;
+
+	sleepq_lock(cvp);
+	sq = sleepq_lookup(cvp);
+	sleepq_release(cvp);
+	KASSERT(sq == NULL, ("%s: associated sleep queue non-empty", __func__));
+#endif
+}
+
+/*
+ * Wait on a condition variable.  The current thread is placed on the condition
+ * variable's wait queue and suspended.  A cv_signal or cv_broadcast on the same
+ * condition variable will resume the thread.  The mutex is released before
+ * sleeping and will be held on return.  It is recommended that the mutex be
+ * held when cv_signal or cv_broadcast are called.
+ */
+void
+_cv_wait(struct cv *cvp, struct lock_object *lock)
+{
+	WITNESS_SAVE_DECL(lock_witness);
+	struct lock_class *class;
+	struct thread *td;
+	int lock_state;
+
+	td = curthread;
+	lock_state = 0;
+#ifdef KTRACE
+	if (KTRPOINT(td, KTR_CSW))
+		ktrcsw(1, 0);
+#endif
+	CV_ASSERT(cvp, lock, td);
+	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
+	    "Waiting on \"%s\"", cvp->cv_description);
+	class = LOCK_CLASS(lock);
+
+	if (cold || panicstr) {
+		/*
+		 * During autoconfiguration, just give interrupts
+		 * a chance, then just return.  Don't run any other
+		 * thread or panic below, in case this is the idle
+		 * process and already asleep.
+		 */
+		return;
+	}
+
+	sleepq_lock(cvp);
+
+	cvp->cv_waiters++;
+	if (lock == &Giant.lock_object)
+		mtx_assert(&Giant, MA_OWNED);
+	DROP_GIANT();
+
+	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0);
+	if (lock != &Giant.lock_object) {
+		if (class->lc_flags & LC_SLEEPABLE)
+			sleepq_release(cvp);
+		WITNESS_SAVE(lock, lock_witness);
+		lock_state = class->lc_unlock(lock);
+		if (class->lc_flags & LC_SLEEPABLE)
+			sleepq_lock(cvp);
+	}
+	sleepq_wait(cvp, 0);
+
+#ifdef KTRACE
+	if (KTRPOINT(td, KTR_CSW))
+		ktrcsw(0, 0);
+#endif
+	PICKUP_GIANT();
+	if (lock != &Giant.lock_object) {
+		class->lc_lock(lock, lock_state);
+		WITNESS_RESTORE(lock, lock_witness);
+	}
+}
+
+/*
+ * Wait on a condition variable.  This function differs from cv_wait by
+ * not aquiring the mutex after condition variable was signaled.
+ */
+void
+_cv_wait_unlock(struct cv *cvp, struct lock_object *lock)
+{
+	struct lock_class *class;
+	struct thread *td;
+
+	td = curthread;
+#ifdef KTRACE
+	if (KTRPOINT(td, KTR_CSW))
+		ktrcsw(1, 0);
+#endif
+	CV_ASSERT(cvp, lock, td);
+	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
+	    "Waiting on \"%s\"", cvp->cv_description);
+	KASSERT(lock != &Giant.lock_object,
+	    ("cv_wait_unlock cannot be used with Giant"));
+	class = LOCK_CLASS(lock);
+
+	if (cold || panicstr) {
+		/*
+		 * During autoconfiguration, just give interrupts
+		 * a chance, then just return.  Don't run any other
+		 * thread or panic below, in case this is the idle
+		 * process and already asleep.
+		 */
+		class->lc_unlock(lock);
+		return;
+	}
+
+	sleepq_lock(cvp);
+
+	cvp->cv_waiters++;
+	DROP_GIANT();
+
+	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0);
+	if (class->lc_flags & LC_SLEEPABLE)
+		sleepq_release(cvp);
+	class->lc_unlock(lock);
+	if (class->lc_flags & LC_SLEEPABLE)
+		sleepq_lock(cvp);
+	sleepq_wait(cvp, 0);
+
+#ifdef KTRACE
+	if (KTRPOINT(td, KTR_CSW))
+		ktrcsw(0, 0);
+#endif
+	PICKUP_GIANT();
+}
+
+/*
+ * Wait on a condition variable, allowing interruption by signals.  Return 0 if
+ * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if
+ * a signal was caught.  If ERESTART is returned the system call should be
+ * restarted if possible.
+ */
+int
+_cv_wait_sig(struct cv *cvp, struct lock_object *lock)
+{
+	WITNESS_SAVE_DECL(lock_witness);
+	struct lock_class *class;
+	struct thread *td;
+	int lock_state, rval;
+
+	td = curthread;
+	lock_state = 0;
+#ifdef KTRACE
+	if (KTRPOINT(td, KTR_CSW))
+		ktrcsw(1, 0);
+#endif
+	CV_ASSERT(cvp, lock, td);
+	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
+	    "Waiting on \"%s\"", cvp->cv_description);
+	class = LOCK_CLASS(lock);
+
+	if (cold || panicstr) {
+		/*
+		 * After a panic, or during autoconfiguration, just give
+		 * interrupts a chance, then just return; don't run any other
+		 * procs or panic below, in case this is the idle process and
+		 * already asleep.
+		 */
+		return (0);
+	}
+
+	sleepq_lock(cvp);
+
+	cvp->cv_waiters++;
+	if (lock == &Giant.lock_object)
+		mtx_assert(&Giant, MA_OWNED);
+	DROP_GIANT();
+
+	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR |
+	    SLEEPQ_INTERRUPTIBLE, 0);
+	if (lock != &Giant.lock_object) {
+		if (class->lc_flags & LC_SLEEPABLE)
+			sleepq_release(cvp);
+		WITNESS_SAVE(lock, lock_witness);
+		lock_state = class->lc_unlock(lock);
+		if (class->lc_flags & LC_SLEEPABLE)
+			sleepq_lock(cvp);
+	}
+	rval = sleepq_wait_sig(cvp, 0);
+
+#ifdef KTRACE
+	if (KTRPOINT(td, KTR_CSW))
+		ktrcsw(0, 0);
+#endif
+	PICKUP_GIANT();
+	if (lock != &Giant.lock_object) {
+		class->lc_lock(lock, lock_state);
+		WITNESS_RESTORE(lock, lock_witness);
+	}
+
+	return (rval);
+}
+
+/*
+ * Wait on a condition variable for at most timo/hz seconds.  Returns 0 if the
+ * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout
+ * expires.
+ */
+int
+_cv_timedwait(struct cv *cvp, struct lock_object *lock, int timo)
+{
+	WITNESS_SAVE_DECL(lock_witness);
+	struct lock_class *class;
+	struct thread *td;
+	int lock_state, rval;
+
+	td = curthread;
+	lock_state = 0;
+#ifdef KTRACE
+	if (KTRPOINT(td, KTR_CSW))
+		ktrcsw(1, 0);
+#endif
+	CV_ASSERT(cvp, lock, td);
+	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
+	    "Waiting on \"%s\"", cvp->cv_description);
+	class = LOCK_CLASS(lock);
+
+	if (cold || panicstr) {
+		/*
+		 * After a panic, or during autoconfiguration, just give
+		 * interrupts a chance, then just return; don't run any other
+		 * thread or panic below, in case this is the idle process and
+		 * already asleep.
+		 */
+		return 0;
+	}
+
+	sleepq_lock(cvp);
+
+	cvp->cv_waiters++;
+	if (lock == &Giant.lock_object)
+		mtx_assert(&Giant, MA_OWNED);
+	DROP_GIANT();
+
+	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0);
+	sleepq_set_timeout(cvp, timo);
+	if (lock != &Giant.lock_object) {
+		if (class->lc_flags & LC_SLEEPABLE)
+			sleepq_release(cvp);
+		WITNESS_SAVE(lock, lock_witness);
+		lock_state = class->lc_unlock(lock);
+		if (class->lc_flags & LC_SLEEPABLE)
+			sleepq_lock(cvp);
+	}
+	rval = sleepq_timedwait(cvp, 0);
+
+#ifdef KTRACE
+	if (KTRPOINT(td, KTR_CSW))
+		ktrcsw(0, 0);
+#endif
+	PICKUP_GIANT();
+	if (lock != &Giant.lock_object) {
+		class->lc_lock(lock, lock_state);
+		WITNESS_RESTORE(lock, lock_witness);
+	}
+
+	return (rval);
+}
+
+/*
+ * Wait on a condition variable for at most timo/hz seconds, allowing
+ * interruption by signals.  Returns 0 if the thread was resumed by cv_signal
+ * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if
+ * a signal was caught.
+ */
+int
+_cv_timedwait_sig(struct cv *cvp, struct lock_object *lock, int timo)
+{
+	WITNESS_SAVE_DECL(lock_witness);
+	struct lock_class *class;
+	struct thread *td;
+	int lock_state, rval;
+
+	td = curthread;
+	lock_state = 0;
+#ifdef KTRACE
+	if (KTRPOINT(td, KTR_CSW))
+		ktrcsw(1, 0);
+#endif
+	CV_ASSERT(cvp, lock, td);
+	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
+	    "Waiting on \"%s\"", cvp->cv_description);
+	class = LOCK_CLASS(lock);
+
+	if (cold || panicstr) {
+		/*
+		 * After a panic, or during autoconfiguration, just give
+		 * interrupts a chance, then just return; don't run any other
+		 * thread or panic below, in case this is the idle process and
+		 * already asleep.
+		 */
+		return 0;
+	}
+
+	sleepq_lock(cvp);
+
+	cvp->cv_waiters++;
+	if (lock == &Giant.lock_object)
+		mtx_assert(&Giant, MA_OWNED);
+	DROP_GIANT();
+
+	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR |
+	    SLEEPQ_INTERRUPTIBLE, 0);
+	sleepq_set_timeout(cvp, timo);
+	if (lock != &Giant.lock_object) {
+		if (class->lc_flags & LC_SLEEPABLE)
+			sleepq_release(cvp);
+		WITNESS_SAVE(lock, lock_witness);
+		lock_state = class->lc_unlock(lock);
+		if (class->lc_flags & LC_SLEEPABLE)
+			sleepq_lock(cvp);
+	}
+	rval = sleepq_timedwait_sig(cvp, 0);
+
+#ifdef KTRACE
+	if (KTRPOINT(td, KTR_CSW))
+		ktrcsw(0, 0);
+#endif
+	PICKUP_GIANT();
+	if (lock != &Giant.lock_object) {
+		class->lc_lock(lock, lock_state);
+		WITNESS_RESTORE(lock, lock_witness);
+	}
+
+	return (rval);
+}
+
+/*
+ * Signal a condition variable, wakes up one waiting thread.  Will also wakeup
+ * the swapper if the process is not in memory, so that it can bring the
+ * sleeping process in.  Note that this may also result in additional threads
+ * being made runnable.  Should be called with the same mutex as was passed to
+ * cv_wait held.
+ */
+void
+cv_signal(struct cv *cvp)
+{
+	int wakeup_swapper;
+
+	wakeup_swapper = 0;
+	sleepq_lock(cvp);
+	if (cvp->cv_waiters > 0) {
+		cvp->cv_waiters--;
+		wakeup_swapper = sleepq_signal(cvp, SLEEPQ_CONDVAR, 0, 0);
+	}
+	sleepq_release(cvp);
+	if (wakeup_swapper)
+		kick_proc0();
+}
+
+/*
+ * Broadcast a signal to a condition variable.  Wakes up all waiting threads.
+ * Should be called with the same mutex as was passed to cv_wait held.
+ */
+void
+cv_broadcastpri(struct cv *cvp, int pri)
+{
+	int wakeup_swapper;
+
+	/*
+	 * XXX sleepq_broadcast pri argument changed from -1 meaning
+	 * no pri to 0 meaning no pri.
+	 */
+	wakeup_swapper = 0;
+	if (pri == -1)
+		pri = 0;
+	sleepq_lock(cvp);
+	if (cvp->cv_waiters > 0) {
+		cvp->cv_waiters = 0;
+		wakeup_swapper = sleepq_broadcast(cvp, SLEEPQ_CONDVAR, pri, 0);
+	}
+	sleepq_release(cvp);
+	if (wakeup_swapper)
+		kick_proc0();
+}
diff --git a/freebsd/kern/kern_descrip.c b/freebsd/kern/kern_descrip.c
new file mode 100644
index 0000000..0ba063f
--- /dev/null
+++ b/freebsd/kern/kern_descrip.c
@@ -0,0 +1,6912 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_compat.h>
+#include <freebsd/local/opt_ddb.h>
+#include <freebsd/local/opt_ktrace.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+
+#include <freebsd/sys/conf.h>
+#include <freebsd/sys/domain.h>
+#include <freebsd/sys/fcntl.h>
+#include <freebsd/sys/file.h>
+#include <freebsd/sys/filedesc.h>
+#include <freebsd/sys/filio.h>
+#include <freebsd/sys/jail.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/limits.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mount.h>
+#include <freebsd/sys/mqueue.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/sys/namei.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/resourcevar.h>
+#include <freebsd/sys/signalvar.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/stat.h>
+#include <freebsd/sys/sx.h>
+#include <freebsd/sys/syscallsubr.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/sysproto.h>
+#include <freebsd/sys/tty.h>
+#include <freebsd/sys/unistd.h>
+#include <freebsd/sys/user.h>
+#include <freebsd/sys/vnode.h>
+#ifdef KTRACE
+#include <freebsd/sys/ktrace.h>
+#endif
+
+#include <freebsd/security/audit/audit.h>
+
+#include <freebsd/vm/uma.h>
+
+#include <freebsd/ddb/ddb.h>
+
+static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table");
+static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader",
+		     "file desc to leader structures");
+static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
+
+static uma_zone_t file_zone;
+
+
+/* Flags for do_dup() */
+#define DUP_FIXED	0x1	/* Force fixed allocation */
+#define DUP_FCNTL	0x2	/* fcntl()-style errors */
+
+static int do_dup(struct thread *td, int flags, int old, int new,
+    register_t *retval);
+static int	fd_first_free(struct filedesc *, int, int);
+static int	fd_last_used(struct filedesc *, int, int);
+static void	fdgrowtable(struct filedesc *, int);
+static void	fdunused(struct filedesc *fdp, int fd);
+static void	fdused(struct filedesc *fdp, int fd);
+
+/*
+ * A process is initially started out with NDFILE descriptors stored within
+ * this structure, selected to be enough for typical applications based on
+ * the historical limit of 20 open files (and the usage of descriptors by
+ * shells).  If these descriptors are exhausted, a larger descriptor table
+ * may be allocated, up to a process' resource limit; the internal arrays
+ * are then unused.
+ */
+#define NDFILE		20
+#define NDSLOTSIZE	sizeof(NDSLOTTYPE)
+#define	NDENTRIES	(NDSLOTSIZE * __CHAR_BIT)
+#define NDSLOT(x)	((x) / NDENTRIES)
+#define NDBIT(x)	((NDSLOTTYPE)1 << ((x) % NDENTRIES))
+#define	NDSLOTS(x)	(((x) + NDENTRIES - 1) / NDENTRIES)
+
+/*
+ * Storage required per open file descriptor.
+ */
+#define OFILESIZE (sizeof(struct file *) + sizeof(char))
+
+/*
+ * Storage to hold unused ofiles that need to be reclaimed.
+ */
+struct freetable {
+	struct file	**ft_table;
+	SLIST_ENTRY(freetable) ft_next;
+};
+
+/*
+ * Basic allocation of descriptors:
+ * one of the above, plus arrays for NDFILE descriptors.
+ */
+struct filedesc0 {
+	struct	filedesc fd_fd;
+	/*
+	 * ofiles which need to be reclaimed on free.
+	 */
+	SLIST_HEAD(,freetable) fd_free;
+	/*
+	 * These arrays are used when the number of open files is
+	 * <= NDFILE, and are then pointed to by the pointers above.
+	 */
+	struct	file *fd_dfiles[NDFILE];
+	char	fd_dfileflags[NDFILE];
+	NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)];
+};
+
+/*
+ * Descriptor management.
+ */
+volatile int openfiles;			/* actual number of open files */
+struct mtx sigio_lock;		/* mtx to protect pointers to sigio */
+#ifndef __rtems__
+void	(*mq_fdclose)(struct thread *td, int fd, struct file *fp);
+
+/* A mutex to protect the association between a proc and filedesc. */
+static struct mtx	fdesc_mtx;
+
+/*
+ * Find the first zero bit in the given bitmap, starting at low and not
+ * exceeding size - 1.
+ */
+static int
+fd_first_free(struct filedesc *fdp, int low, int size)
+{
+	NDSLOTTYPE *map = fdp->fd_map;
+	NDSLOTTYPE mask;
+	int off, maxoff;
+
+	if (low >= size)
+		return (low);
+
+	off = NDSLOT(low);
+	if (low % NDENTRIES) {
+		mask = ~(~(NDSLOTTYPE)0 >> (NDENTRIES - (low % NDENTRIES)));
+		if ((mask &= ~map[off]) != 0UL)
+			return (off * NDENTRIES + ffsl(mask) - 1);
+		++off;
+	}
+	for (maxoff = NDSLOTS(size); off < maxoff; ++off)
+		if (map[off] != ~0UL)
+			return (off * NDENTRIES + ffsl(~map[off]) - 1);
+	return (size);
+}
+
+/*
+ * Find the highest non-zero bit in the given bitmap, starting at low and
+ * not exceeding size - 1.
+ */
+static int
+fd_last_used(struct filedesc *fdp, int low, int size)
+{
+	NDSLOTTYPE *map = fdp->fd_map;
+	NDSLOTTYPE mask;
+	int off, minoff;
+
+	if (low >= size)
+		return (-1);
+
+	off = NDSLOT(size);
+	if (size % NDENTRIES) {
+		mask = ~(~(NDSLOTTYPE)0 << (size % NDENTRIES));
+		if ((mask &= map[off]) != 0)
+			return (off * NDENTRIES + flsl(mask) - 1);
+		--off;
+	}
+	for (minoff = NDSLOT(low); off >= minoff; --off)
+		if (map[off] != 0)
+			return (off * NDENTRIES + flsl(map[off]) - 1);
+	return (low - 1);
+}
+
+static int
+fdisused(struct filedesc *fdp, int fd)
+{
+        KASSERT(fd >= 0 && fd < fdp->fd_nfiles,
+            ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles));
+	return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0);
+}
+
+/*
+ * Mark a file descriptor as used.
+ */
+static void
+fdused(struct filedesc *fdp, int fd)
+{
+
+	FILEDESC_XLOCK_ASSERT(fdp);
+	KASSERT(!fdisused(fdp, fd),
+	    ("fd already used"));
+
+	fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd);
+	if (fd > fdp->fd_lastfile)
+		fdp->fd_lastfile = fd;
+	if (fd == fdp->fd_freefile)
+		fdp->fd_freefile = fd_first_free(fdp, fd, fdp->fd_nfiles);
+}
+
+/*
+ * Mark a file descriptor as unused.
+ */
+static void
+fdunused(struct filedesc *fdp, int fd)
+{
+
+	FILEDESC_XLOCK_ASSERT(fdp);
+	KASSERT(fdisused(fdp, fd),
+	    ("fd is already unused"));
+	KASSERT(fdp->fd_ofiles[fd] == NULL,
+	    ("fd is still in use"));
+
+	fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd);
+	if (fd < fdp->fd_freefile)
+		fdp->fd_freefile = fd;
+	if (fd == fdp->fd_lastfile)
+		fdp->fd_lastfile = fd_last_used(fdp, 0, fd);
+}
+
+/*
+ * System calls on descriptors.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct getdtablesize_args {
+	int	dummy;
+};
+#endif
+/* ARGSUSED */
+int
+getdtablesize(struct thread *td, struct getdtablesize_args *uap)
+{
+	struct proc *p = td->td_proc;
+
+	PROC_LOCK(p);
+	td->td_retval[0] =
+	    min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
+	PROC_UNLOCK(p);
+	return (0);
+}
+
+/*
+ * Duplicate a file descriptor to a particular value.
+ *
+ * Note: keep in mind that a potential race condition exists when closing
+ * descriptors from a shared descriptor table (via rfork).
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct dup2_args {
+	u_int	from;
+	u_int	to;
+};
+#endif
+/* ARGSUSED */
+int
+dup2(struct thread *td, struct dup2_args *uap)
+{
+
+	return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to,
+		    td->td_retval));
+}
+
+/*
+ * Duplicate a file descriptor.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct dup_args {
+	u_int	fd;
+};
+#endif
+/* ARGSUSED */
+int
+dup(struct thread *td, struct dup_args *uap)
+{
+
+	return (do_dup(td, 0, (int)uap->fd, 0, td->td_retval));
+}
+
+/*
+ * The file control system call.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct fcntl_args {
+	int	fd;
+	int	cmd;
+	long	arg;
+};
+#endif
+/* ARGSUSED */
+int
+fcntl(struct thread *td, struct fcntl_args *uap)
+{
+	struct flock fl;
+	struct oflock ofl;
+	intptr_t arg;
+	int error;
+	int cmd;
+
+	error = 0;
+	cmd = uap->cmd;
+	switch (uap->cmd) {
+	case F_OGETLK:
+	case F_OSETLK:
+	case F_OSETLKW:
+		/*
+		 * Convert old flock structure to new.
+		 */
+		error = copyin((void *)(intptr_t)uap->arg, &ofl, sizeof(ofl));
+		fl.l_start = ofl.l_start;
+		fl.l_len = ofl.l_len;
+		fl.l_pid = ofl.l_pid;
+		fl.l_type = ofl.l_type;
+		fl.l_whence = ofl.l_whence;
+		fl.l_sysid = 0;
+
+		switch (uap->cmd) {
+		case F_OGETLK:
+		    cmd = F_GETLK;
+		    break;
+		case F_OSETLK:
+		    cmd = F_SETLK;
+		    break;
+		case F_OSETLKW:
+		    cmd = F_SETLKW;
+		    break;
+		}
+		arg = (intptr_t)&fl;
+		break;
+        case F_GETLK:
+        case F_SETLK:
+        case F_SETLKW:
+	case F_SETLK_REMOTE:
+                error = copyin((void *)(intptr_t)uap->arg, &fl, sizeof(fl));
+                arg = (intptr_t)&fl;
+                break;
+	default:
+		arg = uap->arg;
+		break;
+	}
+	if (error)
+		return (error);
+	error = kern_fcntl(td, uap->fd, cmd, arg);
+	if (error)
+		return (error);
+	if (uap->cmd == F_OGETLK) {
+		ofl.l_start = fl.l_start;
+		ofl.l_len = fl.l_len;
+		ofl.l_pid = fl.l_pid;
+		ofl.l_type = fl.l_type;
+		ofl.l_whence = fl.l_whence;
+		error = copyout(&ofl, (void *)(intptr_t)uap->arg, sizeof(ofl));
+	} else if (uap->cmd == F_GETLK) {
+		error = copyout(&fl, (void *)(intptr_t)uap->arg, sizeof(fl));
+	}
+	return (error);
+}
+
+static inline struct file *
+fdtofp(int fd, struct filedesc *fdp)
+{
+	struct file *fp;
+
+	FILEDESC_LOCK_ASSERT(fdp);
+	if ((unsigned)fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[fd]) == NULL)
+		return (NULL);
+	return (fp);
+}
+
+int
+kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
+{
+	struct filedesc *fdp;
+	struct flock *flp;
+	struct file *fp;
+	struct proc *p;
+	char *pop;
+	struct vnode *vp;
+	int error, flg, tmp;
+	int vfslocked;
+	u_int old, new;
+	uint64_t bsize;
+
+	vfslocked = 0;
+	error = 0;
+	flg = F_POSIX;
+	p = td->td_proc;
+	fdp = p->p_fd;
+
+	switch (cmd) {
+	case F_DUPFD:
+		tmp = arg;
+		error = do_dup(td, DUP_FCNTL, fd, tmp, td->td_retval);
+		break;
+
+	case F_DUP2FD:
+		tmp = arg;
+		error = do_dup(td, DUP_FIXED, fd, tmp, td->td_retval);
+		break;
+
+	case F_GETFD:
+		FILEDESC_SLOCK(fdp);
+		if ((fp = fdtofp(fd, fdp)) == NULL) {
+			FILEDESC_SUNLOCK(fdp);
+			error = EBADF;
+			break;
+		}
+		pop = &fdp->fd_ofileflags[fd];
+		td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0;
+		FILEDESC_SUNLOCK(fdp);
+		break;
+
+	case F_SETFD:
+		FILEDESC_XLOCK(fdp);
+		if ((fp = fdtofp(fd, fdp)) == NULL) {
+			FILEDESC_XUNLOCK(fdp);
+			error = EBADF;
+			break;
+		}
+		pop = &fdp->fd_ofileflags[fd];
+		*pop = (*pop &~ UF_EXCLOSE) |
+		    (arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
+		FILEDESC_XUNLOCK(fdp);
+		break;
+
+	case F_GETFL:
+		FILEDESC_SLOCK(fdp);
+		if ((fp = fdtofp(fd, fdp)) == NULL) {
+			FILEDESC_SUNLOCK(fdp);
+			error = EBADF;
+			break;
+		}
+		td->td_retval[0] = OFLAGS(fp->f_flag);
+		FILEDESC_SUNLOCK(fdp);
+		break;
+
+	case F_SETFL:
+		FILEDESC_SLOCK(fdp);
+		if ((fp = fdtofp(fd, fdp)) == NULL) {
+			FILEDESC_SUNLOCK(fdp);
+			error = EBADF;
+			break;
+		}
+		fhold(fp);
+		FILEDESC_SUNLOCK(fdp);
+		do {
+			tmp = flg = fp->f_flag;
+			tmp &= ~FCNTLFLAGS;
+			tmp |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS;
+		} while(atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0);
+		tmp = fp->f_flag & FNONBLOCK;
+		error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
+		if (error) {
+			fdrop(fp, td);
+			break;
+		}
+		tmp = fp->f_flag & FASYNC;
+		error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td);
+		if (error == 0) {
+			fdrop(fp, td);
+			break;
+		}
+		atomic_clear_int(&fp->f_flag, FNONBLOCK);
+		tmp = 0;
+		(void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
+		fdrop(fp, td);
+		break;
+
+	case F_GETOWN:
+		FILEDESC_SLOCK(fdp);
+		if ((fp = fdtofp(fd, fdp)) == NULL) {
+			FILEDESC_SUNLOCK(fdp);
+			error = EBADF;
+			break;
+		}
+		fhold(fp);
+		FILEDESC_SUNLOCK(fdp);
+		error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td);
+		if (error == 0)
+			td->td_retval[0] = tmp;
+		fdrop(fp, td);
+		break;
+
+	case F_SETOWN:
+		FILEDESC_SLOCK(fdp);
+		if ((fp = fdtofp(fd, fdp)) == NULL) {
+			FILEDESC_SUNLOCK(fdp);
+			error = EBADF;
+			break;
+		}
+		fhold(fp);
+		FILEDESC_SUNLOCK(fdp);
+		tmp = arg;
+		error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td);
+		fdrop(fp, td);
+		break;
+
+	case F_SETLK_REMOTE:
+		error = priv_check(td, PRIV_NFS_LOCKD);
+		if (error)
+			return (error);
+		flg = F_REMOTE;
+		goto do_setlk;
+
+	case F_SETLKW:
+		flg |= F_WAIT;
+		/* FALLTHROUGH F_SETLK */
+
+	case F_SETLK:
+	do_setlk:
+		FILEDESC_SLOCK(fdp);
+		if ((fp = fdtofp(fd, fdp)) == NULL) {
+			FILEDESC_SUNLOCK(fdp);
+			error = EBADF;
+			break;
+		}
+		if (fp->f_type != DTYPE_VNODE) {
+			FILEDESC_SUNLOCK(fdp);
+			error = EBADF;
+			break;
+		}
+		flp = (struct flock *)arg;
+		if (flp->l_whence == SEEK_CUR) {
+			if (fp->f_offset < 0 ||
+			    (flp->l_start > 0 &&
+			     fp->f_offset > OFF_MAX - flp->l_start)) {
+				FILEDESC_SUNLOCK(fdp);
+				error = EOVERFLOW;
+				break;
+			}
+			flp->l_start += fp->f_offset;
+		}
+
+		/*
+		 * VOP_ADVLOCK() may block.
+		 */
+		fhold(fp);
+		FILEDESC_SUNLOCK(fdp);
+		vp = fp->f_vnode;
+		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+		switch (flp->l_type) {
+		case F_RDLCK:
+			if ((fp->f_flag & FREAD) == 0) {
+				error = EBADF;
+				break;
+			}
+			PROC_LOCK(p->p_leader);
+			p->p_leader->p_flag |= P_ADVLOCK;
+			PROC_UNLOCK(p->p_leader);
+			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
+			    flp, flg);
+			break;
+		case F_WRLCK:
+			if ((fp->f_flag & FWRITE) == 0) {
+				error = EBADF;
+				break;
+			}
+			PROC_LOCK(p->p_leader);
+			p->p_leader->p_flag |= P_ADVLOCK;
+			PROC_UNLOCK(p->p_leader);
+			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
+			    flp, flg);
+			break;
+		case F_UNLCK:
+			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
+			    flp, flg);
+			break;
+		case F_UNLCKSYS:
+			/*
+			 * Temporary api for testing remote lock
+			 * infrastructure.
+			 */
+			if (flg != F_REMOTE) {
+				error = EINVAL;
+				break;
+			}
+			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
+			    F_UNLCKSYS, flp, flg);
+			break;
+		default:
+			error = EINVAL;
+			break;
+		}
+		VFS_UNLOCK_GIANT(vfslocked);
+		vfslocked = 0;
+		/* Check for race with close */
+		FILEDESC_SLOCK(fdp);
+		if ((unsigned) fd >= fdp->fd_nfiles ||
+		    fp != fdp->fd_ofiles[fd]) {
+			FILEDESC_SUNLOCK(fdp);
+			flp->l_whence = SEEK_SET;
+			flp->l_start = 0;
+			flp->l_len = 0;
+			flp->l_type = F_UNLCK;
+			vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+			(void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
+					   F_UNLCK, flp, F_POSIX);
+			VFS_UNLOCK_GIANT(vfslocked);
+			vfslocked = 0;
+		} else
+			FILEDESC_SUNLOCK(fdp);
+		fdrop(fp, td);
+		break;
+
+	case F_GETLK:
+		FILEDESC_SLOCK(fdp);
+		if ((fp = fdtofp(fd, fdp)) == NULL) {
+			FILEDESC_SUNLOCK(fdp);
+			error = EBADF;
+			break;
+		}
+		if (fp->f_type != DTYPE_VNODE) {
+			FILEDESC_SUNLOCK(fdp);
+			error = EBADF;
+			break;
+		}
+		flp = (struct flock *)arg;
+		if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK &&
+		    flp->l_type != F_UNLCK) {
+			FILEDESC_SUNLOCK(fdp);
+			error = EINVAL;
+			break;
+		}
+		if (flp->l_whence == SEEK_CUR) {
+			if ((flp->l_start > 0 &&
+			    fp->f_offset > OFF_MAX - flp->l_start) ||
+			    (flp->l_start < 0 &&
+			     fp->f_offset < OFF_MIN - flp->l_start)) {
+				FILEDESC_SUNLOCK(fdp);
+				error = EOVERFLOW;
+				break;
+			}
+			flp->l_start += fp->f_offset;
+		}
+		/*
+		 * VOP_ADVLOCK() may block.
+		 */
+		fhold(fp);
+		FILEDESC_SUNLOCK(fdp);
+		vp = fp->f_vnode;
+		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+		error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp,
+		    F_POSIX);
+		VFS_UNLOCK_GIANT(vfslocked);
+		vfslocked = 0;
+		fdrop(fp, td);
+		break;
+
+	case F_RDAHEAD:
+		arg = arg ? 128 * 1024: 0;
+		/* FALLTHROUGH */
+	case F_READAHEAD:
+		FILEDESC_SLOCK(fdp);
+		if ((fp = fdtofp(fd, fdp)) == NULL) {
+			FILEDESC_SUNLOCK(fdp);
+			error = EBADF;
+			break;
+		}
+		if (fp->f_type != DTYPE_VNODE) {
+			FILEDESC_SUNLOCK(fdp);
+			error = EBADF;
+			break;
+		}
+		fhold(fp);
+		FILEDESC_SUNLOCK(fdp);
+		if (arg != 0) {
+			vp = fp->f_vnode;
+			vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+			error = vn_lock(vp, LK_SHARED);
+			if (error != 0)
+				goto readahead_vnlock_fail;
+			bsize = fp->f_vnode->v_mount->mnt_stat.f_iosize;
+			VOP_UNLOCK(vp, 0);
+			fp->f_seqcount = (arg + bsize - 1) / bsize;
+			do {
+				new = old = fp->f_flag;
+				new |= FRDAHEAD;
+			} while (!atomic_cmpset_rel_int(&fp->f_flag, old, new));
+readahead_vnlock_fail:
+			VFS_UNLOCK_GIANT(vfslocked);
+			vfslocked = 0;
+		} else {
+			do {
+				new = old = fp->f_flag;
+				new &= ~FRDAHEAD;
+			} while (!atomic_cmpset_rel_int(&fp->f_flag, old, new));
+		}
+		fdrop(fp, td);
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+	VFS_UNLOCK_GIANT(vfslocked);
+	return (error);
+}
+
+/*
+ * Common code for dup, dup2, fcntl(F_DUPFD) and fcntl(F_DUP2FD).
+ */
+static int
+do_dup(struct thread *td, int flags, int old, int new,
+    register_t *retval)
+{
+	struct filedesc *fdp;
+	struct proc *p;
+	struct file *fp;
+	struct file *delfp;
+	int error, holdleaders, maxfd;
+
+	p = td->td_proc;
+	fdp = p->p_fd;
+
+	/*
+	 * Verify we have a valid descriptor to dup from and possibly to
+	 * dup to. Unlike dup() and dup2(), fcntl()'s F_DUPFD should
+	 * return EINVAL when the new descriptor is out of bounds.
+	 */
+	if (old < 0)
+		return (EBADF);
+	if (new < 0)
+		return (flags & DUP_FCNTL ? EINVAL : EBADF);
+	PROC_LOCK(p);
+	maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
+	PROC_UNLOCK(p);
+	if (new >= maxfd)
+		return (flags & DUP_FCNTL ? EINVAL : EMFILE);
+
+	FILEDESC_XLOCK(fdp);
+	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
+		FILEDESC_XUNLOCK(fdp);
+		return (EBADF);
+	}
+	if (flags & DUP_FIXED && old == new) {
+		*retval = new;
+		FILEDESC_XUNLOCK(fdp);
+		return (0);
+	}
+	fp = fdp->fd_ofiles[old];
+	fhold(fp);
+
+	/*
+	 * If the caller specified a file descriptor, make sure the file
+	 * table is large enough to hold it, and grab it.  Otherwise, just
+	 * allocate a new descriptor the usual way.  Since the filedesc
+	 * lock may be temporarily dropped in the process, we have to look
+	 * out for a race.
+	 */
+	if (flags & DUP_FIXED) {
+		if (new >= fdp->fd_nfiles)
+			fdgrowtable(fdp, new + 1);
+		if (fdp->fd_ofiles[new] == NULL)
+			fdused(fdp, new);
+	} else {
+		if ((error = fdalloc(td, new, &new)) != 0) {
+			FILEDESC_XUNLOCK(fdp);
+			fdrop(fp, td);
+			return (error);
+		}
+	}
+
+	/*
+	 * If the old file changed out from under us then treat it as a
+	 * bad file descriptor.  Userland should do its own locking to
+	 * avoid this case.
+	 */
+	if (fdp->fd_ofiles[old] != fp) {
+		/* we've allocated a descriptor which we won't use */
+		if (fdp->fd_ofiles[new] == NULL)
+			fdunused(fdp, new);
+		FILEDESC_XUNLOCK(fdp);
+		fdrop(fp, td);
+		return (EBADF);
+	}
+	KASSERT(old != new,
+	    ("new fd is same as old"));
+
+	/*
+	 * Save info on the descriptor being overwritten.  We cannot close
+	 * it without introducing an ownership race for the slot, since we
+	 * need to drop the filedesc lock to call closef().
+	 *
+	 * XXX this duplicates parts of close().
+	 */
+	delfp = fdp->fd_ofiles[new];
+	holdleaders = 0;
+	if (delfp != NULL) {
+		if (td->td_proc->p_fdtol != NULL) {
+			/*
+			 * Ask fdfree() to sleep to ensure that all relevant
+			 * process leaders can be traversed in closef().
+			 */
+			fdp->fd_holdleaderscount++;
+			holdleaders = 1;
+		}
+	}
+
+	/*
+	 * Duplicate the source descriptor
+	 */
+	fdp->fd_ofiles[new] = fp;
+	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
+	if (new > fdp->fd_lastfile)
+		fdp->fd_lastfile = new;
+	*retval = new;
+
+	/*
+	 * If we dup'd over a valid file, we now own the reference to it
+	 * and must dispose of it using closef() semantics (as if a
+	 * close() were performed on it).
+	 *
+	 * XXX this duplicates parts of close().
+	 */
+	if (delfp != NULL) {
+		knote_fdclose(td, new);
+		if (delfp->f_type == DTYPE_MQUEUE)
+			mq_fdclose(td, new, delfp);
+		FILEDESC_XUNLOCK(fdp);
+		(void) closef(delfp, td);
+		if (holdleaders) {
+			FILEDESC_XLOCK(fdp);
+			fdp->fd_holdleaderscount--;
+			if (fdp->fd_holdleaderscount == 0 &&
+			    fdp->fd_holdleaderswakeup != 0) {
+				fdp->fd_holdleaderswakeup = 0;
+				wakeup(&fdp->fd_holdleaderscount);
+			}
+			FILEDESC_XUNLOCK(fdp);
+		}
+	} else {
+		FILEDESC_XUNLOCK(fdp);
+	}
+	return (0);
+}
+
+/*
+ * If sigio is on the list associated with a process or process group,
+ * disable signalling from the device, remove sigio from the list and
+ * free sigio.
+ */
+void
+funsetown(struct sigio **sigiop)
+{
+	struct sigio *sigio;
+
+	SIGIO_LOCK();
+	sigio = *sigiop;
+	if (sigio == NULL) {
+		SIGIO_UNLOCK();
+		return;
+	}
+	*(sigio->sio_myref) = NULL;
+	if ((sigio)->sio_pgid < 0) {
+		struct pgrp *pg = (sigio)->sio_pgrp;
+		PGRP_LOCK(pg);
+		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
+			     sigio, sio_pgsigio);
+		PGRP_UNLOCK(pg);
+	} else {
+		struct proc *p = (sigio)->sio_proc;
+		PROC_LOCK(p);
+		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
+			     sigio, sio_pgsigio);
+		PROC_UNLOCK(p);
+	}
+	SIGIO_UNLOCK();
+	crfree(sigio->sio_ucred);
+	free(sigio, M_SIGIO);
+}
+
+/*
+ * Free a list of sigio structures.
+ * We only need to lock the SIGIO_LOCK because we have made ourselves
+ * inaccessible to callers of fsetown and therefore do not need to lock
+ * the proc or pgrp struct for the list manipulation.
+ */
+void
+funsetownlst(struct sigiolst *sigiolst)
+{
+	struct proc *p;
+	struct pgrp *pg;
+	struct sigio *sigio;
+
+	sigio = SLIST_FIRST(sigiolst);
+	if (sigio == NULL)
+		return;
+	p = NULL;
+	pg = NULL;
+
+	/*
+	 * Every entry of the list should belong
+	 * to a single proc or pgrp.
+	 */
+	if (sigio->sio_pgid < 0) {
+		pg = sigio->sio_pgrp;
+		PGRP_LOCK_ASSERT(pg, MA_NOTOWNED);
+	} else /* if (sigio->sio_pgid > 0) */ {
+		p = sigio->sio_proc;
+		PROC_LOCK_ASSERT(p, MA_NOTOWNED);
+	}
+
+	SIGIO_LOCK();
+	while ((sigio = SLIST_FIRST(sigiolst)) != NULL) {
+		*(sigio->sio_myref) = NULL;
+		if (pg != NULL) {
+			KASSERT(sigio->sio_pgid < 0,
+			    ("Proc sigio in pgrp sigio list"));
+			KASSERT(sigio->sio_pgrp == pg,
+			    ("Bogus pgrp in sigio list"));
+			PGRP_LOCK(pg);
+			SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio,
+			    sio_pgsigio);
+			PGRP_UNLOCK(pg);
+		} else /* if (p != NULL) */ {
+			KASSERT(sigio->sio_pgid > 0,
+			    ("Pgrp sigio in proc sigio list"));
+			KASSERT(sigio->sio_proc == p,
+			    ("Bogus proc in sigio list"));
+			PROC_LOCK(p);
+			SLIST_REMOVE(&p->p_sigiolst, sigio, sigio,
+			    sio_pgsigio);
+			PROC_UNLOCK(p);
+		}
+		SIGIO_UNLOCK();
+		crfree(sigio->sio_ucred);
+		free(sigio, M_SIGIO);
+		SIGIO_LOCK();
+	}
+	SIGIO_UNLOCK();
+}
+
+/*
+ * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
+ *
+ * After permission checking, add a sigio structure to the sigio list for
+ * the process or process group.
+ */
+int
+fsetown(pid_t pgid, struct sigio **sigiop)
+{
+	struct proc *proc;
+	struct pgrp *pgrp;
+	struct sigio *sigio;
+	int ret;
+
+	if (pgid == 0) {
+		funsetown(sigiop);
+		return (0);
+	}
+
+	ret = 0;
+
+	/* Allocate and fill in the new sigio out of locks. */
+	sigio = malloc(sizeof(struct sigio), M_SIGIO, M_WAITOK);
+	sigio->sio_pgid = pgid;
+	sigio->sio_ucred = crhold(curthread->td_ucred);
+	sigio->sio_myref = sigiop;
+
+	sx_slock(&proctree_lock);
+	if (pgid > 0) {
+		proc = pfind(pgid);
+		if (proc == NULL) {
+			ret = ESRCH;
+			goto fail;
+		}
+
+		/*
+		 * Policy - Don't allow a process to FSETOWN a process
+		 * in another session.
+		 *
+		 * Remove this test to allow maximum flexibility or
+		 * restrict FSETOWN to the current process or process
+		 * group for maximum safety.
+		 */
+		PROC_UNLOCK(proc);
+		if (proc->p_session != curthread->td_proc->p_session) {
+			ret = EPERM;
+			goto fail;
+		}
+
+		pgrp = NULL;
+	} else /* if (pgid < 0) */ {
+		pgrp = pgfind(-pgid);
+		if (pgrp == NULL) {
+			ret = ESRCH;
+			goto fail;
+		}
+		PGRP_UNLOCK(pgrp);
+
+		/*
+		 * Policy - Don't allow a process to FSETOWN a process
+		 * in another session.
+		 *
+		 * Remove this test to allow maximum flexibility or
+		 * restrict FSETOWN to the current process or process
+		 * group for maximum safety.
+		 */
+		if (pgrp->pg_session != curthread->td_proc->p_session) {
+			ret = EPERM;
+			goto fail;
+		}
+
+		proc = NULL;
+	}
+	funsetown(sigiop);
+	if (pgid > 0) {
+		PROC_LOCK(proc);
+		/*
+		 * Since funsetownlst() is called without the proctree
+		 * locked, we need to check for P_WEXIT.
+		 * XXX: is ESRCH correct?
+		 */
+		if ((proc->p_flag & P_WEXIT) != 0) {
+			PROC_UNLOCK(proc);
+			ret = ESRCH;
+			goto fail;
+		}
+		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
+		sigio->sio_proc = proc;
+		PROC_UNLOCK(proc);
+	} else {
+		PGRP_LOCK(pgrp);
+		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
+		sigio->sio_pgrp = pgrp;
+		PGRP_UNLOCK(pgrp);
+	}
+	sx_sunlock(&proctree_lock);
+	SIGIO_LOCK();
+	*sigiop = sigio;
+	SIGIO_UNLOCK();
+	return (0);
+
+fail:
+	sx_sunlock(&proctree_lock);
+	crfree(sigio->sio_ucred);
+	free(sigio, M_SIGIO);
+	return (ret);
+}
+
+/*
+ * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
+ */
+pid_t
+fgetown(sigiop)
+	struct sigio **sigiop;
+{
+	pid_t pgid;
+
+	SIGIO_LOCK();
+	pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0;
+	SIGIO_UNLOCK();
+	return (pgid);
+}
+
+/*
+ * Close a file descriptor.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct close_args {
+	int     fd;
+};
+#endif
+/* ARGSUSED */
+int
+close(td, uap)
+	struct thread *td;
+	struct close_args *uap;
+{
+
+	return (kern_close(td, uap->fd));
+}
+
+int
+kern_close(td, fd)
+	struct thread *td;
+	int fd;
+{
+	struct filedesc *fdp;
+	struct file *fp;
+	int error;
+	int holdleaders;
+
+	error = 0;
+	holdleaders = 0;
+	fdp = td->td_proc->p_fd;
+
+	AUDIT_SYSCLOSE(td, fd);
+
+	FILEDESC_XLOCK(fdp);
+	if ((unsigned)fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[fd]) == NULL) {
+		FILEDESC_XUNLOCK(fdp);
+		return (EBADF);
+	}
+	fdp->fd_ofiles[fd] = NULL;
+	fdp->fd_ofileflags[fd] = 0;
+	fdunused(fdp, fd);
+	if (td->td_proc->p_fdtol != NULL) {
+		/*
+		 * Ask fdfree() to sleep to ensure that all relevant
+		 * process leaders can be traversed in closef().
+		 */
+		fdp->fd_holdleaderscount++;
+		holdleaders = 1;
+	}
+
+	/*
+	 * We now hold the fp reference that used to be owned by the
+	 * descriptor array.  We have to unlock the FILEDESC *AFTER*
+	 * knote_fdclose to prevent a race of the fd getting opened, a knote
+	 * added, and deleteing a knote for the new fd.
+	 */
+	knote_fdclose(td, fd);
+	if (fp->f_type == DTYPE_MQUEUE)
+		mq_fdclose(td, fd, fp);
+	FILEDESC_XUNLOCK(fdp);
+
+	error = closef(fp, td);
+	if (holdleaders) {
+		FILEDESC_XLOCK(fdp);
+		fdp->fd_holdleaderscount--;
+		if (fdp->fd_holdleaderscount == 0 &&
+		    fdp->fd_holdleaderswakeup != 0) {
+			fdp->fd_holdleaderswakeup = 0;
+			wakeup(&fdp->fd_holdleaderscount);
+		}
+		FILEDESC_XUNLOCK(fdp);
+	}
+	return (error);
+}
+
+/*
+ * Close open file descriptors.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct closefrom_args {
+	int	lowfd;
+};
+#endif
+/* ARGSUSED */
+int
+closefrom(struct thread *td, struct closefrom_args *uap)
+{
+	struct filedesc *fdp;
+	int fd;
+
+	fdp = td->td_proc->p_fd;
+	AUDIT_ARG_FD(uap->lowfd);
+
+	/*
+	 * Treat negative starting file descriptor values identical to
+	 * closefrom(0) which closes all files.
+	 */
+	if (uap->lowfd < 0)
+		uap->lowfd = 0;
+	FILEDESC_SLOCK(fdp);
+	for (fd = uap->lowfd; fd < fdp->fd_nfiles; fd++) {
+		if (fdp->fd_ofiles[fd] != NULL) {
+			FILEDESC_SUNLOCK(fdp);
+			(void)kern_close(td, fd);
+			FILEDESC_SLOCK(fdp);
+		}
+	}
+	FILEDESC_SUNLOCK(fdp);
+	return (0);
+}
+
+#if defined(COMPAT_43)
+/*
+ * Return status information about a file descriptor.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct ofstat_args {
+	int	fd;
+	struct	ostat *sb;
+};
+#endif
+/* ARGSUSED */
+int
+ofstat(struct thread *td, struct ofstat_args *uap)
+{
+	struct ostat oub;
+	struct stat ub;
+	int error;
+
+	error = kern_fstat(td, uap->fd, &ub);
+	if (error == 0) {
+		cvtstat(&ub, &oub);
+		error = copyout(&oub, uap->sb, sizeof(oub));
+	}
+	return (error);
+}
+#endif /* COMPAT_43 */
+
+/*
+ * Return status information about a file descriptor.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct fstat_args {
+	int	fd;
+	struct	stat *sb;
+};
+#endif
+/* ARGSUSED */
+int
+fstat(struct thread *td, struct fstat_args *uap)
+{
+	struct stat ub;
+	int error;
+
+	error = kern_fstat(td, uap->fd, &ub);
+	if (error == 0)
+		error = copyout(&ub, uap->sb, sizeof(ub));
+	return (error);
+}
+
+int
+kern_fstat(struct thread *td, int fd, struct stat *sbp)
+{
+	struct file *fp;
+	int error;
+
+	AUDIT_ARG_FD(fd);
+
+	if ((error = fget(td, fd, &fp)) != 0)
+		return (error);
+
+	AUDIT_ARG_FILE(td->td_proc, fp);
+
+	error = fo_stat(fp, sbp, td->td_ucred, td);
+	fdrop(fp, td);
+#ifdef KTRACE
+	if (error == 0 && KTRPOINT(td, KTR_STRUCT))
+		ktrstat(sbp);
+#endif
+	return (error);
+}
+
+/*
+ * Return status information about a file descriptor.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct nfstat_args {
+	int	fd;
+	struct	nstat *sb;
+};
+#endif
+/* ARGSUSED */
+int
+nfstat(struct thread *td, struct nfstat_args *uap)
+{
+	struct nstat nub;
+	struct stat ub;
+	int error;
+
+	error = kern_fstat(td, uap->fd, &ub);
+	if (error == 0) {
+		cvtnstat(&ub, &nub);
+		error = copyout(&nub, uap->sb, sizeof(nub));
+	}
+	return (error);
+}
+
+/*
+ * Return pathconf information about a file descriptor.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct fpathconf_args {
+	int	fd;
+	int	name;
+};
+#endif
+/* ARGSUSED */
+int
+fpathconf(struct thread *td, struct fpathconf_args *uap)
+{
+	struct file *fp;
+	struct vnode *vp;
+	int error;
+
+	if ((error = fget(td, uap->fd, &fp)) != 0)
+		return (error);
+
+	/* If asynchronous I/O is available, it works for all descriptors. */
+	if (uap->name == _PC_ASYNC_IO) {
+		td->td_retval[0] = async_io_version;
+		goto out;
+	}
+	vp = fp->f_vnode;
+	if (vp != NULL) {
+		int vfslocked;
+		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+		vn_lock(vp, LK_SHARED | LK_RETRY);
+		error = VOP_PATHCONF(vp, uap->name, td->td_retval);
+		VOP_UNLOCK(vp, 0);
+		VFS_UNLOCK_GIANT(vfslocked);
+	} else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) {
+		if (uap->name != _PC_PIPE_BUF) {
+			error = EINVAL;
+		} else {
+			td->td_retval[0] = PIPE_BUF;
+		error = 0;
+		}
+	} else {
+		error = EOPNOTSUPP;
+	}
+out:
+	fdrop(fp, td);
+	return (error);
+}
+
+/*
+ * Grow the file table to accomodate (at least) nfd descriptors.  This may
+ * block and drop the filedesc lock, but it will reacquire it before
+ * returning.
+ */
+static void
+fdgrowtable(struct filedesc *fdp, int nfd)
+{
+	struct filedesc0 *fdp0;
+	struct freetable *fo;
+	struct file **ntable;
+	struct file **otable;
+	char *nfileflags;
+	int nnfiles, onfiles;
+	NDSLOTTYPE *nmap;
+
+	FILEDESC_XLOCK_ASSERT(fdp);
+
+	KASSERT(fdp->fd_nfiles > 0,
+	    ("zero-length file table"));
+
+	/* compute the size of the new table */
+	onfiles = fdp->fd_nfiles;
+	nnfiles = NDSLOTS(nfd) * NDENTRIES; /* round up */
+	if (nnfiles <= onfiles)
+		/* the table is already large enough */
+		return;
+
+	/* allocate a new table and (if required) new bitmaps */
+	FILEDESC_XUNLOCK(fdp);
+	ntable = malloc((nnfiles * OFILESIZE) + sizeof(struct freetable),
+	    M_FILEDESC, M_ZERO | M_WAITOK);
+	nfileflags = (char *)&ntable[nnfiles];
+	if (NDSLOTS(nnfiles) > NDSLOTS(onfiles))
+		nmap = malloc(NDSLOTS(nnfiles) * NDSLOTSIZE,
+		    M_FILEDESC, M_ZERO | M_WAITOK);
+	else
+		nmap = NULL;
+	FILEDESC_XLOCK(fdp);
+
+	/*
+	 * We now have new tables ready to go.  Since we dropped the
+	 * filedesc lock to call malloc(), watch out for a race.
+	 */
+	onfiles = fdp->fd_nfiles;
+	if (onfiles >= nnfiles) {
+		/* we lost the race, but that's OK */
+		free(ntable, M_FILEDESC);
+		if (nmap != NULL)
+			free(nmap, M_FILEDESC);
+		return;
+	}
+	bcopy(fdp->fd_ofiles, ntable, onfiles * sizeof(*ntable));
+	bcopy(fdp->fd_ofileflags, nfileflags, onfiles);
+	otable = fdp->fd_ofiles;
+	fdp->fd_ofileflags = nfileflags;
+	fdp->fd_ofiles = ntable;
+	/*
+	 * We must preserve ofiles until the process exits because we can't
+	 * be certain that no threads have references to the old table via
+	 * _fget().
+	 */
+	if (onfiles > NDFILE) {
+		fo = (struct freetable *)&otable[onfiles];
+		fdp0 = (struct filedesc0 *)fdp;
+		fo->ft_table = otable;
+		SLIST_INSERT_HEAD(&fdp0->fd_free, fo, ft_next);
+	}
+	if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) {
+		bcopy(fdp->fd_map, nmap, NDSLOTS(onfiles) * sizeof(*nmap));
+		if (NDSLOTS(onfiles) > NDSLOTS(NDFILE))
+			free(fdp->fd_map, M_FILEDESC);
+		fdp->fd_map = nmap;
+	}
+	fdp->fd_nfiles = nnfiles;
+}
+
+/*
+ * Allocate a file descriptor for the process.
+ */
+int
+fdalloc(struct thread *td, int minfd, int *result)
+{
+	struct proc *p = td->td_proc;
+	struct filedesc *fdp = p->p_fd;
+	int fd = -1, maxfd;
+
+	FILEDESC_XLOCK_ASSERT(fdp);
+
+	if (fdp->fd_freefile > minfd)
+		minfd = fdp->fd_freefile;
+
+	PROC_LOCK(p);
+	maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
+	PROC_UNLOCK(p);
+
+	/*
+	 * Search the bitmap for a free descriptor.  If none is found, try
+	 * to grow the file table.  Keep at it until we either get a file
+	 * descriptor or run into process or system limits; fdgrowtable()
+	 * may drop the filedesc lock, so we're in a race.
+	 */
+	for (;;) {
+		fd = fd_first_free(fdp, minfd, fdp->fd_nfiles);
+		if (fd >= maxfd)
+			return (EMFILE);
+		if (fd < fdp->fd_nfiles)
+			break;
+		fdgrowtable(fdp, min(fdp->fd_nfiles * 2, maxfd));
+	}
+
+	/*
+	 * Perform some sanity checks, then mark the file descriptor as
+	 * used and return it to the caller.
+	 */
+	KASSERT(!fdisused(fdp, fd),
+	    ("fd_first_free() returned non-free descriptor"));
+	KASSERT(fdp->fd_ofiles[fd] == NULL,
+	    ("free descriptor isn't"));
+	fdp->fd_ofileflags[fd] = 0; /* XXX needed? */
+	fdused(fdp, fd);
+	*result = fd;
+	return (0);
+}
+
+/*
+ * Check to see whether n user file descriptors are available to the process
+ * p.
+ */
+int
+fdavail(struct thread *td, int n)
+{
+	struct proc *p = td->td_proc;
+	struct filedesc *fdp = td->td_proc->p_fd;
+	struct file **fpp;
+	int i, lim, last;
+
+	FILEDESC_LOCK_ASSERT(fdp);
+
+	PROC_LOCK(p);
+	lim = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
+	PROC_UNLOCK(p);
+	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
+		return (1);
+	last = min(fdp->fd_nfiles, lim);
+	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
+	for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
+		if (*fpp == NULL && --n <= 0)
+			return (1);
+	}
+	return (0);
+}
+
+/*
+ * Create a new open file structure and allocate a file decriptor for the
+ * process that refers to it.  We add one reference to the file for the
+ * descriptor table and one reference for resultfp. This is to prevent us
+ * being preempted and the entry in the descriptor table closed after we
+ * release the FILEDESC lock.
+ */
+int
+falloc(struct thread *td, struct file **resultfp, int *resultfd)
+{
+	struct proc *p = td->td_proc;
+	struct file *fp;
+	int error, i;
+	int maxuserfiles = maxfiles - (maxfiles / 20);
+	static struct timeval lastfail;
+	static int curfail;
+
+	fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO);
+	if ((openfiles >= maxuserfiles &&
+	    priv_check(td, PRIV_MAXFILES) != 0) ||
+	    openfiles >= maxfiles) {
+		if (ppsratecheck(&lastfail, &curfail, 1)) {
+			printf("kern.maxfiles limit exceeded by uid %i, please see tuning(7).\n",
+				td->td_ucred->cr_ruid);
+		}
+		uma_zfree(file_zone, fp);
+		return (ENFILE);
+	}
+	atomic_add_int(&openfiles, 1);
+
+	/*
+	 * If the process has file descriptor zero open, add the new file
+	 * descriptor to the list of open files at that point, otherwise
+	 * put it at the front of the list of open files.
+	 */
+	refcount_init(&fp->f_count, 1);
+	if (resultfp)
+		fhold(fp);
+	fp->f_cred = crhold(td->td_ucred);
+	fp->f_ops = &badfileops;
+	fp->f_data = NULL;
+	fp->f_vnode = NULL;
+	FILEDESC_XLOCK(p->p_fd);
+	if ((error = fdalloc(td, 0, &i))) {
+		FILEDESC_XUNLOCK(p->p_fd);
+
+		fdrop(fp, td);
+		if (resultfp)
+			fdrop(fp, td);
+		return (error);
+	}
+	p->p_fd->fd_ofiles[i] = fp;
+	FILEDESC_XUNLOCK(p->p_fd);
+	if (resultfp)
+		*resultfp = fp;
+	if (resultfd)
+		*resultfd = i;
+	return (0);
+}
+
+/*
+ * Build a new filedesc structure from another.
+ * Copy the current, root, and jail root vnode references.
+ */
+struct filedesc *
+fdinit(struct filedesc *fdp)
+{
+	struct filedesc0 *newfdp;
+
+	newfdp = malloc(sizeof *newfdp, M_FILEDESC, M_WAITOK | M_ZERO);
+	FILEDESC_LOCK_INIT(&newfdp->fd_fd);
+	if (fdp != NULL) {
+		FILEDESC_XLOCK(fdp);
+		newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
+		if (newfdp->fd_fd.fd_cdir)
+			VREF(newfdp->fd_fd.fd_cdir);
+		newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
+		if (newfdp->fd_fd.fd_rdir)
+			VREF(newfdp->fd_fd.fd_rdir);
+		newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
+		if (newfdp->fd_fd.fd_jdir)
+			VREF(newfdp->fd_fd.fd_jdir);
+		FILEDESC_XUNLOCK(fdp);
+	}
+
+	/* Create the file descriptor table. */
+	newfdp->fd_fd.fd_refcnt = 1;
+	newfdp->fd_fd.fd_holdcnt = 1;
+	newfdp->fd_fd.fd_cmask = CMASK;
+	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
+	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
+	newfdp->fd_fd.fd_nfiles = NDFILE;
+	newfdp->fd_fd.fd_map = newfdp->fd_dmap;
+	newfdp->fd_fd.fd_lastfile = -1;
+	return (&newfdp->fd_fd);
+}
+
+static struct filedesc *
+fdhold(struct proc *p)
+{
+	struct filedesc *fdp;
+
+	mtx_lock(&fdesc_mtx);
+	fdp = p->p_fd;
+	if (fdp != NULL)
+		fdp->fd_holdcnt++;
+	mtx_unlock(&fdesc_mtx);
+	return (fdp);
+}
+
+static void
+fddrop(struct filedesc *fdp)
+{
+	struct filedesc0 *fdp0;
+	struct freetable *ft;
+	int i;
+
+	mtx_lock(&fdesc_mtx);
+	i = --fdp->fd_holdcnt;
+	mtx_unlock(&fdesc_mtx);
+	if (i > 0)
+		return;
+
+	FILEDESC_LOCK_DESTROY(fdp);
+	fdp0 = (struct filedesc0 *)fdp;
+	while ((ft = SLIST_FIRST(&fdp0->fd_free)) != NULL) {
+		SLIST_REMOVE_HEAD(&fdp0->fd_free, ft_next);
+		free(ft->ft_table, M_FILEDESC);
+	}
+	free(fdp, M_FILEDESC);
+}
+
+/*
+ * Share a filedesc structure.
+ */
+struct filedesc *
+fdshare(struct filedesc *fdp)
+{
+
+	FILEDESC_XLOCK(fdp);
+	fdp->fd_refcnt++;
+	FILEDESC_XUNLOCK(fdp);
+	return (fdp);
+}
+
+/*
+ * Unshare a filedesc structure, if necessary by making a copy
+ */
+void
+fdunshare(struct proc *p, struct thread *td)
+{
+
+	FILEDESC_XLOCK(p->p_fd);
+	if (p->p_fd->fd_refcnt > 1) {
+		struct filedesc *tmp;
+
+		FILEDESC_XUNLOCK(p->p_fd);
+		tmp = fdcopy(p->p_fd);
+		fdfree(td);
+		p->p_fd = tmp;
+	} else
+		FILEDESC_XUNLOCK(p->p_fd);
+}
+
+/*
+ * Copy a filedesc structure.  A NULL pointer in returns a NULL reference,
+ * this is to ease callers, not catch errors.
+ */
+struct filedesc *
+fdcopy(struct filedesc *fdp)
+{
+	struct filedesc *newfdp;
+	int i;
+
+	/* Certain daemons might not have file descriptors. */
+	if (fdp == NULL)
+		return (NULL);
+
+	newfdp = fdinit(fdp);
+	FILEDESC_SLOCK(fdp);
+	while (fdp->fd_lastfile >= newfdp->fd_nfiles) {
+		FILEDESC_SUNLOCK(fdp);
+		FILEDESC_XLOCK(newfdp);
+		fdgrowtable(newfdp, fdp->fd_lastfile + 1);
+		FILEDESC_XUNLOCK(newfdp);
+		FILEDESC_SLOCK(fdp);
+	}
+	/* copy everything except kqueue descriptors */
+	newfdp->fd_freefile = -1;
+	for (i = 0; i <= fdp->fd_lastfile; ++i) {
+		if (fdisused(fdp, i) &&
+		    fdp->fd_ofiles[i]->f_type != DTYPE_KQUEUE &&
+		    fdp->fd_ofiles[i]->f_ops != &badfileops) {
+			newfdp->fd_ofiles[i] = fdp->fd_ofiles[i];
+			newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i];
+			fhold(newfdp->fd_ofiles[i]);
+			newfdp->fd_lastfile = i;
+		} else {
+			if (newfdp->fd_freefile == -1)
+				newfdp->fd_freefile = i;
+		}
+	}
+	newfdp->fd_cmask = fdp->fd_cmask;
+	FILEDESC_SUNLOCK(fdp);
+	FILEDESC_XLOCK(newfdp);
+	for (i = 0; i <= newfdp->fd_lastfile; ++i)
+		if (newfdp->fd_ofiles[i] != NULL)
+			fdused(newfdp, i);
+	if (newfdp->fd_freefile == -1)
+		newfdp->fd_freefile = i;
+	FILEDESC_XUNLOCK(newfdp);
+	return (newfdp);
+}
+
+/*
+ * Release a filedesc structure.
+ */
+void
+fdfree(struct thread *td)
+{
+	struct filedesc *fdp;
+	struct file **fpp;
+	int i, locked;
+	struct filedesc_to_leader *fdtol;
+	struct file *fp;
+	struct vnode *cdir, *jdir, *rdir, *vp;
+	struct flock lf;
+
+	/* Certain daemons might not have file descriptors. */
+	fdp = td->td_proc->p_fd;
+	if (fdp == NULL)
+		return;
+
+	/* Check for special need to clear POSIX style locks */
+	fdtol = td->td_proc->p_fdtol;
+	if (fdtol != NULL) {
+		FILEDESC_XLOCK(fdp);
+		KASSERT(fdtol->fdl_refcount > 0,
+			("filedesc_to_refcount botch: fdl_refcount=%d",
+			 fdtol->fdl_refcount));
+		if (fdtol->fdl_refcount == 1 &&
+		    (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
+			for (i = 0, fpp = fdp->fd_ofiles;
+			     i <= fdp->fd_lastfile;
+			     i++, fpp++) {
+				if (*fpp == NULL ||
+				    (*fpp)->f_type != DTYPE_VNODE)
+					continue;
+				fp = *fpp;
+				fhold(fp);
+				FILEDESC_XUNLOCK(fdp);
+				lf.l_whence = SEEK_SET;
+				lf.l_start = 0;
+				lf.l_len = 0;
+				lf.l_type = F_UNLCK;
+				vp = fp->f_vnode;
+				locked = VFS_LOCK_GIANT(vp->v_mount);
+				(void) VOP_ADVLOCK(vp,
+						   (caddr_t)td->td_proc->
+						   p_leader,
+						   F_UNLCK,
+						   &lf,
+						   F_POSIX);
+				VFS_UNLOCK_GIANT(locked);
+				FILEDESC_XLOCK(fdp);
+				fdrop(fp, td);
+				fpp = fdp->fd_ofiles + i;
+			}
+		}
+	retry:
+		if (fdtol->fdl_refcount == 1) {
+			if (fdp->fd_holdleaderscount > 0 &&
+			    (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
+				/*
+				 * close() or do_dup() has cleared a reference
+				 * in a shared file descriptor table.
+				 */
+				fdp->fd_holdleaderswakeup = 1;
+				sx_sleep(&fdp->fd_holdleaderscount,
+				    FILEDESC_LOCK(fdp), PLOCK, "fdlhold", 0);
+				goto retry;
+			}
+			if (fdtol->fdl_holdcount > 0) {
+				/*
+				 * Ensure that fdtol->fdl_leader remains
+				 * valid in closef().
+				 */
+				fdtol->fdl_wakeup = 1;
+				sx_sleep(fdtol, FILEDESC_LOCK(fdp), PLOCK,
+				    "fdlhold", 0);
+				goto retry;
+			}
+		}
+		fdtol->fdl_refcount--;
+		if (fdtol->fdl_refcount == 0 &&
+		    fdtol->fdl_holdcount == 0) {
+			fdtol->fdl_next->fdl_prev = fdtol->fdl_prev;
+			fdtol->fdl_prev->fdl_next = fdtol->fdl_next;
+		} else
+			fdtol = NULL;
+		td->td_proc->p_fdtol = NULL;
+		FILEDESC_XUNLOCK(fdp);
+		if (fdtol != NULL)
+			free(fdtol, M_FILEDESC_TO_LEADER);
+	}
+	FILEDESC_XLOCK(fdp);
+	i = --fdp->fd_refcnt;
+	FILEDESC_XUNLOCK(fdp);
+	if (i > 0)
+		return;
+
+	fpp = fdp->fd_ofiles;
+	for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
+		if (*fpp) {
+			FILEDESC_XLOCK(fdp);
+			fp = *fpp;
+			*fpp = NULL;
+			FILEDESC_XUNLOCK(fdp);
+			(void) closef(fp, td);
+		}
+	}
+	FILEDESC_XLOCK(fdp);
+
+	/* XXX This should happen earlier. */
+	mtx_lock(&fdesc_mtx);
+	td->td_proc->p_fd = NULL;
+	mtx_unlock(&fdesc_mtx);
+
+	if (fdp->fd_nfiles > NDFILE)
+		free(fdp->fd_ofiles, M_FILEDESC);
+	if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE))
+		free(fdp->fd_map, M_FILEDESC);
+
+	fdp->fd_nfiles = 0;
+
+	cdir = fdp->fd_cdir;
+	fdp->fd_cdir = NULL;
+	rdir = fdp->fd_rdir;
+	fdp->fd_rdir = NULL;
+	jdir = fdp->fd_jdir;
+	fdp->fd_jdir = NULL;
+	FILEDESC_XUNLOCK(fdp);
+
+	if (cdir) {
+		locked = VFS_LOCK_GIANT(cdir->v_mount);
+		vrele(cdir);
+		VFS_UNLOCK_GIANT(locked);
+	}
+	if (rdir) {
+		locked = VFS_LOCK_GIANT(rdir->v_mount);
+		vrele(rdir);
+		VFS_UNLOCK_GIANT(locked);
+	}
+	if (jdir) {
+		locked = VFS_LOCK_GIANT(jdir->v_mount);
+		vrele(jdir);
+		VFS_UNLOCK_GIANT(locked);
+	}
+
+	fddrop(fdp);
+}
+
+/*
+ * For setugid programs, we don't want to people to use that setugidness
+ * to generate error messages which write to a file which otherwise would
+ * otherwise be off-limits to the process.  We check for filesystems where
+ * the vnode can change out from under us after execve (like [lin]procfs).
+ *
+ * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
+ * sufficient.  We also don't check for setugidness since we know we are.
+ */
+static int
+is_unsafe(struct file *fp)
+{
+	if (fp->f_type == DTYPE_VNODE) {
+		struct vnode *vp = fp->f_vnode;
+
+		if ((vp->v_vflag & VV_PROCDEP) != 0)
+			return (1);
+	}
+	return (0);
+}
+
+/*
+ * Make this setguid thing safe, if at all possible.
+ */
+void
+setugidsafety(struct thread *td)
+{
+	struct filedesc *fdp;
+	int i;
+
+	/* Certain daemons might not have file descriptors. */
+	fdp = td->td_proc->p_fd;
+	if (fdp == NULL)
+		return;
+
+	/*
+	 * Note: fdp->fd_ofiles may be reallocated out from under us while
+	 * we are blocked in a close.  Be careful!
+	 */
+	FILEDESC_XLOCK(fdp);
+	for (i = 0; i <= fdp->fd_lastfile; i++) {
+		if (i > 2)
+			break;
+		if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
+			struct file *fp;
+
+			knote_fdclose(td, i);
+			/*
+			 * NULL-out descriptor prior to close to avoid
+			 * a race while close blocks.
+			 */
+			fp = fdp->fd_ofiles[i];
+			fdp->fd_ofiles[i] = NULL;
+			fdp->fd_ofileflags[i] = 0;
+			fdunused(fdp, i);
+			FILEDESC_XUNLOCK(fdp);
+			(void) closef(fp, td);
+			FILEDESC_XLOCK(fdp);
+		}
+	}
+	FILEDESC_XUNLOCK(fdp);
+}
+
+/*
+ * If a specific file object occupies a specific file descriptor, close the
+ * file descriptor entry and drop a reference on the file object.  This is a
+ * convenience function to handle a subsequent error in a function that calls
+ * falloc() that handles the race that another thread might have closed the
+ * file descriptor out from under the thread creating the file object.
+ */
+void
+fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td)
+{
+
+	FILEDESC_XLOCK(fdp);
+	if (fdp->fd_ofiles[idx] == fp) {
+		fdp->fd_ofiles[idx] = NULL;
+		fdunused(fdp, idx);
+		FILEDESC_XUNLOCK(fdp);
+		fdrop(fp, td);
+	} else
+		FILEDESC_XUNLOCK(fdp);
+}
+
+/*
+ * Close any files on exec?
+ */
+void
+fdcloseexec(struct thread *td)
+{
+	struct filedesc *fdp;
+	int i;
+
+	/* Certain daemons might not have file descriptors. */
+	fdp = td->td_proc->p_fd;
+	if (fdp == NULL)
+		return;
+
+	FILEDESC_XLOCK(fdp);
+
+	/*
+	 * We cannot cache fd_ofiles or fd_ofileflags since operations
+	 * may block and rip them out from under us.
+	 */
+	for (i = 0; i <= fdp->fd_lastfile; i++) {
+		if (fdp->fd_ofiles[i] != NULL &&
+		    (fdp->fd_ofiles[i]->f_type == DTYPE_MQUEUE ||
+		    (fdp->fd_ofileflags[i] & UF_EXCLOSE))) {
+			struct file *fp;
+
+			knote_fdclose(td, i);
+			/*
+			 * NULL-out descriptor prior to close to avoid
+			 * a race while close blocks.
+			 */
+			fp = fdp->fd_ofiles[i];
+			fdp->fd_ofiles[i] = NULL;
+			fdp->fd_ofileflags[i] = 0;
+			fdunused(fdp, i);
+			if (fp->f_type == DTYPE_MQUEUE)
+				mq_fdclose(td, i, fp);
+			FILEDESC_XUNLOCK(fdp);
+			(void) closef(fp, td);
+			FILEDESC_XLOCK(fdp);
+		}
+	}
+	FILEDESC_XUNLOCK(fdp);
+}
+
+/*
+ * It is unsafe for set[ug]id processes to be started with file
+ * descriptors 0..2 closed, as these descriptors are given implicit
+ * significance in the Standard C library.  fdcheckstd() will create a
+ * descriptor referencing /dev/null for each of stdin, stdout, and
+ * stderr that is not already open.
+ */
+int
+fdcheckstd(struct thread *td)
+{
+	struct filedesc *fdp;
+	register_t retval, save;
+	int i, error, devnull;
+
+	fdp = td->td_proc->p_fd;
+	if (fdp == NULL)
+		return (0);
+	KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared"));
+	devnull = -1;
+	error = 0;
+	for (i = 0; i < 3; i++) {
+		if (fdp->fd_ofiles[i] != NULL)
+			continue;
+		if (devnull < 0) {
+			save = td->td_retval[0];
+			error = kern_open(td, "/dev/null", UIO_SYSSPACE,
+			    O_RDWR, 0);
+			devnull = td->td_retval[0];
+			KASSERT(devnull == i, ("oof, we didn't get our fd"));
+			td->td_retval[0] = save;
+			if (error)
+				break;
+		} else {
+			error = do_dup(td, DUP_FIXED, devnull, i, &retval);
+			if (error != 0)
+				break;
+		}
+	}
+	return (error);
+}
+
+/*
+ * Internal form of close.  Decrement reference count on file structure.
+ * Note: td may be NULL when closing a file that was being passed in a
+ * message.
+ *
+ * XXXRW: Giant is not required for the caller, but often will be held; this
+ * makes it moderately likely the Giant will be recursed in the VFS case.
+ */
+int
+closef(struct file *fp, struct thread *td)
+{
+	struct vnode *vp;
+	struct flock lf;
+	struct filedesc_to_leader *fdtol;
+	struct filedesc *fdp;
+
+	/*
+	 * POSIX record locking dictates that any close releases ALL
+	 * locks owned by this process.  This is handled by setting
+	 * a flag in the unlock to free ONLY locks obeying POSIX
+	 * semantics, and not to free BSD-style file locks.
+	 * If the descriptor was in a message, POSIX-style locks
+	 * aren't passed with the descriptor, and the thread pointer
+	 * will be NULL.  Callers should be careful only to pass a
+	 * NULL thread pointer when there really is no owning
+	 * context that might have locks, or the locks will be
+	 * leaked.
+	 */
+	if (fp->f_type == DTYPE_VNODE && td != NULL) {
+		int vfslocked;
+
+		vp = fp->f_vnode;
+		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+		if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
+			lf.l_whence = SEEK_SET;
+			lf.l_start = 0;
+			lf.l_len = 0;
+			lf.l_type = F_UNLCK;
+			(void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
+					   F_UNLCK, &lf, F_POSIX);
+		}
+		fdtol = td->td_proc->p_fdtol;
+		if (fdtol != NULL) {
+			/*
+			 * Handle special case where file descriptor table is
+			 * shared between multiple process leaders.
+			 */
+			fdp = td->td_proc->p_fd;
+			FILEDESC_XLOCK(fdp);
+			for (fdtol = fdtol->fdl_next;
+			     fdtol != td->td_proc->p_fdtol;
+			     fdtol = fdtol->fdl_next) {
+				if ((fdtol->fdl_leader->p_flag &
+				     P_ADVLOCK) == 0)
+					continue;
+				fdtol->fdl_holdcount++;
+				FILEDESC_XUNLOCK(fdp);
+				lf.l_whence = SEEK_SET;
+				lf.l_start = 0;
+				lf.l_len = 0;
+				lf.l_type = F_UNLCK;
+				vp = fp->f_vnode;
+				(void) VOP_ADVLOCK(vp,
+						   (caddr_t)fdtol->fdl_leader,
+						   F_UNLCK, &lf, F_POSIX);
+				FILEDESC_XLOCK(fdp);
+				fdtol->fdl_holdcount--;
+				if (fdtol->fdl_holdcount == 0 &&
+				    fdtol->fdl_wakeup != 0) {
+					fdtol->fdl_wakeup = 0;
+					wakeup(fdtol);
+				}
+			}
+			FILEDESC_XUNLOCK(fdp);
+		}
+		VFS_UNLOCK_GIANT(vfslocked);
+	}
+	return (fdrop(fp, td));
+}
+
+/*
+ * Initialize the file pointer with the specified properties.
+ *
+ * The ops are set with release semantics to be certain that the flags, type,
+ * and data are visible when ops is.  This is to prevent ops methods from being
+ * called with bad data.
+ */
+void
+finit(struct file *fp, u_int flag, short type, void *data, struct fileops *ops)
+{
+	fp->f_data = data;
+	fp->f_flag = flag;
+	fp->f_type = type;
+	atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops);
+}
+#endif /* __rtems__ */
+
+struct file *
+fget_unlocked(struct filedesc *fdp, int fd)
+{
+	struct file *fp;
+	u_int count;
+
+	if (fd < 0 || fd >= fdp->fd_nfiles)
+		return (NULL);
+	/*
+	 * Fetch the descriptor locklessly.  We avoid fdrop() races by
+	 * never raising a refcount above 0.  To accomplish this we have
+	 * to use a cmpset loop rather than an atomic_add.  The descriptor
+	 * must be re-verified once we acquire a reference to be certain
+	 * that the identity is still correct and we did not lose a race
+	 * due to preemption.
+	 */
+	for (;;) {
+		fp = fdp->fd_ofiles[fd];
+		if (fp == NULL)
+			break;
+		count = fp->f_count;
+		if (count == 0)
+			continue;
+		/*
+		 * Use an acquire barrier to prevent caching of fd_ofiles
+		 * so it is refreshed for verification.
+		 */
+		if (atomic_cmpset_acq_int(&fp->f_count, count, count + 1) != 1)
+			continue;
+		if (fp == fdp->fd_ofiles[fd])
+			break;
+		fdrop(fp, curthread);
+	}
+
+	return (fp);
+}
+
+/*
+ * Extract the file pointer associated with the specified descriptor for the
+ * current user process.
+ *
+ * If the descriptor doesn't exist or doesn't match 'flags', EBADF is
+ * returned.
+ *
+ * If an error occured the non-zero error is returned and *fpp is set to
+ * NULL.  Otherwise *fpp is held and set and zero is returned.  Caller is
+ * responsible for fdrop().
+ */
+static __inline int
+_fget(struct thread *td, int fd, struct file **fpp, int flags)
+{
+	struct filedesc *fdp;
+	struct file *fp;
+
+	*fpp = NULL;
+	if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
+		return (EBADF);
+	if ((fp = fget_unlocked(fdp, fd)) == NULL)
+		return (EBADF);
+	if (fp->f_ops == &badfileops) {
+		fdrop(fp, td);
+		return (EBADF);
+	}
+	/*
+	 * FREAD and FWRITE failure return EBADF as per POSIX.
+	 *
+	 * Only one flag, or 0, may be specified.
+	 */
+	if ((flags == FREAD && (fp->f_flag & FREAD) == 0) ||
+	    (flags == FWRITE && (fp->f_flag & FWRITE) == 0)) {
+		fdrop(fp, td);
+		return (EBADF);
+	}
+	*fpp = fp;
+	return (0);
+}
+
+int
+fget(struct thread *td, int fd, struct file **fpp)
+{
+
+	return(_fget(td, fd, fpp, 0));
+}
+
+int
+fget_read(struct thread *td, int fd, struct file **fpp)
+{
+
+	return(_fget(td, fd, fpp, FREAD));
+}
+
+#ifndef __rtems__
+int
+fget_write(struct thread *td, int fd, struct file **fpp)
+{
+
+	return(_fget(td, fd, fpp, FWRITE));
+}
+
+/*
+ * Like fget() but loads the underlying vnode, or returns an error if the
+ * descriptor does not represent a vnode.  Note that pipes use vnodes but
+ * never have VM objects.  The returned vnode will be vref()'d.
+ *
+ * XXX: what about the unused flags ?
+ */
+static __inline int
+_fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
+{
+	struct file *fp;
+	int error;
+
+	*vpp = NULL;
+	if ((error = _fget(td, fd, &fp, flags)) != 0)
+		return (error);
+	if (fp->f_vnode == NULL) {
+		error = EINVAL;
+	} else {
+		*vpp = fp->f_vnode;
+		vref(*vpp);
+	}
+	fdrop(fp, td);
+
+	return (error);
+}
+
+int
+fgetvp(struct thread *td, int fd, struct vnode **vpp)
+{
+
+	return (_fgetvp(td, fd, vpp, 0));
+}
+
+int
+fgetvp_read(struct thread *td, int fd, struct vnode **vpp)
+{
+
+	return (_fgetvp(td, fd, vpp, FREAD));
+}
+
+#ifdef notyet
+int
+fgetvp_write(struct thread *td, int fd, struct vnode **vpp)
+{
+
+	return (_fgetvp(td, fd, vpp, FWRITE));
+}
+#endif
+
+/*
+ * Like fget() but loads the underlying socket, or returns an error if the
+ * descriptor does not represent a socket.
+ *
+ * We bump the ref count on the returned socket.  XXX Also obtain the SX lock
+ * in the future.
+ *
+ * Note: fgetsock() and fputsock() are deprecated, as consumers should rely
+ * on their file descriptor reference to prevent the socket from being free'd
+ * during use.
+ */
+int
+fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
+{
+	struct file *fp;
+	int error;
+
+	*spp = NULL;
+	if (fflagp != NULL)
+		*fflagp = 0;
+	if ((error = _fget(td, fd, &fp, 0)) != 0)
+		return (error);
+	if (fp->f_type != DTYPE_SOCKET) {
+		error = ENOTSOCK;
+	} else {
+		*spp = fp->f_data;
+		if (fflagp)
+			*fflagp = fp->f_flag;
+		SOCK_LOCK(*spp);
+		soref(*spp);
+		SOCK_UNLOCK(*spp);
+	}
+	fdrop(fp, td);
+
+	return (error);
+}
+
+/*
+ * Drop the reference count on the socket and XXX release the SX lock in the
+ * future.  The last reference closes the socket.
+ *
+ * Note: fputsock() is deprecated, see comment for fgetsock().
+ */
+void
+fputsock(struct socket *so)
+{
+
+	ACCEPT_LOCK();
+	SOCK_LOCK(so);
+	sorele(so);
+}
+#endif /* __rtems__ */
+
+/*
+ * Handle the last reference to a file being closed.
+ */
+int
+_fdrop(struct file *fp, struct thread *td)
+{
+#ifdef __rtems__
+  panic("fdrop: RTEMS unsupported");
+
+#else /* __rtems__ */
+	int error;
+
+	error = 0;
+	if (fp->f_count != 0)
+		panic("fdrop: count %d", fp->f_count);
+	if (fp->f_ops != &badfileops)
+		error = fo_close(fp, td);
+	/*
+	 * The f_cdevpriv cannot be assigned non-NULL value while we
+	 * are destroying the file.
+	 */
+	if (fp->f_cdevpriv != NULL)
+		devfs_fpdrop(fp);
+	atomic_subtract_int(&openfiles, 1);
+	crfree(fp->f_cred);
+	uma_zfree(file_zone, fp);
+
+	return (error);
+#endif /* __rtems__ */
+}
+
+#ifndef __rtems__
+/*
+ * Apply an advisory lock on a file descriptor.
+ *
+ * Just attempt to get a record lock of the requested type on the entire file
+ * (l_whence = SEEK_SET, l_start = 0, l_len = 0).
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct flock_args {
+	int	fd;
+	int	how;
+};
+#endif
+/* ARGSUSED */
+int
+flock(struct thread *td, struct flock_args *uap)
+{
+	struct file *fp;
+	struct vnode *vp;
+	struct flock lf;
+	int vfslocked;
+	int error;
+
+	if ((error = fget(td, uap->fd, &fp)) != 0)
+		return (error);
+	if (fp->f_type != DTYPE_VNODE) {
+		fdrop(fp, td);
+		return (EOPNOTSUPP);
+	}
+
+	vp = fp->f_vnode;
+	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+	lf.l_whence = SEEK_SET;
+	lf.l_start = 0;
+	lf.l_len = 0;
+	if (uap->how & LOCK_UN) {
+		lf.l_type = F_UNLCK;
+		atomic_clear_int(&fp->f_flag, FHASLOCK);
+		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
+		goto done2;
+	}
+	if (uap->how & LOCK_EX)
+		lf.l_type = F_WRLCK;
+	else if (uap->how & LOCK_SH)
+		lf.l_type = F_RDLCK;
+	else {
+		error = EBADF;
+		goto done2;
+	}
+	atomic_set_int(&fp->f_flag, FHASLOCK);
+	error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
+	    (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
+done2:
+	fdrop(fp, td);
+	VFS_UNLOCK_GIANT(vfslocked);
+	return (error);
+}
+/*
+ * Duplicate the specified descriptor to a free descriptor.
+ */
+int
+dupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd, int mode, int error)
+{
+	struct file *wfp;
+	struct file *fp;
+
+	/*
+	 * If the to-be-dup'd fd number is greater than the allowed number
+	 * of file descriptors, or the fd to be dup'd has already been
+	 * closed, then reject.
+	 */
+	FILEDESC_XLOCK(fdp);
+	if (dfd < 0 || dfd >= fdp->fd_nfiles ||
+	    (wfp = fdp->fd_ofiles[dfd]) == NULL) {
+		FILEDESC_XUNLOCK(fdp);
+		return (EBADF);
+	}
+
+	/*
+	 * There are two cases of interest here.
+	 *
+	 * For ENODEV simply dup (dfd) to file descriptor (indx) and return.
+	 *
+	 * For ENXIO steal away the file structure from (dfd) and store it in
+	 * (indx).  (dfd) is effectively closed by this operation.
+	 *
+	 * Any other error code is just returned.
+	 */
+	switch (error) {
+	case ENODEV:
+		/*
+		 * Check that the mode the file is being opened for is a
+		 * subset of the mode of the existing descriptor.
+		 */
+		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
+			FILEDESC_XUNLOCK(fdp);
+			return (EACCES);
+		}
+		fp = fdp->fd_ofiles[indx];
+		fdp->fd_ofiles[indx] = wfp;
+		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
+		if (fp == NULL)
+			fdused(fdp, indx);
+		fhold(wfp);
+		FILEDESC_XUNLOCK(fdp);
+		if (fp != NULL)
+			/*
+			 * We now own the reference to fp that the ofiles[]
+			 * array used to own.  Release it.
+			 */
+			fdrop(fp, td);
+		return (0);
+
+	case ENXIO:
+		/*
+		 * Steal away the file pointer from dfd and stuff it into indx.
+		 */
+		fp = fdp->fd_ofiles[indx];
+		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
+		fdp->fd_ofiles[dfd] = NULL;
+		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
+		fdp->fd_ofileflags[dfd] = 0;
+		fdunused(fdp, dfd);
+		if (fp == NULL)
+			fdused(fdp, indx);
+		FILEDESC_XUNLOCK(fdp);
+
+		/*
+		 * We now own the reference to fp that the ofiles[] array
+		 * used to own.  Release it.
+		 */
+		if (fp != NULL)
+			fdrop(fp, td);
+		return (0);
+
+	default:
+		FILEDESC_XUNLOCK(fdp);
+		return (error);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Scan all active processes and prisons to see if any of them have a current
+ * or root directory of `olddp'. If so, replace them with the new mount point.
+ */
+void
+mountcheckdirs(struct vnode *olddp, struct vnode *newdp)
+{
+	struct filedesc *fdp;
+	struct prison *pr;
+	struct proc *p;
+	int nrele;
+
+	if (vrefcnt(olddp) == 1)
+		return;
+	nrele = 0;
+	sx_slock(&allproc_lock);
+	FOREACH_PROC_IN_SYSTEM(p) {
+		fdp = fdhold(p);
+		if (fdp == NULL)
+			continue;
+		FILEDESC_XLOCK(fdp);
+		if (fdp->fd_cdir == olddp) {
+			vref(newdp);
+			fdp->fd_cdir = newdp;
+			nrele++;
+		}
+		if (fdp->fd_rdir == olddp) {
+			vref(newdp);
+			fdp->fd_rdir = newdp;
+			nrele++;
+		}
+		if (fdp->fd_jdir == olddp) {
+			vref(newdp);
+			fdp->fd_jdir = newdp;
+			nrele++;
+		}
+		FILEDESC_XUNLOCK(fdp);
+		fddrop(fdp);
+	}
+	sx_sunlock(&allproc_lock);
+	if (rootvnode == olddp) {
+		vref(newdp);
+		rootvnode = newdp;
+		nrele++;
+	}
+	mtx_lock(&prison0.pr_mtx);
+	if (prison0.pr_root == olddp) {
+		vref(newdp);
+		prison0.pr_root = newdp;
+		nrele++;
+	}
+	mtx_unlock(&prison0.pr_mtx);
+	sx_slock(&allprison_lock);
+	TAILQ_FOREACH(pr, &allprison, pr_list) {
+		mtx_lock(&pr->pr_mtx);
+		if (pr->pr_root == olddp) {
+			vref(newdp);
+			pr->pr_root = newdp;
+			nrele++;
+		}
+		mtx_unlock(&pr->pr_mtx);
+	}
+	sx_sunlock(&allprison_lock);
+	while (nrele--)
+		vrele(olddp);
+}
+
+struct filedesc_to_leader *
+filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader)
+{
+	struct filedesc_to_leader *fdtol;
+
+	fdtol = malloc(sizeof(struct filedesc_to_leader),
+	       M_FILEDESC_TO_LEADER,
+	       M_WAITOK);
+	fdtol->fdl_refcount = 1;
+	fdtol->fdl_holdcount = 0;
+	fdtol->fdl_wakeup = 0;
+	fdtol->fdl_leader = leader;
+	if (old != NULL) {
+		FILEDESC_XLOCK(fdp);
+		fdtol->fdl_next = old->fdl_next;
+		fdtol->fdl_prev = old;
+		old->fdl_next = fdtol;
+		fdtol->fdl_next->fdl_prev = fdtol;
+		FILEDESC_XUNLOCK(fdp);
+	} else {
+		fdtol->fdl_next = fdtol;
+		fdtol->fdl_prev = fdtol;
+	}
+	return (fdtol);
+}
+
+/*
+ * Get file structures globally.
+ */
+static int
+sysctl_kern_file(SYSCTL_HANDLER_ARGS)
+{
+	struct xfile xf;
+	struct filedesc *fdp;
+	struct file *fp;
+	struct proc *p;
+	int error, n;
+
+	error = sysctl_wire_old_buffer(req, 0);
+	if (error != 0)
+		return (error);
+	if (req->oldptr == NULL) {
+		n = 0;
+		sx_slock(&allproc_lock);
+		FOREACH_PROC_IN_SYSTEM(p) {
+			if (p->p_state == PRS_NEW)
+				continue;
+			fdp = fdhold(p);
+			if (fdp == NULL)
+				continue;
+			/* overestimates sparse tables. */
+			if (fdp->fd_lastfile > 0)
+				n += fdp->fd_lastfile;
+			fddrop(fdp);
+		}
+		sx_sunlock(&allproc_lock);
+		return (SYSCTL_OUT(req, 0, n * sizeof(xf)));
+	}
+	error = 0;
+	bzero(&xf, sizeof(xf));
+	xf.xf_size = sizeof(xf);
+	sx_slock(&allproc_lock);
+	FOREACH_PROC_IN_SYSTEM(p) {
+		if (p->p_state == PRS_NEW)
+			continue;
+		PROC_LOCK(p);
+		if (p_cansee(req->td, p) != 0) {
+			PROC_UNLOCK(p);
+			continue;
+		}
+		xf.xf_pid = p->p_pid;
+		xf.xf_uid = p->p_ucred->cr_uid;
+		PROC_UNLOCK(p);
+		fdp = fdhold(p);
+		if (fdp == NULL)
+			continue;
+		FILEDESC_SLOCK(fdp);
+		for (n = 0; fdp->fd_refcnt > 0 && n < fdp->fd_nfiles; ++n) {
+			if ((fp = fdp->fd_ofiles[n]) == NULL)
+				continue;
+			xf.xf_fd = n;
+			xf.xf_file = fp;
+			xf.xf_data = fp->f_data;
+			xf.xf_vnode = fp->f_vnode;
+			xf.xf_type = fp->f_type;
+			xf.xf_count = fp->f_count;
+			xf.xf_msgcount = 0;
+			xf.xf_offset = fp->f_offset;
+			xf.xf_flag = fp->f_flag;
+			error = SYSCTL_OUT(req, &xf, sizeof(xf));
+			if (error)
+				break;
+		}
+		FILEDESC_SUNLOCK(fdp);
+		fddrop(fdp);
+		if (error)
+			break;
+	}
+	sx_sunlock(&allproc_lock);
+	return (error);
+}
+
+SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
+    0, 0, sysctl_kern_file, "S,xfile", "Entire file table");
+
+#ifdef KINFO_OFILE_SIZE
+CTASSERT(sizeof(struct kinfo_ofile) == KINFO_OFILE_SIZE);
+#endif
+
+#ifdef COMPAT_FREEBSD7
+static int
+export_vnode_for_osysctl(struct vnode *vp, int type,
+    struct kinfo_ofile *kif, struct filedesc *fdp, struct sysctl_req *req)
+{
+	int error;
+	char *fullpath, *freepath;
+	int vfslocked;
+
+	bzero(kif, sizeof(*kif));
+	kif->kf_structsize = sizeof(*kif);
+
+	vref(vp);
+	kif->kf_fd = type;
+	kif->kf_type = KF_TYPE_VNODE;
+	/* This function only handles directories. */
+	if (vp->v_type != VDIR) {
+		vrele(vp);
+		return (ENOTDIR);
+	}
+	kif->kf_vnode_type = KF_VTYPE_VDIR;
+
+	/*
+	 * This is not a true file descriptor, so we set a bogus refcount
+	 * and offset to indicate these fields should be ignored.
+	 */
+	kif->kf_ref_count = -1;
+	kif->kf_offset = -1;
+
+	freepath = NULL;
+	fullpath = "-";
+	FILEDESC_SUNLOCK(fdp);
+	vn_fullpath(curthread, vp, &fullpath, &freepath);
+	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+	vrele(vp);
+	VFS_UNLOCK_GIANT(vfslocked);
+	strlcpy(kif->kf_path, fullpath, sizeof(kif->kf_path));
+	if (freepath != NULL)
+		free(freepath, M_TEMP);
+	error = SYSCTL_OUT(req, kif, sizeof(*kif));
+	FILEDESC_SLOCK(fdp);
+	return (error);
+}
+
+/*
+ * Get per-process file descriptors for use by procstat(1), et al.
+ */
+static int
+sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS)
+{
+	char *fullpath, *freepath;
+	struct kinfo_ofile *kif;
+	struct filedesc *fdp;
+	int error, i, *name;
+	struct socket *so;
+	struct vnode *vp;
+	struct file *fp;
+	struct proc *p;
+	struct tty *tp;
+	int vfslocked;
+
+	name = (int *)arg1;
+	if ((p = pfind((pid_t)name[0])) == NULL)
+		return (ESRCH);
+	if ((error = p_candebug(curthread, p))) {
+		PROC_UNLOCK(p);
+		return (error);
+	}
+	fdp = fdhold(p);
+	PROC_UNLOCK(p);
+	if (fdp == NULL)
+		return (ENOENT);
+	kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK);
+	FILEDESC_SLOCK(fdp);
+	if (fdp->fd_cdir != NULL)
+		export_vnode_for_osysctl(fdp->fd_cdir, KF_FD_TYPE_CWD, kif,
+				fdp, req);
+	if (fdp->fd_rdir != NULL)
+		export_vnode_for_osysctl(fdp->fd_rdir, KF_FD_TYPE_ROOT, kif,
+				fdp, req);
+	if (fdp->fd_jdir != NULL)
+		export_vnode_for_osysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif,
+				fdp, req);
+	for (i = 0; i < fdp->fd_nfiles; i++) {
+		if ((fp = fdp->fd_ofiles[i]) == NULL)
+			continue;
+		bzero(kif, sizeof(*kif));
+		kif->kf_structsize = sizeof(*kif);
+		vp = NULL;
+		so = NULL;
+		tp = NULL;
+		kif->kf_fd = i;
+		switch (fp->f_type) {
+		case DTYPE_VNODE:
+			kif->kf_type = KF_TYPE_VNODE;
+			vp = fp->f_vnode;
+			break;
+
+		case DTYPE_SOCKET:
+			kif->kf_type = KF_TYPE_SOCKET;
+			so = fp->f_data;
+			break;
+
+		case DTYPE_PIPE:
+			kif->kf_type = KF_TYPE_PIPE;
+			break;
+
+		case DTYPE_FIFO:
+			kif->kf_type = KF_TYPE_FIFO;
+			vp = fp->f_vnode;
+			break;
+
+		case DTYPE_KQUEUE:
+			kif->kf_type = KF_TYPE_KQUEUE;
+			break;
+
+		case DTYPE_CRYPTO:
+			kif->kf_type = KF_TYPE_CRYPTO;
+			break;
+
+		case DTYPE_MQUEUE:
+			kif->kf_type = KF_TYPE_MQUEUE;
+			break;
+
+		case DTYPE_SHM:
+			kif->kf_type = KF_TYPE_SHM;
+			break;
+
+		case DTYPE_SEM:
+			kif->kf_type = KF_TYPE_SEM;
+			break;
+
+		case DTYPE_PTS:
+			kif->kf_type = KF_TYPE_PTS;
+			tp = fp->f_data;
+			break;
+
+		default:
+			kif->kf_type = KF_TYPE_UNKNOWN;
+			break;
+		}
+		kif->kf_ref_count = fp->f_count;
+		if (fp->f_flag & FREAD)
+			kif->kf_flags |= KF_FLAG_READ;
+		if (fp->f_flag & FWRITE)
+			kif->kf_flags |= KF_FLAG_WRITE;
+		if (fp->f_flag & FAPPEND)
+			kif->kf_flags |= KF_FLAG_APPEND;
+		if (fp->f_flag & FASYNC)
+			kif->kf_flags |= KF_FLAG_ASYNC;
+		if (fp->f_flag & FFSYNC)
+			kif->kf_flags |= KF_FLAG_FSYNC;
+		if (fp->f_flag & FNONBLOCK)
+			kif->kf_flags |= KF_FLAG_NONBLOCK;
+		if (fp->f_flag & O_DIRECT)
+			kif->kf_flags |= KF_FLAG_DIRECT;
+		if (fp->f_flag & FHASLOCK)
+			kif->kf_flags |= KF_FLAG_HASLOCK;
+		kif->kf_offset = fp->f_offset;
+		if (vp != NULL) {
+			vref(vp);
+			switch (vp->v_type) {
+			case VNON:
+				kif->kf_vnode_type = KF_VTYPE_VNON;
+				break;
+			case VREG:
+				kif->kf_vnode_type = KF_VTYPE_VREG;
+				break;
+			case VDIR:
+				kif->kf_vnode_type = KF_VTYPE_VDIR;
+				break;
+			case VBLK:
+				kif->kf_vnode_type = KF_VTYPE_VBLK;
+				break;
+			case VCHR:
+				kif->kf_vnode_type = KF_VTYPE_VCHR;
+				break;
+			case VLNK:
+				kif->kf_vnode_type = KF_VTYPE_VLNK;
+				break;
+			case VSOCK:
+				kif->kf_vnode_type = KF_VTYPE_VSOCK;
+				break;
+			case VFIFO:
+				kif->kf_vnode_type = KF_VTYPE_VFIFO;
+				break;
+			case VBAD:
+				kif->kf_vnode_type = KF_VTYPE_VBAD;
+				break;
+			default:
+				kif->kf_vnode_type = KF_VTYPE_UNKNOWN;
+				break;
+			}
+			/*
+			 * It is OK to drop the filedesc lock here as we will
+			 * re-validate and re-evaluate its properties when
+			 * the loop continues.
+			 */
+			freepath = NULL;
+			fullpath = "-";
+			FILEDESC_SUNLOCK(fdp);
+			vn_fullpath(curthread, vp, &fullpath, &freepath);
+			vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+			vrele(vp);
+			VFS_UNLOCK_GIANT(vfslocked);
+			strlcpy(kif->kf_path, fullpath,
+			    sizeof(kif->kf_path));
+			if (freepath != NULL)
+				free(freepath, M_TEMP);
+			FILEDESC_SLOCK(fdp);
+		}
+		if (so != NULL) {
+			struct sockaddr *sa;
+
+			if (so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa)
+			    == 0 && sa->sa_len <= sizeof(kif->kf_sa_local)) {
+				bcopy(sa, &kif->kf_sa_local, sa->sa_len);
+				free(sa, M_SONAME);
+			}
+			if (so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa)
+			    == 0 && sa->sa_len <= sizeof(kif->kf_sa_peer)) {
+				bcopy(sa, &kif->kf_sa_peer, sa->sa_len);
+				free(sa, M_SONAME);
+			}
+			kif->kf_sock_domain =
+			    so->so_proto->pr_domain->dom_family;
+			kif->kf_sock_type = so->so_type;
+			kif->kf_sock_protocol = so->so_proto->pr_protocol;
+		}
+		if (tp != NULL) {
+			strlcpy(kif->kf_path, tty_devname(tp),
+			    sizeof(kif->kf_path));
+		}
+		error = SYSCTL_OUT(req, kif, sizeof(*kif));
+		if (error)
+			break;
+	}
+	FILEDESC_SUNLOCK(fdp);
+	fddrop(fdp);
+	free(kif, M_TEMP);
+	return (0);
+}
+
+static SYSCTL_NODE(_kern_proc, KERN_PROC_OFILEDESC, ofiledesc, CTLFLAG_RD,
+    sysctl_kern_proc_ofiledesc, "Process ofiledesc entries");
+#endif	/* COMPAT_FREEBSD7 */
+
+#ifdef KINFO_FILE_SIZE
+CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE);
+#endif
+
+static int
+export_vnode_for_sysctl(struct vnode *vp, int type,
+    struct kinfo_file *kif, struct filedesc *fdp, struct sysctl_req *req)
+{
+	int error;
+	char *fullpath, *freepath;
+	int vfslocked;
+
+	bzero(kif, sizeof(*kif));
+
+	vref(vp);
+	kif->kf_fd = type;
+	kif->kf_type = KF_TYPE_VNODE;
+	/* This function only handles directories. */
+	if (vp->v_type != VDIR) {
+		vrele(vp);
+		return (ENOTDIR);
+	}
+	kif->kf_vnode_type = KF_VTYPE_VDIR;
+
+	/*
+	 * This is not a true file descriptor, so we set a bogus refcount
+	 * and offset to indicate these fields should be ignored.
+	 */
+	kif->kf_ref_count = -1;
+	kif->kf_offset = -1;
+
+	freepath = NULL;
+	fullpath = "-";
+	FILEDESC_SUNLOCK(fdp);
+	vn_fullpath(curthread, vp, &fullpath, &freepath);
+	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+	vrele(vp);
+	VFS_UNLOCK_GIANT(vfslocked);
+	strlcpy(kif->kf_path, fullpath, sizeof(kif->kf_path));
+	if (freepath != NULL)
+		free(freepath, M_TEMP);
+	/* Pack record size down */
+	kif->kf_structsize = offsetof(struct kinfo_file, kf_path) +
+	    strlen(kif->kf_path) + 1;
+	kif->kf_structsize = roundup(kif->kf_structsize, sizeof(uint64_t));
+	error = SYSCTL_OUT(req, kif, kif->kf_structsize);
+	FILEDESC_SLOCK(fdp);
+	return (error);
+}
+
+/*
+ * Get per-process file descriptors for use by procstat(1), et al.
+ */
+static int
+sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS)
+{
+	char *fullpath, *freepath;
+	struct kinfo_file *kif;
+	struct filedesc *fdp;
+	int error, i, *name;
+	struct socket *so;
+	struct vnode *vp;
+	struct file *fp;
+	struct proc *p;
+	struct tty *tp;
+	int vfslocked;
+	size_t oldidx;
+
+	name = (int *)arg1;
+	if ((p = pfind((pid_t)name[0])) == NULL)
+		return (ESRCH);
+	if ((error = p_candebug(curthread, p))) {
+		PROC_UNLOCK(p);
+		return (error);
+	}
+	fdp = fdhold(p);
+	PROC_UNLOCK(p);
+	if (fdp == NULL)
+		return (ENOENT);
+	kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK);
+	FILEDESC_SLOCK(fdp);
+	if (fdp->fd_cdir != NULL)
+		export_vnode_for_sysctl(fdp->fd_cdir, KF_FD_TYPE_CWD, kif,
+				fdp, req);
+	if (fdp->fd_rdir != NULL)
+		export_vnode_for_sysctl(fdp->fd_rdir, KF_FD_TYPE_ROOT, kif,
+				fdp, req);
+	if (fdp->fd_jdir != NULL)
+		export_vnode_for_sysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif,
+				fdp, req);
+	for (i = 0; i < fdp->fd_nfiles; i++) {
+		if ((fp = fdp->fd_ofiles[i]) == NULL)
+			continue;
+		bzero(kif, sizeof(*kif));
+		vp = NULL;
+		so = NULL;
+		tp = NULL;
+		kif->kf_fd = i;
+		switch (fp->f_type) {
+		case DTYPE_VNODE:
+			kif->kf_type = KF_TYPE_VNODE;
+			vp = fp->f_vnode;
+			break;
+
+		case DTYPE_SOCKET:
+			kif->kf_type = KF_TYPE_SOCKET;
+			so = fp->f_data;
+			break;
+
+		case DTYPE_PIPE:
+			kif->kf_type = KF_TYPE_PIPE;
+			break;
+
+		case DTYPE_FIFO:
+			kif->kf_type = KF_TYPE_FIFO;
+			vp = fp->f_vnode;
+			break;
+
+		case DTYPE_KQUEUE:
+			kif->kf_type = KF_TYPE_KQUEUE;
+			break;
+
+		case DTYPE_CRYPTO:
+			kif->kf_type = KF_TYPE_CRYPTO;
+			break;
+
+		case DTYPE_MQUEUE:
+			kif->kf_type = KF_TYPE_MQUEUE;
+			break;
+
+		case DTYPE_SHM:
+			kif->kf_type = KF_TYPE_SHM;
+			break;
+
+		case DTYPE_SEM:
+			kif->kf_type = KF_TYPE_SEM;
+			break;
+
+		case DTYPE_PTS:
+			kif->kf_type = KF_TYPE_PTS;
+			tp = fp->f_data;
+			break;
+
+		default:
+			kif->kf_type = KF_TYPE_UNKNOWN;
+			break;
+		}
+		kif->kf_ref_count = fp->f_count;
+		if (fp->f_flag & FREAD)
+			kif->kf_flags |= KF_FLAG_READ;
+		if (fp->f_flag & FWRITE)
+			kif->kf_flags |= KF_FLAG_WRITE;
+		if (fp->f_flag & FAPPEND)
+			kif->kf_flags |= KF_FLAG_APPEND;
+		if (fp->f_flag & FASYNC)
+			kif->kf_flags |= KF_FLAG_ASYNC;
+		if (fp->f_flag & FFSYNC)
+			kif->kf_flags |= KF_FLAG_FSYNC;
+		if (fp->f_flag & FNONBLOCK)
+			kif->kf_flags |= KF_FLAG_NONBLOCK;
+		if (fp->f_flag & O_DIRECT)
+			kif->kf_flags |= KF_FLAG_DIRECT;
+		if (fp->f_flag & FHASLOCK)
+			kif->kf_flags |= KF_FLAG_HASLOCK;
+		kif->kf_offset = fp->f_offset;
+		if (vp != NULL) {
+			vref(vp);
+			switch (vp->v_type) {
+			case VNON:
+				kif->kf_vnode_type = KF_VTYPE_VNON;
+				break;
+			case VREG:
+				kif->kf_vnode_type = KF_VTYPE_VREG;
+				break;
+			case VDIR:
+				kif->kf_vnode_type = KF_VTYPE_VDIR;
+				break;
+			case VBLK:
+				kif->kf_vnode_type = KF_VTYPE_VBLK;
+				break;
+			case VCHR:
+				kif->kf_vnode_type = KF_VTYPE_VCHR;
+				break;
+			case VLNK:
+				kif->kf_vnode_type = KF_VTYPE_VLNK;
+				break;
+			case VSOCK:
+				kif->kf_vnode_type = KF_VTYPE_VSOCK;
+				break;
+			case VFIFO:
+				kif->kf_vnode_type = KF_VTYPE_VFIFO;
+				break;
+			case VBAD:
+				kif->kf_vnode_type = KF_VTYPE_VBAD;
+				break;
+			default:
+				kif->kf_vnode_type = KF_VTYPE_UNKNOWN;
+				break;
+			}
+			/*
+			 * It is OK to drop the filedesc lock here as we will
+			 * re-validate and re-evaluate its properties when
+			 * the loop continues.
+			 */
+			freepath = NULL;
+			fullpath = "-";
+			FILEDESC_SUNLOCK(fdp);
+			vn_fullpath(curthread, vp, &fullpath, &freepath);
+			vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+			vrele(vp);
+			VFS_UNLOCK_GIANT(vfslocked);
+			strlcpy(kif->kf_path, fullpath,
+			    sizeof(kif->kf_path));
+			if (freepath != NULL)
+				free(freepath, M_TEMP);
+			FILEDESC_SLOCK(fdp);
+		}
+		if (so != NULL) {
+			struct sockaddr *sa;
+
+			if (so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa)
+			    == 0 && sa->sa_len <= sizeof(kif->kf_sa_local)) {
+				bcopy(sa, &kif->kf_sa_local, sa->sa_len);
+				free(sa, M_SONAME);
+			}
+			if (so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa)
+			    == 0 && sa->sa_len <= sizeof(kif->kf_sa_peer)) {
+				bcopy(sa, &kif->kf_sa_peer, sa->sa_len);
+				free(sa, M_SONAME);
+			}
+			kif->kf_sock_domain =
+			    so->so_proto->pr_domain->dom_family;
+			kif->kf_sock_type = so->so_type;
+			kif->kf_sock_protocol = so->so_proto->pr_protocol;
+		}
+		if (tp != NULL) {
+			strlcpy(kif->kf_path, tty_devname(tp),
+			    sizeof(kif->kf_path));
+		}
+		/* Pack record size down */
+		kif->kf_structsize = offsetof(struct kinfo_file, kf_path) +
+		    strlen(kif->kf_path) + 1;
+		kif->kf_structsize = roundup(kif->kf_structsize,
+		    sizeof(uint64_t));
+		oldidx = req->oldidx;
+		error = SYSCTL_OUT(req, kif, kif->kf_structsize);
+		if (error) {
+			if (error == ENOMEM) {
+				/*
+				 * The hack to keep the ABI of sysctl
+				 * kern.proc.filedesc intact, but not
+				 * to account a partially copied
+				 * kinfo_file into the oldidx.
+				 */
+				req->oldidx = oldidx;
+				error = 0;
+			}
+			break;
+		}
+	}
+	FILEDESC_SUNLOCK(fdp);
+	fddrop(fdp);
+	free(kif, M_TEMP);
+	return (error);
+}
+
+static SYSCTL_NODE(_kern_proc, KERN_PROC_FILEDESC, filedesc, CTLFLAG_RD,
+    sysctl_kern_proc_filedesc, "Process filedesc entries");
+
+#ifdef DDB
+/*
+ * For the purposes of debugging, generate a human-readable string for the
+ * file type.
+ */
+static const char *
+file_type_to_name(short type)
+{
+
+	switch (type) {
+	case 0:
+		return ("zero");
+	case DTYPE_VNODE:
+		return ("vnod");
+	case DTYPE_SOCKET:
+		return ("sock");
+	case DTYPE_PIPE:
+		return ("pipe");
+	case DTYPE_FIFO:
+		return ("fifo");
+	case DTYPE_KQUEUE:
+		return ("kque");
+	case DTYPE_CRYPTO:
+		return ("crpt");
+	case DTYPE_MQUEUE:
+		return ("mque");
+	case DTYPE_SHM:
+		return ("shm");
+	case DTYPE_SEM:
+		return ("ksem");
+	default:
+		return ("unkn");
+	}
+}
+
+/*
+ * For the purposes of debugging, identify a process (if any, perhaps one of
+ * many) that references the passed file in its file descriptor array. Return
+ * NULL if none.
+ */
+static struct proc *
+file_to_first_proc(struct file *fp)
+{
+	struct filedesc *fdp;
+	struct proc *p;
+	int n;
+
+	FOREACH_PROC_IN_SYSTEM(p) {
+		if (p->p_state == PRS_NEW)
+			continue;
+		fdp = p->p_fd;
+		if (fdp == NULL)
+			continue;
+		for (n = 0; n < fdp->fd_nfiles; n++) {
+			if (fp == fdp->fd_ofiles[n])
+				return (p);
+		}
+	}
+	return (NULL);
+}
+
+static void
+db_print_file(struct file *fp, int header)
+{
+	struct proc *p;
+
+	if (header)
+		db_printf("%8s %4s %8s %8s %4s %5s %6s %8s %5s %12s\n",
+		    "File", "Type", "Data", "Flag", "GCFl", "Count",
+		    "MCount", "Vnode", "FPID", "FCmd");
+	p = file_to_first_proc(fp);
+	db_printf("%8p %4s %8p %08x %04x %5d %6d %8p %5d %12s\n", fp,
+	    file_type_to_name(fp->f_type), fp->f_data, fp->f_flag,
+	    0, fp->f_count, 0, fp->f_vnode,
+	    p != NULL ? p->p_pid : -1, p != NULL ? p->p_comm : "-");
+}
+
+DB_SHOW_COMMAND(file, db_show_file)
+{
+	struct file *fp;
+
+	if (!have_addr) {
+		db_printf("usage: show file <addr>\n");
+		return;
+	}
+	fp = (struct file *)addr;
+	db_print_file(fp, 1);
+}
+
+DB_SHOW_COMMAND(files, db_show_files)
+{
+	struct filedesc *fdp;
+	struct file *fp;
+	struct proc *p;
+	int header;
+	int n;
+
+	header = 1;
+	FOREACH_PROC_IN_SYSTEM(p) {
+		if (p->p_state == PRS_NEW)
+			continue;
+		if ((fdp = p->p_fd) == NULL)
+			continue;
+		for (n = 0; n < fdp->fd_nfiles; ++n) {
+			if ((fp = fdp->fd_ofiles[n]) == NULL)
+				continue;
+			db_print_file(fp, header);
+			header = 0;
+		}
+	}
+}
+#endif
+
+SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
+    &maxfilesperproc, 0, "Maximum files allowed open per process");
+
+SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
+    &maxfiles, 0, "Maximum number of files");
+
+SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
+    __DEVOLATILE(int *, &openfiles), 0, "System-wide number of open files");
+
+/* ARGSUSED*/
+static void
+filelistinit(void *dummy)
+{
+
+	file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
+	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+	mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
+	mtx_init(&fdesc_mtx, "fdesc", NULL, MTX_DEF);
+}
+SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL);
+#endif /* __rtems__ */
+
+/*-------------------------------------------------------------------*/
+
+static int
+badfo_readwrite(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td)
+{
+
+	return (EBADF);
+}
+
+static int
+badfo_truncate(struct file *fp, off_t length, struct ucred *active_cred, struct thread *td)
+{
+
+	return (EINVAL);
+}
+
+static int
+badfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, struct thread *td)
+{
+
+	return (EBADF);
+}
+
+static int
+badfo_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td)
+{
+
+	return (0);
+}
+
+static int
+badfo_kqfilter(struct file *fp, struct knote *kn)
+{
+
+	return (EBADF);
+}
+
+static int
+badfo_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, struct thread *td)
+{
+
+	return (EBADF);
+}
+
+static int
+badfo_close(struct file *fp, struct thread *td)
+{
+
+	return (EBADF);
+}
+
+struct fileops badfileops = {
+	.fo_read = badfo_readwrite,
+	.fo_write = badfo_readwrite,
+	.fo_truncate = badfo_truncate,
+	.fo_ioctl = badfo_ioctl,
+	.fo_poll = badfo_poll,
+	.fo_kqfilter = badfo_kqfilter,
+	.fo_stat = badfo_stat,
+	.fo_close = badfo_close,
+};
+
+#ifndef __rtems__
+/*-------------------------------------------------------------------*/
+
+/*
+ * File Descriptor pseudo-device driver (/dev/fd/).
+ *
+ * Opening minor device N dup()s the file (if any) connected to file
+ * descriptor N belonging to the calling process.  Note that this driver
+ * consists of only the ``open()'' routine, because all subsequent
+ * references to this file will be direct to the other driver.
+ *
+ * XXX: we could give this one a cloning event handler if necessary.
+ */
+
+/* ARGSUSED */
+static int
+fdopen(struct cdev *dev, int mode, int type, struct thread *td)
+{
+
+	/*
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_compat.h>
+#include <freebsd/local/opt_ddb.h>
+#include <freebsd/local/opt_ktrace.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+
+#include <freebsd/sys/conf.h>
+#include <freebsd/sys/domain.h>
+#include <freebsd/sys/fcntl.h>
+#include <freebsd/sys/file.h>
+#include <freebsd/sys/filedesc.h>
+#include <freebsd/sys/filio.h>
+#include <freebsd/sys/jail.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/limits.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mount.h>
+#include <freebsd/sys/mqueue.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/sys/namei.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/resourcevar.h>
+#include <freebsd/sys/signalvar.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/stat.h>
+#include <freebsd/sys/sx.h>
+#include <freebsd/sys/syscallsubr.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/sysproto.h>
+#include <freebsd/sys/tty.h>
+#include <freebsd/sys/unistd.h>
+#include <freebsd/sys/user.h>
+#include <freebsd/sys/vnode.h>
+#ifdef KTRACE
+#include <freebsd/sys/ktrace.h>
+#endif
+
+#include <freebsd/security/audit/audit.h>
+
+#include <freebsd/vm/uma.h>
+
+#include <freebsd/ddb/ddb.h>
+
+static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table");
+static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader",
+		     "file desc to leader structures");
+static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
+
+static uma_zone_t file_zone;
+
+
+/* Flags for do_dup() */
+#define DUP_FIXED	0x1	/* Force fixed allocation */
+#define DUP_FCNTL	0x2	/* fcntl()-style errors */
+
+static int do_dup(struct thread *td, int flags, int old, int new,
+    register_t *retval);
+static int	fd_first_free(struct filedesc *, int, int);
+static int	fd_last_used(struct filedesc *, int, int);
+static void	fdgrowtable(struct filedesc *, int);
+static void	fdunused(struct filedesc *fdp, int fd);
+static void	fdused(struct filedesc *fdp, int fd);
+
+/*
+ * A process is initially started out with NDFILE descriptors stored within
+ * this structure, selected to be enough for typical applications based on
+ * the historical limit of 20 open files (and the usage of descriptors by
+ * shells).  If these descriptors are exhausted, a larger descriptor table
+ * may be allocated, up to a process' resource limit; the internal arrays
+ * are then unused.
+ */
+#define NDFILE		20
+#define NDSLOTSIZE	sizeof(NDSLOTTYPE)
+#define	NDENTRIES	(NDSLOTSIZE * __CHAR_BIT)
+#define NDSLOT(x)	((x) / NDENTRIES)
+#define NDBIT(x)	((NDSLOTTYPE)1 << ((x) % NDENTRIES))
+#define	NDSLOTS(x)	(((x) + NDENTRIES - 1) / NDENTRIES)
+
+/*
+ * Storage required per open file descriptor.
+ */
+#define OFILESIZE (sizeof(struct file *) + sizeof(char))
+
+/*
+ * Storage to hold unused ofiles that need to be reclaimed.
+ */
+struct freetable {
+	struct file	**ft_table;
+	SLIST_ENTRY(freetable) ft_next;
+};
+
+/*
+ * Basic allocation of descriptors:
+ * one of the above, plus arrays for NDFILE descriptors.
+ */
+struct filedesc0 {
+	struct	filedesc fd_fd;
+	/*
+	 * ofiles which need to be reclaimed on free.
+	 */
+	SLIST_HEAD(,freetable) fd_free;
+	/*
+	 * These arrays are used when the number of open files is
+	 * <= NDFILE, and are then pointed to by the pointers above.
+	 */
+	struct	file *fd_dfiles[NDFILE];
+	char	fd_dfileflags[NDFILE];
+	NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)];
+};
+
+/*
+ * Descriptor management.
+ */
+volatile int openfiles;			/* actual number of open files */
+struct mtx sigio_lock;		/* mtx to protect pointers to sigio */
+#ifndef __rtems__
+void	(*mq_fdclose)(struct thread *td, int fd, struct file *fp);
+
+/* A mutex to protect the association between a proc and filedesc. */
+static struct mtx	fdesc_mtx;
+
+/*
+ * Find the first zero bit in the given bitmap, starting at low and not
+ * exceeding size - 1.
+ */
+static int
+fd_first_free(struct filedesc *fdp, int low, int size)
+{
+	NDSLOTTYPE *map = fdp->fd_map;
+	NDSLOTTYPE mask;
+	int off, maxoff;
+
+	if (low >= size)
+		return (low);
+
+	off = NDSLOT(low);
+	if (low % NDENTRIES) {
+		mask = ~(~(NDSLOTTYPE)0 >> (NDENTRIES - (low % NDENTRIES)));
+		if ((mask &= ~map[off]) != 0UL)
+			return (off * NDENTRIES + ffsl(mask) - 1);
+		++off;
+	}
+	for (maxoff = NDSLOTS(size); off < maxoff; ++off)
+		if (map[off] != ~0UL)
+			return (off * NDENTRIES + ffsl(~map[off]) - 1);
+	return (size);
+}
+
+/*
+ * Find the highest non-zero bit in the given bitmap, starting at low and
+ * not exceeding size - 1.
+ */
+static int
+fd_last_used(struct filedesc *fdp, int low, int size)
+{
+	NDSLOTTYPE *map = fdp->fd_map;
+	NDSLOTTYPE mask;
+	int off, minoff;
+
+	if (low >= size)
+		return (-1);
+
+	off = NDSLOT(size);
+	if (size % NDENTRIES) {
+		mask = ~(~(NDSLOTTYPE)0 << (size % NDENTRIES));
+		if ((mask &= map[off]) != 0)
+			return (off * NDENTRIES + flsl(mask) - 1);
+		--off;
+	}
+	for (minoff = NDSLOT(low); off >= minoff; --off)
+		if (map[off] != 0)
+			return (off * NDENTRIES + flsl(map[off]) - 1);
+	return (low - 1);
+}
+
+static int
+fdisused(struct filedesc *fdp, int fd)
+{
+        KASSERT(fd >= 0 && fd < fdp->fd_nfiles,
+            ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles));
+	return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0);
+}
+
+/*
+ * Mark a file descriptor as used.
+ */
+static void
+fdused(struct filedesc *fdp, int fd)
+{
+
+	FILEDESC_XLOCK_ASSERT(fdp);
+	KASSERT(!fdisused(fdp, fd),
+	    ("fd already used"));
+
+	fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd);
+	if (fd > fdp->fd_lastfile)
+		fdp->fd_lastfile = fd;
+	if (fd == fdp->fd_freefile)
+		fdp->fd_freefile = fd_first_free(fdp, fd, fdp->fd_nfiles);
+}
+
+/*
+ * Mark a file descriptor as unused.
+ */
+static void
+fdunused(struct filedesc *fdp, int fd)
+{
+
+	FILEDESC_XLOCK_ASSERT(fdp);
+	KASSERT(fdisused(fdp, fd),
+	    ("fd is already unused"));
+	KASSERT(fdp->fd_ofiles[fd] == NULL,
+	    ("fd is still in use"));
+
+	fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd);
+	if (fd < fdp->fd_freefile)
+		fdp->fd_freefile = fd;
+	if (fd == fdp->fd_lastfile)
+		fdp->fd_lastfile = fd_last_used(fdp, 0, fd);
+}
+
+/*
+ * System calls on descriptors.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct getdtablesize_args {
+	int	dummy;
+};
+#endif
+/* ARGSUSED */
+int
+getdtablesize(struct thread *td, struct getdtablesize_args *uap)
+{
+	struct proc *p = td->td_proc;
+
+	PROC_LOCK(p);
+	td->td_retval[0] =
+	    min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
+	PROC_UNLOCK(p);
+	return (0);
+}
+
+/*
+ * Duplicate a file descriptor to a particular value.
+ *
+ * Note: keep in mind that a potential race condition exists when closing
+ * descriptors from a shared descriptor table (via rfork).
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct dup2_args {
+	u_int	from;
+	u_int	to;
+};
+#endif
+/* ARGSUSED */
+int
+dup2(struct thread *td, struct dup2_args *uap)
+{
+
+	return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to,
+		    td->td_retval));
+}
+
+/*
+ * Duplicate a file descriptor.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct dup_args {
+	u_int	fd;
+};
+#endif
+/* ARGSUSED */
+int
+dup(struct thread *td, struct dup_args *uap)
+{
+
+	return (do_dup(td, 0, (int)uap->fd, 0, td->td_retval));
+}
+
+/*
+ * The file control system call.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct fcntl_args {
+	int	fd;
+	int	cmd;
+	long	arg;
+};
+#endif
+/* ARGSUSED */
+int
+fcntl(struct thread *td, struct fcntl_args *uap)
+{
+	struct flock fl;
+	struct oflock ofl;
+	intptr_t arg;
+	int error;
+	int cmd;
+
+	error = 0;
+	cmd = uap->cmd;
+	switch (uap->cmd) {
+	case F_OGETLK:
+	case F_OSETLK:
+	case F_OSETLKW:
+		/*
+		 * Convert old flock structure to new.
+		 */
+		error = copyin((void *)(intptr_t)uap->arg, &ofl, sizeof(ofl));
+		fl.l_start = ofl.l_start;
+		fl.l_len = ofl.l_len;
+		fl.l_pid = ofl.l_pid;
+		fl.l_type = ofl.l_type;
+		fl.l_whence = ofl.l_whence;
+		fl.l_sysid = 0;
+
+		switch (uap->cmd) {
+		case F_OGETLK:
+		    cmd = F_GETLK;
+		    break;
+		case F_OSETLK:
+		    cmd = F_SETLK;
+		    break;
+		case F_OSETLKW:
+		    cmd = F_SETLKW;
+		    break;
+		}
+		arg = (intptr_t)&fl;
+		break;
+        case F_GETLK:
+        case F_SETLK:
+        case F_SETLKW:
+	case F_SETLK_REMOTE:
+                error = copyin((void *)(intptr_t)uap->arg, &fl, sizeof(fl));
+                arg = (intptr_t)&fl;
+                break;
+	default:
+		arg = uap->arg;
+		break;
+	}
+	if (error)
+		return (error);
+	error = kern_fcntl(td, uap->fd, cmd, arg);
+	if (error)
+		return (error);
+	if (uap->cmd == F_OGETLK) {
+		ofl.l_start = fl.l_start;
+		ofl.l_len = fl.l_len;
+		ofl.l_pid = fl.l_pid;
+		ofl.l_type = fl.l_type;
+		ofl.l_whence = fl.l_whence;
+		error = copyout(&ofl, (void *)(intptr_t)uap->arg, sizeof(ofl));
+	} else if (uap->cmd == F_GETLK) {
+		error = copyout(&fl, (void *)(intptr_t)uap->arg, sizeof(fl));
+	}
+	return (error);
+}
+
+static inline struct file *
+fdtofp(int fd, struct filedesc *fdp)
+{
+	struct file *fp;
+
+	FILEDESC_LOCK_ASSERT(fdp);
+	if ((unsigned)fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[fd]) == NULL)
+		return (NULL);
+	return (fp);
+}
+
+int
+kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
+{
+	struct filedesc *fdp;
+	struct flock *flp;
+	struct file *fp;
+	struct proc *p;
+	char *pop;
+	struct vnode *vp;
+	int error, flg, tmp;
+	int vfslocked;
+	u_int old, new;
+	uint64_t bsize;
+
+	vfslocked = 0;
+	error = 0;
+	flg = F_POSIX;
+	p = td->td_proc;
+	fdp = p->p_fd;
+
+	switch (cmd) {
+	case F_DUPFD:
+		tmp = arg;
+		error = do_dup(td, DUP_FCNTL, fd, tmp, td->td_retval);
+		break;
+
+	case F_DUP2FD:
+		tmp = arg;
+		error = do_dup(td, DUP_FIXED, fd, tmp, td->td_retval);
+		break;
+
+	case F_GETFD:
+		FILEDESC_SLOCK(fdp);
+		if ((fp = fdtofp(fd, fdp)) == NULL) {
+			FILEDESC_SUNLOCK(fdp);
+			error = EBADF;
+			break;
+		}
+		pop = &fdp->fd_ofileflags[fd];
+		td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0;
+		FILEDESC_SUNLOCK(fdp);
+		break;
+
+	case F_SETFD:
+		FILEDESC_XLOCK(fdp);
+		if ((fp = fdtofp(fd, fdp)) == NULL) {
+			FILEDESC_XUNLOCK(fdp);
+			error = EBADF;
+			break;
+		}
+		pop = &fdp->fd_ofileflags[fd];
+		*pop = (*pop &~ UF_EXCLOSE) |
+		    (arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
+		FILEDESC_XUNLOCK(fdp);
+		break;
+
+	case F_GETFL:
+		FILEDESC_SLOCK(fdp);
+		if ((fp = fdtofp(fd, fdp)) == NULL) {
+			FILEDESC_SUNLOCK(fdp);
+			error = EBADF;
+			break;
+		}
+		td->td_retval[0] = OFLAGS(fp->f_flag);
+		FILEDESC_SUNLOCK(fdp);
+		break;
+
+	case F_SETFL:
+		FILEDESC_SLOCK(fdp);
+		if ((fp = fdtofp(fd, fdp)) == NULL) {
+			FILEDESC_SUNLOCK(fdp);
+			error = EBADF;
+			break;
+		}
+		fhold(fp);
+		FILEDESC_SUNLOCK(fdp);
+		do {
+			tmp = flg = fp->f_flag;
+			tmp &= ~FCNTLFLAGS;
+			tmp |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS;
+		} while(atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0);
+		tmp = fp->f_flag & FNONBLOCK;
+		error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
+		if (error) {
+			fdrop(fp, td);
+			break;
+		}
+		tmp = fp->f_flag & FASYNC;
+		error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td);
+		if (error == 0) {
+			fdrop(fp, td);
+			break;
+		}
+		atomic_clear_int(&fp->f_flag, FNONBLOCK);
+		tmp = 0;
+		(void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
+		fdrop(fp, td);
+		break;
+
+	case F_GETOWN:
+		FILEDESC_SLOCK(fdp);
+		if ((fp = fdtofp(fd, fdp)) == NULL) {
+			FILEDESC_SUNLOCK(fdp);
+			error = EBADF;
+			break;
+		}
+		fhold(fp);
+		FILEDESC_SUNLOCK(fdp);
+		error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td);
+		if (error == 0)
+			td->td_retval[0] = tmp;
+		fdrop(fp, td);
+		break;
+
+	case F_SETOWN:
+		FILEDESC_SLOCK(fdp);
+		if ((fp = fdtofp(fd, fdp)) == NULL) {
+			FILEDESC_SUNLOCK(fdp);
+			error = EBADF;
+			break;
+		}
+		fhold(fp);
+		FILEDESC_SUNLOCK(fdp);
+		tmp = arg;
+		error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td);
+		fdrop(fp, td);
+		break;
+
+	case F_SETLK_REMOTE:
+		error = priv_check(td, PRIV_NFS_LOCKD);
+		if (error)
+			return (error);
+		flg = F_REMOTE;
+		goto do_setlk;
+
+	case F_SETLKW:
+		flg |= F_WAIT;
+		/* FALLTHROUGH F_SETLK */
+
+	case F_SETLK:
+	do_setlk:
+		FILEDESC_SLOCK(fdp);
+		if ((fp = fdtofp(fd, fdp)) == NULL) {
+			FILEDESC_SUNLOCK(fdp);
+			error = EBADF;
+			break;
+		}
+		if (fp->f_type != DTYPE_VNODE) {
+			FILEDESC_SUNLOCK(fdp);
+			error = EBADF;
+			break;
+		}
+		flp = (struct flock *)arg;
+		if (flp->l_whence == SEEK_CUR) {
+			if (fp->f_offset < 0 ||
+			    (flp->l_start > 0 &&
+			     fp->f_offset > OFF_MAX - flp->l_start)) {
+				FILEDESC_SUNLOCK(fdp);
+				error = EOVERFLOW;
+				break;
+			}
+			flp->l_start += fp->f_offset;
+		}
+
+		/*
+		 * VOP_ADVLOCK() may block.
+		 */
+		fhold(fp);
+		FILEDESC_SUNLOCK(fdp);
+		vp = fp->f_vnode;
+		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+		switch (flp->l_type) {
+		case F_RDLCK:
+			if ((fp->f_flag & FREAD) == 0) {
+				error = EBADF;
+				break;
+			}
+			PROC_LOCK(p->p_leader);
+			p->p_leader->p_flag |= P_ADVLOCK;
+			PROC_UNLOCK(p->p_leader);
+			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
+			    flp, flg);
+			break;
+		case F_WRLCK:
+			if ((fp->f_flag & FWRITE) == 0) {
+				error = EBADF;
+				break;
+			}
+			PROC_LOCK(p->p_leader);
+			p->p_leader->p_flag |= P_ADVLOCK;
+			PROC_UNLOCK(p->p_leader);
+			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
+			    flp, flg);
+			break;
+		case F_UNLCK:
+			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
+			    flp, flg);
+			break;
+		case F_UNLCKSYS:
+			/*
+			 * Temporary api for testing remote lock
+			 * infrastructure.
+			 */
+			if (flg != F_REMOTE) {
+				error = EINVAL;
+				break;
+			}
+			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
+			    F_UNLCKSYS, flp, flg);
+			break;
+		default:
+			error = EINVAL;
+			break;
+		}
+		VFS_UNLOCK_GIANT(vfslocked);
+		vfslocked = 0;
+		/* Check for race with close */
+		FILEDESC_SLOCK(fdp);
+		if ((unsigned) fd >= fdp->fd_nfiles ||
+		    fp != fdp->fd_ofiles[fd]) {
+			FILEDESC_SUNLOCK(fdp);
+			flp->l_whence = SEEK_SET;
+			flp->l_start = 0;
+			flp->l_len = 0;
+			flp->l_type = F_UNLCK;
+			vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+			(void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
+					   F_UNLCK, flp, F_POSIX);
+			VFS_UNLOCK_GIANT(vfslocked);
+			vfslocked = 0;
+		} else
+			FILEDESC_SUNLOCK(fdp);
+		fdrop(fp, td);
+		break;
+
+	case F_GETLK:
+		FILEDESC_SLOCK(fdp);
+		if ((fp = fdtofp(fd, fdp)) == NULL) {
+			FILEDESC_SUNLOCK(fdp);
+			error = EBADF;
+			break;
+		}
+		if (fp->f_type != DTYPE_VNODE) {
+			FILEDESC_SUNLOCK(fdp);
+			error = EBADF;
+			break;
+		}
+		flp = (struct flock *)arg;
+		if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK &&
+		    flp->l_type != F_UNLCK) {
+			FILEDESC_SUNLOCK(fdp);
+			error = EINVAL;
+			break;
+		}
+		if (flp->l_whence == SEEK_CUR) {
+			if ((flp->l_start > 0 &&
+			    fp->f_offset > OFF_MAX - flp->l_start) ||
+			    (flp->l_start < 0 &&
+			     fp->f_offset < OFF_MIN - flp->l_start)) {
+				FILEDESC_SUNLOCK(fdp);
+				error = EOVERFLOW;
+				break;
+			}
+			flp->l_start += fp->f_offset;
+		}
+		/*
+		 * VOP_ADVLOCK() may block.
+		 */
+		fhold(fp);
+		FILEDESC_SUNLOCK(fdp);
+		vp = fp->f_vnode;
+		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+		error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp,
+		    F_POSIX);
+		VFS_UNLOCK_GIANT(vfslocked);
+		vfslocked = 0;
+		fdrop(fp, td);
+		break;
+
+	case F_RDAHEAD:
+		arg = arg ? 128 * 1024: 0;
+		/* FALLTHROUGH */
+	case F_READAHEAD:
+		FILEDESC_SLOCK(fdp);
+		if ((fp = fdtofp(fd, fdp)) == NULL) {
+			FILEDESC_SUNLOCK(fdp);
+			error = EBADF;
+			break;
+		}
+		if (fp->f_type != DTYPE_VNODE) {
+			FILEDESC_SUNLOCK(fdp);
+			error = EBADF;
+			break;
+		}
+		fhold(fp);
+		FILEDESC_SUNLOCK(fdp);
+		if (arg != 0) {
+			vp = fp->f_vnode;
+			vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+			error = vn_lock(vp, LK_SHARED);
+			if (error != 0)
+				goto readahead_vnlock_fail;
+			bsize = fp->f_vnode->v_mount->mnt_stat.f_iosize;
+			VOP_UNLOCK(vp, 0);
+			fp->f_seqcount = (arg + bsize - 1) / bsize;
+			do {
+				new = old = fp->f_flag;
+				new |= FRDAHEAD;
+			} while (!atomic_cmpset_rel_int(&fp->f_flag, old, new));
+readahead_vnlock_fail:
+			VFS_UNLOCK_GIANT(vfslocked);
+			vfslocked = 0;
+		} else {
+			do {
+				new = old = fp->f_flag;
+				new &= ~FRDAHEAD;
+			} while (!atomic_cmpset_rel_int(&fp->f_flag, old, new));
+		}
+		fdrop(fp, td);
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+	VFS_UNLOCK_GIANT(vfslocked);
+	return (error);
+}
+
+/*
+ * Common code for dup, dup2, fcntl(F_DUPFD) and fcntl(F_DUP2FD).
+ */
+static int
+do_dup(struct thread *td, int flags, int old, int new,
+    register_t *retval)
+{
+	struct filedesc *fdp;
+	struct proc *p;
+	struct file *fp;
+	struct file *delfp;
+	int error, holdleaders, maxfd;
+
+	p = td->td_proc;
+	fdp = p->p_fd;
+
+	/*
+	 * Verify we have a valid descriptor to dup from and possibly to
+	 * dup to. Unlike dup() and dup2(), fcntl()'s F_DUPFD should
+	 * return EINVAL when the new descriptor is out of bounds.
+	 */
+	if (old < 0)
+		return (EBADF);
+	if (new < 0)
+		return (flags & DUP_FCNTL ? EINVAL : EBADF);
+	PROC_LOCK(p);
+	maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
+	PROC_UNLOCK(p);
+	if (new >= maxfd)
+		return (flags & DUP_FCNTL ? EINVAL : EMFILE);
+
+	FILEDESC_XLOCK(fdp);
+	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
+		FILEDESC_XUNLOCK(fdp);
+		return (EBADF);
+	}
+	if (flags & DUP_FIXED && old == new) {
+		*retval = new;
+		FILEDESC_XUNLOCK(fdp);
+		return (0);
+	}
+	fp = fdp->fd_ofiles[old];
+	fhold(fp);
+
+	/*
+	 * If the caller specified a file descriptor, make sure the file
+	 * table is large enough to hold it, and grab it.  Otherwise, just
+	 * allocate a new descriptor the usual way.  Since the filedesc
+	 * lock may be temporarily dropped in the process, we have to look
+	 * out for a race.
+	 */
+	if (flags & DUP_FIXED) {
+		if (new >= fdp->fd_nfiles)
+			fdgrowtable(fdp, new + 1);
+		if (fdp->fd_ofiles[new] == NULL)
+			fdused(fdp, new);
+	} else {
+		if ((error = fdalloc(td, new, &new)) != 0) {
+			FILEDESC_XUNLOCK(fdp);
+			fdrop(fp, td);
+			return (error);
+		}
+	}
+
+	/*
+	 * If the old file changed out from under us then treat it as a
+	 * bad file descriptor.  Userland should do its own locking to
+	 * avoid this case.
+	 */
+	if (fdp->fd_ofiles[old] != fp) {
+		/* we've allocated a descriptor which we won't use */
+		if (fdp->fd_ofiles[new] == NULL)
+			fdunused(fdp, new);
+		FILEDESC_XUNLOCK(fdp);
+		fdrop(fp, td);
+		return (EBADF);
+	}
+	KASSERT(old != new,
+	    ("new fd is same as old"));
+
+	/*
+	 * Save info on the descriptor being overwritten.  We cannot close
+	 * it without introducing an ownership race for the slot, since we
+	 * need to drop the filedesc lock to call closef().
+	 *
+	 * XXX this duplicates parts of close().
+	 */
+	delfp = fdp->fd_ofiles[new];
+	holdleaders = 0;
+	if (delfp != NULL) {
+		if (td->td_proc->p_fdtol != NULL) {
+			/*
+			 * Ask fdfree() to sleep to ensure that all relevant
+			 * process leaders can be traversed in closef().
+			 */
+			fdp->fd_holdleaderscount++;
+			holdleaders = 1;
+		}
+	}
+
+	/*
+	 * Duplicate the source descriptor
+	 */
+	fdp->fd_ofiles[new] = fp;
+	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
+	if (new > fdp->fd_lastfile)
+		fdp->fd_lastfile = new;
+	*retval = new;
+
+	/*
+	 * If we dup'd over a valid file, we now own the reference to it
+	 * and must dispose of it using closef() semantics (as if a
+	 * close() were performed on it).
+	 *
+	 * XXX this duplicates parts of close().
+	 */
+	if (delfp != NULL) {
+		knote_fdclose(td, new);
+		if (delfp->f_type == DTYPE_MQUEUE)
+			mq_fdclose(td, new, delfp);
+		FILEDESC_XUNLOCK(fdp);
+		(void) closef(delfp, td);
+		if (holdleaders) {
+			FILEDESC_XLOCK(fdp);
+			fdp->fd_holdleaderscount--;
+			if (fdp->fd_holdleaderscount == 0 &&
+			    fdp->fd_holdleaderswakeup != 0) {
+				fdp->fd_holdleaderswakeup = 0;
+				wakeup(&fdp->fd_holdleaderscount);
+			}
+			FILEDESC_XUNLOCK(fdp);
+		}
+	} else {
+		FILEDESC_XUNLOCK(fdp);
+	}
+	return (0);
+}
+
+/*
+ * If sigio is on the list associated with a process or process group,
+ * disable signalling from the device, remove sigio from the list and
+ * free sigio.
+ */
+void
+funsetown(struct sigio **sigiop)
+{
+	struct sigio *sigio;
+
+	SIGIO_LOCK();
+	sigio = *sigiop;
+	if (sigio == NULL) {
+		SIGIO_UNLOCK();
+		return;
+	}
+	*(sigio->sio_myref) = NULL;
+	if ((sigio)->sio_pgid < 0) {
+		struct pgrp *pg = (sigio)->sio_pgrp;
+		PGRP_LOCK(pg);
+		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
+			     sigio, sio_pgsigio);
+		PGRP_UNLOCK(pg);
+	} else {
+		struct proc *p = (sigio)->sio_proc;
+		PROC_LOCK(p);
+		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
+			     sigio, sio_pgsigio);
+		PROC_UNLOCK(p);
+	}
+	SIGIO_UNLOCK();
+	crfree(sigio->sio_ucred);
+	free(sigio, M_SIGIO);
+}
+
+/*
+ * Free a list of sigio structures.
+ * We only need to lock the SIGIO_LOCK because we have made ourselves
+ * inaccessible to callers of fsetown and therefore do not need to lock
+ * the proc or pgrp struct for the list manipulation.
+ */
+void
+funsetownlst(struct sigiolst *sigiolst)
+{
+	struct proc *p;
+	struct pgrp *pg;
+	struct sigio *sigio;
+
+	sigio = SLIST_FIRST(sigiolst);
+	if (sigio == NULL)
+		return;
+	p = NULL;
+	pg = NULL;
+
+	/*
+	 * Every entry of the list should belong
+	 * to a single proc or pgrp.
+	 */
+	if (sigio->sio_pgid < 0) {
+		pg = sigio->sio_pgrp;
+		PGRP_LOCK_ASSERT(pg, MA_NOTOWNED);
+	} else /* if (sigio->sio_pgid > 0) */ {
+		p = sigio->sio_proc;
+		PROC_LOCK_ASSERT(p, MA_NOTOWNED);
+	}
+
+	SIGIO_LOCK();
+	while ((sigio = SLIST_FIRST(sigiolst)) != NULL) {
+		*(sigio->sio_myref) = NULL;
+		if (pg != NULL) {
+			KASSERT(sigio->sio_pgid < 0,
+			    ("Proc sigio in pgrp sigio list"));
+			KASSERT(sigio->sio_pgrp == pg,
+			    ("Bogus pgrp in sigio list"));
+			PGRP_LOCK(pg);
+			SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio,
+			    sio_pgsigio);
+			PGRP_UNLOCK(pg);
+		} else /* if (p != NULL) */ {
+			KASSERT(sigio->sio_pgid > 0,
+			    ("Pgrp sigio in proc sigio list"));
+			KASSERT(sigio->sio_proc == p,
+			    ("Bogus proc in sigio list"));
+			PROC_LOCK(p);
+			SLIST_REMOVE(&p->p_sigiolst, sigio, sigio,
+			    sio_pgsigio);
+			PROC_UNLOCK(p);
+		}
+		SIGIO_UNLOCK();
+		crfree(sigio->sio_ucred);
+		free(sigio, M_SIGIO);
+		SIGIO_LOCK();
+	}
+	SIGIO_UNLOCK();
+}
+
+/*
+ * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
+ *
+ * After permission checking, add a sigio structure to the sigio list for
+ * the process or process group.
+ */
+int
+fsetown(pid_t pgid, struct sigio **sigiop)
+{
+	struct proc *proc;
+	struct pgrp *pgrp;
+	struct sigio *sigio;
+	int ret;
+
+	if (pgid == 0) {
+		funsetown(sigiop);
+		return (0);
+	}
+
+	ret = 0;
+
+	/* Allocate and fill in the new sigio out of locks. */
+	sigio = malloc(sizeof(struct sigio), M_SIGIO, M_WAITOK);
+	sigio->sio_pgid = pgid;
+	sigio->sio_ucred = crhold(curthread->td_ucred);
+	sigio->sio_myref = sigiop;
+
+	sx_slock(&proctree_lock);
+	if (pgid > 0) {
+		proc = pfind(pgid);
+		if (proc == NULL) {
+			ret = ESRCH;
+			goto fail;
+		}
+
+		/*
+		 * Policy - Don't allow a process to FSETOWN a process
+		 * in another session.
+		 *
+		 * Remove this test to allow maximum flexibility or
+		 * restrict FSETOWN to the current process or process
+		 * group for maximum safety.
+		 */
+		PROC_UNLOCK(proc);
+		if (proc->p_session != curthread->td_proc->p_session) {
+			ret = EPERM;
+			goto fail;
+		}
+
+		pgrp = NULL;
+	} else /* if (pgid < 0) */ {
+		pgrp = pgfind(-pgid);
+		if (pgrp == NULL) {
+			ret = ESRCH;
+			goto fail;
+		}
+		PGRP_UNLOCK(pgrp);
+
+		/*
+		 * Policy - Don't allow a process to FSETOWN a process
+		 * in another session.
+		 *
+		 * Remove this test to allow maximum flexibility or
+		 * restrict FSETOWN to the current process or process
+		 * group for maximum safety.
+		 */
+		if (pgrp->pg_session != curthread->td_proc->p_session) {
+			ret = EPERM;
+			goto fail;
+		}
+
+		proc = NULL;
+	}
+	funsetown(sigiop);
+	if (pgid > 0) {
+		PROC_LOCK(proc);
+		/*
+		 * Since funsetownlst() is called without the proctree
+		 * locked, we need to check for P_WEXIT.
+		 * XXX: is ESRCH correct?
+		 */
+		if ((proc->p_flag & P_WEXIT) != 0) {
+			PROC_UNLOCK(proc);
+			ret = ESRCH;
+			goto fail;
+		}
+		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
+		sigio->sio_proc = proc;
+		PROC_UNLOCK(proc);
+	} else {
+		PGRP_LOCK(pgrp);
+		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
+		sigio->sio_pgrp = pgrp;
+		PGRP_UNLOCK(pgrp);
+	}
+	sx_sunlock(&proctree_lock);
+	SIGIO_LOCK();
+	*sigiop = sigio;
+	SIGIO_UNLOCK();
+	return (0);
+
+fail:
+	sx_sunlock(&proctree_lock);
+	crfree(sigio->sio_ucred);
+	free(sigio, M_SIGIO);
+	return (ret);
+}
+
+/*
+ * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
+ */
+pid_t
+fgetown(sigiop)
+	struct sigio **sigiop;
+{
+	pid_t pgid;
+
+	SIGIO_LOCK();
+	pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0;
+	SIGIO_UNLOCK();
+	return (pgid);
+}
+
+/*
+ * Close a file descriptor.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct close_args {
+	int     fd;
+};
+#endif
+/* ARGSUSED */
+int
+close(td, uap)
+	struct thread *td;
+	struct close_args *uap;
+{
+
+	return (kern_close(td, uap->fd));
+}
+
+int
+kern_close(td, fd)
+	struct thread *td;
+	int fd;
+{
+	struct filedesc *fdp;
+	struct file *fp;
+	int error;
+	int holdleaders;
+
+	error = 0;
+	holdleaders = 0;
+	fdp = td->td_proc->p_fd;
+
+	AUDIT_SYSCLOSE(td, fd);
+
+	FILEDESC_XLOCK(fdp);
+	if ((unsigned)fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[fd]) == NULL) {
+		FILEDESC_XUNLOCK(fdp);
+		return (EBADF);
+	}
+	fdp->fd_ofiles[fd] = NULL;
+	fdp->fd_ofileflags[fd] = 0;
+	fdunused(fdp, fd);
+	if (td->td_proc->p_fdtol != NULL) {
+		/*
+		 * Ask fdfree() to sleep to ensure that all relevant
+		 * process leaders can be traversed in closef().
+		 */
+		fdp->fd_holdleaderscount++;
+		holdleaders = 1;
+	}
+
+	/*
+	 * We now hold the fp reference that used to be owned by the
+	 * descriptor array.  We have to unlock the FILEDESC *AFTER*
+	 * knote_fdclose to prevent a race of the fd getting opened, a knote
+	 * added, and deleteing a knote for the new fd.
+	 */
+	knote_fdclose(td, fd);
+	if (fp->f_type == DTYPE_MQUEUE)
+		mq_fdclose(td, fd, fp);
+	FILEDESC_XUNLOCK(fdp);
+
+	error = closef(fp, td);
+	if (holdleaders) {
+		FILEDESC_XLOCK(fdp);
+		fdp->fd_holdleaderscount--;
+		if (fdp->fd_holdleaderscount == 0 &&
+		    fdp->fd_holdleaderswakeup != 0) {
+			fdp->fd_holdleaderswakeup = 0;
+			wakeup(&fdp->fd_holdleaderscount);
+		}
+		FILEDESC_XUNLOCK(fdp);
+	}
+	return (error);
+}
+
+/*
+ * Close open file descriptors.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct closefrom_args {
+	int	lowfd;
+};
+#endif
+/* ARGSUSED */
+int
+closefrom(struct thread *td, struct closefrom_args *uap)
+{
+	struct filedesc *fdp;
+	int fd;
+
+	fdp = td->td_proc->p_fd;
+	AUDIT_ARG_FD(uap->lowfd);
+
+	/*
+	 * Treat negative starting file descriptor values identical to
+	 * closefrom(0) which closes all files.
+	 */
+	if (uap->lowfd < 0)
+		uap->lowfd = 0;
+	FILEDESC_SLOCK(fdp);
+	for (fd = uap->lowfd; fd < fdp->fd_nfiles; fd++) {
+		if (fdp->fd_ofiles[fd] != NULL) {
+			FILEDESC_SUNLOCK(fdp);
+			(void)kern_close(td, fd);
+			FILEDESC_SLOCK(fdp);
+		}
+	}
+	FILEDESC_SUNLOCK(fdp);
+	return (0);
+}
+
+#if defined(COMPAT_43)
+/*
+ * Return status information about a file descriptor.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct ofstat_args {
+	int	fd;
+	struct	ostat *sb;
+};
+#endif
+/* ARGSUSED */
+int
+ofstat(struct thread *td, struct ofstat_args *uap)
+{
+	struct ostat oub;
+	struct stat ub;
+	int error;
+
+	error = kern_fstat(td, uap->fd, &ub);
+	if (error == 0) {
+		cvtstat(&ub, &oub);
+		error = copyout(&oub, uap->sb, sizeof(oub));
+	}
+	return (error);
+}
+#endif /* COMPAT_43 */
+
+/*
+ * Return status information about a file descriptor.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct fstat_args {
+	int	fd;
+	struct	stat *sb;
+};
+#endif
+/* ARGSUSED */
+int
+fstat(struct thread *td, struct fstat_args *uap)
+{
+	struct stat ub;
+	int error;
+
+	error = kern_fstat(td, uap->fd, &ub);
+	if (error == 0)
+		error = copyout(&ub, uap->sb, sizeof(ub));
+	return (error);
+}
+
+int
+kern_fstat(struct thread *td, int fd, struct stat *sbp)
+{
+	struct file *fp;
+	int error;
+
+	AUDIT_ARG_FD(fd);
+
+	if ((error = fget(td, fd, &fp)) != 0)
+		return (error);
+
+	AUDIT_ARG_FILE(td->td_proc, fp);
+
+	error = fo_stat(fp, sbp, td->td_ucred, td);
+	fdrop(fp, td);
+#ifdef KTRACE
+	if (error == 0 && KTRPOINT(td, KTR_STRUCT))
+		ktrstat(sbp);
+#endif
+	return (error);
+}
+
+/*
+ * Return status information about a file descriptor.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct nfstat_args {
+	int	fd;
+	struct	nstat *sb;
+};
+#endif
+/* ARGSUSED */
+int
+nfstat(struct thread *td, struct nfstat_args *uap)
+{
+	struct nstat nub;
+	struct stat ub;
+	int error;
+
+	error = kern_fstat(td, uap->fd, &ub);
+	if (error == 0) {
+		cvtnstat(&ub, &nub);
+		error = copyout(&nub, uap->sb, sizeof(nub));
+	}
+	return (error);
+}
+
+/*
+ * Return pathconf information about a file descriptor.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct fpathconf_args {
+	int	fd;
+	int	name;
+};
+#endif
+/* ARGSUSED */
+int
+fpathconf(struct thread *td, struct fpathconf_args *uap)
+{
+	struct file *fp;
+	struct vnode *vp;
+	int error;
+
+	if ((error = fget(td, uap->fd, &fp)) != 0)
+		return (error);
+
+	/* If asynchronous I/O is available, it works for all descriptors. */
+	if (uap->name == _PC_ASYNC_IO) {
+		td->td_retval[0] = async_io_version;
+		goto out;
+	}
+	vp = fp->f_vnode;
+	if (vp != NULL) {
+		int vfslocked;
+		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+		vn_lock(vp, LK_SHARED | LK_RETRY);
+		error = VOP_PATHCONF(vp, uap->name, td->td_retval);
+		VOP_UNLOCK(vp, 0);
+		VFS_UNLOCK_GIANT(vfslocked);
+	} else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) {
+		if (uap->name != _PC_PIPE_BUF) {
+			error = EINVAL;
+		} else {
+			td->td_retval[0] = PIPE_BUF;
+		error = 0;
+		}
+	} else {
+		error = EOPNOTSUPP;
+	}
+out:
+	fdrop(fp, td);
+	return (error);
+}
+
+/*
+ * Grow the file table to accomodate (at least) nfd descriptors.  This may
+ * block and drop the filedesc lock, but it will reacquire it before
+ * returning.
+ */
+static void
+fdgrowtable(struct filedesc *fdp, int nfd)
+{
+	struct filedesc0 *fdp0;
+	struct freetable *fo;
+	struct file **ntable;
+	struct file **otable;
+	char *nfileflags;
+	int nnfiles, onfiles;
+	NDSLOTTYPE *nmap;
+
+	FILEDESC_XLOCK_ASSERT(fdp);
+
+	KASSERT(fdp->fd_nfiles > 0,
+	    ("zero-length file table"));
+
+	/* compute the size of the new table */
+	onfiles = fdp->fd_nfiles;
+	nnfiles = NDSLOTS(nfd) * NDENTRIES; /* round up */
+	if (nnfiles <= onfiles)
+		/* the table is already large enough */
+		return;
+
+	/* allocate a new table and (if required) new bitmaps */
+	FILEDESC_XUNLOCK(fdp);
+	ntable = malloc((nnfiles * OFILESIZE) + sizeof(struct freetable),
+	    M_FILEDESC, M_ZERO | M_WAITOK);
+	nfileflags = (char *)&ntable[nnfiles];
+	if (NDSLOTS(nnfiles) > NDSLOTS(onfiles))
+		nmap = malloc(NDSLOTS(nnfiles) * NDSLOTSIZE,
+		    M_FILEDESC, M_ZERO | M_WAITOK);
+	else
+		nmap = NULL;
+	FILEDESC_XLOCK(fdp);
+
+	/*
+	 * We now have new tables ready to go.  Since we dropped the
+	 * filedesc lock to call malloc(), watch out for a race.
+	 */
+	onfiles = fdp->fd_nfiles;
+	if (onfiles >= nnfiles) {
+		/* we lost the race, but that's OK */
+		free(ntable, M_FILEDESC);
+		if (nmap != NULL)
+			free(nmap, M_FILEDESC);
+		return;
+	}
+	bcopy(fdp->fd_ofiles, ntable, onfiles * sizeof(*ntable));
+	bcopy(fdp->fd_ofileflags, nfileflags, onfiles);
+	otable = fdp->fd_ofiles;
+	fdp->fd_ofileflags = nfileflags;
+	fdp->fd_ofiles = ntable;
+	/*
+	 * We must preserve ofiles until the process exits because we can't
+	 * be certain that no threads have references to the old table via
+	 * _fget().
+	 */
+	if (onfiles > NDFILE) {
+		fo = (struct freetable *)&otable[onfiles];
+		fdp0 = (struct filedesc0 *)fdp;
+		fo->ft_table = otable;
+		SLIST_INSERT_HEAD(&fdp0->fd_free, fo, ft_next);
+	}
+	if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) {
+		bcopy(fdp->fd_map, nmap, NDSLOTS(onfiles) * sizeof(*nmap));
+		if (NDSLOTS(onfiles) > NDSLOTS(NDFILE))
+			free(fdp->fd_map, M_FILEDESC);
+		fdp->fd_map = nmap;
+	}
+	fdp->fd_nfiles = nnfiles;
+}
+
+/*
+ * Allocate a file descriptor for the process.
+ */
+int
+fdalloc(struct thread *td, int minfd, int *result)
+{
+	struct proc *p = td->td_proc;
+	struct filedesc *fdp = p->p_fd;
+	int fd = -1, maxfd;
+
+	FILEDESC_XLOCK_ASSERT(fdp);
+
+	if (fdp->fd_freefile > minfd)
+		minfd = fdp->fd_freefile;
+
+	PROC_LOCK(p);
+	maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
+	PROC_UNLOCK(p);
+
+	/*
+	 * Search the bitmap for a free descriptor.  If none is found, try
+	 * to grow the file table.  Keep at it until we either get a file
+	 * descriptor or run into process or system limits; fdgrowtable()
+	 * may drop the filedesc lock, so we're in a race.
+	 */
+	for (;;) {
+		fd = fd_first_free(fdp, minfd, fdp->fd_nfiles);
+		if (fd >= maxfd)
+			return (EMFILE);
+		if (fd < fdp->fd_nfiles)
+			break;
+		fdgrowtable(fdp, min(fdp->fd_nfiles * 2, maxfd));
+	}
+
+	/*
+	 * Perform some sanity checks, then mark the file descriptor as
+	 * used and return it to the caller.
+	 */
+	KASSERT(!fdisused(fdp, fd),
+	    ("fd_first_free() returned non-free descriptor"));
+	KASSERT(fdp->fd_ofiles[fd] == NULL,
+	    ("free descriptor isn't"));
+	fdp->fd_ofileflags[fd] = 0; /* XXX needed? */
+	fdused(fdp, fd);
+	*result = fd;
+	return (0);
+}
+
+/*
+ * Check to see whether n user file descriptors are available to the process
+ * p.
+ */
+int
+fdavail(struct thread *td, int n)
+{
+	struct proc *p = td->td_proc;
+	struct filedesc *fdp = td->td_proc->p_fd;
+	struct file **fpp;
+	int i, lim, last;
+
+	FILEDESC_LOCK_ASSERT(fdp);
+
+	PROC_LOCK(p);
+	lim = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
+	PROC_UNLOCK(p);
+	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
+		return (1);
+	last = min(fdp->fd_nfiles, lim);
+	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
+	for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
+		if (*fpp == NULL && --n <= 0)
+			return (1);
+	}
+	return (0);
+}
+
+/*
+ * Create a new open file structure and allocate a file decriptor for the
+ * process that refers to it.  We add one reference to the file for the
+ * descriptor table and one reference for resultfp. This is to prevent us
+ * being preempted and the entry in the descriptor table closed after we
+ * release the FILEDESC lock.
+ */
+int
+falloc(struct thread *td, struct file **resultfp, int *resultfd)
+{
+	struct proc *p = td->td_proc;
+	struct file *fp;
+	int error, i;
+	int maxuserfiles = maxfiles - (maxfiles / 20);
+	static struct timeval lastfail;
+	static int curfail;
+
+	fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO);
+	if ((openfiles >= maxuserfiles &&
+	    priv_check(td, PRIV_MAXFILES) != 0) ||
+	    openfiles >= maxfiles) {
+		if (ppsratecheck(&lastfail, &curfail, 1)) {
+			printf("kern.maxfiles limit exceeded by uid %i, please see tuning(7).\n",
+				td->td_ucred->cr_ruid);
+		}
+		uma_zfree(file_zone, fp);
+		return (ENFILE);
+	}
+	atomic_add_int(&openfiles, 1);
+
+	/*
+	 * If the process has file descriptor zero open, add the new file
+	 * descriptor to the list of open files at that point, otherwise
+	 * put it at the front of the list of open files.
+	 */
+	refcount_init(&fp->f_count, 1);
+	if (resultfp)
+		fhold(fp);
+	fp->f_cred = crhold(td->td_ucred);
+	fp->f_ops = &badfileops;
+	fp->f_data = NULL;
+	fp->f_vnode = NULL;
+	FILEDESC_XLOCK(p->p_fd);
+	if ((error = fdalloc(td, 0, &i))) {
+		FILEDESC_XUNLOCK(p->p_fd);
+
+		fdrop(fp, td);
+		if (resultfp)
+			fdrop(fp, td);
+		return (error);
+	}
+	p->p_fd->fd_ofiles[i] = fp;
+	FILEDESC_XUNLOCK(p->p_fd);
+	if (resultfp)
+		*resultfp = fp;
+	if (resultfd)
+		*resultfd = i;
+	return (0);
+}
+
+/*
+ * Build a new filedesc structure from another.
+ * Copy the current, root, and jail root vnode references.
+ */
+struct filedesc *
+fdinit(struct filedesc *fdp)
+{
+	struct filedesc0 *newfdp;
+
+	newfdp = malloc(sizeof *newfdp, M_FILEDESC, M_WAITOK | M_ZERO);
+	FILEDESC_LOCK_INIT(&newfdp->fd_fd);
+	if (fdp != NULL) {
+		FILEDESC_XLOCK(fdp);
+		newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
+		if (newfdp->fd_fd.fd_cdir)
+			VREF(newfdp->fd_fd.fd_cdir);
+		newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
+		if (newfdp->fd_fd.fd_rdir)
+			VREF(newfdp->fd_fd.fd_rdir);
+		newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
+		if (newfdp->fd_fd.fd_jdir)
+			VREF(newfdp->fd_fd.fd_jdir);
+		FILEDESC_XUNLOCK(fdp);
+	}
+
+	/* Create the file descriptor table. */
+	newfdp->fd_fd.fd_refcnt = 1;
+	newfdp->fd_fd.fd_holdcnt = 1;
+	newfdp->fd_fd.fd_cmask = CMASK;
+	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
+	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
+	newfdp->fd_fd.fd_nfiles = NDFILE;
+	newfdp->fd_fd.fd_map = newfdp->fd_dmap;
+	newfdp->fd_fd.fd_lastfile = -1;
+	return (&newfdp->fd_fd);
+}
+
+static struct filedesc *
+fdhold(struct proc *p)
+{
+	struct filedesc *fdp;
+
+	mtx_lock(&fdesc_mtx);
+	fdp = p->p_fd;
+	if (fdp != NULL)
+		fdp->fd_holdcnt++;
+	mtx_unlock(&fdesc_mtx);
+	return (fdp);
+}
+
+static void
+fddrop(struct filedesc *fdp)
+{
+	struct filedesc0 *fdp0;
+	struct freetable *ft;
+	int i;
+
+	mtx_lock(&fdesc_mtx);
+	i = --fdp->fd_holdcnt;
+	mtx_unlock(&fdesc_mtx);
+	if (i > 0)
+		return;
+
+	FILEDESC_LOCK_DESTROY(fdp);
+	fdp0 = (struct filedesc0 *)fdp;
+	while ((ft = SLIST_FIRST(&fdp0->fd_free)) != NULL) {
+		SLIST_REMOVE_HEAD(&fdp0->fd_free, ft_next);
+		free(ft->ft_table, M_FILEDESC);
+	}
+	free(fdp, M_FILEDESC);
+}
+
+/*
+ * Share a filedesc structure.
+ */
+struct filedesc *
+fdshare(struct filedesc *fdp)
+{
+
+	FILEDESC_XLOCK(fdp);
+	fdp->fd_refcnt++;
+	FILEDESC_XUNLOCK(fdp);
+	return (fdp);
+}
+
+/*
+ * Unshare a filedesc structure, if necessary by making a copy
+ */
+void
+fdunshare(struct proc *p, struct thread *td)
+{
+
+	FILEDESC_XLOCK(p->p_fd);
+	if (p->p_fd->fd_refcnt > 1) {
+		struct filedesc *tmp;
+
+		FILEDESC_XUNLOCK(p->p_fd);
+		tmp = fdcopy(p->p_fd);
+		fdfree(td);
+		p->p_fd = tmp;
+	} else
+		FILEDESC_XUNLOCK(p->p_fd);
+}
+
+/*
+ * Copy a filedesc structure.  A NULL pointer in returns a NULL reference,
+ * this is to ease callers, not catch errors.
+ */
+struct filedesc *
+fdcopy(struct filedesc *fdp)
+{
+	struct filedesc *newfdp;
+	int i;
+
+	/* Certain daemons might not have file descriptors. */
+	if (fdp == NULL)
+		return (NULL);
+
+	newfdp = fdinit(fdp);
+	FILEDESC_SLOCK(fdp);
+	while (fdp->fd_lastfile >= newfdp->fd_nfiles) {
+		FILEDESC_SUNLOCK(fdp);
+		FILEDESC_XLOCK(newfdp);
+		fdgrowtable(newfdp, fdp->fd_lastfile + 1);
+		FILEDESC_XUNLOCK(newfdp);
+		FILEDESC_SLOCK(fdp);
+	}
+	/* copy everything except kqueue descriptors */
+	newfdp->fd_freefile = -1;
+	for (i = 0; i <= fdp->fd_lastfile; ++i) {
+		if (fdisused(fdp, i) &&
+		    fdp->fd_ofiles[i]->f_type != DTYPE_KQUEUE &&
+		    fdp->fd_ofiles[i]->f_ops != &badfileops) {
+			newfdp->fd_ofiles[i] = fdp->fd_ofiles[i];
+			newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i];
+			fhold(newfdp->fd_ofiles[i]);
+			newfdp->fd_lastfile = i;
+		} else {
+			if (newfdp->fd_freefile == -1)
+				newfdp->fd_freefile = i;
+		}
+	}
+	newfdp->fd_cmask = fdp->fd_cmask;
+	FILEDESC_SUNLOCK(fdp);
+	FILEDESC_XLOCK(newfdp);
+	for (i = 0; i <= newfdp->fd_lastfile; ++i)
+		if (newfdp->fd_ofiles[i] != NULL)
+			fdused(newfdp, i);
+	if (newfdp->fd_freefile == -1)
+		newfdp->fd_freefile = i;
+	FILEDESC_XUNLOCK(newfdp);
+	return (newfdp);
+}
+
+/*
+ * Release a filedesc structure.
+ */
+void
+fdfree(struct thread *td)
+{
+	struct filedesc *fdp;
+	struct file **fpp;
+	int i, locked;
+	struct filedesc_to_leader *fdtol;
+	struct file *fp;
+	struct vnode *cdir, *jdir, *rdir, *vp;
+	struct flock lf;
+
+	/* Certain daemons might not have file descriptors. */
+	fdp = td->td_proc->p_fd;
+	if (fdp == NULL)
+		return;
+
+	/* Check for special need to clear POSIX style locks */
+	fdtol = td->td_proc->p_fdtol;
+	if (fdtol != NULL) {
+		FILEDESC_XLOCK(fdp);
+		KASSERT(fdtol->fdl_refcount > 0,
+			("filedesc_to_refcount botch: fdl_refcount=%d",
+			 fdtol->fdl_refcount));
+		if (fdtol->fdl_refcount == 1 &&
+		    (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
+			for (i = 0, fpp = fdp->fd_ofiles;
+			     i <= fdp->fd_lastfile;
+			     i++, fpp++) {
+				if (*fpp == NULL ||
+				    (*fpp)->f_type != DTYPE_VNODE)
+					continue;
+				fp = *fpp;
+				fhold(fp);
+				FILEDESC_XUNLOCK(fdp);
+				lf.l_whence = SEEK_SET;
+				lf.l_start = 0;
+				lf.l_len = 0;
+				lf.l_type = F_UNLCK;
+				vp = fp->f_vnode;
+				locked = VFS_LOCK_GIANT(vp->v_mount);
+				(void) VOP_ADVLOCK(vp,
+						   (caddr_t)td->td_proc->
+						   p_leader,
+						   F_UNLCK,
+						   &lf,
+						   F_POSIX);
+				VFS_UNLOCK_GIANT(locked);
+				FILEDESC_XLOCK(fdp);
+				fdrop(fp, td);
+				fpp = fdp->fd_ofiles + i;
+			}
+		}
+	retry:
+		if (fdtol->fdl_refcount == 1) {
+			if (fdp->fd_holdleaderscount > 0 &&
+			    (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
+				/*
+				 * close() or do_dup() has cleared a reference
+				 * in a shared file descriptor table.
+				 */
+				fdp->fd_holdleaderswakeup = 1;
+				sx_sleep(&fdp->fd_holdleaderscount,
+				    FILEDESC_LOCK(fdp), PLOCK, "fdlhold", 0);
+				goto retry;
+			}
+			if (fdtol->fdl_holdcount > 0) {
+				/*
+				 * Ensure that fdtol->fdl_leader remains
+				 * valid in closef().
+				 */
+				fdtol->fdl_wakeup = 1;
+				sx_sleep(fdtol, FILEDESC_LOCK(fdp), PLOCK,
+				    "fdlhold", 0);
+				goto retry;
+			}
+		}
+		fdtol->fdl_refcount--;
+		if (fdtol->fdl_refcount == 0 &&
+		    fdtol->fdl_holdcount == 0) {
+			fdtol->fdl_next->fdl_prev = fdtol->fdl_prev;
+			fdtol->fdl_prev->fdl_next = fdtol->fdl_next;
+		} else
+			fdtol = NULL;
+		td->td_proc->p_fdtol = NULL;
+		FILEDESC_XUNLOCK(fdp);
+		if (fdtol != NULL)
+			free(fdtol, M_FILEDESC_TO_LEADER);
+	}
+	FILEDESC_XLOCK(fdp);
+	i = --fdp->fd_refcnt;
+	FILEDESC_XUNLOCK(fdp);
+	if (i > 0)
+		return;
+
+	fpp = fdp->fd_ofiles;
+	for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
+		if (*fpp) {
+			FILEDESC_XLOCK(fdp);
+			fp = *fpp;
+			*fpp = NULL;
+			FILEDESC_XUNLOCK(fdp);
+			(void) closef(fp, td);
+		}
+	}
+	FILEDESC_XLOCK(fdp);
+
+	/* XXX This should happen earlier. */
+	mtx_lock(&fdesc_mtx);
+	td->td_proc->p_fd = NULL;
+	mtx_unlock(&fdesc_mtx);
+
+	if (fdp->fd_nfiles > NDFILE)
+		free(fdp->fd_ofiles, M_FILEDESC);
+	if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE))
+		free(fdp->fd_map, M_FILEDESC);
+
+	fdp->fd_nfiles = 0;
+
+	cdir = fdp->fd_cdir;
+	fdp->fd_cdir = NULL;
+	rdir = fdp->fd_rdir;
+	fdp->fd_rdir = NULL;
+	jdir = fdp->fd_jdir;
+	fdp->fd_jdir = NULL;
+	FILEDESC_XUNLOCK(fdp);
+
+	if (cdir) {
+		locked = VFS_LOCK_GIANT(cdir->v_mount);
+		vrele(cdir);
+		VFS_UNLOCK_GIANT(locked);
+	}
+	if (rdir) {
+		locked = VFS_LOCK_GIANT(rdir->v_mount);
+		vrele(rdir);
+		VFS_UNLOCK_GIANT(locked);
+	}
+	if (jdir) {
+		locked = VFS_LOCK_GIANT(jdir->v_mount);
+		vrele(jdir);
+		VFS_UNLOCK_GIANT(locked);
+	}
+
+	fddrop(fdp);
+}
+
+/*
+ * For setugid programs, we don't want to people to use that setugidness
+ * to generate error messages which write to a file which otherwise would
+ * otherwise be off-limits to the process.  We check for filesystems where
+ * the vnode can change out from under us after execve (like [lin]procfs).
+ *
+ * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
+ * sufficient.  We also don't check for setugidness since we know we are.
+ */
+static int
+is_unsafe(struct file *fp)
+{
+	if (fp->f_type == DTYPE_VNODE) {
+		struct vnode *vp = fp->f_vnode;
+
+		if ((vp->v_vflag & VV_PROCDEP) != 0)
+			return (1);
+	}
+	return (0);
+}
+
+/*
+ * Make this setguid thing safe, if at all possible.
+ */
+void
+setugidsafety(struct thread *td)
+{
+	struct filedesc *fdp;
+	int i;
+
+	/* Certain daemons might not have file descriptors. */
+	fdp = td->td_proc->p_fd;
+	if (fdp == NULL)
+		return;
+
+	/*
+	 * Note: fdp->fd_ofiles may be reallocated out from under us while
+	 * we are blocked in a close.  Be careful!
+	 */
+	FILEDESC_XLOCK(fdp);
+	for (i = 0; i <= fdp->fd_lastfile; i++) {
+		if (i > 2)
+			break;
+		if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
+			struct file *fp;
+
+			knote_fdclose(td, i);
+			/*
+			 * NULL-out descriptor prior to close to avoid
+			 * a race while close blocks.
+			 */
+			fp = fdp->fd_ofiles[i];
+			fdp->fd_ofiles[i] = NULL;
+			fdp->fd_ofileflags[i] = 0;
+			fdunused(fdp, i);
+			FILEDESC_XUNLOCK(fdp);
+			(void) closef(fp, td);
+			FILEDESC_XLOCK(fdp);
+		}
+	}
+	FILEDESC_XUNLOCK(fdp);
+}
+
+/*
+ * If a specific file object occupies a specific file descriptor, close the
+ * file descriptor entry and drop a reference on the file object.  This is a
+ * convenience function to handle a subsequent error in a function that calls
+ * falloc() that handles the race that another thread might have closed the
+ * file descriptor out from under the thread creating the file object.
+ */
+void
+fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td)
+{
+
+	FILEDESC_XLOCK(fdp);
+	if (fdp->fd_ofiles[idx] == fp) {
+		fdp->fd_ofiles[idx] = NULL;
+		fdunused(fdp, idx);
+		FILEDESC_XUNLOCK(fdp);
+		fdrop(fp, td);
+	} else
+		FILEDESC_XUNLOCK(fdp);
+}
+
+/*
+ * Close any files on exec?
+ */
+void
+fdcloseexec(struct thread *td)
+{
+	struct filedesc *fdp;
+	int i;
+
+	/* Certain daemons might not have file descriptors. */
+	fdp = td->td_proc->p_fd;
+	if (fdp == NULL)
+		return;
+
+	FILEDESC_XLOCK(fdp);
+
+	/*
+	 * We cannot cache fd_ofiles or fd_ofileflags since operations
+	 * may block and rip them out from under us.
+	 */
+	for (i = 0; i <= fdp->fd_lastfile; i++) {
+		if (fdp->fd_ofiles[i] != NULL &&
+		    (fdp->fd_ofiles[i]->f_type == DTYPE_MQUEUE ||
+		    (fdp->fd_ofileflags[i] & UF_EXCLOSE))) {
+			struct file *fp;
+
+			knote_fdclose(td, i);
+			/*
+			 * NULL-out descriptor prior to close to avoid
+			 * a race while close blocks.
+			 */
+			fp = fdp->fd_ofiles[i];
+			fdp->fd_ofiles[i] = NULL;
+			fdp->fd_ofileflags[i] = 0;
+			fdunused(fdp, i);
+			if (fp->f_type == DTYPE_MQUEUE)
+				mq_fdclose(td, i, fp);
+			FILEDESC_XUNLOCK(fdp);
+			(void) closef(fp, td);
+			FILEDESC_XLOCK(fdp);
+		}
+	}
+	FILEDESC_XUNLOCK(fdp);
+}
+
+/*
+ * It is unsafe for set[ug]id processes to be started with file
+ * descriptors 0..2 closed, as these descriptors are given implicit
+ * significance in the Standard C library.  fdcheckstd() will create a
+ * descriptor referencing /dev/null for each of stdin, stdout, and
+ * stderr that is not already open.
+ */
+int
+fdcheckstd(struct thread *td)
+{
+	struct filedesc *fdp;
+	register_t retval, save;
+	int i, error, devnull;
+
+	fdp = td->td_proc->p_fd;
+	if (fdp == NULL)
+		return (0);
+	KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared"));
+	devnull = -1;
+	error = 0;
+	for (i = 0; i < 3; i++) {
+		if (fdp->fd_ofiles[i] != NULL)
+			continue;
+		if (devnull < 0) {
+			save = td->td_retval[0];
+			error = kern_open(td, "/dev/null", UIO_SYSSPACE,
+			    O_RDWR, 0);
+			devnull = td->td_retval[0];
+			KASSERT(devnull == i, ("oof, we didn't get our fd"));
+			td->td_retval[0] = save;
+			if (error)
+				break;
+		} else {
+			error = do_dup(td, DUP_FIXED, devnull, i, &retval);
+			if (error != 0)
+				break;
+		}
+	}
+	return (error);
+}
+
+/*
+ * Internal form of close.  Decrement reference count on file structure.
+ * Note: td may be NULL when closing a file that was being passed in a
+ * message.
+ *
+ * XXXRW: Giant is not required for the caller, but often will be held; this
+ * makes it moderately likely the Giant will be recursed in the VFS case.
+ */
+int
+closef(struct file *fp, struct thread *td)
+{
+	struct vnode *vp;
+	struct flock lf;
+	struct filedesc_to_leader *fdtol;
+	struct filedesc *fdp;
+
+	/*
+	 * POSIX record locking dictates that any close releases ALL
+	 * locks owned by this process.  This is handled by setting
+	 * a flag in the unlock to free ONLY locks obeying POSIX
+	 * semantics, and not to free BSD-style file locks.
+	 * If the descriptor was in a message, POSIX-style locks
+	 * aren't passed with the descriptor, and the thread pointer
+	 * will be NULL.  Callers should be careful only to pass a
+	 * NULL thread pointer when there really is no owning
+	 * context that might have locks, or the locks will be
+	 * leaked.
+	 */
+	if (fp->f_type == DTYPE_VNODE && td != NULL) {
+		int vfslocked;
+
+		vp = fp->f_vnode;
+		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+		if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
+			lf.l_whence = SEEK_SET;
+			lf.l_start = 0;
+			lf.l_len = 0;
+			lf.l_type = F_UNLCK;
+			(void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
+					   F_UNLCK, &lf, F_POSIX);
+		}
+		fdtol = td->td_proc->p_fdtol;
+		if (fdtol != NULL) {
+			/*
+			 * Handle special case where file descriptor table is
+			 * shared between multiple process leaders.
+			 */
+			fdp = td->td_proc->p_fd;
+			FILEDESC_XLOCK(fdp);
+			for (fdtol = fdtol->fdl_next;
+			     fdtol != td->td_proc->p_fdtol;
+			     fdtol = fdtol->fdl_next) {
+				if ((fdtol->fdl_leader->p_flag &
+				     P_ADVLOCK) == 0)
+					continue;
+				fdtol->fdl_holdcount++;
+				FILEDESC_XUNLOCK(fdp);
+				lf.l_whence = SEEK_SET;
+				lf.l_start = 0;
+				lf.l_len = 0;
+				lf.l_type = F_UNLCK;
+				vp = fp->f_vnode;
+				(void) VOP_ADVLOCK(vp,
+						   (caddr_t)fdtol->fdl_leader,
+						   F_UNLCK, &lf, F_POSIX);
+				FILEDESC_XLOCK(fdp);
+				fdtol->fdl_holdcount--;
+				if (fdtol->fdl_holdcount == 0 &&
+				    fdtol->fdl_wakeup != 0) {
+					fdtol->fdl_wakeup = 0;
+					wakeup(fdtol);
+				}
+			}
+			FILEDESC_XUNLOCK(fdp);
+		}
+		VFS_UNLOCK_GIANT(vfslocked);
+	}
+	return (fdrop(fp, td));
+}
+
+/*
+ * Initialize the file pointer with the specified properties.
+ *
+ * The ops are set with release semantics to be certain that the flags, type,
+ * and data are visible when ops is.  This is to prevent ops methods from being
+ * called with bad data.
+ */
+void
+finit(struct file *fp, u_int flag, short type, void *data, struct fileops *ops)
+{
+	fp->f_data = data;
+	fp->f_flag = flag;
+	fp->f_type = type;
+	atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops);
+}
+#endif /* __rtems__ */
+
+struct file *
+fget_unlocked(struct filedesc *fdp, int fd)
+{
+	struct file *fp;
+	u_int count;
+
+	if (fd < 0 || fd >= fdp->fd_nfiles)
+		return (NULL);
+	/*
+	 * Fetch the descriptor locklessly.  We avoid fdrop() races by
+	 * never raising a refcount above 0.  To accomplish this we have
+	 * to use a cmpset loop rather than an atomic_add.  The descriptor
+	 * must be re-verified once we acquire a reference to be certain
+	 * that the identity is still correct and we did not lose a race
+	 * due to preemption.
+	 */
+	for (;;) {
+		fp = fdp->fd_ofiles[fd];
+		if (fp == NULL)
+			break;
+		count = fp->f_count;
+		if (count == 0)
+			continue;
+		/*
+		 * Use an acquire barrier to prevent caching of fd_ofiles
+		 * so it is refreshed for verification.
+		 */
+		if (atomic_cmpset_acq_int(&fp->f_count, count, count + 1) != 1)
+			continue;
+		if (fp == fdp->fd_ofiles[fd])
+			break;
+		fdrop(fp, curthread);
+	}
+
+	return (fp);
+}
+
+/*
+ * Extract the file pointer associated with the specified descriptor for the
+ * current user process.
+ *
+ * If the descriptor doesn't exist or doesn't match 'flags', EBADF is
+ * returned.
+ *
+ * If an error occured the non-zero error is returned and *fpp is set to
+ * NULL.  Otherwise *fpp is held and set and zero is returned.  Caller is
+ * responsible for fdrop().
+ */
+static __inline int
+_fget(struct thread *td, int fd, struct file **fpp, int flags)
+{
+	struct filedesc *fdp;
+	struct file *fp;
+
+	*fpp = NULL;
+	if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
+		return (EBADF);
+	if ((fp = fget_unlocked(fdp, fd)) == NULL)
+		return (EBADF);
+	if (fp->f_ops == &badfileops) {
+		fdrop(fp, td);
+		return (EBADF);
+	}
+	/*
+	 * FREAD and FWRITE failure return EBADF as per POSIX.
+	 *
+	 * Only one flag, or 0, may be specified.
+	 */
+	if ((flags == FREAD && (fp->f_flag & FREAD) == 0) ||
+	    (flags == FWRITE && (fp->f_flag & FWRITE) == 0)) {
+		fdrop(fp, td);
+		return (EBADF);
+	}
+	*fpp = fp;
+	return (0);
+}
+
+int
+fget(struct thread *td, int fd, struct file **fpp)
+{
+
+	return(_fget(td, fd, fpp, 0));
+}
+
+int
+fget_read(struct thread *td, int fd, struct file **fpp)
+{
+
+	return(_fget(td, fd, fpp, FREAD));
+}
+
+#ifndef __rtems__
+int
+fget_write(struct thread *td, int fd, struct file **fpp)
+{
+
+	return(_fget(td, fd, fpp, FWRITE));
+}
+
+/*
+ * Like fget() but loads the underlying vnode, or returns an error if the
+ * descriptor does not represent a vnode.  Note that pipes use vnodes but
+ * never have VM objects.  The returned vnode will be vref()'d.
+ *
+ * XXX: what about the unused flags ?
+ */
+static __inline int
+_fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
+{
+	struct file *fp;
+	int error;
+
+	*vpp = NULL;
+	if ((error = _fget(td, fd, &fp, flags)) != 0)
+		return (error);
+	if (fp->f_vnode == NULL) {
+		error = EINVAL;
+	} else {
+		*vpp = fp->f_vnode;
+		vref(*vpp);
+	}
+	fdrop(fp, td);
+
+	return (error);
+}
+
+int
+fgetvp(struct thread *td, int fd, struct vnode **vpp)
+{
+
+	return (_fgetvp(td, fd, vpp, 0));
+}
+
+int
+fgetvp_read(struct thread *td, int fd, struct vnode **vpp)
+{
+
+	return (_fgetvp(td, fd, vpp, FREAD));
+}
+
+#ifdef notyet
+int
+fgetvp_write(struct thread *td, int fd, struct vnode **vpp)
+{
+
+	return (_fgetvp(td, fd, vpp, FWRITE));
+}
+#endif
+
+/*
+ * Like fget() but loads the underlying socket, or returns an error if the
+ * descriptor does not represent a socket.
+ *
+ * We bump the ref count on the returned socket.  XXX Also obtain the SX lock
+ * in the future.
+ *
+ * Note: fgetsock() and fputsock() are deprecated, as consumers should rely
+ * on their file descriptor reference to prevent the socket from being free'd
+ * during use.
+ */
+int
+fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
+{
+	struct file *fp;
+	int error;
+
+	*spp = NULL;
+	if (fflagp != NULL)
+		*fflagp = 0;
+	if ((error = _fget(td, fd, &fp, 0)) != 0)
+		return (error);
+	if (fp->f_type != DTYPE_SOCKET) {
+		error = ENOTSOCK;
+	} else {
+		*spp = fp->f_data;
+		if (fflagp)
+			*fflagp = fp->f_flag;
+		SOCK_LOCK(*spp);
+		soref(*spp);
+		SOCK_UNLOCK(*spp);
+	}
+	fdrop(fp, td);
+
+	return (error);
+}
+
+/*
+ * Drop the reference count on the socket and XXX release the SX lock in the
+ * future.  The last reference closes the socket.
+ *
+ * Note: fputsock() is deprecated, see comment for fgetsock().
+ */
+void
+fputsock(struct socket *so)
+{
+
+	ACCEPT_LOCK();
+	SOCK_LOCK(so);
+	sorele(so);
+}
+#endif /* __rtems__ */
+
+/*
+ * Handle the last reference to a file being closed.
+ */
+int
+_fdrop(struct file *fp, struct thread *td)
+{
+#ifdef __rtems__
+  panic("fdrop: RTEMS unsupported");
+
+#else /* __rtems__ */
+	int error;
+
+	error = 0;
+	if (fp->f_count != 0)
+		panic("fdrop: count %d", fp->f_count);
+	if (fp->f_ops != &badfileops)
+		error = fo_close(fp, td);
+	/*
+	 * The f_cdevpriv cannot be assigned non-NULL value while we
+	 * are destroying the file.
+	 */
+	if (fp->f_cdevpriv != NULL)
+		devfs_fpdrop(fp);
+	atomic_subtract_int(&openfiles, 1);
+	crfree(fp->f_cred);
+	uma_zfree(file_zone, fp);
+
+	return (error);
+#endif /* __rtems__ */
+}
+
+#ifndef __rtems__
+/*
+ * Apply an advisory lock on a file descriptor.
+ *
+ * Just attempt to get a record lock of the requested type on the entire file
+ * (l_whence = SEEK_SET, l_start = 0, l_len = 0).
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct flock_args {
+	int	fd;
+	int	how;
+};
+#endif
+/* ARGSUSED */
+int
+flock(struct thread *td, struct flock_args *uap)
+{
+	struct file *fp;
+	struct vnode *vp;
+	struct flock lf;
+	int vfslocked;
+	int error;
+
+	if ((error = fget(td, uap->fd, &fp)) != 0)
+		return (error);
+	if (fp->f_type != DTYPE_VNODE) {
+		fdrop(fp, td);
+		return (EOPNOTSUPP);
+	}
+
+	vp = fp->f_vnode;
+	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+	lf.l_whence = SEEK_SET;
+	lf.l_start = 0;
+	lf.l_len = 0;
+	if (uap->how & LOCK_UN) {
+		lf.l_type = F_UNLCK;
+		atomic_clear_int(&fp->f_flag, FHASLOCK);
+		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
+		goto done2;
+	}
+	if (uap->how & LOCK_EX)
+		lf.l_type = F_WRLCK;
+	else if (uap->how & LOCK_SH)
+		lf.l_type = F_RDLCK;
+	else {
+		error = EBADF;
+		goto done2;
+	}
+	atomic_set_int(&fp->f_flag, FHASLOCK);
+	error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
+	    (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
+done2:
+	fdrop(fp, td);
+	VFS_UNLOCK_GIANT(vfslocked);
+	return (error);
+}
+/*
+ * Duplicate the specified descriptor to a free descriptor.
+ */
+int
+dupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd, int mode, int error)
+{
+	struct file *wfp;
+	struct file *fp;
+
+	/*
+	 * If the to-be-dup'd fd number is greater than the allowed number
+	 * of file descriptors, or the fd to be dup'd has already been
+	 * closed, then reject.
+	 */
+	FILEDESC_XLOCK(fdp);
+	if (dfd < 0 || dfd >= fdp->fd_nfiles ||
+	    (wfp = fdp->fd_ofiles[dfd]) == NULL) {
+		FILEDESC_XUNLOCK(fdp);
+		return (EBADF);
+	}
+
+	/*
+	 * There are two cases of interest here.
+	 *
+	 * For ENODEV simply dup (dfd) to file descriptor (indx) and return.
+	 *
+	 * For ENXIO steal away the file structure from (dfd) and store it in
+	 * (indx).  (dfd) is effectively closed by this operation.
+	 *
+	 * Any other error code is just returned.
+	 */
+	switch (error) {
+	case ENODEV:
+		/*
+		 * Check that the mode the file is being opened for is a
+		 * subset of the mode of the existing descriptor.
+		 */
+		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
+			FILEDESC_XUNLOCK(fdp);
+			return (EACCES);
+		}
+		fp = fdp->fd_ofiles[indx];
+		fdp->fd_ofiles[indx] = wfp;
+		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
+		if (fp == NULL)
+			fdused(fdp, indx);
+		fhold(wfp);
+		FILEDESC_XUNLOCK(fdp);
+		if (fp != NULL)
+			/*
+			 * We now own the reference to fp that the ofiles[]
+			 * array used to own.  Release it.
+			 */
+			fdrop(fp, td);
+		return (0);
+
+	case ENXIO:
+		/*
+		 * Steal away the file pointer from dfd and stuff it into indx.
+		 */
+		fp = fdp->fd_ofiles[indx];
+		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
+		fdp->fd_ofiles[dfd] = NULL;
+		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
+		fdp->fd_ofileflags[dfd] = 0;
+		fdunused(fdp, dfd);
+		if (fp == NULL)
+			fdused(fdp, indx);
+		FILEDESC_XUNLOCK(fdp);
+
+		/*
+		 * We now own the reference to fp that the ofiles[] array
+		 * used to own.  Release it.
+		 */
+		if (fp != NULL)
+			fdrop(fp, td);
+		return (0);
+
+	default:
+		FILEDESC_XUNLOCK(fdp);
+		return (error);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Scan all active processes and prisons to see if any of them have a current
+ * or root directory of `olddp'. If so, replace them with the new mount point.
+ */
+void
+mountcheckdirs(struct vnode *olddp, struct vnode *newdp)
+{
+	struct filedesc *fdp;
+	struct prison *pr;
+	struct proc *p;
+	int nrele;
+
+	if (vrefcnt(olddp) == 1)
+		return;
+	nrele = 0;
+	sx_slock(&allproc_lock);
+	FOREACH_PROC_IN_SYSTEM(p) {
+		fdp = fdhold(p);
+		if (fdp == NULL)
+			continue;
+		FILEDESC_XLOCK(fdp);
+		if (fdp->fd_cdir == olddp) {
+			vref(newdp);
+			fdp->fd_cdir = newdp;
+			nrele++;
+		}
+		if (fdp->fd_rdir == olddp) {
+			vref(newdp);
+			fdp->fd_rdir = newdp;
+			nrele++;
+		}
+		if (fdp->fd_jdir == olddp) {
+			vref(newdp);
+			fdp->fd_jdir = newdp;
+			nrele++;
+		}
+		FILEDESC_XUNLOCK(fdp);
+		fddrop(fdp);
+	}
+	sx_sunlock(&allproc_lock);
+	if (rootvnode == olddp) {
+		vref(newdp);
+		rootvnode = newdp;
+		nrele++;
+	}
+	mtx_lock(&prison0.pr_mtx);
+	if (prison0.pr_root == olddp) {
+		vref(newdp);
+		prison0.pr_root = newdp;
+		nrele++;
+	}
+	mtx_unlock(&prison0.pr_mtx);
+	sx_slock(&allprison_lock);
+	TAILQ_FOREACH(pr, &allprison, pr_list) {
+		mtx_lock(&pr->pr_mtx);
+		if (pr->pr_root == olddp) {
+			vref(newdp);
+			pr->pr_root = newdp;
+			nrele++;
+		}
+		mtx_unlock(&pr->pr_mtx);
+	}
+	sx_sunlock(&allprison_lock);
+	while (nrele--)
+		vrele(olddp);
+}
+
+struct filedesc_to_leader *
+filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader)
+{
+	struct filedesc_to_leader *fdtol;
+
+	fdtol = malloc(sizeof(struct filedesc_to_leader),
+	       M_FILEDESC_TO_LEADER,
+	       M_WAITOK);
+	fdtol->fdl_refcount = 1;
+	fdtol->fdl_holdcount = 0;
+	fdtol->fdl_wakeup = 0;
+	fdtol->fdl_leader = leader;
+	if (old != NULL) {
+		FILEDESC_XLOCK(fdp);
+		fdtol->fdl_next = old->fdl_next;
+		fdtol->fdl_prev = old;
+		old->fdl_next = fdtol;
+		fdtol->fdl_next->fdl_prev = fdtol;
+		FILEDESC_XUNLOCK(fdp);
+	} else {
+		fdtol->fdl_next = fdtol;
+		fdtol->fdl_prev = fdtol;
+	}
+	return (fdtol);
+}
+
+/*
+ * Get file structures globally.
+ */
+static int
+sysctl_kern_file(SYSCTL_HANDLER_ARGS)
+{
+	struct xfile xf;
+	struct filedesc *fdp;
+	struct file *fp;
+	struct proc *p;
+	int error, n;
+
+	error = sysctl_wire_old_buffer(req, 0);
+	if (error != 0)
+		return (error);
+	if (req->oldptr == NULL) {
+		n = 0;
+		sx_slock(&allproc_lock);
+		FOREACH_PROC_IN_SYSTEM(p) {
+			if (p->p_state == PRS_NEW)
+				continue;
+			fdp = fdhold(p);
+			if (fdp == NULL)
+				continue;
+			/* overestimates sparse tables. */
+			if (fdp->fd_lastfile > 0)
+				n += fdp->fd_lastfile;
+			fddrop(fdp);
+		}
+		sx_sunlock(&allproc_lock);
+		return (SYSCTL_OUT(req, 0, n * sizeof(xf)));
+	}
+	error = 0;
+	bzero(&xf, sizeof(xf));
+	xf.xf_size = sizeof(xf);
+	sx_slock(&allproc_lock);
+	FOREACH_PROC_IN_SYSTEM(p) {
+		if (p->p_state == PRS_NEW)
+			continue;
+		PROC_LOCK(p);
+		if (p_cansee(req->td, p) != 0) {
+			PROC_UNLOCK(p);
+			continue;
+		}
+		xf.xf_pid = p->p_pid;
+		xf.xf_uid = p->p_ucred->cr_uid;
+		PROC_UNLOCK(p);
+		fdp = fdhold(p);
+		if (fdp == NULL)
+			continue;
+		FILEDESC_SLOCK(fdp);
+		for (n = 0; fdp->fd_refcnt > 0 && n < fdp->fd_nfiles; ++n) {
+			if ((fp = fdp->fd_ofiles[n]) == NULL)
+				continue;
+			xf.xf_fd = n;
+			xf.xf_file = fp;
+			xf.xf_data = fp->f_data;
+			xf.xf_vnode = fp->f_vnode;
+			xf.xf_type = fp->f_type;
+			xf.xf_count = fp->f_count;
+			xf.xf_msgcount = 0;
+			xf.xf_offset = fp->f_offset;
+			xf.xf_flag = fp->f_flag;
+			error = SYSCTL_OUT(req, &xf, sizeof(xf));
+			if (error)
+				break;
+		}
+		FILEDESC_SUNLOCK(fdp);
+		fddrop(fdp);
+		if (error)
+			break;
+	}
+	sx_sunlock(&allproc_lock);
+	return (error);
+}
+
+SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
+    0, 0, sysctl_kern_file, "S,xfile", "Entire file table");
+
+#ifdef KINFO_OFILE_SIZE
+CTASSERT(sizeof(struct kinfo_ofile) == KINFO_OFILE_SIZE);
+#endif
+
+#ifdef COMPAT_FREEBSD7
+static int
+export_vnode_for_osysctl(struct vnode *vp, int type,
+    struct kinfo_ofile *kif, struct filedesc *fdp, struct sysctl_req *req)
+{
+	int error;
+	char *fullpath, *freepath;
+	int vfslocked;
+
+	bzero(kif, sizeof(*kif));
+	kif->kf_structsize = sizeof(*kif);
+
+	vref(vp);
+	kif->kf_fd = type;
+	kif->kf_type = KF_TYPE_VNODE;
+	/* This function only handles directories. */
+	if (vp->v_type != VDIR) {
+		vrele(vp);
+		return (ENOTDIR);
+	}
+	kif->kf_vnode_type = KF_VTYPE_VDIR;
+
+	/*
+	 * This is not a true file descriptor, so we set a bogus refcount
+	 * and offset to indicate these fields should be ignored.
+	 */
+	kif->kf_ref_count = -1;
+	kif->kf_offset = -1;
+
+	freepath = NULL;
+	fullpath = "-";
+	FILEDESC_SUNLOCK(fdp);
+	vn_fullpath(curthread, vp, &fullpath, &freepath);
+	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+	vrele(vp);
+	VFS_UNLOCK_GIANT(vfslocked);
+	strlcpy(kif->kf_path, fullpath, sizeof(kif->kf_path));
+	if (freepath != NULL)
+		free(freepath, M_TEMP);
+	error = SYSCTL_OUT(req, kif, sizeof(*kif));
+	FILEDESC_SLOCK(fdp);
+	return (error);
+}
+
+/*
+ * Get per-process file descriptors for use by procstat(1), et al.
+ */
+static int
+sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS)
+{
+	char *fullpath, *freepath;
+	struct kinfo_ofile *kif;
+	struct filedesc *fdp;
+	int error, i, *name;
+	struct socket *so;
+	struct vnode *vp;
+	struct file *fp;
+	struct proc *p;
+	struct tty *tp;
+	int vfslocked;
+
+	name = (int *)arg1;
+	if ((p = pfind((pid_t)name[0])) == NULL)
+		return (ESRCH);
+	if ((error = p_candebug(curthread, p))) {
+		PROC_UNLOCK(p);
+		return (error);
+	}
+	fdp = fdhold(p);
+	PROC_UNLOCK(p);
+	if (fdp == NULL)
+		return (ENOENT);
+	kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK);
+	FILEDESC_SLOCK(fdp);
+	if (fdp->fd_cdir != NULL)
+		export_vnode_for_osysctl(fdp->fd_cdir, KF_FD_TYPE_CWD, kif,
+				fdp, req);
+	if (fdp->fd_rdir != NULL)
+		export_vnode_for_osysctl(fdp->fd_rdir, KF_FD_TYPE_ROOT, kif,
+				fdp, req);
+	if (fdp->fd_jdir != NULL)
+		export_vnode_for_osysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif,
+				fdp, req);
+	for (i = 0; i < fdp->fd_nfiles; i++) {
+		if ((fp = fdp->fd_ofiles[i]) == NULL)
+			continue;
+		bzero(kif, sizeof(*kif));
+		kif->kf_structsize = sizeof(*kif);
+		vp = NULL;
+		so = NULL;
+		tp = NULL;
+		kif->kf_fd = i;
+		switch (fp->f_type) {
+		case DTYPE_VNODE:
+			kif->kf_type = KF_TYPE_VNODE;
+			vp = fp->f_vnode;
+			break;
+
+		case DTYPE_SOCKET:
+			kif->kf_type = KF_TYPE_SOCKET;
+			so = fp->f_data;
+			break;
+
+		case DTYPE_PIPE:
+			kif->kf_type = KF_TYPE_PIPE;
+			break;
+
+		case DTYPE_FIFO:
+			kif->kf_type = KF_TYPE_FIFO;
+			vp = fp->f_vnode;
+			break;
+
+		case DTYPE_KQUEUE:
+			kif->kf_type = KF_TYPE_KQUEUE;
+			break;
+
+		case DTYPE_CRYPTO:
+			kif->kf_type = KF_TYPE_CRYPTO;
+			break;
+
+		case DTYPE_MQUEUE:
+			kif->kf_type = KF_TYPE_MQUEUE;
+			break;
+
+		case DTYPE_SHM:
+			kif->kf_type = KF_TYPE_SHM;
+			break;
+
+		case DTYPE_SEM:
+			kif->kf_type = KF_TYPE_SEM;
+			break;
+
+		case DTYPE_PTS:
+			kif->kf_type = KF_TYPE_PTS;
+			tp = fp->f_data;
+			break;
+
+		default:
+			kif->kf_type = KF_TYPE_UNKNOWN;
+			break;
+		}
+		kif->kf_ref_count = fp->f_count;
+		if (fp->f_flag & FREAD)
+			kif->kf_flags |= KF_FLAG_READ;
+		if (fp->f_flag & FWRITE)
+			kif->kf_flags |= KF_FLAG_WRITE;
+		if (fp->f_flag & FAPPEND)
+			kif->kf_flags |= KF_FLAG_APPEND;
+		if (fp->f_flag & FASYNC)
+			kif->kf_flags |= KF_FLAG_ASYNC;
+		if (fp->f_flag & FFSYNC)
+			kif->kf_flags |= KF_FLAG_FSYNC;
+		if (fp->f_flag & FNONBLOCK)
+			kif->kf_flags |= KF_FLAG_NONBLOCK;
+		if (fp->f_flag & O_DIRECT)
+			kif->kf_flags |= KF_FLAG_DIRECT;
+		if (fp->f_flag & FHASLOCK)
+			kif->kf_flags |= KF_FLAG_HASLOCK;
+		kif->kf_offset = fp->f_offset;
+		if (vp != NULL) {
+			vref(vp);
+			switch (vp->v_type) {
+			case VNON:
+				kif->kf_vnode_type = KF_VTYPE_VNON;
+				break;
+			case VREG:
+				kif->kf_vnode_type = KF_VTYPE_VREG;
+				break;
+			case VDIR:
+				kif->kf_vnode_type = KF_VTYPE_VDIR;
+				break;
+			case VBLK:
+				kif->kf_vnode_type = KF_VTYPE_VBLK;
+				break;
+			case VCHR:
+				kif->kf_vnode_type = KF_VTYPE_VCHR;
+				break;
+			case VLNK:
+				kif->kf_vnode_type = KF_VTYPE_VLNK;
+				break;
+			case VSOCK:
+				kif->kf_vnode_type = KF_VTYPE_VSOCK;
+				break;
+			case VFIFO:
+				kif->kf_vnode_type = KF_VTYPE_VFIFO;
+				break;
+			case VBAD:
+				kif->kf_vnode_type = KF_VTYPE_VBAD;
+				break;
+			default:
+				kif->kf_vnode_type = KF_VTYPE_UNKNOWN;
+				break;
+			}
+			/*
+			 * It is OK to drop the filedesc lock here as we will
+			 * re-validate and re-evaluate its properties when
+			 * the loop continues.
+			 */
+			freepath = NULL;
+			fullpath = "-";
+			FILEDESC_SUNLOCK(fdp);
+			vn_fullpath(curthread, vp, &fullpath, &freepath);
+			vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+			vrele(vp);
+			VFS_UNLOCK_GIANT(vfslocked);
+			strlcpy(kif->kf_path, fullpath,
+			    sizeof(kif->kf_path));
+			if (freepath != NULL)
+				free(freepath, M_TEMP);
+			FILEDESC_SLOCK(fdp);
+		}
+		if (so != NULL) {
+			struct sockaddr *sa;
+
+			if (so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa)
+			    == 0 && sa->sa_len <= sizeof(kif->kf_sa_local)) {
+				bcopy(sa, &kif->kf_sa_local, sa->sa_len);
+				free(sa, M_SONAME);
+			}
+			if (so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa)
+			    == 0 && sa->sa_len <= sizeof(kif->kf_sa_peer)) {
+				bcopy(sa, &kif->kf_sa_peer, sa->sa_len);
+				free(sa, M_SONAME);
+			}
+			kif->kf_sock_domain =
+			    so->so_proto->pr_domain->dom_family;
+			kif->kf_sock_type = so->so_type;
+			kif->kf_sock_protocol = so->so_proto->pr_protocol;
+		}
+		if (tp != NULL) {
+			strlcpy(kif->kf_path, tty_devname(tp),
+			    sizeof(kif->kf_path));
+		}
+		error = SYSCTL_OUT(req, kif, sizeof(*kif));
+		if (error)
+			break;
+	}
+	FILEDESC_SUNLOCK(fdp);
+	fddrop(fdp);
+	free(kif, M_TEMP);
+	return (0);
+}
+
+static SYSCTL_NODE(_kern_proc, KERN_PROC_OFILEDESC, ofiledesc, CTLFLAG_RD,
+    sysctl_kern_proc_ofiledesc, "Process ofiledesc entries");
+#endif	/* COMPAT_FREEBSD7 */
+
+#ifdef KINFO_FILE_SIZE
+CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE);
+#endif
+
+static int
+export_vnode_for_sysctl(struct vnode *vp, int type,
+    struct kinfo_file *kif, struct filedesc *fdp, struct sysctl_req *req)
+{
+	int error;
+	char *fullpath, *freepath;
+	int vfslocked;
+
+	bzero(kif, sizeof(*kif));
+
+	vref(vp);
+	kif->kf_fd = type;
+	kif->kf_type = KF_TYPE_VNODE;
+	/* This function only handles directories. */
+	if (vp->v_type != VDIR) {
+		vrele(vp);
+		return (ENOTDIR);
+	}
+	kif->kf_vnode_type = KF_VTYPE_VDIR;
+
+	/*
+	 * This is not a true file descriptor, so we set a bogus refcount
+	 * and offset to indicate these fields should be ignored.
+	 */
+	kif->kf_ref_count = -1;
+	kif->kf_offset = -1;
+
+	freepath = NULL;
+	fullpath = "-";
+	FILEDESC_SUNLOCK(fdp);
+	vn_fullpath(curthread, vp, &fullpath, &freepath);
+	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+	vrele(vp);
+	VFS_UNLOCK_GIANT(vfslocked);
+	strlcpy(kif->kf_path, fullpath, sizeof(kif->kf_path));
+	if (freepath != NULL)
+		free(freepath, M_TEMP);
+	/* Pack record size down */
+	kif->kf_structsize = offsetof(struct kinfo_file, kf_path) +
+	    strlen(kif->kf_path) + 1;
+	kif->kf_structsize = roundup(kif->kf_structsize, sizeof(uint64_t));
+	error = SYSCTL_OUT(req, kif, kif->kf_structsize);
+	FILEDESC_SLOCK(fdp);
+	return (error);
+}
+
+/*
+ * Get per-process file descriptors for use by procstat(1), et al.
+ */
+static int
+sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS)
+{
+	char *fullpath, *freepath;
+	struct kinfo_file *kif;
+	struct filedesc *fdp;
+	int error, i, *name;
+	struct socket *so;
+	struct vnode *vp;
+	struct file *fp;
+	struct proc *p;
+	struct tty *tp;
+	int vfslocked;
+	size_t oldidx;
+
+	name = (int *)arg1;
+	if ((p = pfind((pid_t)name[0])) == NULL)
+		return (ESRCH);
+	if ((error = p_candebug(curthread, p))) {
+		PROC_UNLOCK(p);
+		return (error);
+	}
+	fdp = fdhold(p);
+	PROC_UNLOCK(p);
+	if (fdp == NULL)
+		return (ENOENT);
+	kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK);
+	FILEDESC_SLOCK(fdp);
+	if (fdp->fd_cdir != NULL)
+		export_vnode_for_sysctl(fdp->fd_cdir, KF_FD_TYPE_CWD, kif,
+				fdp, req);
+	if (fdp->fd_rdir != NULL)
+		export_vnode_for_sysctl(fdp->fd_rdir, KF_FD_TYPE_ROOT, kif,
+				fdp, req);
+	if (fdp->fd_jdir != NULL)
+		export_vnode_for_sysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif,
+				fdp, req);
+	for (i = 0; i < fdp->fd_nfiles; i++) {
+		if ((fp = fdp->fd_ofiles[i]) == NULL)
+			continue;
+		bzero(kif, sizeof(*kif));
+		vp = NULL;
+		so = NULL;
+		tp = NULL;
+		kif->kf_fd = i;
+		switch (fp->f_type) {
+		case DTYPE_VNODE:
+			kif->kf_type = KF_TYPE_VNODE;
+			vp = fp->f_vnode;
+			break;
+
+		case DTYPE_SOCKET:
+			kif->kf_type = KF_TYPE_SOCKET;
+			so = fp->f_data;
+			break;
+
+		case DTYPE_PIPE:
+			kif->kf_type = KF_TYPE_PIPE;
+			break;
+
+		case DTYPE_FIFO:
+			kif->kf_type = KF_TYPE_FIFO;
+			vp = fp->f_vnode;
+			break;
+
+		case DTYPE_KQUEUE:
+			kif->kf_type = KF_TYPE_KQUEUE;
+			break;
+
+		case DTYPE_CRYPTO:
+			kif->kf_type = KF_TYPE_CRYPTO;
+			break;
+
+		case DTYPE_MQUEUE:
+			kif->kf_type = KF_TYPE_MQUEUE;
+			break;
+
+		case DTYPE_SHM:
+			kif->kf_type = KF_TYPE_SHM;
+			break;
+
+		case DTYPE_SEM:
+			kif->kf_type = KF_TYPE_SEM;
+			break;
+
+		case DTYPE_PTS:
+			kif->kf_type = KF_TYPE_PTS;
+			tp = fp->f_data;
+			break;
+
+		default:
+			kif->kf_type = KF_TYPE_UNKNOWN;
+			break;
+		}
+		kif->kf_ref_count = fp->f_count;
+		if (fp->f_flag & FREAD)
+			kif->kf_flags |= KF_FLAG_READ;
+		if (fp->f_flag & FWRITE)
+			kif->kf_flags |= KF_FLAG_WRITE;
+		if (fp->f_flag & FAPPEND)
+			kif->kf_flags |= KF_FLAG_APPEND;
+		if (fp->f_flag & FASYNC)
+			kif->kf_flags |= KF_FLAG_ASYNC;
+		if (fp->f_flag & FFSYNC)
+			kif->kf_flags |= KF_FLAG_FSYNC;
+		if (fp->f_flag & FNONBLOCK)
+			kif->kf_flags |= KF_FLAG_NONBLOCK;
+		if (fp->f_flag & O_DIRECT)
+			kif->kf_flags |= KF_FLAG_DIRECT;
+		if (fp->f_flag & FHASLOCK)
+			kif->kf_flags |= KF_FLAG_HASLOCK;
+		kif->kf_offset = fp->f_offset;
+		if (vp != NULL) {
+			vref(vp);
+			switch (vp->v_type) {
+			case VNON:
+				kif->kf_vnode_type = KF_VTYPE_VNON;
+				break;
+			case VREG:
+				kif->kf_vnode_type = KF_VTYPE_VREG;
+				break;
+			case VDIR:
+				kif->kf_vnode_type = KF_VTYPE_VDIR;
+				break;
+			case VBLK:
+				kif->kf_vnode_type = KF_VTYPE_VBLK;
+				break;
+			case VCHR:
+				kif->kf_vnode_type = KF_VTYPE_VCHR;
+				break;
+			case VLNK:
+				kif->kf_vnode_type = KF_VTYPE_VLNK;
+				break;
+			case VSOCK:
+				kif->kf_vnode_type = KF_VTYPE_VSOCK;
+				break;
+			case VFIFO:
+				kif->kf_vnode_type = KF_VTYPE_VFIFO;
+				break;
+			case VBAD:
+				kif->kf_vnode_type = KF_VTYPE_VBAD;
+				break;
+			default:
+				kif->kf_vnode_type = KF_VTYPE_UNKNOWN;
+				break;
+			}
+			/*
+			 * It is OK to drop the filedesc lock here as we will
+			 * re-validate and re-evaluate its properties when
+			 * the loop continues.
+			 */
+			freepath = NULL;
+			fullpath = "-";
+			FILEDESC_SUNLOCK(fdp);
+			vn_fullpath(curthread, vp, &fullpath, &freepath);
+			vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+			vrele(vp);
+			VFS_UNLOCK_GIANT(vfslocked);
+			strlcpy(kif->kf_path, fullpath,
+			    sizeof(kif->kf_path));
+			if (freepath != NULL)
+				free(freepath, M_TEMP);
+			FILEDESC_SLOCK(fdp);
+		}
+		if (so != NULL) {
+			struct sockaddr *sa;
+
+			if (so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa)
+			    == 0 && sa->sa_len <= sizeof(kif->kf_sa_local)) {
+				bcopy(sa, &kif->kf_sa_local, sa->sa_len);
+				free(sa, M_SONAME);
+			}
+			if (so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa)
+			    == 0 && sa->sa_len <= sizeof(kif->kf_sa_peer)) {
+				bcopy(sa, &kif->kf_sa_peer, sa->sa_len);
+				free(sa, M_SONAME);
+			}
+			kif->kf_sock_domain =
+			    so->so_proto->pr_domain->dom_family;
+			kif->kf_sock_type = so->so_type;
+			kif->kf_sock_protocol = so->so_proto->pr_protocol;
+		}
+		if (tp != NULL) {
+			strlcpy(kif->kf_path, tty_devname(tp),
+			    sizeof(kif->kf_path));
+		}
+		/* Pack record size down */
+		kif->kf_structsize = offsetof(struct kinfo_file, kf_path) +
+		    strlen(kif->kf_path) + 1;
+		kif->kf_structsize = roundup(kif->kf_structsize,
+		    sizeof(uint64_t));
+		oldidx = req->oldidx;
+		error = SYSCTL_OUT(req, kif, kif->kf_structsize);
+		if (error) {
+			if (error == ENOMEM) {
+				/*
+				 * The hack to keep the ABI of sysctl
+				 * kern.proc.filedesc intact, but not
+				 * to account a partially copied
+				 * kinfo_file into the oldidx.
+				 */
+				req->oldidx = oldidx;
+				error = 0;
+			}
+			break;
+		}
+	}
+	FILEDESC_SUNLOCK(fdp);
+	fddrop(fdp);
+	free(kif, M_TEMP);
+	return (error);
+}
+
+static SYSCTL_NODE(_kern_proc, KERN_PROC_FILEDESC, filedesc, CTLFLAG_RD,
+    sysctl_kern_proc_filedesc, "Process filedesc entries");
+
+#ifdef DDB
+/*
+ * For the purposes of debugging, generate a human-readable string for the
+ * file type.
+ */
+static const char *
+file_type_to_name(short type)
+{
+
+	switch (type) {
+	case 0:
+		return ("zero");
+	case DTYPE_VNODE:
+		return ("vnod");
+	case DTYPE_SOCKET:
+		return ("sock");
+	case DTYPE_PIPE:
+		return ("pipe");
+	case DTYPE_FIFO:
+		return ("fifo");
+	case DTYPE_KQUEUE:
+		return ("kque");
+	case DTYPE_CRYPTO:
+		return ("crpt");
+	case DTYPE_MQUEUE:
+		return ("mque");
+	case DTYPE_SHM:
+		return ("shm");
+	case DTYPE_SEM:
+		return ("ksem");
+	default:
+		return ("unkn");
+	}
+}
+
+/*
+ * For the purposes of debugging, identify a process (if any, perhaps one of
+ * many) that references the passed file in its file descriptor array. Return
+ * NULL if none.
+ */
+static struct proc *
+file_to_first_proc(struct file *fp)
+{
+	struct filedesc *fdp;
+	struct proc *p;
+	int n;
+
+	FOREACH_PROC_IN_SYSTEM(p) {
+		if (p->p_state == PRS_NEW)
+			continue;
+		fdp = p->p_fd;
+		if (fdp == NULL)
+			continue;
+		for (n = 0; n < fdp->fd_nfiles; n++) {
+			if (fp == fdp->fd_ofiles[n])
+				return (p);
+		}
+	}
+	return (NULL);
+}
+
+static void
+db_print_file(struct file *fp, int header)
+{
+	struct proc *p;
+
+	if (header)
+		db_printf("%8s %4s %8s %8s %4s %5s %6s %8s %5s %12s\n",
+		    "File", "Type", "Data", "Flag", "GCFl", "Count",
+		    "MCount", "Vnode", "FPID", "FCmd");
+	p = file_to_first_proc(fp);
+	db_printf("%8p %4s %8p %08x %04x %5d %6d %8p %5d %12s\n", fp,
+	    file_type_to_name(fp->f_type), fp->f_data, fp->f_flag,
+	    0, fp->f_count, 0, fp->f_vnode,
+	    p != NULL ? p->p_pid : -1, p != NULL ? p->p_comm : "-");
+}
+
+DB_SHOW_COMMAND(file, db_show_file)
+{
+	struct file *fp;
+
+	if (!have_addr) {
+		db_printf("usage: show file <addr>\n");
+		return;
+	}
+	fp = (struct file *)addr;
+	db_print_file(fp, 1);
+}
+
+DB_SHOW_COMMAND(files, db_show_files)
+{
+	struct filedesc *fdp;
+	struct file *fp;
+	struct proc *p;
+	int header;
+	int n;
+
+	header = 1;
+	FOREACH_PROC_IN_SYSTEM(p) {
+		if (p->p_state == PRS_NEW)
+			continue;
+		if ((fdp = p->p_fd) == NULL)
+			continue;
+		for (n = 0; n < fdp->fd_nfiles; ++n) {
+			if ((fp = fdp->fd_ofiles[n]) == NULL)
+				continue;
+			db_print_file(fp, header);
+			header = 0;
+		}
+	}
+}
+#endif
+
+SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
+    &maxfilesperproc, 0, "Maximum files allowed open per process");
+
+SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
+    &maxfiles, 0, "Maximum number of files");
+
+SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
+    __DEVOLATILE(int *, &openfiles), 0, "System-wide number of open files");
+
+/* ARGSUSED*/
+static void
+filelistinit(void *dummy)
+{
+
+	file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
+	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+	mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
+	mtx_init(&fdesc_mtx, "fdesc", NULL, MTX_DEF);
+}
+SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL);
+#endif /* __rtems__ */
+
+/*-------------------------------------------------------------------*/
+
+static int
+badfo_readwrite(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td)
+{
+
+	return (EBADF);
+}
+
+static int
+badfo_truncate(struct file *fp, off_t length, struct ucred *active_cred, struct thread *td)
+{
+
+	return (EINVAL);
+}
+
+static int
+badfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, struct thread *td)
+{
+
+	return (EBADF);
+}
+
+static int
+badfo_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td)
+{
+
+	return (0);
+}
+
+static int
+badfo_kqfilter(struct file *fp, struct knote *kn)
+{
+
+	return (EBADF);
+}
+
+static int
+badfo_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, struct thread *td)
+{
+
+	return (EBADF);
+}
+
+static int
+badfo_close(struct file *fp, struct thread *td)
+{
+
+	return (EBADF);
+}
+
+struct fileops badfileops = {
+	.fo_read = badfo_readwrite,
+	.fo_write = badfo_readwrite,
+	.fo_truncate = badfo_truncate,
+	.fo_ioctl = badfo_ioctl,
+	.fo_poll = badfo_poll,
+	.fo_kqfilter = badfo_kqfilter,
+	.fo_stat = badfo_stat,
+	.fo_close = badfo_close,
+};
+
+#ifndef __rtems__
+/*-------------------------------------------------------------------*/
+
+/*
+ * File Descriptor pseudo-device driver (/dev/fd/).
+ *
+ * Opening minor device N dup()s the file (if any) connected to file
+ * descriptor N belonging to the calling process.  Note that this driver
+ * consists of only the ``open()'' routine, because all subsequent
+ * references to this file will be direct to the other driver.
+ *
+ * XXX: we could give this one a cloning event handler if necessary.
+ */
+
+/* ARGSUSED */
+static int
+fdopen(struct cdev *dev, int mode, int type, struct thread *td)
+{
+
+	/*
+	 * XXX Kludge: set curthread->td_dupfd to contain the value of the
+	 * the file descriptor being sought for duplication. The error
+	 * return ensures that the vnode for this device will be released
+	 * by vn_open. Open will detect this special error and take the
+	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
+	 * will simply report the error.
+	 */
+	td->td_dupfd = dev2unit(dev);
+	return (ENODEV);
+}
+
+static struct cdevsw fildesc_cdevsw = {
+	.d_version =	D_VERSION,
+	.d_open =	fdopen,
+	.d_name =	"FD",
+};
+
+static void
+fildesc_drvinit(void *unused)
+{
+	struct cdev *dev;
+
+	dev = make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "fd/0");
+	make_dev_alias(dev, "stdin");
+	dev = make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "fd/1");
+	make_dev_alias(dev, "stdout");
+	dev = make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "fd/2");
+	make_dev_alias(dev, "stderr");
+}
+
+SYSINIT(fildescdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, fildesc_drvinit, NULL);
+#endif /* __rtems__ */
+	 * XXX Kludge: set curthread->td_dupfd to contain the value of the
+	 * the file descriptor being sought for duplication. The error
+	 * return ensures that the vnode for this device will be released
+	 * by vn_open. Open will detect this special error and take the
+	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
+	 * will simply report the error.
+	 */
+	td->td_dupfd = dev2unit(dev);
+	return (ENODEV);
+}
+
+static struct cdevsw fildesc_cdevsw = {
+	.d_version =	D_VERSION,
+	.d_open =	fdopen,
+	.d_name =	"FD",
+};
+
+static void
+fildesc_drvinit(void *unused)
+{
+	struct cdev *dev;
+
+	dev = make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "fd/0");
+	make_dev_alias(dev, "stdin");
+	dev = make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "fd/1");
+	make_dev_alias(dev, "stdout");
+	dev = make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "fd/2");
+	make_dev_alias(dev, "stderr");
+}
+
+SYSINIT(fildescdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, fildesc_drvinit, NULL);
+#endif /* __rtems__ */
diff --git a/freebsd/kern/kern_mtxpool.c b/freebsd/kern/kern_mtxpool.c
new file mode 100644
index 0000000..b2c0267
--- /dev/null
+++ b/freebsd/kern/kern_mtxpool.c
@@ -0,0 +1,220 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2001 Matthew Dillon.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* Mutex pool routines.  These routines are designed to be used as short
+ * term leaf mutexes (e.g. the last mutex you might acquire other then
+ * calling msleep()).  They operate using a shared pool.  A mutex is chosen
+ * from the pool based on the supplied pointer (which may or may not be
+ * valid).
+ *
+ * Advantages:
+ *	- no structural overhead.  Mutexes can be associated with structures
+ *	  without adding bloat to the structures.
+ *	- mutexes can be obtained for invalid pointers, useful when uses
+ *	  mutexes to interlock destructor ops.
+ *	- no initialization/destructor overhead.
+ *	- can be used with msleep.
+ *
+ * Disadvantages:
+ *	- should generally only be used as leaf mutexes.
+ *	- pool/pool dependancy ordering cannot be depended on.
+ *	- possible L1 cache mastersip contention between cpus.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/ktr.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/sys/systm.h>
+
+
+static MALLOC_DEFINE(M_MTXPOOL, "mtx_pool", "mutex pool");
+
+/* Pool sizes must be a power of two */
+#ifndef MTX_POOL_LOCKBUILDER_SIZE
+#define MTX_POOL_LOCKBUILDER_SIZE	128
+#endif
+#ifndef MTX_POOL_SLEEP_SIZE
+#define MTX_POOL_SLEEP_SIZE		128
+#endif
+
+struct mtxpool_header {
+	int		mtxpool_size;
+	int		mtxpool_mask;
+	int		mtxpool_shift;
+	int		mtxpool_next;
+};
+
+struct mtx_pool {
+	struct mtxpool_header mtx_pool_header;
+	struct mtx	mtx_pool_ary[1];
+};
+
+static struct mtx_pool_lockbuilder {
+	struct mtxpool_header mtx_pool_header;
+	struct mtx	mtx_pool_ary[MTX_POOL_LOCKBUILDER_SIZE];
+} lockbuilder_pool;
+
+#define mtx_pool_size	mtx_pool_header.mtxpool_size
+#define mtx_pool_mask	mtx_pool_header.mtxpool_mask
+#define mtx_pool_shift	mtx_pool_header.mtxpool_shift
+#define mtx_pool_next	mtx_pool_header.mtxpool_next
+
+struct mtx_pool *mtxpool_sleep;
+struct mtx_pool *mtxpool_lockbuilder;
+
+#if UINTPTR_MAX == UINT64_MAX	/* 64 bits */
+# define POINTER_BITS		64
+# define HASH_MULTIPLIER	11400714819323198485u /* (2^64)*(sqrt(5)-1)/2 */
+#else				/* assume 32 bits */
+# define POINTER_BITS		32
+# define HASH_MULTIPLIER	2654435769u	      /* (2^32)*(sqrt(5)-1)/2 */
+#endif
+
+/*
+ * Return the (shared) pool mutex associated with the specified address.
+ * The returned mutex is a leaf level mutex, meaning that if you obtain it
+ * you cannot obtain any other mutexes until you release it.  You can
+ * legally msleep() on the mutex.
+ */
+struct mtx *
+mtx_pool_find(struct mtx_pool *pool, void *ptr)
+{
+	int p;
+
+	KASSERT(pool != NULL, ("_mtx_pool_find(): null pool"));
+	/*
+	 * Fibonacci hash, see Knuth's
+	 * _Art of Computer Programming, Volume 3 / Sorting and Searching_
+	 */
+	p = ((HASH_MULTIPLIER * (uintptr_t)ptr) >> pool->mtx_pool_shift) &
+	    pool->mtx_pool_mask;
+	return (&pool->mtx_pool_ary[p]);
+}
+
+static void
+mtx_pool_initialize(struct mtx_pool *pool, const char *mtx_name, int pool_size,
+    int opts)
+{
+	int i, maskbits;
+
+	pool->mtx_pool_size = pool_size;
+	pool->mtx_pool_mask = pool_size - 1;
+	for (i = 1, maskbits = 0; (i & pool_size) == 0; i = i << 1)
+		maskbits++;
+	pool->mtx_pool_shift = POINTER_BITS - maskbits;
+	pool->mtx_pool_next = 0;
+	for (i = 0; i < pool_size; ++i)
+		mtx_init(&pool->mtx_pool_ary[i], mtx_name, NULL, opts);
+}
+
+struct mtx_pool *
+mtx_pool_create(const char *mtx_name, int pool_size, int opts)
+{
+	struct mtx_pool *pool;
+
+	if (pool_size <= 0 || !powerof2(pool_size)) {
+		printf("WARNING: %s pool size is not a power of 2.\n",
+		    mtx_name);
+		pool_size = 128;
+	}
+	pool = malloc(sizeof (struct mtx_pool) +
+	    ((pool_size - 1) * sizeof (struct mtx)),
+	    M_MTXPOOL, M_WAITOK | M_ZERO);
+	mtx_pool_initialize(pool, mtx_name, pool_size, opts);
+	return pool;
+}
+
+void
+mtx_pool_destroy(struct mtx_pool **poolp)
+{
+	int i;
+	struct mtx_pool *pool = *poolp;
+
+	for (i = pool->mtx_pool_size - 1; i >= 0; --i)
+		mtx_destroy(&pool->mtx_pool_ary[i]);
+	free(pool, M_MTXPOOL);
+	*poolp = NULL;
+}
+
+static void
+mtx_pool_setup_static(void *dummy __unused)
+{
+	mtx_pool_initialize((struct mtx_pool *)&lockbuilder_pool,
+	    "lockbuilder mtxpool", MTX_POOL_LOCKBUILDER_SIZE,
+	    MTX_DEF | MTX_NOWITNESS | MTX_QUIET);
+	mtxpool_lockbuilder = (struct mtx_pool *)&lockbuilder_pool;
+}
+
+static void
+mtx_pool_setup_dynamic(void *dummy __unused)
+{
+	mtxpool_sleep = mtx_pool_create("sleep mtxpool",
+	    MTX_POOL_SLEEP_SIZE, MTX_DEF);
+}
+
+/*
+ * Obtain a (shared) mutex from the pool.  The returned mutex is a leaf
+ * level mutex, meaning that if you obtain it you cannot obtain any other
+ * mutexes until you release it.  You can legally msleep() on the mutex.
+ */
+struct mtx *
+mtx_pool_alloc(struct mtx_pool *pool)
+{
+	int i;
+
+	KASSERT(pool != NULL, ("mtx_pool_alloc(): null pool"));
+	/*
+	 * mtx_pool_next is unprotected against multiple accesses,
+	 * but simultaneous access by two CPUs should not be very
+	 * harmful.
+	 */
+	i = pool->mtx_pool_next;
+	pool->mtx_pool_next = (i + 1) & pool->mtx_pool_mask;
+	return (&pool->mtx_pool_ary[i]);
+}
+
+/*
+ * The lockbuilder pool must be initialized early because the lockmgr
+ * and sx locks depend on it.  The sx locks are used in the kernel
+ * memory allocator.  The lockmgr subsystem is initialized by
+ * SYSINIT(..., SI_SUB_LOCKMGR, ...).
+ *
+ * We can't call malloc() to dynamically allocate the sleep pool
+ * until after kmeminit() has been called, which is done by
+ * SYSINIT(..., SI_SUB_KMEM, ...).
+ */
+SYSINIT(mtxpooli1, SI_SUB_MTX_POOL_STATIC, SI_ORDER_FIRST,
+    mtx_pool_setup_static, NULL);
+SYSINIT(mtxpooli2, SI_SUB_MTX_POOL_DYNAMIC, SI_ORDER_FIRST,
+    mtx_pool_setup_dynamic, NULL);
diff --git a/freebsd/kern/kern_subr.c b/freebsd/kern/kern_subr.c
index fecb91c..9a28a7d 100644
--- a/freebsd/kern/kern_subr.c
+++ b/freebsd/kern/kern_subr.c
@@ -545,7 +545,6 @@ copyiniov(struct iovec *iovp, u_int iovcnt, struct iovec **iov, int error)
 	return (error);
 }
 
-#ifndef __rtems__
 int
 copyinuio(struct iovec *iovp, u_int iovcnt, struct uio **uiop)
 {
@@ -582,6 +581,7 @@ copyinuio(struct iovec *iovp, u_int iovcnt, struct uio **uiop)
 	return (0);
 }
 
+#ifndef __rtems__
 struct uio *
 cloneuio(struct uio *uiop)
 {
diff --git a/freebsd/kern/kern_time.c b/freebsd/kern/kern_time.c
index e07abc3..8c760b4 100644
--- a/freebsd/kern/kern_time.c
+++ b/freebsd/kern/kern_time.c
@@ -698,6 +698,7 @@ realitexpire(void *arg)
 	}
 	/*NOTREACHED*/
 }
+#endif /* __rtems__ */
 
 /*
  * Check that a proposed value to load into the .it_value or
@@ -716,6 +717,7 @@ itimerfix(struct timeval *tv)
 	return (0);
 }
 
+#ifndef __rtems__
 /*
  * Decrement an interval timer by a specified number
  * of microseconds, which must be less than a second,
@@ -756,6 +758,7 @@ expire:
 		itp->it_value.tv_usec = 0;		/* sec is already 0 */
 	return (0);
 }
+#endif /* __rtems__ */
 
 /*
  * Add and subtract routines for timevals.
@@ -772,7 +775,6 @@ timevaladd(struct timeval *t1, const struct timeval *t2)
 	t1->tv_usec += t2->tv_usec;
 	timevalfix(t1);
 }
-#endif /* __rtems__ */
 
 void
 timevalsub(struct timeval *t1, const struct timeval *t2)
diff --git a/freebsd/kern/sys_generic.c b/freebsd/kern/sys_generic.c
new file mode 100644
index 0000000..c90d632
--- /dev/null
+++ b/freebsd/kern/sys_generic.c
@@ -0,0 +1,1665 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)sys_generic.c	8.5 (Berkeley) 1/21/94
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+
+#include <freebsd/local/opt_compat.h>
+#include <freebsd/local/opt_ktrace.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/sysproto.h>
+#include <freebsd/sys/filedesc.h>
+#include <freebsd/sys/filio.h>
+#include <freebsd/sys/fcntl.h>
+#include <freebsd/sys/file.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/signalvar.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/uio.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/ktr.h>
+#include <freebsd/sys/limits.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/poll.h>
+#include <freebsd/sys/resourcevar.h>
+#include <freebsd/sys/selinfo.h>
+#include <freebsd/sys/sleepqueue.h>
+#include <freebsd/sys/syscallsubr.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/sysent.h>
+#include <freebsd/sys/vnode.h>
+#include <freebsd/sys/bio.h>
+#ifndef __rtems__
+#include <freebsd/sys/buf.h>
+#endif
+#include <freebsd/sys/condvar.h>
+#ifdef KTRACE
+#include <freebsd/sys/ktrace.h>
+#endif
+
+#include <freebsd/security/audit/audit.h>
+
+#ifdef __rtems__
+typedef long fd_mask;
+#include <freebsd/vm/uma.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/machine/rtems-bsd-symbols.h>
+#endif /* __rtems__ */
+
+static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer");
+static MALLOC_DEFINE(M_SELECT, "select", "select() buffer");
+#ifndef __rtems__
+MALLOC_DEFINE(M_IOV, "iov", "large iov's");
+#endif /* __rtems__ */
+
+static int	pollout(struct thread *, struct pollfd *, struct pollfd *,
+		    u_int);
+static int	pollscan(struct thread *, struct pollfd *, u_int);
+static int	pollrescan(struct thread *);
+static int	selscan(struct thread *, fd_mask **, fd_mask **, int);
+static int	selrescan(struct thread *, fd_mask **, fd_mask **);
+static void	selfdalloc(struct thread *, void *);
+static void	selfdfree(struct seltd *, struct selfd *);
+static int	dofileread(struct thread *, int, struct file *, struct uio *,
+		    off_t, int);
+static int	dofilewrite(struct thread *, int, struct file *, struct uio *,
+		    off_t, int);
+static void	doselwakeup(struct selinfo *, int);
+static void	seltdinit(struct thread *);
+static int	seltdwait(struct thread *, int);
+static void	seltdclear(struct thread *);
+
+/*
+ * One seltd per-thread allocated on demand as needed.
+ *
+ *	t - protected by st_mtx
+ * 	k - Only accessed by curthread or read-only
+ */
+struct seltd {
+	STAILQ_HEAD(, selfd)	st_selq;	/* (k) List of selfds. */
+	struct selfd		*st_free1;	/* (k) free fd for read set. */
+	struct selfd		*st_free2;	/* (k) free fd for write set. */
+	struct mtx		st_mtx;		/* Protects struct seltd */
+	struct cv		st_wait;	/* (t) Wait channel. */
+	int			st_flags;	/* (t) SELTD_ flags. */
+};
+
+#define	SELTD_PENDING	0x0001			/* We have pending events. */
+#define	SELTD_RESCAN	0x0002			/* Doing a rescan. */
+
+/*
+ * One selfd allocated per-thread per-file-descriptor.
+ *	f - protected by sf_mtx
+ */
+struct selfd {
+	STAILQ_ENTRY(selfd)	sf_link;	/* (k) fds owned by this td. */
+	TAILQ_ENTRY(selfd)	sf_threads;	/* (f) fds on this selinfo. */
+	struct selinfo		*sf_si;		/* (f) selinfo when linked. */
+	struct mtx		*sf_mtx;	/* Pointer to selinfo mtx. */
+	struct seltd		*sf_td;		/* (k) owning seltd. */
+	void			*sf_cookie;	/* (k) fd or pollfd. */
+};
+
+static uma_zone_t selfd_zone;
+static struct mtx_pool *mtxpool_select;
+
+#ifndef _SYS_SYSPROTO_H_
+struct read_args {
+	int	fd;
+	void	*buf;
+	size_t	nbyte;
+};
+#endif
+#ifndef __rtems__
+int
+read(td, uap)
+	struct thread *td;
+	struct read_args *uap;
+{
+	struct uio auio;
+	struct iovec aiov;
+	int error;
+
+	if (uap->nbyte > INT_MAX)
+		return (EINVAL);
+	aiov.iov_base = uap->buf;
+	aiov.iov_len = uap->nbyte;
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	auio.uio_resid = uap->nbyte;
+	auio.uio_segflg = UIO_USERSPACE;
+	error = kern_readv(td, uap->fd, &auio);
+	return(error);
+}
+#endif
+
+/*
+ * Positioned read system call
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct pread_args {
+	int	fd;
+	void	*buf;
+	size_t	nbyte;
+	int	pad;
+	off_t	offset;
+};
+#endif
+int
+pread(td, uap)
+	struct thread *td;
+	struct pread_args *uap;
+{
+	struct uio auio;
+	struct iovec aiov;
+	int error;
+
+	if (uap->nbyte > INT_MAX)
+		return (EINVAL);
+	aiov.iov_base = uap->buf;
+	aiov.iov_len = uap->nbyte;
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	auio.uio_resid = uap->nbyte;
+	auio.uio_segflg = UIO_USERSPACE;
+	error = kern_preadv(td, uap->fd, &auio, uap->offset);
+	return(error);
+}
+
+#ifndef __rtems__
+int
+freebsd6_pread(td, uap)
+	struct thread *td;
+	struct freebsd6_pread_args *uap;
+{
+	struct pread_args oargs;
+
+	oargs.fd = uap->fd;
+	oargs.buf = uap->buf;
+	oargs.nbyte = uap->nbyte;
+	oargs.offset = uap->offset;
+	return (pread(td, &oargs));
+}
+#endif /* __rtems__ */
+
+/*
+ * Scatter read system call.
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct readv_args {
+	int	fd;
+	struct	iovec *iovp;
+	u_int	iovcnt;
+};
+#endif
+int
+readv(struct thread *td, struct readv_args *uap)
+{
+	struct uio *auio;
+	int error;
+
+	error = copyinuio(uap->iovp, uap->iovcnt, &auio);
+	if (error)
+		return (error);
+	error = kern_readv(td, uap->fd, auio);
+	free(auio, M_IOV);
+	return (error);
+}
+
+int
+kern_readv(struct thread *td, int fd, struct uio *auio)
+{
+	struct file *fp;
+	int error;
+
+	error = fget_read(td, fd, &fp);
+	if (error)
+		return (error);
+	error = dofileread(td, fd, fp, auio, (off_t)-1, 0);
+	fdrop(fp, td);
+	return (error);
+}
+
+/*
+ * Scatter positioned read system call.
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct preadv_args {
+	int	fd;
+	struct	iovec *iovp;
+	u_int	iovcnt;
+	off_t	offset;
+};
+#endif
+int
+preadv(struct thread *td, struct preadv_args *uap)
+{
+	struct uio *auio;
+	int error;
+
+	error = copyinuio(uap->iovp, uap->iovcnt, &auio);
+	if (error)
+		return (error);
+	error = kern_preadv(td, uap->fd, auio, uap->offset);
+	free(auio, M_IOV);
+	return (error);
+}
+
+int
+kern_preadv(td, fd, auio, offset)
+	struct thread *td;
+	int fd;
+	struct uio *auio;
+	off_t offset;
+{
+	struct file *fp;
+	int error;
+
+	error = fget_read(td, fd, &fp);
+	if (error)
+		return (error);
+	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE))
+		error = ESPIPE;
+#ifndef __rtems__
+	else if (offset < 0 && fp->f_vnode->v_type != VCHR)
+		error = EINVAL;
+#endif /* __rtems__ */
+	else
+		error = dofileread(td, fd, fp, auio, offset, FOF_OFFSET);
+	fdrop(fp, td);
+	return (error);
+}
+
+/*
+ * Common code for readv and preadv that reads data in
+ * from a file using the passed in uio, offset, and flags.
+ */
+static int
+dofileread(td, fd, fp, auio, offset, flags)
+	struct thread *td;
+	int fd;
+	struct file *fp;
+	struct uio *auio;
+	off_t offset;
+	int flags;
+{
+	ssize_t cnt;
+	int error;
+#ifdef KTRACE
+	struct uio *ktruio = NULL;
+#endif
+
+	/* Finish zero length reads right here */
+	if (auio->uio_resid == 0) {
+		td->td_retval[0] = 0;
+		return(0);
+	}
+	auio->uio_rw = UIO_READ;
+	auio->uio_offset = offset;
+	auio->uio_td = td;
+#ifdef KTRACE
+	if (KTRPOINT(td, KTR_GENIO)) 
+		ktruio = cloneuio(auio);
+#endif
+	cnt = auio->uio_resid;
+	if ((error = fo_read(fp, auio, td->td_ucred, flags, td))) {
+		if (auio->uio_resid != cnt && (error == ERESTART ||
+		    error == EINTR || error == EWOULDBLOCK))
+			error = 0;
+	}
+	cnt -= auio->uio_resid;
+#ifdef KTRACE
+	if (ktruio != NULL) {
+		ktruio->uio_resid = cnt;
+		ktrgenio(fd, UIO_READ, ktruio, error);
+	}
+#endif
+	td->td_retval[0] = cnt;
+	return (error);
+}
+
+#ifndef __rtems__
+#ifndef _SYS_SYSPROTO_H_
+struct write_args {
+	int	fd;
+	const void *buf;
+	size_t	nbyte;
+};
+#endif
+int
+write(td, uap)
+	struct thread *td;
+	struct write_args *uap;
+{
+	struct uio auio;
+	struct iovec aiov;
+	int error;
+
+	if (uap->nbyte > INT_MAX)
+		return (EINVAL);
+	aiov.iov_base = (void *)(uintptr_t)uap->buf;
+	aiov.iov_len = uap->nbyte;
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	auio.uio_resid = uap->nbyte;
+	auio.uio_segflg = UIO_USERSPACE;
+	error = kern_writev(td, uap->fd, &auio);
+	return(error);
+}
+
+/*
+ * Positioned write system call.
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct pwrite_args {
+	int	fd;
+	const void *buf;
+	size_t	nbyte;
+	int	pad;
+	off_t	offset;
+};
+#endif
+int
+pwrite(td, uap)
+	struct thread *td;
+	struct pwrite_args *uap;
+{
+	struct uio auio;
+	struct iovec aiov;
+	int error;
+
+	if (uap->nbyte > INT_MAX)
+		return (EINVAL);
+	aiov.iov_base = (void *)(uintptr_t)uap->buf;
+	aiov.iov_len = uap->nbyte;
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	auio.uio_resid = uap->nbyte;
+	auio.uio_segflg = UIO_USERSPACE;
+	error = kern_pwritev(td, uap->fd, &auio, uap->offset);
+	return(error);
+}
+
+int
+freebsd6_pwrite(td, uap)
+	struct thread *td;
+	struct freebsd6_pwrite_args *uap;
+{
+	struct pwrite_args oargs;
+
+	oargs.fd = uap->fd;
+	oargs.buf = uap->buf;
+	oargs.nbyte = uap->nbyte;
+	oargs.offset = uap->offset;
+	return (pwrite(td, &oargs));
+}
+
+/*
+ * Gather write system call.
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct writev_args {
+	int	fd;
+	struct	iovec *iovp;
+	u_int	iovcnt;
+};
+#endif
+int
+writev(struct thread *td, struct writev_args *uap)
+{
+	struct uio *auio;
+	int error;
+
+	error = copyinuio(uap->iovp, uap->iovcnt, &auio);
+	if (error)
+		return (error);
+	error = kern_writev(td, uap->fd, auio);
+	free(auio, M_IOV);
+	return (error);
+}
+
+int
+kern_writev(struct thread *td, int fd, struct uio *auio)
+{
+	struct file *fp;
+	int error;
+
+	error = fget_write(td, fd, &fp);
+	if (error)
+		return (error);
+	error = dofilewrite(td, fd, fp, auio, (off_t)-1, 0);
+	fdrop(fp, td);
+	return (error);
+}
+
+/*
+ * Gather positioned write system call.
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct pwritev_args {
+	int	fd;
+	struct	iovec *iovp;
+	u_int	iovcnt;
+	off_t	offset;
+};
+#endif
+int
+pwritev(struct thread *td, struct pwritev_args *uap)
+{
+	struct uio *auio;
+	int error;
+
+	error = copyinuio(uap->iovp, uap->iovcnt, &auio);
+	if (error)
+		return (error);
+	error = kern_pwritev(td, uap->fd, auio, uap->offset);
+	free(auio, M_IOV);
+	return (error);
+}
+
+int
+kern_pwritev(td, fd, auio, offset)
+	struct thread *td;
+	struct uio *auio;
+	int fd;
+	off_t offset;
+{
+	struct file *fp;
+	int error;
+
+	error = fget_write(td, fd, &fp);
+	if (error)
+		return (error);
+	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE))
+		error = ESPIPE;
+	else if (offset < 0 && fp->f_vnode->v_type != VCHR)
+		error = EINVAL;
+	else
+		error = dofilewrite(td, fd, fp, auio, offset, FOF_OFFSET);
+	fdrop(fp, td);
+	return (error);
+}
+
+/*
+ * Common code for writev and pwritev that writes data to
+ * a file using the passed in uio, offset, and flags.
+ */
+static int
+dofilewrite(td, fd, fp, auio, offset, flags)
+	struct thread *td;
+	int fd;
+	struct file *fp;
+	struct uio *auio;
+	off_t offset;
+	int flags;
+{
+	ssize_t cnt;
+	int error;
+#ifdef KTRACE
+	struct uio *ktruio = NULL;
+#endif
+
+	auio->uio_rw = UIO_WRITE;
+	auio->uio_td = td;
+	auio->uio_offset = offset;
+#ifdef KTRACE
+	if (KTRPOINT(td, KTR_GENIO))
+		ktruio = cloneuio(auio);
+#endif
+	cnt = auio->uio_resid;
+	if (fp->f_type == DTYPE_VNODE)
+		bwillwrite();
+	if ((error = fo_write(fp, auio, td->td_ucred, flags, td))) {
+		if (auio->uio_resid != cnt && (error == ERESTART ||
+		    error == EINTR || error == EWOULDBLOCK))
+			error = 0;
+		/* Socket layer is responsible for issuing SIGPIPE. */
+		if (fp->f_type != DTYPE_SOCKET && error == EPIPE) {
+			PROC_LOCK(td->td_proc);
+			tdksignal(td, SIGPIPE, NULL);
+			PROC_UNLOCK(td->td_proc);
+		}
+	}
+	cnt -= auio->uio_resid;
+#ifdef KTRACE
+	if (ktruio != NULL) {
+		ktruio->uio_resid = cnt;
+		ktrgenio(fd, UIO_WRITE, ktruio, error);
+	}
+#endif
+	td->td_retval[0] = cnt;
+	return (error);
+}
+
+/*
+ * Truncate a file given a file descriptor.
+ *
+ * Can't use fget_write() here, since must return EINVAL and not EBADF if the
+ * descriptor isn't writable.
+ */
+int
+kern_ftruncate(td, fd, length)
+	struct thread *td;
+	int fd;
+	off_t length;
+{
+	struct file *fp;
+	int error;
+
+	AUDIT_ARG_FD(fd);
+	if (length < 0)
+		return (EINVAL);
+	error = fget(td, fd, &fp);
+	if (error)
+		return (error);
+	AUDIT_ARG_FILE(td->td_proc, fp);
+	if (!(fp->f_flag & FWRITE)) {
+		fdrop(fp, td);
+		return (EINVAL);
+	}
+	error = fo_truncate(fp, length, td->td_ucred, td);
+	fdrop(fp, td);
+	return (error);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct ftruncate_args {
+	int	fd;
+	int	pad;
+	off_t	length;
+};
+#endif
+int
+ftruncate(td, uap)
+	struct thread *td;
+	struct ftruncate_args *uap;
+{
+
+	return (kern_ftruncate(td, uap->fd, uap->length));
+}
+
+#if defined(COMPAT_43)
+#ifndef _SYS_SYSPROTO_H_
+struct oftruncate_args {
+	int	fd;
+	long	length;
+};
+#endif
+int
+oftruncate(td, uap)
+	struct thread *td;
+	struct oftruncate_args *uap;
+{
+
+	return (kern_ftruncate(td, uap->fd, uap->length));
+}
+#endif /* COMPAT_43 */
+
+#ifndef _SYS_SYSPROTO_H_
+struct ioctl_args {
+	int	fd;
+	u_long	com;
+	caddr_t	data;
+};
+#endif
+/* ARGSUSED */
+int
+ioctl(struct thread *td, struct ioctl_args *uap)
+{
+	u_long com;
+	int arg, error;
+	u_int size;
+	caddr_t data;
+
+	if (uap->com > 0xffffffff) {
+		printf(
+		    "WARNING pid %d (%s): ioctl sign-extension ioctl %lx\n",
+		    td->td_proc->p_pid, td->td_name, uap->com);
+		uap->com &= 0xffffffff;
+	}
+	com = uap->com;
+
+	/*
+	 * Interpret high order word to find amount of data to be
+	 * copied to/from the user's address space.
+	 */
+	size = IOCPARM_LEN(com);
+	if ((size > IOCPARM_MAX) ||
+	    ((com & (IOC_VOID  | IOC_IN | IOC_OUT)) == 0) ||
+#if defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
+	    ((com & IOC_OUT) && size == 0) ||
+#else
+	    ((com & (IOC_IN | IOC_OUT)) && size == 0) ||
+#endif
+	    ((com & IOC_VOID) && size > 0 && size != sizeof(int)))
+		return (ENOTTY);
+
+	if (size > 0) {
+		if (com & IOC_VOID) {
+			/* Integer argument. */
+			arg = (intptr_t)uap->data;
+			data = (void *)&arg;
+			size = 0;
+		} else
+			data = malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
+	} else
+		data = (void *)&uap->data;
+	if (com & IOC_IN) {
+		error = copyin(uap->data, data, (u_int)size);
+		if (error) {
+			if (size > 0)
+				free(data, M_IOCTLOPS);
+			return (error);
+		}
+	} else if (com & IOC_OUT) {
+		/*
+		 * Zero the buffer so the user always
+		 * gets back something deterministic.
+		 */
+		bzero(data, size);
+	}
+
+	error = kern_ioctl(td, uap->fd, com, data);
+
+	if (error == 0 && (com & IOC_OUT))
+		error = copyout(data, uap->data, (u_int)size);
+
+	if (size > 0)
+		free(data, M_IOCTLOPS);
+	return (error);
+}
+
+int
+kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data)
+{
+	struct file *fp;
+	struct filedesc *fdp;
+	int error;
+	int tmp;
+
+	AUDIT_ARG_FD(fd);
+	AUDIT_ARG_CMD(com);
+	if ((error = fget(td, fd, &fp)) != 0)
+		return (error);
+	if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
+		fdrop(fp, td);
+		return (EBADF);
+	}
+	fdp = td->td_proc->p_fd;
+	switch (com) {
+	case FIONCLEX:
+		FILEDESC_XLOCK(fdp);
+		fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE;
+		FILEDESC_XUNLOCK(fdp);
+		goto out;
+	case FIOCLEX:
+		FILEDESC_XLOCK(fdp);
+		fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
+		FILEDESC_XUNLOCK(fdp);
+		goto out;
+	case FIONBIO:
+		if ((tmp = *(int *)data))
+			atomic_set_int(&fp->f_flag, FNONBLOCK);
+		else
+			atomic_clear_int(&fp->f_flag, FNONBLOCK);
+		data = (void *)&tmp;
+		break;
+	case FIOASYNC:
+		if ((tmp = *(int *)data))
+			atomic_set_int(&fp->f_flag, FASYNC);
+		else
+			atomic_clear_int(&fp->f_flag, FASYNC);
+		data = (void *)&tmp;
+		break;
+	}
+
+	error = fo_ioctl(fp, com, data, td->td_ucred, td);
+out:
+	fdrop(fp, td);
+	return (error);
+}
+
+int
+poll_no_poll(int events)
+{
+	/*
+	 * Return true for read/write.  If the user asked for something
+	 * special, return POLLNVAL, so that clients have a way of
+	 * determining reliably whether or not the extended
+	 * functionality is present without hard-coding knowledge
+	 * of specific filesystem implementations.
+	 */
+	if (events & ~POLLSTANDARD)
+		return (POLLNVAL);
+
+	return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
+}
+
+int
+pselect(struct thread *td, struct pselect_args *uap)
+{
+	struct timespec ts;
+	struct timeval tv, *tvp;
+	sigset_t set, *uset;
+	int error;
+
+	if (uap->ts != NULL) {
+		error = copyin(uap->ts, &ts, sizeof(ts));
+		if (error != 0)
+		    return (error);
+		TIMESPEC_TO_TIMEVAL(&tv, &ts);
+		tvp = &tv;
+	} else
+		tvp = NULL;
+	if (uap->sm != NULL) {
+		error = copyin(uap->sm, &set, sizeof(set));
+		if (error != 0)
+			return (error);
+		uset = &set;
+	} else
+		uset = NULL;
+	return (kern_pselect(td, uap->nd, uap->in, uap->ou, uap->ex, tvp,
+	    uset, NFDBITS));
+}
+
+int
+kern_pselect(struct thread *td, int nd, fd_set *in, fd_set *ou, fd_set *ex,
+    struct timeval *tvp, sigset_t *uset, int abi_nfdbits)
+{
+	int error;
+
+	if (uset != NULL) {
+		error = kern_sigprocmask(td, SIG_SETMASK, uset,
+		    &td->td_oldsigmask, 0);
+		if (error != 0)
+			return (error);
+		td->td_pflags |= TDP_OLDMASK;
+		/*
+		 * Make sure that ast() is called on return to
+		 * usermode and TDP_OLDMASK is cleared, restoring old
+		 * sigmask.
+		 */
+		thread_lock(td);
+		td->td_flags |= TDF_ASTPENDING;
+		thread_unlock(td);
+	}
+	error = kern_select(td, nd, in, ou, ex, tvp, abi_nfdbits);
+	return (error);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct select_args {
+	int	nd;
+	fd_set	*in, *ou, *ex;
+	struct	timeval *tv;
+};
+#endif
+int
+select(struct thread *td, struct select_args *uap)
+{
+	struct timeval tv, *tvp;
+	int error;
+
+	if (uap->tv != NULL) {
+		error = copyin(uap->tv, &tv, sizeof(tv));
+		if (error)
+			return (error);
+		tvp = &tv;
+	} else
+		tvp = NULL;
+
+	return (kern_select(td, uap->nd, uap->in, uap->ou, uap->ex, tvp,
+	    NFDBITS));
+}
+
+int
+kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou,
+    fd_set *fd_ex, struct timeval *tvp, int abi_nfdbits)
+{
+	struct filedesc *fdp;
+	/*
+	 * The magic 2048 here is chosen to be just enough for FD_SETSIZE
+	 * infds with the new FD_SETSIZE of 1024, and more than enough for
+	 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE
+	 * of 256.
+	 */
+	fd_mask s_selbits[howmany(2048, NFDBITS)];
+	fd_mask *ibits[3], *obits[3], *selbits, *sbp;
+	struct timeval atv, rtv, ttv;
+	int error, timo;
+	u_int nbufbytes, ncpbytes, ncpubytes, nfdbits;
+
+	if (nd < 0)
+		return (EINVAL);
+	fdp = td->td_proc->p_fd;
+	if (nd > fdp->fd_lastfile + 1)
+		nd = fdp->fd_lastfile + 1;
+
+	/*
+	 * Allocate just enough bits for the non-null fd_sets.  Use the
+	 * preallocated auto buffer if possible.
+	 */
+	nfdbits = roundup(nd, NFDBITS);
+	ncpbytes = nfdbits / NBBY;
+	ncpubytes = roundup(nd, abi_nfdbits) / NBBY;
+	nbufbytes = 0;
+	if (fd_in != NULL)
+		nbufbytes += 2 * ncpbytes;
+	if (fd_ou != NULL)
+		nbufbytes += 2 * ncpbytes;
+	if (fd_ex != NULL)
+		nbufbytes += 2 * ncpbytes;
+	if (nbufbytes <= sizeof s_selbits)
+		selbits = &s_selbits[0];
+	else
+		selbits = malloc(nbufbytes, M_SELECT, M_WAITOK);
+
+	/*
+	 * Assign pointers into the bit buffers and fetch the input bits.
+	 * Put the output buffers together so that they can be bzeroed
+	 * together.
+	 */
+	sbp = selbits;
+#define	getbits(name, x) \
+	do {								\
+		if (name == NULL) {					\
+			ibits[x] = NULL;				\
+			obits[x] = NULL;				\
+		} else {						\
+			ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp;	\
+			obits[x] = sbp;					\
+			sbp += ncpbytes / sizeof *sbp;			\
+			error = copyin(name, ibits[x], ncpubytes);	\
+			if (error != 0)					\
+				goto done;				\
+			bzero((char *)ibits[x] + ncpubytes,		\
+			    ncpbytes - ncpubytes);			\
+		}							\
+	} while (0)
+	getbits(fd_in, 0);
+	getbits(fd_ou, 1);
+	getbits(fd_ex, 2);
+#undef	getbits
+
+#if BYTE_ORDER == BIG_ENDIAN && defined(__LP64__)
+	/*
+	 * XXX: swizzle_fdset assumes that if abi_nfdbits != NFDBITS,
+	 * we are running under 32-bit emulation. This should be more
+	 * generic.
+	 */
+#define swizzle_fdset(bits)						\
+	if (abi_nfdbits != NFDBITS && bits != NULL) {			\
+		int i;							\
+		for (i = 0; i < ncpbytes / sizeof *sbp; i++)		\
+			bits[i] = (bits[i] >> 32) | (bits[i] << 32);	\
+	}
+#else
+#define swizzle_fdset(bits)
+#endif
+
+	/* Make sure the bit order makes it through an ABI transition */
+	swizzle_fdset(ibits[0]);
+	swizzle_fdset(ibits[1]);
+	swizzle_fdset(ibits[2]);
+	
+	if (nbufbytes != 0)
+		bzero(selbits, nbufbytes / 2);
+
+	if (tvp != NULL) {
+		atv = *tvp;
+		if (itimerfix(&atv)) {
+			error = EINVAL;
+			goto done;
+		}
+		getmicrouptime(&rtv);
+		timevaladd(&atv, &rtv);
+	} else {
+		atv.tv_sec = 0;
+		atv.tv_usec = 0;
+	}
+	timo = 0;
+	seltdinit(td);
+	/* Iterate until the timeout expires or descriptors become ready. */
+	for (;;) {
+		error = selscan(td, ibits, obits, nd);
+		if (error || td->td_retval[0] != 0)
+			break;
+		if (atv.tv_sec || atv.tv_usec) {
+			getmicrouptime(&rtv);
+			if (timevalcmp(&rtv, &atv, >=))
+				break;
+			ttv = atv;
+			timevalsub(&ttv, &rtv);
+			timo = ttv.tv_sec > 24 * 60 * 60 ?
+			    24 * 60 * 60 * hz : tvtohz(&ttv);
+		}
+		error = seltdwait(td, timo);
+		if (error)
+			break;
+		error = selrescan(td, ibits, obits);
+		if (error || td->td_retval[0] != 0)
+			break;
+	}
+	seltdclear(td);
+
+done:
+	/* select is not restarted after signals... */
+	if (error == ERESTART)
+		error = EINTR;
+	if (error == EWOULDBLOCK)
+		error = 0;
+
+	/* swizzle bit order back, if necessary */
+	swizzle_fdset(obits[0]);
+	swizzle_fdset(obits[1]);
+	swizzle_fdset(obits[2]);
+#undef swizzle_fdset
+
+#define	putbits(name, x) \
+	if (name && (error2 = copyout(obits[x], name, ncpubytes))) \
+		error = error2;
+	if (error == 0) {
+		int error2;
+
+		putbits(fd_in, 0);
+		putbits(fd_ou, 1);
+		putbits(fd_ex, 2);
+#undef putbits
+	}
+	if (selbits != &s_selbits[0])
+		free(selbits, M_SELECT);
+
+	return (error);
+}
+/* 
+ * Convert a select bit set to poll flags.
+ *
+ * The backend always returns POLLHUP/POLLERR if appropriate and we
+ * return this as a set bit in any set.
+ */
+static int select_flags[3] = {
+    POLLRDNORM | POLLHUP | POLLERR,
+    POLLWRNORM | POLLHUP | POLLERR,
+    POLLRDBAND | POLLERR
+};
+
+/*
+ * Compute the fo_poll flags required for a fd given by the index and
+ * bit position in the fd_mask array.
+ */
+static __inline int
+selflags(fd_mask **ibits, int idx, fd_mask bit)
+{
+	int flags;
+	int msk;
+
+	flags = 0;
+	for (msk = 0; msk < 3; msk++) {
+		if (ibits[msk] == NULL)
+			continue;
+		if ((ibits[msk][idx] & bit) == 0)
+			continue;
+		flags |= select_flags[msk];
+	}
+	return (flags);
+}
+
+/*
+ * Set the appropriate output bits given a mask of fired events and the
+ * input bits originally requested.
+ */
+static __inline int
+selsetbits(fd_mask **ibits, fd_mask **obits, int idx, fd_mask bit, int events)
+{
+	int msk;
+	int n;
+
+	n = 0;
+	for (msk = 0; msk < 3; msk++) {
+		if ((events & select_flags[msk]) == 0)
+			continue;
+		if (ibits[msk] == NULL)
+			continue;
+		if ((ibits[msk][idx] & bit) == 0)
+			continue;
+		/*
+		 * XXX Check for a duplicate set.  This can occur because a
+		 * socket calls selrecord() twice for each poll() call
+		 * resulting in two selfds per real fd.  selrescan() will
+		 * call selsetbits twice as a result.
+		 */
+		if ((obits[msk][idx] & bit) != 0)
+			continue;
+		obits[msk][idx] |= bit;
+		n++;
+	}
+
+	return (n);
+}
+
+/*
+ * Traverse the list of fds attached to this thread's seltd and check for
+ * completion.
+ */
+static int
+selrescan(struct thread *td, fd_mask **ibits, fd_mask **obits)
+{
+	struct filedesc *fdp;
+	struct selinfo *si;
+	struct seltd *stp;
+	struct selfd *sfp;
+	struct selfd *sfn;
+	struct file *fp;
+	fd_mask bit;
+	int fd, ev, n, idx;
+
+	fdp = td->td_proc->p_fd;
+	stp = td->td_sel;
+	n = 0;
+	STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) {
+		fd = (int)(uintptr_t)sfp->sf_cookie;
+		si = sfp->sf_si;
+		selfdfree(stp, sfp);
+		/* If the selinfo wasn't cleared the event didn't fire. */
+		if (si != NULL)
+			continue;
+		if ((fp = fget_unlocked(fdp, fd)) == NULL)
+			return (EBADF);
+		idx = fd / NFDBITS;
+		bit = (fd_mask)1 << (fd % NFDBITS);
+		ev = fo_poll(fp, selflags(ibits, idx, bit), td->td_ucred, td);
+		fdrop(fp, td);
+		if (ev != 0)
+			n += selsetbits(ibits, obits, idx, bit, ev);
+	}
+	stp->st_flags = 0;
+	td->td_retval[0] = n;
+	return (0);
+}
+
+/*
+ * Perform the initial filedescriptor scan and register ourselves with
+ * each selinfo.
+ */
+static int
+selscan(td, ibits, obits, nfd)
+	struct thread *td;
+	fd_mask **ibits, **obits;
+	int nfd;
+{
+	struct filedesc *fdp;
+	struct file *fp;
+	fd_mask bit;
+	int ev, flags, end, fd;
+	int n, idx;
+
+	fdp = td->td_proc->p_fd;
+	n = 0;
+	for (idx = 0, fd = 0; fd < nfd; idx++) {
+		end = imin(fd + NFDBITS, nfd);
+		for (bit = 1; fd < end; bit <<= 1, fd++) {
+			/* Compute the list of events we're interested in. */
+			flags = selflags(ibits, idx, bit);
+			if (flags == 0)
+				continue;
+			if ((fp = fget_unlocked(fdp, fd)) == NULL)
+				return (EBADF);
+			selfdalloc(td, (void *)(uintptr_t)fd);
+			ev = fo_poll(fp, flags, td->td_ucred, td);
+			fdrop(fp, td);
+			if (ev != 0)
+				n += selsetbits(ibits, obits, idx, bit, ev);
+		}
+	}
+
+	td->td_retval[0] = n;
+	return (0);
+}
+#endif /* __rtems__ */
+
+#ifndef _SYS_SYSPROTO_H_
+struct poll_args {
+	struct pollfd *fds;
+	u_int	nfds;
+	int	timeout;
+};
+#endif
+int
+#ifdef __rtems__
+kern_poll(td, uap)
+#else
+poll(td, uap)
+#endif /* __rtems__ */
+	struct thread *td;
+	struct poll_args *uap;
+{
+	struct pollfd *bits;
+	struct pollfd smallbits[32];
+	struct timeval atv, rtv, ttv;
+	int error = 0, timo;
+	u_int nfds;
+	size_t ni;
+
+	nfds = uap->nfds;
+	if (nfds > maxfilesperproc && nfds > FD_SETSIZE) 
+		return (EINVAL);
+	ni = nfds * sizeof(struct pollfd);
+	if (ni > sizeof(smallbits))
+		bits = malloc(ni, M_TEMP, M_WAITOK);
+	else
+		bits = smallbits;
+	error = copyin(uap->fds, bits, ni);
+	if (error)
+		goto done;
+	if (uap->timeout != INFTIM) {
+		atv.tv_sec = uap->timeout / 1000;
+		atv.tv_usec = (uap->timeout % 1000) * 1000;
+		if (itimerfix(&atv)) {
+			error = EINVAL;
+			goto done;
+		}
+		getmicrouptime(&rtv);
+		timevaladd(&atv, &rtv);
+	} else {
+		atv.tv_sec = 0;
+		atv.tv_usec = 0;
+	}
+	timo = 0;
+	seltdinit(td);
+	/* Iterate until the timeout expires or descriptors become ready. */
+	for (;;) {
+		error = pollscan(td, bits, nfds);
+		if (error || td->td_retval[0] != 0)
+			break;
+		if (atv.tv_sec || atv.tv_usec) {
+			getmicrouptime(&rtv);
+			if (timevalcmp(&rtv, &atv, >=))
+				break;
+			ttv = atv;
+			timevalsub(&ttv, &rtv);
+			timo = ttv.tv_sec > 24 * 60 * 60 ?
+			    24 * 60 * 60 * hz : tvtohz(&ttv);
+		}
+		error = seltdwait(td, timo);
+		if (error)
+			break;
+		error = pollrescan(td);
+		if (error || td->td_retval[0] != 0)
+			break;
+	}
+	seltdclear(td);
+
+done:
+	/* poll is not restarted after signals... */
+	if (error == ERESTART)
+		error = EINTR;
+	if (error == EWOULDBLOCK)
+		error = 0;
+	if (error == 0) {
+		error = pollout(td, bits, uap->fds, nfds);
+		if (error)
+			goto out;
+	}
+out:
+	if (ni > sizeof(smallbits))
+		free(bits, M_TEMP);
+	return (error);
+}
+
+static int
+pollrescan(struct thread *td)
+{
+	struct seltd *stp;
+	struct selfd *sfp;
+	struct selfd *sfn;
+	struct selinfo *si;
+	struct filedesc *fdp;
+	struct file *fp;
+	struct pollfd *fd;
+	int n;
+
+	n = 0;
+	fdp = td->td_proc->p_fd;
+	stp = td->td_sel;
+	FILEDESC_SLOCK(fdp);
+	STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) {
+		fd = (struct pollfd *)sfp->sf_cookie;
+		si = sfp->sf_si;
+		selfdfree(stp, sfp);
+		/* If the selinfo wasn't cleared the event didn't fire. */
+		if (si != NULL)
+			continue;
+		fp = fdp->fd_ofiles[fd->fd];
+		if (fp == NULL) {
+			fd->revents = POLLNVAL;
+			n++;
+			continue;
+		}
+		/*
+		 * Note: backend also returns POLLHUP and
+		 * POLLERR if appropriate.
+		 */
+		fd->revents = fo_poll(fp, fd->events, td->td_ucred, td);
+		if (fd->revents != 0)
+			n++;
+	}
+	FILEDESC_SUNLOCK(fdp);
+	stp->st_flags = 0;
+	td->td_retval[0] = n;
+	return (0);
+}
+
+
+static int
+pollout(td, fds, ufds, nfd)
+	struct thread *td;
+	struct pollfd *fds;
+	struct pollfd *ufds;
+	u_int nfd;
+{
+	int error = 0;
+	u_int i = 0;
+	u_int n = 0;
+
+	for (i = 0; i < nfd; i++) {
+		error = copyout(&fds->revents, &ufds->revents,
+		    sizeof(ufds->revents));
+		if (error)
+			return (error);
+		if (fds->revents != 0)
+			n++;
+		fds++;
+		ufds++;
+	}
+	td->td_retval[0] = n;
+	return (0);
+}
+
+static int
+pollscan(td, fds, nfd)
+	struct thread *td;
+	struct pollfd *fds;
+	u_int nfd;
+{
+	struct filedesc *fdp = td->td_proc->p_fd;
+	int i;
+	struct file *fp;
+	int n = 0;
+
+	FILEDESC_SLOCK(fdp);
+	for (i = 0; i < nfd; i++, fds++) {
+		if (fds->fd >= fdp->fd_nfiles) {
+			fds->revents = POLLNVAL;
+			n++;
+		} else if (fds->fd < 0) {
+			fds->revents = 0;
+		} else {
+			fp = fdp->fd_ofiles[fds->fd];
+			if (fp == NULL) {
+				fds->revents = POLLNVAL;
+				n++;
+			} else {
+				/*
+				 * Note: backend also returns POLLHUP and
+				 * POLLERR if appropriate.
+				 */
+				selfdalloc(td, fds);
+				fds->revents = fo_poll(fp, fds->events,
+				    td->td_ucred, td);
+				/*
+				 * POSIX requires POLLOUT to be never
+				 * set simultaneously with POLLHUP.
+				 */
+				if ((fds->revents & POLLHUP) != 0)
+					fds->revents &= ~POLLOUT;
+
+				if (fds->revents != 0)
+					n++;
+			}
+		}
+	}
+	FILEDESC_SUNLOCK(fdp);
+	td->td_retval[0] = n;
+	return (0);
+}
+
+/*
+ * OpenBSD poll system call.
+ *
+ * XXX this isn't quite a true representation..  OpenBSD uses select ops.
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct openbsd_poll_args {
+	struct pollfd *fds;
+	u_int	nfds;
+	int	timeout;
+};
+#endif
+int
+openbsd_poll(td, uap)
+	register struct thread *td;
+	register struct openbsd_poll_args *uap;
+{
+#ifdef __rtems__
+	return (kern_poll(td, (struct poll_args *)uap));
+#else
+	return (poll(td, (struct poll_args *)uap));
+#endif
+}
+
+/*
+ * XXX This was created specifically to support netncp and netsmb.  This
+ * allows the caller to specify a socket to wait for events on.  It returns
+ * 0 if any events matched and an error otherwise.  There is no way to
+ * determine which events fired.
+ */
+int
+selsocket(struct socket *so, int events, struct timeval *tvp, struct thread *td)
+{
+	struct timeval atv, rtv, ttv;
+	int error, timo;
+
+	if (tvp != NULL) {
+		atv = *tvp;
+		if (itimerfix(&atv))
+			return (EINVAL);
+		getmicrouptime(&rtv);
+		timevaladd(&atv, &rtv);
+	} else {
+		atv.tv_sec = 0;
+		atv.tv_usec = 0;
+	}
+
+	timo = 0;
+	seltdinit(td);
+	/*
+	 * Iterate until the timeout expires or the socket becomes ready.
+	 */
+	for (;;) {
+		selfdalloc(td, NULL);
+		error = sopoll(so, events, NULL, td);
+		/* error here is actually the ready events. */
+		if (error)
+			return (0);
+		if (atv.tv_sec || atv.tv_usec) {
+			getmicrouptime(&rtv);
+			if (timevalcmp(&rtv, &atv, >=)) {
+				seltdclear(td);
+				return (EWOULDBLOCK);
+			}
+			ttv = atv;
+			timevalsub(&ttv, &rtv);
+			timo = ttv.tv_sec > 24 * 60 * 60 ?
+			    24 * 60 * 60 * hz : tvtohz(&ttv);
+		}
+		error = seltdwait(td, timo);
+		seltdclear(td);
+		if (error)
+			break;
+	}
+	/* XXX Duplicates ncp/smb behavior. */
+	if (error == ERESTART)
+		error = 0;
+	return (error);
+}
+
+/*
+ * Preallocate two selfds associated with 'cookie'.  Some fo_poll routines
+ * have two select sets, one for read and another for write.
+ */
+static void
+selfdalloc(struct thread *td, void *cookie)
+{
+	struct seltd *stp;
+
+	stp = td->td_sel;
+	if (stp->st_free1 == NULL)
+		stp->st_free1 = uma_zalloc(selfd_zone, M_WAITOK|M_ZERO);
+	stp->st_free1->sf_td = stp;
+	stp->st_free1->sf_cookie = cookie;
+	if (stp->st_free2 == NULL)
+		stp->st_free2 = uma_zalloc(selfd_zone, M_WAITOK|M_ZERO);
+	stp->st_free2->sf_td = stp;
+	stp->st_free2->sf_cookie = cookie;
+}
+
+static void
+selfdfree(struct seltd *stp, struct selfd *sfp)
+{
+	STAILQ_REMOVE(&stp->st_selq, sfp, selfd, sf_link);
+	mtx_lock(sfp->sf_mtx);
+	if (sfp->sf_si)
+		TAILQ_REMOVE(&sfp->sf_si->si_tdlist, sfp, sf_threads);
+	mtx_unlock(sfp->sf_mtx);
+	uma_zfree(selfd_zone, sfp);
+}
+
+/*
+ * Record a select request.
+ */
+void
+selrecord(selector, sip)
+	struct thread *selector;
+	struct selinfo *sip;
+{
+	struct selfd *sfp;
+	struct seltd *stp;
+	struct mtx *mtxp;
+
+	stp = selector->td_sel;
+	/*
+	 * Don't record when doing a rescan.
+	 */
+	if (stp->st_flags & SELTD_RESCAN)
+		return;
+	/*
+	 * Grab one of the preallocated descriptors.
+	 */
+	sfp = NULL;
+	if ((sfp = stp->st_free1) != NULL)
+		stp->st_free1 = NULL;
+	else if ((sfp = stp->st_free2) != NULL)
+		stp->st_free2 = NULL;
+	else
+		panic("selrecord: No free selfd on selq");
+	mtxp = sip->si_mtx;
+	if (mtxp == NULL)
+		mtxp = mtx_pool_find(mtxpool_select, sip);
+	/*
+	 * Initialize the sfp and queue it in the thread.
+	 */
+	sfp->sf_si = sip;
+	sfp->sf_mtx = mtxp;
+	STAILQ_INSERT_TAIL(&stp->st_selq, sfp, sf_link);
+	/*
+	 * Now that we've locked the sip, check for initialization.
+	 */
+	mtx_lock(mtxp);
+	if (sip->si_mtx == NULL) {
+		sip->si_mtx = mtxp;
+		TAILQ_INIT(&sip->si_tdlist);
+	}
+	/*
+	 * Add this thread to the list of selfds listening on this selinfo.
+	 */
+	TAILQ_INSERT_TAIL(&sip->si_tdlist, sfp, sf_threads);
+	mtx_unlock(sip->si_mtx);
+}
+
+/* Wake up a selecting thread. */
+void
+selwakeup(sip)
+	struct selinfo *sip;
+{
+	doselwakeup(sip, -1);
+}
+
+/* Wake up a selecting thread, and set its priority. */
+void
+selwakeuppri(sip, pri)
+	struct selinfo *sip;
+	int pri;
+{
+	doselwakeup(sip, pri);
+}
+
+/*
+ * Do a wakeup when a selectable event occurs.
+ */
+static void
+doselwakeup(sip, pri)
+	struct selinfo *sip;
+	int pri;
+{
+	struct selfd *sfp;
+	struct selfd *sfn;
+	struct seltd *stp;
+
+	/* If it's not initialized there can't be any waiters. */
+	if (sip->si_mtx == NULL)
+		return;
+	/*
+	 * Locking the selinfo locks all selfds associated with it.
+	 */
+	mtx_lock(sip->si_mtx);
+	TAILQ_FOREACH_SAFE(sfp, &sip->si_tdlist, sf_threads, sfn) {
+		/*
+		 * Once we remove this sfp from the list and clear the
+		 * sf_si seltdclear will know to ignore this si.
+		 */
+		TAILQ_REMOVE(&sip->si_tdlist, sfp, sf_threads);
+		sfp->sf_si = NULL;
+		stp = sfp->sf_td;
+		mtx_lock(&stp->st_mtx);
+		stp->st_flags |= SELTD_PENDING;
+		cv_broadcastpri(&stp->st_wait, pri);
+		mtx_unlock(&stp->st_mtx);
+	}
+	mtx_unlock(sip->si_mtx);
+}
+
+static void
+seltdinit(struct thread *td)
+{
+	struct seltd *stp;
+
+	if ((stp = td->td_sel) != NULL)
+		goto out;
+	td->td_sel = stp = malloc(sizeof(*stp), M_SELECT, M_WAITOK|M_ZERO);
+	mtx_init(&stp->st_mtx, "sellck", NULL, MTX_DEF);
+	cv_init(&stp->st_wait, "select");
+out:
+	stp->st_flags = 0;
+	STAILQ_INIT(&stp->st_selq);
+}
+
+static int
+seltdwait(struct thread *td, int timo)
+{
+	struct seltd *stp;
+	int error;
+
+	stp = td->td_sel;
+	/*
+	 * An event of interest may occur while we do not hold the seltd
+	 * locked so check the pending flag before we sleep.
+	 */
+	mtx_lock(&stp->st_mtx);
+	/*
+	 * Any further calls to selrecord will be a rescan.
+	 */
+	stp->st_flags |= SELTD_RESCAN;
+	if (stp->st_flags & SELTD_PENDING) {
+		mtx_unlock(&stp->st_mtx);
+		return (0);
+	}
+	if (timo > 0)
+		error = cv_timedwait_sig(&stp->st_wait, &stp->st_mtx, timo);
+	else
+		error = cv_wait_sig(&stp->st_wait, &stp->st_mtx);
+	mtx_unlock(&stp->st_mtx);
+
+	return (error);
+}
+
+void
+seltdfini(struct thread *td)
+{
+	struct seltd *stp;
+
+	stp = td->td_sel;
+	if (stp == NULL)
+		return;
+	if (stp->st_free1)
+		uma_zfree(selfd_zone, stp->st_free1);
+	if (stp->st_free2)
+		uma_zfree(selfd_zone, stp->st_free2);
+	td->td_sel = NULL;
+	free(stp, M_SELECT);
+}
+
+/*
+ * Remove the references to the thread from all of the objects we were
+ * polling.
+ */
+static void
+seltdclear(struct thread *td)
+{
+	struct seltd *stp;
+	struct selfd *sfp;
+	struct selfd *sfn;
+
+	stp = td->td_sel;
+	STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn)
+		selfdfree(stp, sfp);
+	stp->st_flags = 0;
+}
+
+static void selectinit(void *);
+SYSINIT(select, SI_SUB_SYSCALLS, SI_ORDER_ANY, selectinit, NULL);
+static void
+selectinit(void *dummy __unused)
+{
+
+	selfd_zone = uma_zcreate("selfd", sizeof(struct selfd), NULL, NULL,
+	    NULL, NULL, UMA_ALIGN_PTR, 0);
+	mtxpool_select = mtx_pool_create("select mtxpool", 128, MTX_DEF);
+}
diff --git a/freebsd/sys/buf.h b/freebsd/sys/buf.h
new file mode 100644
index 0000000..88e55d9
--- /dev/null
+++ b/freebsd/sys/buf.h
@@ -0,0 +1,526 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)buf.h	8.9 (Berkeley) 3/30/95
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_BUF_HH_
+#define	_SYS_BUF_HH_
+
+#include <freebsd/sys/bufobj.h>
+#include <freebsd/sys/queue.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/lockmgr.h>
+
+struct bio;
+struct buf;
+struct bufobj;
+struct mount;
+struct vnode;
+struct uio;
+
+/*
+ * To avoid including <ufs/ffs/softdep.h> 
+ */   
+LIST_HEAD(workhead, worklist);
+/*
+ * These are currently used only by the soft dependency code, hence
+ * are stored once in a global variable. If other subsystems wanted
+ * to use these hooks, a pointer to a set of bio_ops could be added
+ * to each buffer.
+ */
+extern struct bio_ops {
+	void	(*io_start)(struct buf *);
+	void	(*io_complete)(struct buf *);
+	void	(*io_deallocate)(struct buf *);
+	int	(*io_countdeps)(struct buf *, int);
+} bioops;
+
+struct vm_object;
+
+typedef unsigned char b_xflags_t;
+
+/*
+ * The buffer header describes an I/O operation in the kernel.
+ *
+ * NOTES:
+ *	b_bufsize, b_bcount.  b_bufsize is the allocation size of the
+ *	buffer, either DEV_BSIZE or PAGE_SIZE aligned.  b_bcount is the
+ *	originally requested buffer size and can serve as a bounds check
+ *	against EOF.  For most, but not all uses, b_bcount == b_bufsize.
+ *
+ *	b_dirtyoff, b_dirtyend.  Buffers support piecemeal, unaligned
+ *	ranges of dirty data that need to be written to backing store.
+ *	The range is typically clipped at b_bcount ( not b_bufsize ).
+ *
+ *	b_resid.  Number of bytes remaining in I/O.  After an I/O operation
+ *	completes, b_resid is usually 0 indicating 100% success.
+ *
+ *	All fields are protected by the buffer lock except those marked:
+ *		V - Protected by owning bufobj lock
+ *		Q - Protected by the buf queue lock
+ *		D - Protected by an dependency implementation specific lock
+ */
+struct buf {
+	struct bufobj	*b_bufobj;
+	long		b_bcount;
+	void		*b_caller1;
+	caddr_t		b_data;
+	int		b_error;
+	uint8_t		b_iocmd;
+	uint8_t		b_ioflags;
+	off_t		b_iooffset;
+	long		b_resid;
+	void	(*b_iodone)(struct buf *);
+	daddr_t b_blkno;		/* Underlying physical block number. */
+	off_t	b_offset;		/* Offset into file. */
+	TAILQ_ENTRY(buf) b_bobufs;	/* (V) Buffer's associated vnode. */
+	struct buf	*b_left;	/* (V) splay tree link */
+	struct buf	*b_right;	/* (V) splay tree link */
+	uint32_t	b_vflags;	/* (V) BV_* flags */
+	TAILQ_ENTRY(buf) b_freelist;	/* (Q) Free list position inactive. */
+	unsigned short b_qindex;	/* (Q) buffer queue index */
+	uint32_t	b_flags;	/* B_* flags. */
+	b_xflags_t b_xflags;		/* extra flags */
+	struct lock b_lock;		/* Buffer lock */
+	long	b_bufsize;		/* Allocated buffer size. */
+	long	b_runningbufspace;	/* when I/O is running, pipelining */
+	caddr_t	b_kvabase;		/* base kva for buffer */
+	int	b_kvasize;		/* size of kva for buffer */
+	daddr_t b_lblkno;		/* Logical block number. */
+	struct	vnode *b_vp;		/* Device vnode. */
+	int	b_dirtyoff;		/* Offset in buffer of dirty region. */
+	int	b_dirtyend;		/* Offset of end of dirty region. */
+	struct	ucred *b_rcred;		/* Read credentials reference. */
+	struct	ucred *b_wcred;		/* Write credentials reference. */
+	void	*b_saveaddr;		/* Original b_addr for physio. */
+	union	pager_info {
+		int	pg_reqpage;
+	} b_pager;
+	union	cluster_info {
+		TAILQ_HEAD(cluster_list_head, buf) cluster_head;
+		TAILQ_ENTRY(buf) cluster_entry;
+	} b_cluster;
+	struct	vm_page *b_pages[btoc(MAXPHYS)];
+	int		b_npages;
+	struct	workhead b_dep;		/* (D) List of filesystem dependencies. */
+	void	*b_fsprivate1;
+	void	*b_fsprivate2;
+	void	*b_fsprivate3;
+	int	b_pin_count;
+};
+
+#define b_object	b_bufobj->bo_object
+
+/*
+ * These flags are kept in b_flags.
+ *
+ * Notes:
+ *
+ *	B_ASYNC		VOP calls on bp's are usually async whether or not
+ *			B_ASYNC is set, but some subsystems, such as NFS, like 
+ *			to know what is best for the caller so they can
+ *			optimize the I/O.
+ *
+ *	B_PAGING	Indicates that bp is being used by the paging system or
+ *			some paging system and that the bp is not linked into
+ *			the b_vp's clean/dirty linked lists or ref counts.
+ *			Buffer vp reassignments are illegal in this case.
+ *
+ *	B_CACHE		This may only be set if the buffer is entirely valid.
+ *			The situation where B_DELWRI is set and B_CACHE is
+ *			clear MUST be committed to disk by getblk() so 
+ *			B_DELWRI can also be cleared.  See the comments for
+ *			getblk() in kern/vfs_bio.c.  If B_CACHE is clear,
+ *			the caller is expected to clear BIO_ERROR and B_INVAL,
+ *			set BIO_READ, and initiate an I/O.
+ *
+ *			The 'entire buffer' is defined to be the range from
+ *			0 through b_bcount.
+ *
+ *	B_MALLOC	Request that the buffer be allocated from the malloc
+ *			pool, DEV_BSIZE aligned instead of PAGE_SIZE aligned.
+ *
+ *	B_CLUSTEROK	This flag is typically set for B_DELWRI buffers
+ *			by filesystems that allow clustering when the buffer
+ *			is fully dirty and indicates that it may be clustered
+ *			with other adjacent dirty buffers.  Note the clustering
+ *			may not be used with the stage 1 data write under NFS
+ *			but may be used for the commit rpc portion.
+ *
+ *	B_VMIO		Indicates that the buffer is tied into an VM object.
+ *			The buffer's data is always PAGE_SIZE aligned even
+ *			if b_bufsize and b_bcount are not.  ( b_bufsize is 
+ *			always at least DEV_BSIZE aligned, though ).
+ *
+ *	B_DIRECT	Hint that we should attempt to completely free
+ *			the pages underlying the buffer.  B_DIRECT is
+ *			sticky until the buffer is released and typically
+ *			only has an effect when B_RELBUF is also set.
+ *
+ */
+
+#define	B_AGE		0x00000001	/* Move to age queue when I/O done. */
+#define	B_NEEDCOMMIT	0x00000002	/* Append-write in progress. */
+#define	B_ASYNC		0x00000004	/* Start I/O, do not wait. */
+#define	B_DIRECT	0x00000008	/* direct I/O flag (pls free vmio) */
+#define	B_DEFERRED	0x00000010	/* Skipped over for cleaning */
+#define	B_CACHE		0x00000020	/* Bread found us in the cache. */
+#define	B_VALIDSUSPWRT	0x00000040	/* Valid write during suspension. */
+#define	B_DELWRI	0x00000080	/* Delay I/O until buffer reused. */
+#define	B_PERSISTENT	0x00000100	/* Perm. ref'ed while EXT2FS mounted. */
+#define	B_DONE		0x00000200	/* I/O completed. */
+#define	B_EINTR		0x00000400	/* I/O was interrupted */
+#define	B_00000800	0x00000800	/* Available flag. */
+#define	B_00001000	0x00001000	/* Available flag. */
+#define	B_INVAL		0x00002000	/* Does not contain valid info. */
+#define	B_00004000	0x00004000	/* Available flag. */
+#define	B_NOCACHE	0x00008000	/* Do not cache block after use. */
+#define	B_MALLOC	0x00010000	/* malloced b_data */
+#define	B_CLUSTEROK	0x00020000	/* Pagein op, so swap() can count it. */
+#define	B_000400000	0x00040000	/* Available flag. */
+#define	B_000800000	0x00080000	/* Available flag. */
+#define	B_00100000	0x00100000	/* Available flag. */
+#define	B_DIRTY		0x00200000	/* Needs writing later (in EXT2FS). */
+#define	B_RELBUF	0x00400000	/* Release VMIO buffer. */
+#define	B_00800000	0x00800000	/* Available flag. */
+#define	B_01000000	0x01000000	/* Available flag. */
+#define	B_NEEDSGIANT	0x02000000	/* Buffer's vnode needs giant. */
+#define	B_PAGING	0x04000000	/* volatile paging I/O -- bypass VMIO */
+#define B_MANAGED	0x08000000	/* Managed by FS. */
+#define B_RAM		0x10000000	/* Read ahead mark (flag) */
+#define B_VMIO		0x20000000	/* VMIO flag */
+#define B_CLUSTER	0x40000000	/* pagein op, so swap() can count it */
+#define B_REMFREE	0x80000000	/* Delayed bremfree */
+
+#define PRINT_BUF_FLAGS "\20\40remfree\37cluster\36vmio\35ram\34b27" \
+	"\33paging\32b25\31b24\30b23\27relbuf\26dirty\25b20" \
+	"\24b19\23b18\22clusterok\21malloc\20nocache\17b14\16inval" \
+	"\15b12\14b11\13eintr\12done\11persist\10delwri\7validsuspwrt" \
+	"\6cache\5deferred\4direct\3async\2needcommit\1age"
+
+/*
+ * These flags are kept in b_xflags.
+ */
+#define	BX_VNDIRTY	0x00000001	/* On vnode dirty list */
+#define	BX_VNCLEAN	0x00000002	/* On vnode clean list */
+#define	BX_BKGRDWRITE	0x00000010	/* Do writes in background */
+#define BX_BKGRDMARKER	0x00000020	/* Mark buffer for splay tree */
+#define	BX_ALTDATA	0x00000040	/* Holds extended data */
+
+#define	NOOFFSET	(-1LL)		/* No buffer offset calculated yet */
+
+/*
+ * These flags are kept in b_vflags.
+ */
+#define	BV_SCANNED	0x00000001	/* VOP_FSYNC funcs mark written bufs */
+#define	BV_BKGRDINPROG	0x00000002	/* Background write in progress */
+#define	BV_BKGRDWAIT	0x00000004	/* Background write waiting */
+#define	BV_INFREECNT	0x80000000	/* buf is counted in numfreebufs */
+
+#ifdef _KERNEL
+/*
+ * Buffer locking
+ */
+extern const char *buf_wmesg;		/* Default buffer lock message */
+#define BUF_WMESG "bufwait"
+#include <freebsd/sys/proc.h>			/* XXX for curthread */
+#include <freebsd/sys/mutex.h>
+
+/*
+ * Initialize a lock.
+ */
+#define BUF_LOCKINIT(bp)						\
+	lockinit(&(bp)->b_lock, PRIBIO + 4, buf_wmesg, 0, 0)
+/*
+ *
+ * Get a lock sleeping non-interruptably until it becomes available.
+ */
+#define	BUF_LOCK(bp, locktype, interlock)				\
+	_lockmgr_args(&(bp)->b_lock, (locktype), (interlock),		\
+	    LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT,		\
+	    LOCK_FILE, LOCK_LINE)
+
+/*
+ * Get a lock sleeping with specified interruptably and timeout.
+ */
+#define	BUF_TIMELOCK(bp, locktype, interlock, wmesg, catch, timo)	\
+	_lockmgr_args(&(bp)->b_lock, (locktype) | LK_TIMELOCK,		\
+	    (interlock), (wmesg), (PRIBIO + 4) | (catch), (timo),	\
+	    LOCK_FILE, LOCK_LINE)
+
+/*
+ * Release a lock. Only the acquiring process may free the lock unless
+ * it has been handed off to biodone.
+ */
+#define	BUF_UNLOCK(bp) do {						\
+	KASSERT(((bp)->b_flags & B_REMFREE) == 0,			\
+	    ("BUF_UNLOCK %p while B_REMFREE is still set.", (bp)));	\
+									\
+	(void)_lockmgr_args(&(bp)->b_lock, LK_RELEASE, NULL,		\
+	    LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT,		\
+	    LOCK_FILE, LOCK_LINE);					\
+} while (0)
+
+/*
+ * Check if a buffer lock is recursed.
+ */
+#define	BUF_LOCKRECURSED(bp)						\
+	lockmgr_recursed(&(bp)->b_lock)
+
+/*
+ * Check if a buffer lock is currently held.
+ */
+#define	BUF_ISLOCKED(bp)						\
+	lockstatus(&(bp)->b_lock)
+/*
+ * Free a buffer lock.
+ */
+#define BUF_LOCKFREE(bp) 						\
+	lockdestroy(&(bp)->b_lock)
+
+/*
+ * Buffer lock assertions.
+ */
+#if defined(INVARIANTS) && defined(INVARIANT_SUPPORT)
+#define	BUF_ASSERT_LOCKED(bp)						\
+	_lockmgr_assert(&(bp)->b_lock, KA_LOCKED, LOCK_FILE, LOCK_LINE)
+#define	BUF_ASSERT_SLOCKED(bp)						\
+	_lockmgr_assert(&(bp)->b_lock, KA_SLOCKED, LOCK_FILE, LOCK_LINE)
+#define	BUF_ASSERT_XLOCKED(bp)						\
+	_lockmgr_assert(&(bp)->b_lock, KA_XLOCKED, LOCK_FILE, LOCK_LINE)
+#define	BUF_ASSERT_UNLOCKED(bp)						\
+	_lockmgr_assert(&(bp)->b_lock, KA_UNLOCKED, LOCK_FILE, LOCK_LINE)
+#define	BUF_ASSERT_HELD(bp)
+#define	BUF_ASSERT_UNHELD(bp)
+#else
+#define	BUF_ASSERT_LOCKED(bp)
+#define	BUF_ASSERT_SLOCKED(bp)
+#define	BUF_ASSERT_XLOCKED(bp)
+#define	BUF_ASSERT_UNLOCKED(bp)
+#define	BUF_ASSERT_HELD(bp)
+#define	BUF_ASSERT_UNHELD(bp)
+#endif
+
+#ifdef _SYS_PROC_HH_	/* Avoid #include <freebsd/sys/proc.h> pollution */
+/*
+ * When initiating asynchronous I/O, change ownership of the lock to the
+ * kernel. Once done, the lock may legally released by biodone. The
+ * original owning process can no longer acquire it recursively, but must
+ * wait until the I/O is completed and the lock has been freed by biodone.
+ */
+#define	BUF_KERNPROC(bp)						\
+	_lockmgr_disown(&(bp)->b_lock, LOCK_FILE, LOCK_LINE)
+#endif
+
+/*
+ * Find out if the lock has waiters or not.
+ */
+#define	BUF_LOCKWAITERS(bp)						\
+	lockmgr_waiters(&(bp)->b_lock)
+
+#endif /* _KERNEL */
+
+struct buf_queue_head {
+	TAILQ_HEAD(buf_queue, buf) queue;
+	daddr_t last_pblkno;
+	struct	buf *insert_point;
+	struct	buf *switch_point;
+};
+
+/*
+ * This structure describes a clustered I/O.  It is stored in the b_saveaddr
+ * field of the buffer on which I/O is done.  At I/O completion, cluster
+ * callback uses the structure to parcel I/O's to individual buffers, and
+ * then free's this structure.
+ */
+struct cluster_save {
+	long	bs_bcount;		/* Saved b_bcount. */
+	long	bs_bufsize;		/* Saved b_bufsize. */
+	void	*bs_saveaddr;		/* Saved b_addr. */
+	int	bs_nchildren;		/* Number of associated buffers. */
+	struct buf **bs_children;	/* List of associated buffers. */
+};
+
+#ifdef _KERNEL
+
+static __inline int
+bwrite(struct buf *bp)
+{
+
+	KASSERT(bp->b_bufobj != NULL, ("bwrite: no bufobj bp=%p", bp));
+	KASSERT(bp->b_bufobj->bo_ops != NULL, ("bwrite: no bo_ops bp=%p", bp));
+	KASSERT(bp->b_bufobj->bo_ops->bop_write != NULL,
+	    ("bwrite: no bop_write bp=%p", bp));
+	return (BO_WRITE(bp->b_bufobj, bp));
+}
+
+static __inline void
+bstrategy(struct buf *bp)
+{
+
+	KASSERT(bp->b_bufobj != NULL, ("bstrategy: no bufobj bp=%p", bp));
+	KASSERT(bp->b_bufobj->bo_ops != NULL,
+	    ("bstrategy: no bo_ops bp=%p", bp));
+	KASSERT(bp->b_bufobj->bo_ops->bop_strategy != NULL,
+	    ("bstrategy: no bop_strategy bp=%p", bp));
+	BO_STRATEGY(bp->b_bufobj, bp);
+}
+
+static __inline void
+buf_start(struct buf *bp)
+{
+	if (bioops.io_start)
+		(*bioops.io_start)(bp);
+}
+
+static __inline void
+buf_complete(struct buf *bp)
+{
+	if (bioops.io_complete)
+		(*bioops.io_complete)(bp);
+}
+
+static __inline void
+buf_deallocate(struct buf *bp)
+{
+	if (bioops.io_deallocate)
+		(*bioops.io_deallocate)(bp);
+	BUF_LOCKFREE(bp);
+}
+
+static __inline int
+buf_countdeps(struct buf *bp, int i)
+{
+	if (bioops.io_countdeps)
+		return ((*bioops.io_countdeps)(bp, i));
+	else
+		return (0);
+}
+
+#endif /* _KERNEL */
+
+/*
+ * Zero out the buffer's data area.
+ */
+#define	clrbuf(bp) {							\
+	bzero((bp)->b_data, (u_int)(bp)->b_bcount);			\
+	(bp)->b_resid = 0;						\
+}
+
+/*
+ * Flags for getblk's last parameter.
+ */
+#define	GB_LOCK_NOWAIT	0x0001		/* Fail if we block on a buf lock. */
+#define	GB_NOCREAT	0x0002		/* Don't create a buf if not found. */
+#define	GB_NOWAIT_BD	0x0004		/* Do not wait for bufdaemon */
+
+#ifdef _KERNEL
+extern int	nbuf;			/* The number of buffer headers */
+extern long	maxswzone;		/* Max KVA for swap structures */
+extern long	maxbcache;		/* Max KVA for buffer cache */
+extern long	runningbufspace;
+extern long	hibufspace;
+extern int	dirtybufthresh;
+extern int	bdwriteskip;
+extern int	dirtybufferflushes;
+extern int	altbufferflushes;
+extern int      buf_maxio;              /* nominal maximum I/O for buffer */
+extern struct	buf *buf;		/* The buffer headers. */
+extern char	*buffers;		/* The buffer contents. */
+extern int	bufpages;		/* Number of memory pages in the buffer pool. */
+extern struct	buf *swbuf;		/* Swap I/O buffer headers. */
+extern int	nswbuf;			/* Number of swap I/O buffer headers. */
+extern int	cluster_pbuf_freecnt;	/* Number of pbufs for clusters */
+extern int	vnode_pbuf_freecnt;	/* Number of pbufs for vnode pager */
+
+void	runningbufwakeup(struct buf *);
+void	waitrunningbufspace(void);
+caddr_t	kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est);
+void	bufinit(void);
+void	bwillwrite(void);
+int	buf_dirty_count_severe(void);
+void	bremfree(struct buf *);
+void	bremfreef(struct buf *);	/* XXX Force bremfree, only for nfs. */
+int	bread(struct vnode *, daddr_t, int, struct ucred *, struct buf **);
+void	breada(struct vnode *, daddr_t *, int *, int, struct ucred *);
+int	breadn(struct vnode *, daddr_t, int, daddr_t *, int *, int,
+	    struct ucred *, struct buf **);
+void	bdwrite(struct buf *);
+void	bawrite(struct buf *);
+void	bdirty(struct buf *);
+void	bundirty(struct buf *);
+void	bufstrategy(struct bufobj *, struct buf *);
+void	brelse(struct buf *);
+void	bqrelse(struct buf *);
+int	vfs_bio_awrite(struct buf *);
+struct buf *     getpbuf(int *);
+struct buf *incore(struct bufobj *, daddr_t);
+struct buf *gbincore(struct bufobj *, daddr_t);
+struct buf *getblk(struct vnode *, daddr_t, int, int, int, int);
+struct buf *geteblk(int, int);
+int	bufwait(struct buf *);
+int	bufwrite(struct buf *);
+void	bufdone(struct buf *);
+void	bufdone_finish(struct buf *);
+
+int	cluster_read(struct vnode *, u_quad_t, daddr_t, long,
+	    struct ucred *, long, int, struct buf **);
+int	cluster_wbuild(struct vnode *, long, daddr_t, int);
+void	cluster_write(struct vnode *, struct buf *, u_quad_t, int);
+void	vfs_bio_set_valid(struct buf *, int base, int size);
+void	vfs_bio_clrbuf(struct buf *);
+void	vfs_busy_pages(struct buf *, int clear_modify);
+void	vfs_unbusy_pages(struct buf *);
+int	vmapbuf(struct buf *);
+void	vunmapbuf(struct buf *);
+void	relpbuf(struct buf *, int *);
+void	brelvp(struct buf *);
+void	bgetvp(struct vnode *, struct buf *);
+void	pbgetbo(struct bufobj *bo, struct buf *bp);
+void	pbgetvp(struct vnode *, struct buf *);
+void	pbrelbo(struct buf *);
+void	pbrelvp(struct buf *);
+int	allocbuf(struct buf *bp, int size);
+void	reassignbuf(struct buf *);
+struct	buf *trypbuf(int *);
+void	bwait(struct buf *, u_char, const char *);
+void	bdone(struct buf *);
+void	bpin(struct buf *);
+void	bunpin(struct buf *);
+void 	bunpin_wait(struct buf *);
+
+#endif /* _KERNEL */
+
+#endif /* !_SYS_BUF_HH_ */
diff --git a/freebsd/sys/mqueue.h b/freebsd/sys/mqueue.h
new file mode 100644
index 0000000..80d4047
--- /dev/null
+++ b/freebsd/sys/mqueue.h
@@ -0,0 +1,45 @@
+/*-
+ * Copyright (c) 2005 David Xu <davidxu at freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_MQUEUE_HH_
+#define _SYS_MQUEUE_HH_
+
+struct mq_attr {
+	long	mq_flags;	/* Message queue flags. */
+	long	mq_maxmsg;	/* Maximum number of messages. */
+	long	mq_msgsize;	/* Maximum message size. */
+	long	mq_curmsgs;	/* Number of messages currently queued. */
+	long    __reserved[4];  /* Ignored for input, zeroed for output */
+};
+
+#ifdef _KERNEL
+struct thread;
+struct file;
+extern void	(*mq_fdclose)(struct thread *td, int fd, struct file *fp);
+#endif
+#endif
diff --git a/freebsd/sys/proc.h b/freebsd/sys/proc.h
index 39ddd78..04022ee 100644
--- a/freebsd/sys/proc.h
+++ b/freebsd/sys/proc.h
@@ -208,7 +208,9 @@ struct thread {
 	TAILQ_ENTRY(thread) td_slpq;	/* (t) Sleep queue. */
 	TAILQ_ENTRY(thread) td_lockq;	/* (t) Lock queue. */
 	struct cpuset	*td_cpuset;	/* (t) CPU affinity mask. */
+#endif /* __rtems__ */
 	struct seltd	*td_sel;	/* Select queue/channel. */
+#ifndef __rtems__
 	struct sleepqueue *td_sleepqueue; /* (k) Associated sleep queue. */
 	struct turnstile *td_turnstile;	/* (k) Associated turnstile. */
 	struct umtx_q   *td_umtxq;	/* (c?) Link for when we're blocked. */
@@ -478,7 +480,9 @@ struct proc {
 	TAILQ_HEAD(, thread) p_threads;	/* (c) all threads. */
 	struct mtx	p_slock;	/* process spin lock */
 	struct ucred	*p_ucred;	/* (c) Process owner's identity. */
+#endif /* __rtems__ */
 	struct filedesc	*p_fd;		/* (b) Open files. */
+#ifndef __rtems__
 	struct filedesc_to_leader *p_fdtol; /* (b) Tracking node */
 	struct pstats	*p_stats;	/* (b) Accounting/statistics (CPU). */
 	struct plimit	*p_limit;	/* (c) Process limits. */
@@ -572,6 +576,9 @@ struct proc {
 	struct kdtrace_proc	*p_dtrace; /* (*) DTrace-specific data. */
 	struct cv	p_pwait;	/* (*) wait cv for exit/exec */
 #else /* __rtems__ */
+  struct sigiolst	p_sigiolst;	/* (c) List of sigio sources. */
+  int		p_flag;		/* (c) P_* flags. */
+  struct proc	*p_leader;	/* (b) */
   struct ucred  *p_ucred; /* (c) Process owner's identity. */
   struct mtx  p_mtx;    /* (n) Lock for this struct. */
   rtems_id p_pid;
diff --git a/freebsd/sys/tty.h b/freebsd/sys/tty.h
new file mode 100644
index 0000000..a56d38f
--- /dev/null
+++ b/freebsd/sys/tty.h
@@ -0,0 +1,217 @@
+/*-
+ * Copyright (c) 2008 Ed Schouten <ed at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Portions of this software were developed under sponsorship from Snow
+ * B.V., the Netherlands.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_TTY_HH_
+#define	_SYS_TTY_HH_
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/queue.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/sys/condvar.h>
+#include <freebsd/sys/selinfo.h>
+#ifndef __rtems__
+#include <freebsd/sys/termios.h>
+#endif
+#include <freebsd/sys/ttycom.h>
+#include <freebsd/sys/ttyqueue.h>
+
+struct cdev;
+struct file;
+struct pgrp;
+struct session;
+struct ucred;
+
+struct ttydevsw;
+
+/*
+ * Per-TTY structure, containing buffers, etc.
+ *
+ * List of locks
+ * (t)	locked by t_mtx
+ * (l)	locked by tty_list_sx
+ * (c)	const until freeing
+ */
+struct tty {
+	struct mtx	*t_mtx;		/* TTY lock. */
+	struct mtx	t_mtxobj;	/* Per-TTY lock (when not borrowing). */
+	TAILQ_ENTRY(tty) t_list;	/* (l) TTY list entry. */
+	unsigned int	t_flags;	/* (t) Terminal option flags. */
+/* Keep flags in sync with db_show_tty and pstat(8). */
+#define	TF_NOPREFIX	0x00001	/* Don't prepend "tty" to device name. */
+#define	TF_INITLOCK	0x00002	/* Create init/lock state devices. */
+#define	TF_CALLOUT	0x00004	/* Create "cua" devices. */
+#define	TF_OPENED_IN	0x00008	/* "tty" node is in use. */
+#define	TF_OPENED_OUT	0x00010	/* "cua" node is in use. */
+#define	TF_OPENED_CONS	0x00020 /* Device in use as console. */
+#define	TF_OPENED	(TF_OPENED_IN|TF_OPENED_OUT|TF_OPENED_CONS)
+#define	TF_GONE		0x00040	/* Device node is gone. */
+#define	TF_OPENCLOSE	0x00080	/* Device is in open()/close(). */
+#define	TF_ASYNC	0x00100	/* Asynchronous I/O enabled. */
+#define	TF_LITERAL	0x00200	/* Accept the next character literally. */
+#define	TF_HIWAT_IN	0x00400	/* We've reached the input watermark. */
+#define	TF_HIWAT_OUT	0x00800	/* We've reached the output watermark. */
+#define	TF_HIWAT	(TF_HIWAT_IN|TF_HIWAT_OUT)
+#define	TF_STOPPED	0x01000	/* Output flow control - stopped. */
+#define	TF_EXCLUDE	0x02000	/* Exclusive access. */
+#define	TF_BYPASS	0x04000	/* Optimized input path. */
+#define	TF_ZOMBIE	0x08000	/* Modem disconnect received. */
+#define	TF_HOOK		0x10000	/* TTY has hook attached. */
+#define	TF_BUSY_IN	0x20000	/* Process busy in read() -- not supported. */
+#define	TF_BUSY_OUT	0x40000	/* Process busy in write(). */
+#define	TF_BUSY		(TF_BUSY_IN|TF_BUSY_OUT)
+	unsigned int	t_revokecnt;	/* (t) revoke() count. */
+
+	/* Buffering mechanisms. */
+	struct ttyinq	t_inq;		/* (t) Input queue. */
+	size_t		t_inlow;	/* (t) Input low watermark. */
+	struct ttyoutq	t_outq;		/* (t) Output queue. */
+	size_t		t_outlow;	/* (t) Output low watermark. */
+
+	/* Sleeping mechanisms. */
+	struct cv	t_inwait;	/* (t) Input wait queue. */
+	struct cv	t_outwait;	/* (t) Output wait queue. */
+	struct cv	t_outserwait;	/* (t) Serial output wait queue. */
+	struct cv	t_bgwait;	/* (t) Background wait queue. */
+	struct cv	t_dcdwait;	/* (t) Carrier Detect wait queue. */
+
+	/* Polling mechanisms. */
+	struct selinfo	t_inpoll;	/* (t) Input poll queue. */
+	struct selinfo	t_outpoll;	/* (t) Output poll queue. */
+	struct sigio	*t_sigio;	/* (t) Asynchronous I/O. */
+
+	struct termios	t_termios;	/* (t) I/O processing flags. */
+	struct winsize	t_winsize;	/* (t) Window size. */
+	unsigned int	t_column;	/* (t) Current cursor position. */
+	unsigned int	t_writepos;	/* (t) Where input was interrupted. */
+	int		t_compatflags;	/* (t) COMPAT_43TTY flags. */
+
+	/* Init/lock-state devices. */
+	struct termios	t_termios_init_in;	/* tty%s.init. */
+	struct termios	t_termios_lock_in;	/* tty%s.lock. */
+	struct termios	t_termios_init_out;	/* cua%s.init. */
+	struct termios	t_termios_lock_out;	/* cua%s.lock. */
+
+	struct ttydevsw	*t_devsw;	/* (c) Driver hooks. */
+	struct ttyhook	*t_hook;	/* (t) Capture/inject hook. */
+
+	/* Process signal delivery. */
+	struct pgrp	*t_pgrp;	/* (t) Foreground process group. */
+	struct session	*t_session;	/* (t) Associated session. */
+	unsigned int	t_sessioncnt;	/* (t) Backpointing sessions. */
+
+	void		*t_devswsoftc;	/* (c) Soft config, for drivers. */
+	void		*t_hooksoftc;	/* (t) Soft config, for hooks. */
+	struct cdev	*t_dev;		/* (c) Primary character device. */
+};
+
+/*
+ * Userland version of struct tty, for sysctl kern.ttys
+ */
+struct xtty {
+	size_t	xt_size;	/* Structure size. */
+	size_t	xt_insize;	/* Input queue size. */
+	size_t	xt_incc;	/* Canonicalized characters. */
+	size_t	xt_inlc;	/* Input line charaters. */
+	size_t	xt_inlow;	/* Input low watermark. */
+	size_t	xt_outsize;	/* Output queue size. */
+	size_t	xt_outcc;	/* Output queue usage. */
+	size_t	xt_outlow;	/* Output low watermark. */
+	unsigned int xt_column;	/* Current column position. */
+	pid_t	xt_pgid;	/* Foreground process group. */
+	pid_t	xt_sid;		/* Session. */
+	unsigned int xt_flags;	/* Terminal option flags. */
+	dev_t	xt_dev;		/* Userland device. */
+};
+
+#ifdef _KERNEL
+
+/* Allocation and deallocation. */
+struct tty *tty_alloc(struct ttydevsw *tsw, void *softc);
+struct tty *tty_alloc_mutex(struct ttydevsw *tsw, void *softc, struct mtx *mtx);
+void	tty_rel_pgrp(struct tty *tp, struct pgrp *pgrp);
+void	tty_rel_sess(struct tty *tp, struct session *sess);
+void	tty_rel_gone(struct tty *tp);
+
+#define	tty_lock(tp)		mtx_lock((tp)->t_mtx)
+#define	tty_unlock(tp)		mtx_unlock((tp)->t_mtx)
+#define	tty_lock_assert(tp,ma)	mtx_assert((tp)->t_mtx, (ma))
+#define	tty_getlock(tp)		((tp)->t_mtx)
+
+/* Device node creation. */
+void	tty_makedev(struct tty *tp, struct ucred *cred, const char *fmt, ...)
+    __printflike(3, 4);
+#define	tty_makealias(tp,fmt,...) \
+	make_dev_alias((tp)->t_dev, fmt, ## __VA_ARGS__)
+
+/* Signalling processes. */
+void	tty_signal_sessleader(struct tty *tp, int signal);
+void	tty_signal_pgrp(struct tty *tp, int signal);
+/* Waking up readers/writers. */
+int	tty_wait(struct tty *tp, struct cv *cv);
+int	tty_timedwait(struct tty *tp, struct cv *cv, int timo);
+void	tty_wakeup(struct tty *tp, int flags);
+
+/* System messages. */
+int	tty_checkoutq(struct tty *tp);
+int	tty_putchar(struct tty *tp, char c);
+
+int	tty_ioctl(struct tty *tp, u_long cmd, void *data, int fflag,
+    struct thread *td);
+int	tty_ioctl_compat(struct tty *tp, u_long cmd, caddr_t data,
+    int fflag, struct thread *td);
+void	tty_init_console(struct tty *tp, speed_t speed);
+void	tty_flush(struct tty *tp, int flags);
+void	tty_hiwat_in_block(struct tty *tp);
+void	tty_hiwat_in_unblock(struct tty *tp);
+dev_t	tty_udev(struct tty *tp);
+#define	tty_opened(tp)		((tp)->t_flags & TF_OPENED)
+#define	tty_gone(tp)		((tp)->t_flags & TF_GONE)
+#define	tty_softc(tp)		((tp)->t_devswsoftc)
+#define	tty_devname(tp)		devtoname((tp)->t_dev)
+
+/* Status line printing. */
+void	tty_info(struct tty *tp);
+
+/* /dev/console selection. */
+void	ttyconsdev_select(const char *name);
+
+/* Pseudo-terminal hooks. */
+int	pts_alloc_external(int fd, struct thread *td, struct file *fp,
+    struct cdev *dev, const char *name);
+
+/* Drivers and line disciplines also need to call these. */
+#include <freebsd/sys/ttydisc.h>
+#include <freebsd/sys/ttydevsw.h>
+#include <freebsd/sys/ttyhook.h>
+#endif /* _KERNEL */
+
+#endif /* !_SYS_TTY_HH_ */
diff --git a/freebsd/sys/ttydevsw.h b/freebsd/sys/ttydevsw.h
new file mode 100644
index 0000000..a403558
--- /dev/null
+++ b/freebsd/sys/ttydevsw.h
@@ -0,0 +1,169 @@
+/*-
+ * Copyright (c) 2008 Ed Schouten <ed at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Portions of this software were developed under sponsorship from Snow
+ * B.V., the Netherlands.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_TTYDEVSW_HH_
+#define	_SYS_TTYDEVSW_HH_
+
+#ifndef _SYS_TTY_HH_
+#error "can only be included through <sys/tty.h>"
+#endif /* !_SYS_TTY_HH_ */
+
+/*
+ * Driver routines that are called from the line discipline to adjust
+ * hardware parameters and such.
+ */
+typedef int tsw_open_t(struct tty *tp);
+typedef void tsw_close_t(struct tty *tp);
+typedef void tsw_outwakeup_t(struct tty *tp);
+typedef void tsw_inwakeup_t(struct tty *tp);
+typedef int tsw_ioctl_t(struct tty *tp, u_long cmd, caddr_t data,
+    struct thread *td);
+typedef int tsw_param_t(struct tty *tp, struct termios *t);
+typedef int tsw_modem_t(struct tty *tp, int sigon, int sigoff);
+typedef int tsw_mmap_t(struct tty *tp, vm_offset_t offset,
+    vm_paddr_t * paddr, int nprot);
+typedef void tsw_pktnotify_t(struct tty *tp, char event);
+typedef void tsw_free_t(void *softc);
+
+struct ttydevsw {
+	unsigned int	tsw_flags;	/* Default TTY flags. */
+
+	tsw_open_t	*tsw_open;	/* Device opening. */
+	tsw_close_t	*tsw_close;	/* Device closure. */
+
+	tsw_outwakeup_t	*tsw_outwakeup;	/* Output available. */
+	tsw_inwakeup_t	*tsw_inwakeup;	/* Input can be stored again. */
+
+	tsw_ioctl_t	*tsw_ioctl;	/* ioctl() hooks. */
+	tsw_param_t	*tsw_param;	/* TIOCSETA device parameter setting. */
+	tsw_modem_t	*tsw_modem;	/* Modem sigon/sigoff. */
+
+	tsw_mmap_t	*tsw_mmap;	/* mmap() hooks. */
+	tsw_pktnotify_t	*tsw_pktnotify;	/* TIOCPKT events. */
+
+	tsw_free_t	*tsw_free;	/* Destructor. */
+};
+
+static __inline int
+ttydevsw_open(struct tty *tp)
+{
+	tty_lock_assert(tp, MA_OWNED);
+	MPASS(!tty_gone(tp));
+
+	return tp->t_devsw->tsw_open(tp);
+}
+
+static __inline void
+ttydevsw_close(struct tty *tp)
+{
+	tty_lock_assert(tp, MA_OWNED);
+	MPASS(!tty_gone(tp));
+
+	tp->t_devsw->tsw_close(tp);
+}
+
+static __inline void
+ttydevsw_outwakeup(struct tty *tp)
+{
+	tty_lock_assert(tp, MA_OWNED);
+	MPASS(!tty_gone(tp));
+
+	/* Prevent spurious wakeups. */
+	if (ttydisc_getc_poll(tp) == 0)
+		return;
+
+	tp->t_devsw->tsw_outwakeup(tp);
+}
+
+static __inline void
+ttydevsw_inwakeup(struct tty *tp)
+{
+	tty_lock_assert(tp, MA_OWNED);
+	MPASS(!tty_gone(tp));
+
+	/* Prevent spurious wakeups. */
+	if (tp->t_flags & TF_HIWAT_IN)
+		return;
+
+	tp->t_devsw->tsw_inwakeup(tp);
+}
+
+static __inline int
+ttydevsw_ioctl(struct tty *tp, u_long cmd, caddr_t data, struct thread *td)
+{
+	tty_lock_assert(tp, MA_OWNED);
+	MPASS(!tty_gone(tp));
+
+	return tp->t_devsw->tsw_ioctl(tp, cmd, data, td);
+}
+
+static __inline int
+ttydevsw_param(struct tty *tp, struct termios *t)
+{
+	MPASS(!tty_gone(tp));
+
+	return tp->t_devsw->tsw_param(tp, t);
+}
+
+static __inline int
+ttydevsw_modem(struct tty *tp, int sigon, int sigoff)
+{
+	MPASS(!tty_gone(tp));
+
+	return tp->t_devsw->tsw_modem(tp, sigon, sigoff);
+}
+
+static __inline int
+ttydevsw_mmap(struct tty *tp, vm_offset_t offset, vm_paddr_t *paddr, int nprot)
+{
+	MPASS(!tty_gone(tp));
+
+	return tp->t_devsw->tsw_mmap(tp, offset, paddr, nprot);
+}
+
+static __inline void
+ttydevsw_pktnotify(struct tty *tp, char event)
+{
+	tty_lock_assert(tp, MA_OWNED);
+	MPASS(!tty_gone(tp));
+
+	tp->t_devsw->tsw_pktnotify(tp, event);
+}
+
+static __inline void
+ttydevsw_free(struct tty *tp)
+{
+	MPASS(tty_gone(tp));
+
+	tp->t_devsw->tsw_free(tty_softc(tp));
+}
+
+#endif /* !_SYS_TTYDEVSW_HH_ */
diff --git a/freebsd/sys/ttydisc.h b/freebsd/sys/ttydisc.h
new file mode 100644
index 0000000..0019498
--- /dev/null
+++ b/freebsd/sys/ttydisc.h
@@ -0,0 +1,86 @@
+/*-
+ * Copyright (c) 2008 Ed Schouten <ed at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Portions of this software were developed under sponsorship from Snow
+ * B.V., the Netherlands.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_TTYDISC_HH_
+#define	_SYS_TTYDISC_HH_
+
+#ifndef _SYS_TTY_HH_
+#error "can only be included through <sys/tty.h>"
+#endif /* !_SYS_TTY_HH_ */
+
+struct cv;
+struct thread;
+struct tty;
+struct uio;
+
+/* Top half routines. */
+void	ttydisc_open(struct tty *tp);
+void	ttydisc_close(struct tty *tp);
+int	ttydisc_read(struct tty *tp, struct uio *uio, int ioflag);
+int	ttydisc_write(struct tty *tp, struct uio *uio, int ioflag);
+void	ttydisc_optimize(struct tty *tp);
+
+/* Bottom half routines. */
+void	ttydisc_modem(struct tty *tp, int open);
+#define ttydisc_can_bypass(tp) ((tp)->t_flags & TF_BYPASS)
+int	ttydisc_rint(struct tty *tp, char c, int flags);
+size_t	ttydisc_rint_bypass(struct tty *tp, const void *buf, size_t len);
+void	ttydisc_rint_done(struct tty *tp);
+size_t	ttydisc_rint_poll(struct tty *tp);
+size_t	ttydisc_getc(struct tty *tp, void *buf, size_t len);
+int	ttydisc_getc_uio(struct tty *tp, struct uio *uio);
+size_t	ttydisc_getc_poll(struct tty *tp);
+
+/* Error codes for ttydisc_rint(). */
+#define	TRE_FRAMING	0x01
+#define	TRE_PARITY	0x02
+#define	TRE_OVERRUN	0x04
+#define	TRE_BREAK	0x08
+
+static __inline size_t
+ttydisc_read_poll(struct tty *tp)
+{
+
+	tty_lock_assert(tp, MA_OWNED);
+
+	return ttyinq_bytescanonicalized(&tp->t_inq);
+}
+
+static __inline size_t
+ttydisc_write_poll(struct tty *tp)
+{
+
+	tty_lock_assert(tp, MA_OWNED);
+
+	return ttyoutq_bytesleft(&tp->t_outq);
+}
+
+#endif /* !_SYS_TTYDISC_HH_ */
diff --git a/freebsd/sys/ttyhook.h b/freebsd/sys/ttyhook.h
new file mode 100644
index 0000000..a15fbbb
--- /dev/null
+++ b/freebsd/sys/ttyhook.h
@@ -0,0 +1,147 @@
+/*-
+ * Copyright (c) 2008 Ed Schouten <ed at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_TTYHOOK_HH_
+#define	_SYS_TTYHOOK_HH_
+
+#ifndef _SYS_TTY_HH_
+#error "can only be included through <sys/tty.h>"
+#endif /* !_SYS_TTY_HH_ */
+
+struct tty;
+
+/*
+ * Hooks interface, which allows to capture and inject traffic into the
+ * input and output paths of a TTY.
+ */
+
+typedef int th_rint_t(struct tty *tp, char c, int flags);
+typedef size_t th_rint_bypass_t(struct tty *tp, const void *buf, size_t len);
+typedef void th_rint_done_t(struct tty *tp);
+typedef size_t th_rint_poll_t(struct tty *tp);
+
+typedef size_t th_getc_inject_t(struct tty *tp, void *buf, size_t len);
+typedef void th_getc_capture_t(struct tty *tp, const void *buf, size_t len);
+typedef size_t th_getc_poll_t(struct tty *tp);
+
+typedef void th_close_t(struct tty *tp);
+
+struct ttyhook {
+	/* Character input. */
+	th_rint_t		*th_rint;
+	th_rint_bypass_t	*th_rint_bypass;
+	th_rint_done_t		*th_rint_done;
+	th_rint_poll_t		*th_rint_poll;
+
+	/* Character output. */
+	th_getc_inject_t	*th_getc_inject;
+	th_getc_capture_t	*th_getc_capture;
+	th_getc_poll_t		*th_getc_poll;
+
+	th_close_t		*th_close;
+};
+
+int	ttyhook_register(struct tty **, struct proc *, int,
+    struct ttyhook *, void *);
+void	ttyhook_unregister(struct tty *);
+#define	ttyhook_softc(tp)		((tp)->t_hooksoftc)
+#define	ttyhook_hashook(tp,hook)	((tp)->t_hook != NULL && \
+					(tp)->t_hook->th_ ## hook != NULL)
+
+static __inline int
+ttyhook_rint(struct tty *tp, char c, int flags)
+{
+	tty_lock_assert(tp, MA_OWNED);
+	MPASS(!tty_gone(tp));
+
+	return tp->t_hook->th_rint(tp, c, flags);
+}
+
+static __inline size_t
+ttyhook_rint_bypass(struct tty *tp, const void *buf, size_t len)
+{
+	tty_lock_assert(tp, MA_OWNED);
+	MPASS(!tty_gone(tp));
+
+	return tp->t_hook->th_rint_bypass(tp, buf, len);
+}
+
+static __inline void
+ttyhook_rint_done(struct tty *tp)
+{
+	tty_lock_assert(tp, MA_OWNED);
+	MPASS(!tty_gone(tp));
+
+	tp->t_hook->th_rint_done(tp);
+}
+
+static __inline size_t
+ttyhook_rint_poll(struct tty *tp)
+{
+	tty_lock_assert(tp, MA_OWNED);
+	MPASS(!tty_gone(tp));
+
+	return tp->t_hook->th_rint_poll(tp);
+}
+
+static __inline size_t
+ttyhook_getc_inject(struct tty *tp, void *buf, size_t len)
+{
+	tty_lock_assert(tp, MA_OWNED);
+	MPASS(!tty_gone(tp));
+
+	return tp->t_hook->th_getc_inject(tp, buf, len);
+}
+
+static __inline void
+ttyhook_getc_capture(struct tty *tp, const void *buf, size_t len)
+{
+	tty_lock_assert(tp, MA_OWNED);
+	MPASS(!tty_gone(tp));
+
+	tp->t_hook->th_getc_capture(tp, buf, len);
+}
+
+static __inline size_t
+ttyhook_getc_poll(struct tty *tp)
+{
+	tty_lock_assert(tp, MA_OWNED);
+	MPASS(!tty_gone(tp));
+
+	return tp->t_hook->th_getc_poll(tp);
+}
+
+static __inline void
+ttyhook_close(struct tty *tp)
+{
+	tty_lock_assert(tp, MA_OWNED);
+
+	tp->t_hook->th_close(tp);
+}
+
+#endif /* !_SYS_TTYHOOK_HH_ */
diff --git a/freebsd/sys/ttyqueue.h b/freebsd/sys/ttyqueue.h
new file mode 100644
index 0000000..b9228bd
--- /dev/null
+++ b/freebsd/sys/ttyqueue.h
@@ -0,0 +1,178 @@
+/*-
+ * Copyright (c) 2008 Ed Schouten <ed at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Portions of this software were developed under sponsorship from Snow
+ * B.V., the Netherlands.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_TTYQUEUE_HH_
+#define	_SYS_TTYQUEUE_HH_
+
+#ifndef _SYS_TTY_HH_
+#error "can only be included through <sys/tty.h>"
+#endif /* !_SYS_TTY_HH_ */
+
+struct tty;
+struct ttyinq_block;
+struct ttyoutq_block;
+struct uio;
+
+/* Data input queue. */
+struct ttyinq {
+	struct ttyinq_block	*ti_firstblock;
+	struct ttyinq_block	*ti_startblock;
+	struct ttyinq_block	*ti_reprintblock;
+	struct ttyinq_block	*ti_lastblock;
+	unsigned int		ti_begin;
+	unsigned int		ti_linestart;
+	unsigned int		ti_reprint;
+	unsigned int		ti_end;
+	unsigned int		ti_nblocks;
+	unsigned int		ti_quota;
+};
+#define TTYINQ_DATASIZE 128
+
+/* Data output queue. */
+struct ttyoutq {
+	struct ttyoutq_block	*to_firstblock;
+	struct ttyoutq_block	*to_lastblock;
+	unsigned int		to_begin;
+	unsigned int		to_end;
+	unsigned int		to_nblocks;
+	unsigned int		to_quota;
+};
+#define TTYOUTQ_DATASIZE (256 - sizeof(struct ttyoutq_block *))
+
+#ifdef _KERNEL
+/* Input queue handling routines. */
+void	ttyinq_setsize(struct ttyinq *ti, struct tty *tp, size_t len);
+void	ttyinq_free(struct ttyinq *ti);
+int	ttyinq_read_uio(struct ttyinq *ti, struct tty *tp, struct uio *uio,
+    size_t readlen, size_t flushlen);
+size_t	ttyinq_write(struct ttyinq *ti, const void *buf, size_t len,
+    int quote);
+int	ttyinq_write_nofrag(struct ttyinq *ti, const void *buf, size_t len,
+    int quote);
+void	ttyinq_canonicalize(struct ttyinq *ti);
+size_t	ttyinq_findchar(struct ttyinq *ti, const char *breakc, size_t maxlen,
+    char *lastc);
+void	ttyinq_flush(struct ttyinq *ti);
+int	ttyinq_peekchar(struct ttyinq *ti, char *c, int *quote);
+void	ttyinq_unputchar(struct ttyinq *ti);
+void	ttyinq_reprintpos_set(struct ttyinq *ti);
+void	ttyinq_reprintpos_reset(struct ttyinq *ti);
+
+static __inline size_t
+ttyinq_getsize(struct ttyinq *ti)
+{
+	return (ti->ti_nblocks * TTYINQ_DATASIZE);
+}
+
+static __inline size_t
+ttyinq_getallocatedsize(struct ttyinq *ti)
+{
+
+	return (ti->ti_quota * TTYINQ_DATASIZE);
+}
+
+static __inline size_t
+ttyinq_bytesleft(struct ttyinq *ti)
+{
+	size_t len;
+
+	/* Make sure the usage never exceeds the length. */
+	len = ti->ti_nblocks * TTYINQ_DATASIZE;
+	MPASS(len >= ti->ti_end);
+
+	return (len - ti->ti_end);
+}
+
+static __inline size_t
+ttyinq_bytescanonicalized(struct ttyinq *ti)
+{
+	MPASS(ti->ti_begin <= ti->ti_linestart);
+
+	return (ti->ti_linestart - ti->ti_begin);
+}
+
+static __inline size_t
+ttyinq_bytesline(struct ttyinq *ti)
+{
+	MPASS(ti->ti_linestart <= ti->ti_end);
+
+	return (ti->ti_end - ti->ti_linestart);
+}
+
+/* Input buffer iteration. */
+typedef void ttyinq_line_iterator_t(void *data, char c, int flags);
+void	ttyinq_line_iterate_from_linestart(struct ttyinq *ti,
+    ttyinq_line_iterator_t *iterator, void *data);
+void	ttyinq_line_iterate_from_reprintpos(struct ttyinq *ti,
+    ttyinq_line_iterator_t *iterator, void *data);
+
+/* Output queue handling routines. */
+void	ttyoutq_flush(struct ttyoutq *to);
+void	ttyoutq_setsize(struct ttyoutq *to, struct tty *tp, size_t len);
+void	ttyoutq_free(struct ttyoutq *to);
+size_t	ttyoutq_read(struct ttyoutq *to, void *buf, size_t len);
+int	ttyoutq_read_uio(struct ttyoutq *to, struct tty *tp, struct uio *uio);
+size_t	ttyoutq_write(struct ttyoutq *to, const void *buf, size_t len);
+int	ttyoutq_write_nofrag(struct ttyoutq *to, const void *buf, size_t len);
+
+static __inline size_t
+ttyoutq_getsize(struct ttyoutq *to)
+{
+	return (to->to_nblocks * TTYOUTQ_DATASIZE);
+}
+
+static __inline size_t
+ttyoutq_getallocatedsize(struct ttyoutq *to)
+{
+
+	return (to->to_quota * TTYOUTQ_DATASIZE);
+}
+
+static __inline size_t
+ttyoutq_bytesleft(struct ttyoutq *to)
+{
+	size_t len;
+
+	/* Make sure the usage never exceeds the length. */
+	len = to->to_nblocks * TTYOUTQ_DATASIZE;
+	MPASS(len >= to->to_end);
+
+	return (len - to->to_end);
+}
+
+static __inline size_t
+ttyoutq_bytesused(struct ttyoutq *to)
+{
+	return (to->to_end - to->to_begin);
+}
+#endif /* _KERNEL */
+
+#endif /* !_SYS_TTYQUEUE_HH_ */
diff --git a/freebsd/sys/user.h b/freebsd/sys/user.h
new file mode 100644
index 0000000..df788c0
--- /dev/null
+++ b/freebsd/sys/user.h
@@ -0,0 +1,414 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.
+ * Copyright (c) 2007 Robert N. M. Watson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)user.h	8.2 (Berkeley) 9/23/93
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_USER_HH_
+#define _SYS_USER_HH_
+
+#include <freebsd/machine/pcb.h>
+#ifndef _KERNEL
+/* stuff that *used* to be included by user.h, or is now needed */
+#include <freebsd/sys/errno.h>
+#include <freebsd/sys/time.h>
+#include <freebsd/sys/resource.h>
+#include <freebsd/sys/ucred.h>
+#include <freebsd/sys/uio.h>
+#include <freebsd/sys/queue.h>
+#include <freebsd/sys/_lock.h>
+#include <freebsd/sys/_mutex.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/vm/vm.h>		/* XXX */
+#include <freebsd/vm/vm_param.h>	/* XXX */
+#include <freebsd/vm/pmap.h>		/* XXX */
+#include <freebsd/vm/vm_map.h>		/* XXX */
+#endif /* !_KERNEL */
+#ifndef _SYS_RESOURCEVAR_HH_
+#include <freebsd/sys/resourcevar.h>
+#endif
+#ifndef _SYS_SIGNALVAR_HH_
+#include <freebsd/sys/signalvar.h>
+#endif
+#ifndef _SYS_SOCKET_VAR_HH_
+#include <freebsd/sys/socket.h>
+#endif
+
+/*
+ * KERN_PROC subtype ops return arrays of selected proc structure entries:
+ *
+ * This struct includes several arrays of spare space, with different arrays
+ * for different standard C-types.  When adding new variables to this struct,
+ * the space for byte-aligned data should be taken from the ki_sparestring,
+ * pointers from ki_spareptrs, word-aligned data from ki_spareints, and
+ * doubleword-aligned data from ki_sparelongs.  Make sure the space for new
+ * variables come from the array which matches the size and alignment of
+ * those variables on ALL hardware platforms, and then adjust the appropriate
+ * KI_NSPARE_* value(s) to match.
+ *
+ * Always verify that sizeof(struct kinfo_proc) == KINFO_PROC_SIZE on all
+ * platforms after you have added new variables.  Note that if you change
+ * the value of KINFO_PROC_SIZE, then many userland programs will stop
+ * working until they are recompiled!
+ *
+ * Once you have added the new field, you will need to add code to initialize
+ * it in two places: function fill_kinfo_proc in sys/kern/kern_proc.c and
+ * function kvm_proclist in lib/libkvm/kvm_proc.c .
+ */
+#define	KI_NSPARE_INT	9
+#define	KI_NSPARE_LONG	12
+#define	KI_NSPARE_PTR	6
+
+#ifndef _KERNEL
+#ifndef KINFO_PROC_SIZE
+#error "Unknown architecture"
+#endif
+#endif /* !_KERNEL */
+
+#define	WMESGLEN	8		/* size of returned wchan message */
+#define	LOCKNAMELEN	8		/* size of returned lock name */
+#define	OCOMMLEN	16		/* size of returned thread name */
+#define	COMMLEN		19		/* size of returned ki_comm name */
+#define	KI_EMULNAMELEN	16		/* size of returned ki_emul */
+#define KI_NGROUPS	16		/* number of groups in ki_groups */
+#define	LOGNAMELEN	17		/* size of returned ki_login */
+
+/*
+ * Steal a bit from ki_cr_flags (cr_flags is never used) to indicate
+ * that the cred had more than KI_NGROUPS groups.
+ */
+#define KI_CRF_GRP_OVERFLOW	0x80000000
+
+struct kinfo_proc {
+	int	ki_structsize;		/* size of this structure */
+	int	ki_layout;		/* reserved: layout identifier */
+	struct	pargs *ki_args;		/* address of command arguments */
+	struct	proc *ki_paddr;		/* address of proc */
+	struct	user *ki_addr;		/* kernel virtual addr of u-area */
+	struct	vnode *ki_tracep;	/* pointer to trace file */
+	struct	vnode *ki_textvp;	/* pointer to executable file */
+	struct	filedesc *ki_fd;	/* pointer to open file info */
+	struct	vmspace *ki_vmspace;	/* pointer to kernel vmspace struct */
+	void	*ki_wchan;		/* sleep address */
+	pid_t	ki_pid;			/* Process identifier */
+	pid_t	ki_ppid;		/* parent process id */
+	pid_t	ki_pgid;		/* process group id */
+	pid_t	ki_tpgid;		/* tty process group id */
+	pid_t	ki_sid;			/* Process session ID */
+	pid_t	ki_tsid;		/* Terminal session ID */
+	short	ki_jobc;		/* job control counter */
+	short	ki_spare_short1;	/* unused (just here for alignment) */
+	dev_t	ki_tdev;		/* controlling tty dev */
+	sigset_t ki_siglist;		/* Signals arrived but not delivered */
+	sigset_t ki_sigmask;		/* Current signal mask */
+	sigset_t ki_sigignore;		/* Signals being ignored */
+	sigset_t ki_sigcatch;		/* Signals being caught by user */
+	uid_t	ki_uid;			/* effective user id */
+	uid_t	ki_ruid;		/* Real user id */
+	uid_t	ki_svuid;		/* Saved effective user id */
+	gid_t	ki_rgid;		/* Real group id */
+	gid_t	ki_svgid;		/* Saved effective group id */
+	short	ki_ngroups;		/* number of groups */
+	short	ki_spare_short2;	/* unused (just here for alignment) */
+	gid_t 	ki_groups[KI_NGROUPS];	/* groups */
+	vm_size_t ki_size;		/* virtual size */
+	segsz_t ki_rssize;		/* current resident set size in pages */
+	segsz_t ki_swrss;		/* resident set size before last swap */
+	segsz_t ki_tsize;		/* text size (pages) XXX */
+	segsz_t ki_dsize;		/* data size (pages) XXX */
+	segsz_t ki_ssize;		/* stack size (pages) */
+	u_short	ki_xstat;		/* Exit status for wait & stop signal */
+	u_short	ki_acflag;		/* Accounting flags */
+	fixpt_t	ki_pctcpu;	 	/* %cpu for process during ki_swtime */
+	u_int	ki_estcpu;	 	/* Time averaged value of ki_cpticks */
+	u_int	ki_slptime;	 	/* Time since last blocked */
+	u_int	ki_swtime;	 	/* Time swapped in or out */
+	int	ki_spareint1;	 	/* unused (just here for alignment) */
+	u_int64_t ki_runtime;		/* Real time in microsec */
+	struct	timeval ki_start;	/* starting time */
+	struct	timeval ki_childtime;	/* time used by process children */
+	long	ki_flag;		/* P_* flags */
+	long	ki_kiflag;		/* KI_* flags (below) */
+	int	ki_traceflag;		/* Kernel trace points */
+	char	ki_stat;		/* S* process status */
+	signed char ki_nice;		/* Process "nice" value */
+	char	ki_lock;		/* Process lock (prevent swap) count */
+	char	ki_rqindex;		/* Run queue index */
+	u_char	ki_oncpu;		/* Which cpu we are on */
+	u_char	ki_lastcpu;		/* Last cpu we were on */
+	char	ki_ocomm[OCOMMLEN+1];	/* thread name */
+	char	ki_wmesg[WMESGLEN+1];	/* wchan message */
+	char	ki_login[LOGNAMELEN+1];	/* setlogin name */
+	char	ki_lockname[LOCKNAMELEN+1]; /* lock name */
+	char	ki_comm[COMMLEN+1];	/* command name */
+	char	ki_emul[KI_EMULNAMELEN+1];  /* emulation name */
+	/*
+	 * When adding new variables, take space for char-strings from the
+	 * front of ki_sparestrings, and ints from the end of ki_spareints.
+	 * That way the spare room from both arrays will remain contiguous.
+	 */
+	char	ki_sparestrings[68];	/* spare string space */
+	int	ki_spareints[KI_NSPARE_INT];	/* spare room for growth */
+	u_int	ki_cr_flags;		/* Credential flags */
+	int	ki_jid;			/* Process jail ID */
+	int	ki_numthreads;		/* XXXKSE number of threads in total */
+	lwpid_t	ki_tid;			/* XXXKSE thread id */
+	struct	priority ki_pri;	/* process priority */
+	struct	rusage ki_rusage;	/* process rusage statistics */
+	/* XXX - most fields in ki_rusage_ch are not (yet) filled in */
+	struct	rusage ki_rusage_ch;	/* rusage of children processes */
+	struct	pcb *ki_pcb;		/* kernel virtual addr of pcb */
+	void	*ki_kstack;		/* kernel virtual addr of stack */
+	void	*ki_udata;		/* User convenience pointer */
+	struct	thread *ki_tdaddr;	/* address of thread */
+	/*
+	 * When adding new variables, take space for pointers from the
+	 * front of ki_spareptrs, and longs from the end of ki_sparelongs.
+	 * That way the spare room from both arrays will remain contiguous.
+	 */
+	void	*ki_spareptrs[KI_NSPARE_PTR];	/* spare room for growth */
+	long	ki_sparelongs[KI_NSPARE_LONG];	/* spare room for growth */
+	long	ki_sflag;		/* PS_* flags */
+	long	ki_tdflags;		/* XXXKSE kthread flag */
+};
+void fill_kinfo_proc(struct proc *, struct kinfo_proc *);
+/* XXX - the following two defines are temporary */
+#define	ki_childstime	ki_rusage_ch.ru_stime
+#define	ki_childutime	ki_rusage_ch.ru_utime
+
+/*
+ *  Legacy PS_ flag.  This moved to p_flag but is maintained for
+ *  compatibility.
+ */
+#define	PS_INMEM	0x00001		/* Loaded into memory. */
+
+/* ki_sessflag values */
+#define	KI_CTTY		0x00000001	/* controlling tty vnode active */
+#define	KI_SLEADER	0x00000002	/* session leader */
+#define	KI_LOCKBLOCK	0x00000004	/* proc blocked on lock ki_lockname */
+
+/*
+ * This used to be the per-process structure containing data that
+ * isn't needed in core when the process is swapped out, but now it
+ * remains only for the benefit of a.out core dumps.
+ */
+struct user {
+	struct	pstats u_stats;		/* *p_stats */
+	struct	kinfo_proc u_kproc;	/* eproc */
+};
+
+/*
+ * The KERN_PROC_FILE sysctl allows a process to dump the file descriptor
+ * array of another process.
+ */
+#define	KF_TYPE_NONE	0
+#define	KF_TYPE_VNODE	1
+#define	KF_TYPE_SOCKET	2
+#define	KF_TYPE_PIPE	3
+#define	KF_TYPE_FIFO	4
+#define	KF_TYPE_KQUEUE	5
+#define	KF_TYPE_CRYPTO	6
+#define	KF_TYPE_MQUEUE	7
+#define	KF_TYPE_SHM	8
+#define	KF_TYPE_SEM	9
+#define	KF_TYPE_PTS	10
+#define	KF_TYPE_UNKNOWN	255
+
+#define	KF_VTYPE_VNON	0
+#define	KF_VTYPE_VREG	1
+#define	KF_VTYPE_VDIR	2
+#define	KF_VTYPE_VBLK	3
+#define	KF_VTYPE_VCHR	4
+#define	KF_VTYPE_VLNK	5
+#define	KF_VTYPE_VSOCK	6
+#define	KF_VTYPE_VFIFO	7
+#define	KF_VTYPE_VBAD	8
+#define	KF_VTYPE_UNKNOWN	255
+
+#define	KF_FD_TYPE_CWD	-1	/* Current working directory */
+#define	KF_FD_TYPE_ROOT	-2	/* Root directory */
+#define	KF_FD_TYPE_JAIL	-3	/* Jail directory */
+
+#define	KF_FLAG_READ		0x00000001
+#define	KF_FLAG_WRITE		0x00000002
+#define	KF_FLAG_APPEND		0x00000004
+#define	KF_FLAG_ASYNC		0x00000008
+#define	KF_FLAG_FSYNC		0x00000010
+#define	KF_FLAG_NONBLOCK	0x00000020
+#define	KF_FLAG_DIRECT		0x00000040
+#define	KF_FLAG_HASLOCK		0x00000080
+
+/*
+ * Old format.  Has variable hidden padding due to alignment.
+ * This is a compatability hack for pre-build 7.1 packages.
+ */
+#if defined(__amd64__)
+#define	KINFO_OFILE_SIZE	1328
+#endif
+#if defined(__i386__)
+#define	KINFO_OFILE_SIZE	1324
+#endif
+
+struct kinfo_ofile {
+	int	kf_structsize;			/* Size of kinfo_file. */
+	int	kf_type;			/* Descriptor type. */
+	int	kf_fd;				/* Array index. */
+	int	kf_ref_count;			/* Reference count. */
+	int	kf_flags;			/* Flags. */
+	/* XXX Hidden alignment padding here on amd64 */
+	off_t	kf_offset;			/* Seek location. */
+	int	kf_vnode_type;			/* Vnode type. */
+	int	kf_sock_domain;			/* Socket domain. */
+	int	kf_sock_type;			/* Socket type. */
+	int	kf_sock_protocol;		/* Socket protocol. */
+	char	kf_path[PATH_MAX];	/* Path to file, if any. */
+	struct sockaddr_storage kf_sa_local;	/* Socket address. */
+	struct sockaddr_storage	kf_sa_peer;	/* Peer address. */
+};
+
+#if defined(__amd64__) || defined(__i386__)
+#define	KINFO_FILE_SIZE	1392
+#endif
+
+struct kinfo_file {
+	int	kf_structsize;			/* Variable size of record. */
+	int	kf_type;			/* Descriptor type. */
+	int	kf_fd;				/* Array index. */
+	int	kf_ref_count;			/* Reference count. */
+	int	kf_flags;			/* Flags. */
+	int	_kf_pad0;			/* Round to 64 bit alignment */
+	int64_t	kf_offset;			/* Seek location. */
+	int	kf_vnode_type;			/* Vnode type. */
+	int	kf_sock_domain;			/* Socket domain. */
+	int	kf_sock_type;			/* Socket type. */
+	int	kf_sock_protocol;		/* Socket protocol. */
+	struct sockaddr_storage kf_sa_local;	/* Socket address. */
+	struct sockaddr_storage	kf_sa_peer;	/* Peer address. */
+	int	_kf_ispare[16];			/* Space for more stuff. */
+	/* Truncated before copyout in sysctl */
+	char	kf_path[PATH_MAX];		/* Path to file, if any. */
+};
+
+/*
+ * The KERN_PROC_VMMAP sysctl allows a process to dump the VM layout of
+ * another process as a series of entries.
+ */
+#define	KVME_TYPE_NONE		0
+#define	KVME_TYPE_DEFAULT	1
+#define	KVME_TYPE_VNODE		2
+#define	KVME_TYPE_SWAP		3
+#define	KVME_TYPE_DEVICE	4
+#define	KVME_TYPE_PHYS		5
+#define	KVME_TYPE_DEAD		6
+#define	KVME_TYPE_SG		7
+#define	KVME_TYPE_UNKNOWN	255
+
+#define	KVME_PROT_READ		0x00000001
+#define	KVME_PROT_WRITE		0x00000002
+#define	KVME_PROT_EXEC		0x00000004
+
+#define	KVME_FLAG_COW		0x00000001
+#define	KVME_FLAG_NEEDS_COPY	0x00000002
+#define	KVME_FLAG_NOCOREDUMP	0x00000004
+
+#if defined(__amd64__)
+#define	KINFO_OVMENTRY_SIZE	1168
+#endif
+#if defined(__i386__)
+#define	KINFO_OVMENTRY_SIZE	1128
+#endif
+
+struct kinfo_ovmentry {
+	int	 kve_structsize;		/* Size of kinfo_vmmapentry. */
+	int	 kve_type;			/* Type of map entry. */
+	void	*kve_start;			/* Starting address. */
+	void	*kve_end;			/* Finishing address. */
+	int	 kve_flags;			/* Flags on map entry. */
+	int	 kve_resident;			/* Number of resident pages. */
+	int	 kve_private_resident;		/* Number of private pages. */
+	int	 kve_protection;		/* Protection bitmask. */
+	int	 kve_ref_count;			/* VM obj ref count. */
+	int	 kve_shadow_count;		/* VM obj shadow count. */
+	char	 kve_path[PATH_MAX];		/* Path to VM obj, if any. */
+	void	*_kve_pspare[8];		/* Space for more stuff. */
+	off_t	 kve_offset;			/* Mapping offset in object */
+	uint64_t kve_fileid;			/* inode number if vnode */
+	dev_t	 kve_fsid;			/* dev_t of vnode location */
+	int	 _kve_ispare[3];		/* Space for more stuff. */
+};
+
+#if defined(__amd64__) || defined(__i386__)
+#define	KINFO_VMENTRY_SIZE	1160
+#endif
+
+struct kinfo_vmentry {
+	int	 kve_structsize;		/* Variable size of record. */
+	int	 kve_type;			/* Type of map entry. */
+	uint64_t kve_start;			/* Starting address. */
+	uint64_t kve_end;			/* Finishing address. */
+	uint64_t kve_offset;			/* Mapping offset in object */
+	uint64_t kve_fileid;			/* inode number if vnode */
+	uint32_t kve_fsid;			/* dev_t of vnode location */
+	int	 kve_flags;			/* Flags on map entry. */
+	int	 kve_resident;			/* Number of resident pages. */
+	int	 kve_private_resident;		/* Number of private pages. */
+	int	 kve_protection;		/* Protection bitmask. */
+	int	 kve_ref_count;			/* VM obj ref count. */
+	int	 kve_shadow_count;		/* VM obj shadow count. */
+	int	 _kve_pad0;			/* 64bit align next field */
+	int	 _kve_ispare[16];		/* Space for more stuff. */
+	/* Truncated before copyout in sysctl */
+	char	 kve_path[PATH_MAX];		/* Path to VM obj, if any. */
+};
+
+/*
+ * The KERN_PROC_KSTACK sysctl allows a process to dump the kernel stacks of
+ * another process as a series of entries.  Each stack is represented by a
+ * series of symbol names and offsets as generated by stack_sbuf_print(9).
+ */
+#define	KKST_MAXLEN	1024
+
+#define	KKST_STATE_STACKOK	0		/* Stack is valid. */
+#define	KKST_STATE_SWAPPED	1		/* Stack swapped out. */
+#define	KKST_STATE_RUNNING	2		/* Stack ephemeral. */
+
+#if defined(__amd64__) || defined(__i386__)
+#define	KINFO_KSTACK_SIZE	1096
+#endif
+
+struct kinfo_kstack {
+	lwpid_t	 kkst_tid;			/* ID of thread. */
+	int	 kkst_state;			/* Validity of stack. */
+	char	 kkst_trace[KKST_MAXLEN];	/* String representing stack. */
+	int	 _kkst_ispare[16];		/* Space for more stuff. */
+};
+
+#endif
diff --git a/rtemsbsd/src/rtems-bsd-condvar.c b/rtemsbsd/src/rtems-bsd-condvar.c
index 7c16940..63f3000 100644
--- a/rtemsbsd/src/rtems-bsd-condvar.c
+++ b/rtemsbsd/src/rtems-bsd-condvar.c
@@ -183,3 +183,20 @@ cv_broadcastpri(struct cv *cv, int pri)
 	rv = pthread_cond_broadcast(&cv->cv_id);
 	BSD_ASSERT_RV(rv);
 }
+int
+_cv_wait_sig(struct cv *cvp, struct lock_object *lock)
+{
+  /* XXX */
+	_cv_wait_support(cvp, lock, 0, true);
+}
+
+int
+_cv_timedwait_sig(struct cv *cvp, struct lock_object *lock, int timo)
+{
+  /* XXX */
+	if (timo <= 0) {
+		timo = 1;
+	}
+
+	return _cv_wait_support(cvp, lock, timo, true);
+}
diff --git a/rtemsbsd/src/rtems-bsd-thread.c b/rtemsbsd/src/rtems-bsd-thread.c
index fc41411..ca1ff1f 100644
--- a/rtemsbsd/src/rtems-bsd-thread.c
+++ b/rtemsbsd/src/rtems-bsd-thread.c
@@ -49,6 +49,7 @@
 #include <freebsd/sys/mutex.h>
 #include <freebsd/sys/jail.h>
 #include <freebsd/sys/resourcevar.h>
+#include <freebsd/sys/filedesc.h>
 
 RTEMS_CHAIN_DEFINE_EMPTY(rtems_bsd_thread_chain);
 
@@ -56,6 +57,9 @@ RTEMS_CHAIN_DEFINE_EMPTY(rtems_bsd_thread_chain);
 static struct ucred FIXME_ucred = {
   .cr_ref = 1				/* reference count */
 };
+static struct filedesc FIXME_fd = {
+  .fd_ofiles = NULL	/* file structures for open files */
+};
 static struct proc  FIXME_proc = {
   .p_ucred = NULL /* (c) Process owner's identity. */
 };
@@ -67,29 +71,34 @@ static struct prison FIXME_prison = {
 static struct uidinfo	FIXME_uidinfo;	/* per euid resource consumption */
 static struct uidinfo	FIXME_ruidinfo;	/* per ruid resource consumption */
 
+static struct thread *rtems_bsd_current_td = NULL;
+
+static void rtems_bsd_thread_descriptor_dtor(void *td)
+{
+	// XXX are there other pieces to clean up?
+	free(td, M_TEMP);
+}
+
 static struct thread *
-rtems_bsd_thread_init_note( rtems_id id )
+rtems_bsd_thread_init( rtems_id id )
 {
 	rtems_status_code sc = RTEMS_SUCCESSFUL;
 	unsigned index = 0;
 	char name [5] = "_???";
-	struct thread *td = malloc(sizeof(struct thread), M_TEMP, M_WAITOK | M_ZERO);
-  struct proc   *proc;
-
-	if ( td == NULL )
-		return td;
+	struct thread *td;
+	struct proc   *proc;
 
-	sc = rtems_task_set_note( id, RTEMS_NOTEPAD_0, ( uint32_t )td );
-	if (sc != RTEMS_SUCCESSFUL) {
-		free(td, M_TEMP);
+	td = malloc(sizeof(struct thread), M_TEMP, M_WAITOK | M_ZERO);
+	if (td == NULL)
 		return NULL;
-	}
 
+	// Initialize the thread descriptor
 	index = rtems_object_id_get_index(id);
 	snprintf(name + 1, sizeof(name) - 1, "%03u", index);
 	sc = rtems_object_set_name(id, name);
 	if (sc != RTEMS_SUCCESSFUL) {
-		rtems_task_delete(id);
+		// XXX does the thread get deleted? Seems wrong
+		// rtems_task_delete(id);
 		free(td, M_TEMP);
 		return 	NULL;
 	}
@@ -98,55 +107,62 @@ rtems_bsd_thread_init_note( rtems_id id )
 	td->td_ucred = crhold(&FIXME_ucred);
   
 	td->td_proc = &FIXME_proc;
-	if (td->td_proc->p_ucred != NULL)
-		return td;
-
-  if (prison_init ) {
-    mtx_init(&FIXME_prison.pr_mtx, "prison lock", NULL, MTX_DEF | MTX_DUPOK);
-
-    prison_init = 0;
-  }
+	if (td->td_proc->p_ucred == NULL) {
+  		if ( prison_init ) {
+			mtx_init(&FIXME_prison.pr_mtx, "prison lock", NULL, MTX_DEF | MTX_DUPOK);
+    			prison_init = 0;
+  		}
+  		FIXME_ucred.cr_prison   = &FIXME_prison;    /* jail(2) */
+		FIXME_ucred.cr_uidinfo  = uifind(0);
+		FIXME_ucred.cr_ruidinfo = uifind(0);
+		FIXME_ucred.cr_ngroups = 1;     /* group 0 */
+
+		td->td_proc->p_ucred = crhold(&FIXME_ucred);
+		mtx_init(&td->td_proc->p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
+		td->td_proc->p_pid = getpid();
+		td->td_proc->p_fibnum = 0;
+		td->td_proc->p_fd = &FIXME_fd;
+		sx_init_flags(&FIXME_fd.fd_sx, "config SX thread lock", SX_DUPOK);
+	}
 
-  FIXME_ucred.cr_prison   = &FIXME_prison;    /* jail(2) */
-  FIXME_ucred.cr_uidinfo  = uifind(0);
-  FIXME_ucred.cr_ruidinfo = uifind(0);
-  FIXME_ucred.cr_ngroups = 1;     /* group 0 */
+	// Actually set the global pointer 
+	rtems_bsd_current_td = td;
 
-	td->td_proc->p_ucred = crhold(&FIXME_ucred);
-	mtx_init(&td->td_proc->p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
-	td->td_proc->p_pid = getpid();
-	td->td_proc->p_fibnum = 0;
+	// Now add the task descriptor as a per-task variable
+	sc = rtems_task_variable_add(
+		id,
+		&rtems_bsd_current_td,
+		rtems_bsd_thread_descriptor_dtor
+	);
+	if (sc != RTEMS_SUCCESSFUL) {
+		free(td, M_TEMP);
+		return NULL;
+	}
 
-  return td;
+  	return td;
 }
 
 /*
- *  XXX Threads which delete themselves will leak this
- *  XXX Maybe better integrated into the TCB OR a task variable.
- *  XXX but this is OK for now
+ *  Threads which delete themselves would leak the task
+ *  descriptor so we are using the per-task variable so
+ *  it can be cleaned up.
  */
 struct thread *rtems_get_curthread(void)
 {
 	struct thread *td;
-	rtems_status_code sc;
-	rtems_id id;
 
 	/*
 	 * If we already have a struct thread associated with this thread,
-	 * obtain it
+	 * obtain it. Otherwise, allocate and initialize one.
 	 */
-  id = rtems_task_self();
-
-	sc = rtems_task_get_note( id, RTEMS_NOTEPAD_0, (uint32_t *) &td );
-	if (sc != RTEMS_SUCCESSFUL) {
-			panic("rtems_get_curthread: get note Error\n");
+	td = rtems_bsd_current_td;
+	if ( td == NULL ) {
+		td = rtems_bsd_thread_init( rtems_task_self() );
+		if ( td == NULL ){
+			panic("rtems_get_curthread: Unable to thread descriptor\n");
+		}
 	}
 
-  td = rtems_bsd_thread_init_note( id);
-  if ( td == NULL ){
-		panic("rtems_get_curthread: Unable to generate thread note\n");
-  }
-
   return td;
 }
 
@@ -163,6 +179,8 @@ rtems_bsd_thread_start(struct thread **td_ptr, void (*func)(void *), void *arg,
 
 		BSD_ASSERT(pages >= 0);
 
+		memset( td, 0, sizeof(struct thread) );
+
 		sc = rtems_task_create(
 			rtems_build_name('_', 'T', 'S', 'K'),
 			BSD_TASK_PRIORITY_NORMAL,
@@ -177,8 +195,8 @@ rtems_bsd_thread_start(struct thread **td_ptr, void (*func)(void *), void *arg,
 			return ENOMEM;
 		}
 
-    td = rtems_bsd_thread_init_note( id );
-    if (!td)
+		td = rtems_bsd_thread_init( id );
+		if (!td)
 			return ENOMEM;
 		
 		sc = rtems_task_start(id, (rtems_task_entry) func, (rtems_task_argument) arg);




More information about the vc mailing list