/* ==== fd_kern.c ============================================================
 * Copyright (c) 1993, 1994 by Chris Provenzano, proven@mit.edu
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *  This product includes software developed by Chris Provenzano.
 * 4. The name of Chris Provenzano may not be used to endorse or promote 
 *	  products derived from this software without specific prior written
 *	  permission.
 *
 * THIS SOFTWARE IS PROVIDED BY CHRIS PROVENZANO ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL CHRIS PROVENZANO BE LIABLE FOR ANY 
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
 * SUCH DAMAGE.
 *
 * Description : Deals with the valid kernel fds.
 *
 *  1.00 93/09/27 proven
 *      -Started coding this file.
 *
 *	1.01 93/11/13 proven
 *		-The functions readv() and writev() added.
 */

#ifndef lint
static const char rcsid[] = "$Id: fd_kern.c,v 1.50.2.2 94/10/05 06:47:53 proven Exp $";
#endif

#include <pthread.h>
#include <sys/compat.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/uio.h>
#include <stdarg.h>
#include <signal.h>
#include <fcntl.h>
#include <errno.h>
#include <pthread/posix.h>

/* ==========================================================================
 * Variables used by both fd_kern_poll and fd_kern_wait
 */
static struct pthread *fd_wait_read, *fd_wait_write;

/* ==========================================================================
 * fd_kern_poll()
 *
 * Called only from context_switch(). The kernel must be locked.
 *
 * This function uses a linked list of waiting pthreads, NOT a queue.
 */ 
static semaphore fd_wait_lock = SEMAPHORE_CLEAR;

void fd_kern_poll()
{
	struct timeval __fd_kern_poll_timeout = { 0, 0 };
	fd_set fd_set_read, fd_set_write;
	struct pthread **pthread;
	semaphore *lock;
	int count;

	FD_ZERO(&fd_set_read);
	FD_ZERO(&fd_set_write);
	/* If someone has the lock then they are in RUNNING state, just return */
	lock = &fd_wait_lock;
	if (SEMAPHORE_TEST_AND_SET(lock)) {
		return;
	}
	if (fd_wait_read || fd_wait_write) {
		for (pthread = &fd_wait_read; *pthread; pthread = &((*pthread)->next)) {
			FD_SET((*pthread)->fd, &fd_set_read);
		}
		for (pthread = &fd_wait_write; *pthread; pthread = &((*pthread)->next)) {
			FD_SET((*pthread)->fd, &fd_set_write);
		}

		if ((count = machdep_sys_select(dtablesize, &fd_set_read,
		  &fd_set_write, NULL, &__fd_kern_poll_timeout)) < OK) {
			if (count == -EINTR) {
				SEMAPHORE_RESET(lock);
				return;
			}
			PANIC();
		}
	
		for (pthread = &fd_wait_read; count && *pthread; ) {
			if (FD_ISSET((*pthread)->fd, &fd_set_read)) {
				/* Get lock on thread */

				(*pthread)->state = PS_RUNNING;
				*pthread = (*pthread)->next;
				count--;
				continue;
			} 
			pthread = &((*pthread)->next);
		}
					
		for (pthread = &fd_wait_write; count && *pthread; ) {
			if (FD_ISSET((*pthread)->fd, &fd_set_write)) {
				semaphore *plock;

				/* Get lock on thread */
				plock = &(*pthread)->lock;
				if (!(SEMAPHORE_TEST_AND_SET(plock))) {
					/* Thread locked, skip it. */
					(*pthread)->state = PS_RUNNING;
					*pthread = (*pthread)->next;
					SEMAPHORE_RESET(plock);
				}
				count--;
				continue;
			} 
			pthread = &((*pthread)->next);
		}
	}
	SEMAPHORE_RESET(lock);
}

/* ==========================================================================
 * fd_kern_wait()
 *
 * Called when there is no active thread to run.
 */
extern struct timeval __fd_kern_wait_timeout;

void fd_kern_wait()
{
	fd_set fd_set_read, fd_set_write;
	struct pthread **pthread;
	sigset_t sig_to_block;
	int count;

	if (fd_wait_read || fd_wait_write) {
		FD_ZERO(&fd_set_read);
		FD_ZERO(&fd_set_write);
		for (pthread = &fd_wait_read; *pthread; pthread = &((*pthread)->next)) {
			FD_SET((*pthread)->fd, &fd_set_read);
		}
		for (pthread = &fd_wait_write; *pthread; pthread = &((*pthread)->next)) {
			FD_SET((*pthread)->fd, &fd_set_write);
		}

		/* Turn off interrupts for real while we set the timer.  */

		sigfillset(&sig_to_block);
		sigprocmask(SIG_BLOCK, &sig_to_block, NULL);

		machdep_unset_thread_timer(); 
		__fd_kern_wait_timeout.tv_usec = 0;
		__fd_kern_wait_timeout.tv_sec = 3600;

		sigprocmask(SIG_UNBLOCK, &sig_to_block, NULL);

		/*
		 * There is a small but finite chance that an interrupt will
		 * occure between the unblock and the select. Because of this
		 * sig_handler_real() sets the value of __fd_kern_wait_timeout
		 * to zero causing the select to do a poll instead of a wait.
		 */

		while ((count = machdep_sys_select(dtablesize, &fd_set_read,
		  &fd_set_write, NULL, &__fd_kern_wait_timeout)) < OK) {
			if (count == -EINTR) {
				return;
			}
			PANIC();
		}
	
		for (pthread = &fd_wait_read; count && *pthread; ) {
			if (FD_ISSET((*pthread)->fd, &fd_set_read)) {
				/* Get lock on thread */

				(*pthread)->state = PS_RUNNING;
				*pthread = (*pthread)->next;
				count--;
				continue;
			} 
			pthread = &((*pthread)->next);
		}
					
		for (pthread = &fd_wait_write; count && *pthread; ) {
			if (FD_ISSET((*pthread)->fd, &fd_set_write)) {
				semaphore *plock;

				/* Get lock on thread */
				plock = &(*pthread)->lock;
				if (!(SEMAPHORE_TEST_AND_SET(plock))) {
					/* Thread locked, skip it. */
					(*pthread)->state = PS_RUNNING;
					*pthread = (*pthread)->next;
					SEMAPHORE_RESET(plock);
				}
				count--;
				continue;
			} 
			pthread = &((*pthread)->next);
		}
	} else {
		/* No threads, waiting on I/O, do a sigsuspend */
		sig_handler_pause();
	}
}

/* ==========================================================================
 * Special Note: All operations return the errno as a negative of the errno
 * listed in errno.h
 * ======================================================================= */

/* ==========================================================================
 * read()
 */
ssize_t __fd_kern_read(union fd_data fd_data, int flags, void *buf,
  size_t nbytes)
{
	semaphore *lock, *plock;
	int fd = fd_data.i;
	int ret;

	while ((ret = machdep_sys_read(fd, buf, nbytes)) < OK) { 
		if (!(flags & __FD_NONBLOCK) &&
		  ((ret == -EWOULDBLOCK) || (ret == -EAGAIN))) {
			/* Lock queue */
			lock = &fd_wait_lock;
			while (SEMAPHORE_TEST_AND_SET(lock)) {
				pthread_yield();
			}

			/* Lock pthread */
			plock = &(pthread_run->lock);
			while (SEMAPHORE_TEST_AND_SET(plock)) {
				pthread_yield();
			}

			/* queue pthread for a FDR_WAIT */
			pthread_run->next = fd_wait_read;
			fd_wait_read = pthread_run;
			pthread_run->fd = fd;
			SEMAPHORE_RESET(lock);
			reschedule(PS_FDR_WAIT);
		} else {
			pthread_run->error = -ret;
			ret = NOTOK;
			break;
		}
	}
	return(ret);
}

/* ==========================================================================
 * readv()
 */
int __fd_kern_readv(union fd_data fd_data, int flags, struct iovec *iov,
  int iovcnt)
{
	semaphore *lock, *plock;
	int fd = fd_data.i;
	int ret;

	while ((ret = machdep_sys_readv(fd, iov, iovcnt)) < OK) { 
		if (!(flags & __FD_NONBLOCK) &&
		  ((ret == -EWOULDBLOCK) || (ret == -EAGAIN))) {
			/* Lock queue */
			lock = &fd_wait_lock;
			while (SEMAPHORE_TEST_AND_SET(lock)) {
				pthread_yield();
			}

			/* Lock pthread */
			plock = &(pthread_run->lock);
			while (SEMAPHORE_TEST_AND_SET(plock)) {
				pthread_yield();
			}

			/* queue pthread for a FDR_WAIT */
			pthread_run->next = fd_wait_read;
			fd_wait_read = pthread_run;
			pthread_run->fd = fd;
			SEMAPHORE_RESET(lock);
			reschedule(PS_FDR_WAIT);
		} else {
			pthread_run->error = -ret;
			ret = NOTOK;
			break;
		}
	}
	return(ret);
}

/* ==========================================================================
 * write()
 */
ssize_t __fd_kern_write(union fd_data fd_data, int flags, const void *buf,
  size_t nbytes)
{
	semaphore *lock, *plock;
	int fd = fd_data.i;
	int ret;

    while ((ret = machdep_sys_write(fd, buf, nbytes)) < OK) { 
		if (!(flags & __FD_NONBLOCK) &&
          ((ret == -EWOULDBLOCK) || (ret == -EAGAIN))) {
			/* Lock queue */
			lock = &fd_wait_lock;
			while (SEMAPHORE_TEST_AND_SET(lock)) {
				pthread_yield();
			}

			/* Lock pthread */
			plock = &(pthread_run->lock);
			while (SEMAPHORE_TEST_AND_SET(plock)) {
				pthread_yield();
			}

			/* queue pthread for a FDW_WAIT */
			pthread_run->next = fd_wait_write;
			fd_wait_write = pthread_run;
			pthread_run->fd = fd;
			SEMAPHORE_RESET(lock);
			reschedule(PS_FDW_WAIT);
        } else {
			pthread_run->error = -ret;
            break;
        }
    }
    return(ret);
}

/* ==========================================================================
 * writev()
 */
int __fd_kern_writev(union fd_data fd_data, int flags, struct iovec *iov,
  int iovcnt)
{
	semaphore *lock, *plock;
	int fd = fd_data.i;
	int ret;

    while ((ret = machdep_sys_writev(fd, iov, iovcnt)) < OK) { 
		if (!(flags & __FD_NONBLOCK) &&
          ((ret == -EWOULDBLOCK) || (ret == -EAGAIN))) {
			/* Lock queue */
			lock = &fd_wait_lock;
			while (SEMAPHORE_TEST_AND_SET(lock)) {
				pthread_yield();
			}

			/* Lock pthread */
			plock = &(pthread_run->lock);
			while (SEMAPHORE_TEST_AND_SET(plock)) {
				pthread_yield();
			}

			/* queue pthread for a FDW_WAIT */
			pthread_run->next = fd_wait_write;
			fd_wait_write = pthread_run;
			pthread_run->fd = fd;
			SEMAPHORE_RESET(lock);
			reschedule(PS_FDW_WAIT);
        } else {
			pthread_run->error = -ret;
            break;
        }
    }
    return(ret);
}

/* ==========================================================================
 * For blocking version we really should set an interrupt
 * fcntl()
 */
int __fd_kern_fcntl(union fd_data fd_data, int flags, int cmd, int arg)
{
	int fd = fd_data.i;

	return(machdep_sys_fcntl(fd, cmd, arg));
}

/* ==========================================================================
 * close()
 */
int __fd_kern_close(union fd_data fd_data, int flags)
{
	int fd = fd_data.i;

	return(machdep_sys_close(fd));
}

/* ==========================================================================
 * lseek()
 */
off_t __fd_kern_lseek(int fd, int flags, off_t offset, int whence)
{
	return(machdep_sys_lseek(fd, offset, whence));
}

/*
 * File descriptor operations
 */
extern machdep_sys_close();

/* Normal file operations */
static struct fd_ops __fd_kern_ops = {
	__fd_kern_write, __fd_kern_read, __fd_kern_close, __fd_kern_fcntl,
	__fd_kern_writev, __fd_kern_readv, __fd_kern_lseek
};

/* NFS file opperations */

/* FIFO file opperations */

/* Device operations */

/* ==========================================================================
 * open()
 *
 * Because open could potentially block opening a file from a remote
 * system, we want to make sure the call will timeout. We then try and open
 * the file, and stat the file to determine what operations we should
 * associate with the fd.
 *
 * This is not done yet
 *
 * A regular file on the local system needs no special treatment.
 */
int open(const char *path, int flags, ...)
{
	int fd, mode, fd_kern;
	struct stat stat_buf;
	va_list ap;

	/* If pthread scheduling == FIFO set a virtual timer */
	if (flags & O_CREAT) {
		va_start(ap, flags);
		mode = va_arg(ap, int);
		va_end(ap);
	} else {
		mode = 0;
	}

	if (!((fd = fd_allocate()) < OK)) {
		fd_table[fd]->flags = flags;
		flags |= __FD_NONBLOCK;

		if (!((fd_kern = machdep_sys_open(path, flags, mode)) < OK)) {

			/* fstat the file to determine what type it is */
			if (machdep_sys_fstat(fd_kern, &stat_buf)) {
				PANIC();
			}
			if (S_ISREG(stat_buf.st_mode)) {
				fd_table[fd]->ops = &(__fd_kern_ops);
				fd_table[fd]->type = FD_HALF_DUPLEX;
			} else {
				fd_table[fd]->ops = &(__fd_kern_ops);
				fd_table[fd]->type = FD_FULL_DUPLEX;
			}
			fd_table[fd]->fd.i = fd_kern; 
			return(fd);
		}

		pthread_run->error = - fd_kern;
		fd_table[fd]->count = 0;
	}
	return(NOTOK);
}

/* ==========================================================================
 * create()
 */
int create(const char *path, mode_t mode)
{
	int fd, fd_kern;

	if (!((fd = fd_allocate()) < OK)) {
		fd_table[fd]->flags = O_WRONLY;

		if (!((fd_kern = machdep_sys_creat(path, mode)) < OK)) {
			machdep_sys_fcntl(fd, F_SETFL, fd_table[fd]->flags | __FD_NONBLOCK);
			fd_table[fd]->type = FD_HALF_DUPLEX;
			fd_table[fd]->ops = &(__fd_kern_ops);
			fd_table[fd]->fd.i = fd_kern; 
			return(fd);
		}

		pthread_run->error = - fd_kern;
		fd_table[fd]->count = 0;
	}
	return(NOTOK);
}

/* ==========================================================================
 * creat()
 */
int creat(const char *path, mode_t mode)
{
	create(path, mode);
}

/* ==========================================================================
 * fchown()
 */
int fchown(int fd, uid_t owner, gid_t group)
{
	int ret;

	if ((ret = fd_lock(fd, FD_WRITE)) == OK) {
		if ((ret = machdep_sys_fchown(fd_table[fd]->fd.i, owner, group)) < OK) {
			SET_ERRNO(-ret);
		}
		fd_unlock(fd, FD_WRITE);
	}
	return(ret);
}

/* ==========================================================================
 * fchmod()
 */
int fchmod(int fd, mode_t mode)
{
	int ret;

	if ((ret = fd_lock(fd, FD_WRITE)) == OK) {
		if ((ret = machdep_sys_fchmod(fd_table[fd]->fd.i, mode)) < OK) {
			SET_ERRNO(-ret);
		}
		fd_unlock(fd, FD_WRITE);
	}
	return(ret);
}

/* ==========================================================================
 * pipe()
 */
int pipe(int fds[2])
{
	int kfds[2];
	int ret;

	if ((fds[0] = fd_allocate()) >= OK) {
		if ((fds[1] = fd_allocate()) >= OK) {
			if ((ret = machdep_sys_pipe(kfds)) >= OK) {
				fd_table[fds[0]]->flags = machdep_sys_fcntl(kfds[0], F_GETFL, NULL);
				machdep_sys_fcntl(kfds[0], F_SETFL, fd_table[fds[0]]->flags | __FD_NONBLOCK);
				fd_table[fds[1]]->flags = machdep_sys_fcntl(kfds[1], F_GETFL, NULL);
				machdep_sys_fcntl(kfds[1], F_SETFL, fd_table[fds[1]]->flags | __FD_NONBLOCK);

				fd_table[fds[0]]->ops = &(__fd_kern_ops);
				fd_table[fds[1]]->ops = &(__fd_kern_ops);

				/* Not really full duplex but ... */
				fd_table[fds[0]]->type = FD_FULL_DUPLEX;
				fd_table[fds[1]]->type = FD_FULL_DUPLEX;

				fd_table[fds[0]]->fd.i = kfds[0];
				fd_table[fds[1]]->fd.i = kfds[1];

				return(OK);
			} else {
				 pthread_run->error = - ret;
			}
			fd_table[fds[1]]->count = 0;
		}
		fd_table[fds[0]]->count = 0;
	}
	return(NOTOK);
}

/* ==========================================================================
 * fd_kern_reset()
 * Change the fcntl blocking flag back to NONBLOCKING. This should only
 * be called after a fork.
 */
void fd_kern_reset(int fd)
{
	switch (fd_table[fd]->type) {
	case FD_TEST_HALF_DUPLEX:
		machdep_sys_fcntl(fd_table[fd]->fd.i, F_SETFL,
          fd_table[fd]->flags | __FD_NONBLOCK);
		fd_table[fd]->type = FD_HALF_DUPLEX;
		break;
	case FD_TEST_FULL_DUPLEX:
		machdep_sys_fcntl(fd_table[fd]->fd.i, F_SETFL,
		  fd_table[fd]->flags | __FD_NONBLOCK);
		fd_table[fd]->type = FD_FULL_DUPLEX;
		break;
	default:
		break;
	}
}

/* ==========================================================================
 * fd_kern_init()
 *
 * Assume the entry is locked before routine is invoked
 *
 * This may change. The problem is setting the fd to nonblocking changes
 * the parents fd too, which may not be the desired result.
 *
 * New added feature: If the fd in question is a tty then we open it again
 * and close the original, this way we don't have to worry about the
 * fd being NONBLOCKING to the outside world.
 */
void fd_kern_init(int fd)
{
	if ((fd_table[fd]->flags = machdep_sys_fcntl(fd, F_GETFL, NULL)) >= OK) {
		if (isatty(fd)) {
			int new_fd;

			if ((new_fd = machdep_sys_open(ttyname_basic(fd), O_RDWR)) >= OK) {
				if (machdep_sys_dup2(new_fd, fd) == OK) {
					/* Should print a warning */

					/* Should also set the flags to that of opened outside of
					process */
				}
				machdep_sys_close(new_fd);
			}
		}
		/* We do these things regaurdless of the above results */
		machdep_sys_fcntl(fd, F_SETFL, fd_table[fd]->flags | __FD_NONBLOCK);
		fd_table[fd]->ops 	= &(__fd_kern_ops);
		fd_table[fd]->type 	= FD_HALF_DUPLEX;
		fd_table[fd]->fd.i 	= fd;
		fd_table[fd]->count = 1;

	}
}

/* ==========================================================================
 * fd_kern_gettableentry()
 *
 * Remember only return a a file descriptor that I will modify later.
 * Don't return file descriptors that aren't owned by the child, or don't
 * have kernel operations.
 */
static int fd_kern_gettableentry(const int child, int fd)
{
	int i;

	for (i = 0; i < dtablesize; i++) {
		if (fd_table[i]->fd.i == fd) {
			if (child) {
				if ((fd_table[i]->type != FD_TEST_HALF_DUPLEX) &&
		    	  (fd_table[i]->type != FD_TEST_FULL_DUPLEX)) {
					continue;
				}
			} else {
				if ((fd_table[i]->type == FD_NT) ||
            	  (fd_table[i]->type == FD_NIU)) {
					continue;
				}
			}
			/* Is it a kernel fd ? */
			if (fd_table[i]->ops != &(__fd_kern_ops)) {
				continue;
			}
			return(i);
		}
	}
	return(NOTOK);
}

/* ==========================================================================
 * fd_kern_exec()
 *
 * Fixup the fd_table such that (fd == fd_table[fd]->fd.i) this way
 * the new immage will be OK.
 *
 * Only touch those that won't be used by the parent if we're in a child
 * otherwise fixup all.
 *
 * Returns:
 * 0 no fixup necessary
 * 1 fixup without problems
 * 2 failed fixup on some descriptors, and clobbered them.
 */
int fd_kern_exec(const int child)
{
	int ret = 0;
	int fd, i;

	for (fd = 0; fd < dtablesize; fd++) {
		/* Is the fd already in use ? */
		if (child) {
			if ((fd_table[fd]->type != FD_TEST_HALF_DUPLEX) &&
		      (fd_table[fd]->type != FD_TEST_FULL_DUPLEX)) {
				continue;
			}
		} else {
			if ((fd_table[fd]->type == FD_NT) ||
              (fd_table[fd]->type == FD_NIU)) {
				continue;
			}
		}
		/* Is it a kernel fd ? */
		if (fd_table[fd]->ops != &(__fd_kern_ops)) {
			continue;
		}
		/* Does it match ? */
		if (fd_table[fd]->fd.i == fd) {
			continue;
		}
		/* OK, fixup entry: Read comments before changing. This isn't obvious */ 

		/* i is the real file descriptor fd currently represents */
		if (((i = fd_table[fd]->fd.i) >= dtablesize) || (i < 0)) {
			/* This should never happen */
			PANIC();
		}

		/*
		 * if the real file descriptor with the same number as the fake file
		 * descriptor number fd is actually in use by the program, we have
         * to move it out of the way
		 */
		if ((machdep_sys_fcntl(fd, F_GETFL, NULL)) >= OK) {
			/* fd is busy */
			int j;

			/*
			 * j is the fake file descriptor that represents the real file
			 * descriptor that we want to move. This way the fake file
			 * descriptor fd can move its real file descriptor i such that
			 * fd == i.
			 */
			if ((j = fd_kern_gettableentry(child, fd)) >= OK) {

				/*
				 * Since j represents a fake file descriptor and fd represents
				 * a fake file descriptor. If j < fd then a previous pass
				 * should have set fd_table[j]->fd.i == j.
				 */
				if (fd < j) {
					if ((fd_table[j]->fd.i = machdep_sys_dup(fd)) < OK) {
						/* Close j, there is nothing else we can do */
  						fd_table[j]->type = FD_NIU;
						ret = 2;
					}
				} else {
					/* This implies fd_table[j]->fd.i != j */
					PANIC();
				}
			}
		}

		/*
		 * Here the real file descriptor i is set to equel the fake file
		 * descriptor fd
		 */
		machdep_sys_dup2(i, fd);

		/*
		 * Now comes the really complicated part: UNDERSTAND before changing
		 *
		 * Here are the things this routine wants to do ...
		 *
		 * Case 1. The real file descriptor has only one fake file descriptor
		 * representing it. 
		 * fd -> i, fd != i ===>  fd -> fd, close(i)
		 * Example fd = 4, i = 2: then close(2), set fd -> i = 4
		 * 
		 * Case 2. The real file descriptor has more than one fake file
		 * descriptor representing it, and this is the first fake file
		 * descriptor representing the real file descriptor
		 * fd -> i, fd' -> i, fd != i ===> fd -> fd, fd' -> fd, close(i)
		 *
		 * The problem is achiving the above is very messy and difficult,
		 * but I should be able to take a short cut. If fd > i then there
		 * will be no need to ever move i, this is because the fake file
		 * descriptor foo that we would have wanted to represent the real
		 * file descriptor i has already been processed. If fd < i then by
		 * moving i to fd all subsequent fake file descriptors fd' should fall
		 * into the previous case and won't need aditional adjusting.
		 *
		 * Does this break the above fd < j check .... It shouldn't because j
		 * is a fake file descriptor and if j < fd then j has already moved 
		 * its real file descriptor foo such that foo <= j therefore foo < fd
		 * and not foo == fd therefor j cannot represent the real 
		 * filedescriptor that fd want to move to and be less than fd
		 */
		if (fd < i) {
			fd_table[fd]->fd.i = fd;
			machdep_sys_close(i);
		}
		if (ret < 1) {
			 ret = 1;
		}
	}
}

/* ==========================================================================
 * fd_kern_fork()
 */
void fd_kern_fork()
{
	semaphore *lock;
	int fd;

	for (fd = 0; fd < dtablesize; fd++) {
		lock = & (fd_table[fd]->lock);
		if (SEMAPHORE_TEST_AND_SET(lock)) {
			continue;
		}
		/* Is it a kernel fd ? */
		if (fd_table[fd]->ops != &(__fd_kern_ops)) {
			SEMAPHORE_RESET(lock);
			continue;
		}
		if ((fd_table[fd]->r_owner) || (fd_table[fd]->w_owner)) {
			SEMAPHORE_RESET(lock);
			continue;
		}
		switch (fd_table[fd]->type) {
		case FD_HALF_DUPLEX:
			machdep_sys_fcntl(fd_table[fd]->fd.i, F_SETFL, fd_table[fd]->flags);
			fd_table[fd]->type = FD_TEST_HALF_DUPLEX;
			break;
		case FD_FULL_DUPLEX:
			machdep_sys_fcntl(fd_table[fd]->fd.i, F_SETFL, fd_table[fd]->flags);
			fd_table[fd]->type = FD_TEST_FULL_DUPLEX;
			break;
		default:
			break;
		}
		SEMAPHORE_RESET(lock);
	}
}

/* ==========================================================================
 * Here are the berkeley socket functions. These are not POSIX.
 * ======================================================================= */

/* ==========================================================================
 * socket()
 */
int socket(int af, int type, int protocol)
{
	int fd, fd_kern;

	 if (!((fd = fd_allocate()) < OK)) {

        if (!((fd_kern = machdep_sys_socket(af, type, protocol)) < OK)) {
			machdep_sys_fcntl(fd_kern, F_SETFL, __FD_NONBLOCK);

            /* Should fstat the file to determine what type it is */
            fd_table[fd]->ops 	= & __fd_kern_ops;
            fd_table[fd]->type 	= FD_FULL_DUPLEX;
			fd_table[fd]->fd.i	= fd_kern;
        	fd_table[fd]->flags = 0;
            return(fd);
        }

        pthread_run->error = - fd_kern;
        fd_table[fd]->count = 0;
    }
    return(NOTOK);
}

/* ==========================================================================
 * bind()
 */
int bind(int fd, const struct sockaddr *name, int namelen)
{
	/* Not much to do in bind */
	semaphore *plock;
	int ret;

	if ((ret = fd_lock(fd, FD_RDWR)) == OK) {
        if ((ret = machdep_sys_bind(fd_table[fd]->fd.i, name, namelen)) < OK) { 
			pthread_run->error = - ret;
		}
		fd_unlock(fd, FD_RDWR);
	}
	return(ret);
}

/* ==========================================================================
 * connect()
 */
int connect(int fd, const struct sockaddr *name, int namelen)
{
	semaphore *lock, *plock;
	struct sockaddr tmpname;
	int ret, tmpnamelen;

	if ((ret = fd_lock(fd, FD_RDWR)) == OK) {
		if ((ret = machdep_sys_connect(fd_table[fd]->fd.i, name, namelen)) < OK) {
            if (!(fd_table[fd]->flags & __FD_NONBLOCK) &&
              ((ret == -EWOULDBLOCK) || (ret == -EINPROGRESS) ||
		      (ret == -EALREADY) || (ret == -EAGAIN))) {
				/* Lock queue */
				lock = &fd_wait_lock;
				while (SEMAPHORE_TEST_AND_SET(lock)) {
					pthread_yield();
				}

				/* Lock pthread */
				plock = &(pthread_run->lock);
				while (SEMAPHORE_TEST_AND_SET(plock)) {
					pthread_yield();
				}

				/* queue pthread for a FDW_WAIT */
				pthread_run->fd = fd_table[fd]->fd.i;
				pthread_run->next = fd_wait_write;
				fd_wait_write = pthread_run;
				SEMAPHORE_RESET(lock);
				reschedule(PS_FDW_WAIT);

				/* OK now lets see if it really worked */
				if (((ret = machdep_sys_getpeername(fd_table[fd]->fd.i,
				  &tmpname, &tmpnamelen)) < OK) && (ret == -ENOTCONN)) {

					/* Get the error, this function should not fail */
					machdep_sys_getsockopt(fd_table[fd]->fd.i, SOL_SOCKET,
					  SO_ERROR, &pthread_run->error, &tmpnamelen); 
				}
            } else {
				pthread_run->error = -ret;
			}
		}
		fd_unlock(fd, FD_RDWR);
	}
	return(ret);
}

/* ==========================================================================
 * accept()
 */
int accept(int fd, struct sockaddr *name, int *namelen)
{
	semaphore *lock, *plock;
	int ret, fd_kern;



	if ((ret = fd_lock(fd, FD_RDWR)) == OK) {
		while ((fd_kern = machdep_sys_accept(fd_table[fd]->fd.i, name, namelen)) < OK) {
            if (!(fd_table[fd]->flags & __FD_NONBLOCK) &&
              ((fd_kern == -EWOULDBLOCK) || (fd_kern == -EAGAIN))) {
				/* Lock queue */
				lock = &fd_wait_lock;
				while (SEMAPHORE_TEST_AND_SET(lock)) {
					pthread_yield();
				}

				/* Lock pthread */
				plock = &(pthread_run->lock);
				while (SEMAPHORE_TEST_AND_SET(plock)) {
					pthread_yield();
				}

				/* queue pthread for a FDR_WAIT */
				pthread_run->fd = fd_table[fd]->fd.i;
				pthread_run->next = fd_wait_write;
				pthread_run->next = fd_wait_read;
				fd_wait_read = pthread_run;
				SEMAPHORE_RESET(lock);
				reschedule(PS_FDR_WAIT);
            } else {
				fd_unlock(fd, FD_RDWR);
				return(fd_kern);
			}
		}
		fd_unlock(fd, FD_RDWR);

	 	if (!((ret = fd_allocate()) < OK)) {

			/* This may be unnecessary */
			machdep_sys_fcntl(fd_kern, F_SETFL, __FD_NONBLOCK);

            /* Should fstat the file to determine what type it is */
            fd_table[ret]->ops 		= & __fd_kern_ops;
            fd_table[ret]->type 	= FD_FULL_DUPLEX;
			fd_table[ret]->fd.i		= fd_kern;
        	fd_table[ret]->flags 	= 0;
		}
	}
	return(ret);
}

/* ==========================================================================
 * listen()
 */
int listen(int fd, int backlog) 
{
	int ret;

	if ((ret = fd_lock(fd, FD_RDWR)) == OK) {
		if ((ret = machdep_sys_listen(fd_table[fd]->fd.i, backlog)) < OK) {
			SET_ERRNO(-ret);
		}
		fd_unlock(fd, FD_RDWR);
	}
	return(ret);
}

/* ==========================================================================
 * send()
 */
/* int send(int fd, char* msg, int len, int flags) */
ssize_t send(int fd, const void * msg, size_t len, int flags)
{
	semaphore *lock, *plock;
	int ret;

	if ((ret = fd_lock(fd, FD_WRITE)) == OK) {
		while ((ret = machdep_sys_send(fd_table[fd]->fd.i,
          msg,  len, flags)) < OK) {
            if (!(fd_table[fd]->flags & __FD_NONBLOCK) &&
              ((ret == -EWOULDBLOCK) || (ret == -EAGAIN))) {
				/* Lock queue */
				lock = &fd_wait_lock;
				while (SEMAPHORE_TEST_AND_SET(lock)) {
					pthread_yield();
				}

				/* Lock pthread */
				plock = &(pthread_run->lock);
				while (SEMAPHORE_TEST_AND_SET(plock)) {
					pthread_yield();
				}

				/* queue pthread for a FDW_WAIT */
				pthread_run->fd = fd_table[fd]->fd.i;
				pthread_run->next = fd_wait_write;
				fd_wait_write = pthread_run;
				SEMAPHORE_RESET(lock);
				reschedule(PS_FDW_WAIT);

            } else {
				pthread_run->error = -ret;
				break;
			}
		}
		fd_unlock(fd, FD_WRITE);
	}
	return(ret);
}

/* ==========================================================================
 * sendto()
 */
/* int sendto(int fd, char* msg, int len, int flags, struct sockaddr *to,
  int to_len) */
ssize_t sendto(int fd, const void * msg, size_t len, int flags,
  const struct sockaddr *to, int to_len)
{
	semaphore *lock, *plock;
	int ret;

	if ((ret = fd_lock(fd, FD_WRITE)) == OK) {
		while ((ret = machdep_sys_sendto(fd_table[fd]->fd.i,
          msg, len, flags, to, to_len)) < OK) {
            if (!(fd_table[fd]->flags & __FD_NONBLOCK) &&
              ((ret == -EWOULDBLOCK) || (ret == -EAGAIN))) {
				/* Lock queue */
				lock = &fd_wait_lock;
				while (SEMAPHORE_TEST_AND_SET(lock)) {
					pthread_yield();
				}

				/* Lock pthread */
				plock = &(pthread_run->lock);
				while (SEMAPHORE_TEST_AND_SET(plock)) {
					pthread_yield();
				}

				/* queue pthread for a FDW_WAIT */
				pthread_run->fd = fd_table[fd]->fd.i;
				pthread_run->next = fd_wait_write;
				fd_wait_write = pthread_run;
				SEMAPHORE_RESET(lock);
				reschedule(PS_FDW_WAIT);

            } else {
				pthread_run->error = -ret;
				break;
			}
		}
		fd_unlock(fd, FD_WRITE);
	}
	return(ret);
}

/* ==========================================================================
 * sendmsg()
 */
ssize_t sendmsg(int fd, const struct msghdr *msg, int flags)
{
	semaphore *lock, *plock;
	int passed_fd, ret, i;

	/* Handle getting the real file descriptor */
	for (i = 0; i < (((struct omsghdr *)msg)->msg_accrightslen / sizeof(i)); i++) {
		passed_fd = *(((int *)((struct omsghdr *)msg)->msg_accrights) + i);
		if ((ret = fd_lock(passed_fd, FD_RDWR)) == OK) {
			*(((int *)((struct omsghdr *)msg)->msg_accrights) + i)
			  = fd_table[passed_fd]->fd.i;
			machdep_sys_fcntl(fd_table[passed_fd]->fd.i, F_SETFL, 
			  fd_table[passed_fd]->flags);
			switch(fd_table[passed_fd]->type) {
			case FD_TEST_FULL_DUPLEX:
			case FD_TEST_HALF_DUPLEX:
				break;
			case FD_FULL_DUPLEX:
				fd_table[passed_fd]->type =  FD_TEST_FULL_DUPLEX;
				break;
			case FD_HALF_DUPLEX:
				fd_table[passed_fd]->type =  FD_TEST_HALF_DUPLEX;
				break;
			default:
				PANIC();
			}
		} else {
			fd_unlock(fd, FD_RDWR);
			return(EBADF);
		}
		fd_unlock(fd, FD_RDWR);
	}

	if ((ret = fd_lock(fd, FD_WRITE)) == OK) {
		while ((ret = machdep_sys_sendmsg(fd_table[fd]->fd.i, msg, flags)) < OK) {
            if (!(fd_table[fd]->flags & __FD_NONBLOCK) &&
			  ((ret == -EWOULDBLOCK) || (ret == -EAGAIN))) {
				/* Lock queue */
				lock = &fd_wait_lock;
				while (SEMAPHORE_TEST_AND_SET(lock)) {
					pthread_yield();
				}

				/* Lock pthread */
				plock = &(pthread_run->lock);
				while (SEMAPHORE_TEST_AND_SET(plock)) {
					pthread_yield();
				}

				/* queue pthread for a FDR_WAIT */
				pthread_run->fd = fd_table[fd]->fd.i;
				pthread_run->next = fd_wait_read;
				fd_wait_write = pthread_run;
				SEMAPHORE_RESET(lock);
				reschedule(PS_FDW_WAIT);
            } else {
				pthread_run->error = -ret;
				break;
			}
		}
		fd_unlock(fd, FD_READ);
	}
	return(ret);
}

/* ==========================================================================
 * recv()
 */
/* int recv(int fd, char * buf, int len, int flags) */
ssize_t recv(int fd, void * buf, size_t len, int flags)
{
	semaphore *lock, *plock;
	int ret;

	if ((ret = fd_lock(fd, FD_READ)) == OK) {
		while ((ret = machdep_sys_recv(fd_table[fd]->fd.i,
		  buf, len, flags)) < OK) {
            if (!(fd_table[fd]->flags & __FD_NONBLOCK) &&
              ((ret == -EWOULDBLOCK) || (ret == -EAGAIN))) {
				/* Lock queue */
				lock = &fd_wait_lock;
				while (SEMAPHORE_TEST_AND_SET(lock)) {
					pthread_yield();
				}

				/* Lock pthread */
				plock = &(pthread_run->lock);
				while (SEMAPHORE_TEST_AND_SET(plock)) {
					pthread_yield();
				}

				/* queue pthread for a FDR_WAIT */
				pthread_run->fd = fd_table[fd]->fd.i;
				pthread_run->next = fd_wait_write;
				pthread_run->next = fd_wait_read;
				fd_wait_read = pthread_run;
				SEMAPHORE_RESET(lock);
				reschedule(PS_FDR_WAIT);
            } else {
				pthread_run->error = -ret;
				break;
			}
		}
		fd_unlock(fd, FD_READ);
	}
	return(ret);
}

/* ==========================================================================
 * recvfrom()
 */
/* int recvfrom(int fd, char * buf, int len, int flags,
  struct sockaddr *from, int * from_len) */
ssize_t recvfrom(int fd, void * buf, size_t len, int flags,
  struct sockaddr * from, int * from_len)
{
	semaphore *lock, *plock;
	int ret;

	if ((ret = fd_lock(fd, FD_READ)) == OK) {
		while ((ret = machdep_sys_recvfrom(fd_table[fd]->fd.i,
		  buf, len, flags, from, from_len)) < OK) {
            if (!(fd_table[fd]->flags & __FD_NONBLOCK) &&
			  ((ret == -EWOULDBLOCK) || (ret == -EAGAIN))) {
				/* Lock queue */
				lock = &fd_wait_lock;
				while (SEMAPHORE_TEST_AND_SET(lock)) {
					pthread_yield();
				}

				/* Lock pthread */
				plock = &(pthread_run->lock);
				while (SEMAPHORE_TEST_AND_SET(plock)) {
					pthread_yield();
				}

				/* queue pthread for a FDR_WAIT */
				pthread_run->fd = fd_table[fd]->fd.i;
				pthread_run->next = fd_wait_read;
				fd_wait_read = pthread_run;
				SEMAPHORE_RESET(lock);
				reschedule(PS_FDR_WAIT);
            } else {
				pthread_run->error = -ret;
				break;
			}
		}
		fd_unlock(fd, FD_READ);
	}
	return(ret);
}

/* ==========================================================================
 * recvmsg()
 */
ssize_t recvmsg(int fd, struct msghdr *msg, int flags) 
{
	semaphore *lock, *plock;
	struct stat stat_buf;
	int passed_fd, ret, i;

	if ((ret = fd_lock(fd, FD_READ)) == OK) {
		while ((ret = machdep_sys_recvmsg(fd_table[fd]->fd.i, msg, flags)) < OK) {
            if (!(fd_table[fd]->flags & __FD_NONBLOCK) &&
			  ((ret == -EWOULDBLOCK) || (ret == -EAGAIN))) {
				/* Lock queue */
				lock = &fd_wait_lock;
				while (SEMAPHORE_TEST_AND_SET(lock)) {
					pthread_yield();
				}

				/* Lock pthread */
				plock = &(pthread_run->lock);
				while (SEMAPHORE_TEST_AND_SET(plock)) {
					pthread_yield();
				}

				/* queue pthread for a FDR_WAIT */
				pthread_run->fd = fd_table[fd]->fd.i;
				pthread_run->next = fd_wait_read;
				fd_wait_read = pthread_run;
				SEMAPHORE_RESET(lock);
				reschedule(PS_FDR_WAIT);
            } else {
				pthread_run->error = -ret;
				break;
			}
		}
		fd_unlock(fd, FD_READ);


		/* Handle getting the real file descriptor */
		for (i = 0; i < (((struct omsghdr *)msg)->msg_accrightslen / sizeof(i)); i++) {
			passed_fd = *(((int *)((struct omsghdr *)msg)->msg_accrights) + i);
			if (!((fd = fd_allocate()) < OK)) {
				fd_table[fd]->flags = machdep_sys_fcntl(passed_fd, F_GETFL);

				if (!( fd_table[fd]->flags & __FD_NONBLOCK)) {
					machdep_sys_fcntl(passed_fd, F_SETFL,  
					  fd_table[fd]->flags | __FD_NONBLOCK);
				}

				/* fstat the file to determine what type it is */
				machdep_sys_fstat(passed_fd, &stat_buf);
           		if (S_ISREG(stat_buf.st_mode)) {
           	    	fd_table[fd]->type = FD_HALF_DUPLEX;
           		} else {
           	    	fd_table[fd]->type = FD_FULL_DUPLEX;
           		}
				*(((int *)((struct omsghdr *)msg)->msg_accrights) + i) = fd;
           		fd_table[fd]->ops = &(__fd_kern_ops);
           		fd_table[fd]->fd.i = passed_fd;
			} else {
				pthread_run->error = EBADF;
				return(-EBADF);
				break;
			}
		}
		fd_unlock(fd, FD_READ);
	}
	return(ret);
}

/* ==========================================================================
 * shutdown()
 */
int shutdown(int fd, int how)
{
	int ret;

	switch(how) {
	case 0: /* Read */
		if ((ret = fd_lock(fd, FD_READ)) == OK) {
			if ((ret = machdep_sys_shutdown(fd_table[fd]->fd.i, how)) < OK) {
				pthread_run->error = -ret;
			}
			fd_unlock(fd, FD_READ);
		}
	case 1: /* Write */
		if ((ret = fd_lock(fd, FD_WRITE)) == OK) {
			if ((ret = machdep_sys_shutdown(fd_table[fd]->fd.i, how)) < OK) {
				pthread_run->error = -ret;
			}
			fd_unlock(fd, FD_WRITE);
		}
	case 2: /* Read-Write */
		if ((ret = fd_lock(fd, FD_RDWR)) == OK) {
			if ((ret = machdep_sys_shutdown(fd_table[fd]->fd.i, how)) < OK) {
				pthread_run->error = -ret;
			}
			fd_unlock(fd, FD_RDWR);
		}
	default:
		pthread_run->error = EBADF;
		ret = -EBADF;
		break;
	}
	return(ret);
}
