/*
 * (Enhanced) Network block device - make block devices work over TCP
 *
 * Original NBD Copyright 1997 Pavel Machek <pavel@elf.mj.gts.cz>
 * Further ENBD Copyrights 1998, 1999, 2000 Peter Breuer <ptb@it.uc3m.es>
 *
 * Development of the ENBD software has been supported by grants and
 * contributions from Realm Information Technologies, Inc. of 5555
 * Oakbrook Parkway, NW Norcross, GA and iNsu Innovations Inc.  of
 * 3465, Boulevard Thimens, Saint-Laurent, Quebec, Canada.
 * 
 * ------------ Pavel's history notes ----------------------------------
 * 97-3-25 compiled 0-th version, not yet tested it 
 *   (it did not work, BTW) (later that day) HEY! it works!
 *   (bit later) hmm, not that much... 2:00am next day:
 *   yes, it works, but it gives something like 50kB/sec
 * 97-3-28 it's completely strange - when using 1024 byte "packets"
 *   it gives 50kB/sec and CPU idle; with 2048 bytes it gives
 *   500kB/sec (and CPU loaded 100% as it should be) (all done
 *   against localhost)
 * 97-4-1 complete rewrite to make it possible for many requests at 
 *   once to be processed
 * 97-4-1 23:57 rewrite once again to make it work :-(
 * 97-4-3 00:02 hmm, it does not work.
 * 97-4-3 23:06 hmm, it will need one more rewrite :-)
 * 97-4-10 It looks like it's working and stable. But I still do not
 *  have any recovery from lost connection...
 * (setq tab-width 4)
 * 97-4-11 Making protocol independent of endianity etc.
 * 97-4-15 Probably one more rewrite, since it loses requests under
 *  heavy loads
 * 97-9-13 Cosmetic changes
 *
 * possible FIXME: make set_sock / set_blksize / set_size / do_it one syscall
 * why not: would need verify_area and friends, would share yet another 
 *          structure with userland
 *
 * FIXME: not module-safe
 *
 * ------------ Peter's history notes ----------------------------------
 * 98-12-18 modules now OK ptb@it.uc3m.es (Peter Breuer) ported to
 * 2.0.*. + better debugging. Still possible lockup in connection with APM
 * and spurious interrupt - only on write. Error treatment should
 * be improved. After 100 errors from end_request the kernel can
 * do anything. We should catch it ourselves.
 * 99-1-sometime fixed lockup by extending semaphore - ptb v1.0.
 * 99-3-sometime reconnect protocol (client mod agreed by pavel) - ptb v1.1
 * 99-4-25 add /proc/nbdinfo - ptb v1.1.1
 * 99-4-sometime add multiplex - ptb v1.2
 * 99-4-26 fix multiplex and redundancy - ptb v1.2.1
 * 99-4-29 reentrant client threads - ptb v1.2.2
 * 99-4-29 socket related stuff placed in user space - amarin v1.3.0
 * 99-5-3  fix all, all writes had to be before all reads - ptb v1.2.4
 * 99-5-5  fix out-of-order, async - ptb v1.2.5
 * 99-5-7  semaphores removed (still works!), fail cases corrected - ptb v1.2.6
 * 99-5-12 signals unblocked in xmit, blksize != 1024 fixed, ioctls
 *         added  - ptb v1.2.7
 * 99-6-1  interaction with client split into two functions - amarin v1.3.0
 * 99-6-3  reintegrated fully, mem manager fixed, accounts fixed - ptb v1.2.8.3
 * 99-6-3  extra queue removed, mem manager removed  - ptb v1.2.8.4
 * 99-7-3  buffer registration introduced - ptb v1.2.8.5
 * 99-7-3  some client redundancy reestablished - ptb v2.1.1
 * 99-7-10 encapsulated queue calls. One element rollback buffer - ptb v2.1.2
 * 99-7-20 timestamp and rollback old abandoned request - ptb v2.1.3
 * 99-7-24 64bit file sizes and offsets accepted - ptb v2.1.9
 * 99-7-26 experimental request coalesces - ptb v2.1.10
 * 99-7-27 partitioning scheme - ptb v2.2.1
 * 99-8-3  nbd_clr_sock bug in invalidate_device fixed? - ptb v2.2.4
 * 99-8-5  reverse replace of block_fsync, add sig ioctls - ptb v2.2.5
 *         reverse bug introduced about v2.2.3 for compound reqs - ptb v2.2.5
 *         fix clear_que bug (didn't rollback first) from 2.1.3 - ptb v2.2.5
 * 99-8-22 workaround strange nr_sectors bug - ptb v2.2.6
 * 99-8-11 fix MY_NBD_SYNC bug. Never sync'ed all - ptb v2.2.7
 * 99-8-12 wakeups all moved to enqueue - ptb v2.2.7
 * 99-8-23 remove slot->cli_age - ptb v2.2.7
 * 99-8-24 first 8 bytes of signature embedded in packets - ptb v2.2.8
 *         fix SET_SIG define buglet, remove hardcoded constants - ptb v2.2.8
 *         fix huge bug. Missing copy_fromfs in my_nbd_ack - ptb v2.2.8     
 *         removed signature embedding and all other decorations - ptb v2.2.8
 * 99-8-25 recast fix in my_nbd_ack to avoid align. bug - ptb v2.2.9
 *         put in MKDEVs and put back some hardcode const fixes - ptb v2.2.10
 * 99-9-29 fix BLKGETSIZE bug - ptb v2.2.14
 * 99-10-2 run with interrupts on throughout. Think we lose some - ptb v2.2.15
 * 99-10-8 trim dead code, kernel 2.2 ifdef's - ptb v2.2.17
 * 99-12-18 further o-o - ptb v2.2.19
 * 99-12-28 queue account cleanup. endio on queue reqs at reset - ptb v2.2.20
 *          interruptible semaphores for better client recovery - ptb v2.2.20
 * 00-1-2   debugging cleanups. Fix race in end_request - ptb v2.2.21
 * 00-1-4   semaphores simplified. - ptb v2.2.22
 * 00-6-8   emergency control by write to proc - ptb v2.2.24
 * 00-7-20  ported to 2.4.0-test1. Possible minor bugs found/fixed - ptb v2.2.24
 * 00-7-27  changed proc i/f to read_proc from get_info in 2.2/2.4 - ptb v2.2.25
 * 00-7-30  fixed reads before writes under 2.4 by disabling merge - ptb v2.2.25
 * 00-7-30  and fixed merge_reqs for 2.4, now that I understand! - ptb v2.2.25
 * 00-7-30  fixed/introduced possible bug in end_io  for 2.2/2.4 - ptb v2.2.25
 * 00-7-30 added timeval/zone field in requests and replies - ptb v2.4.0
 * 00-7-30 fixed hitherto masked bug in read_stat in nbd_client - ptb v2.4.0
 * 00-7-30 added timeout to net writes in nbd_client - ptb v2.4.0
 * 00-8-20 display fix for devices over 2GB - ptb v2.4.5
 * 00-8-23 more 64 bit fixes + error out overrange requests- ptb v2.4.6/2.2.27
 * 00-8-31 add NBD_ERR ioctl to error out slot request- ptb v2.4.9
 * 00-8-31 soften NBD_SOFT_RESET so doesn't wreck protocol - ptb v2.4.9
 * 00-9-1  remove %L's from printfs. Kernel 2.2. doesn't - ptb v2.4.10/2.2.27
 * 00-9-6  add various state flags to help init order - ptb v2.4.11
 * 00-9-8  add checks for device initialised to set_sock - ptb v2.4.12
 * 00-9-17 en/disable device as aslot count goes through 0 - ptb v2.4.13/2.2.28
 * 00-9-21 split read/write dev req counts for accounting - ptb v2.4.14
 * 00-9-21 renamed sync_intvl to req_timeo - ptb v2.4.14
 * 00-9-21 made sync_intvl count write blocks - ptb v2.4.14
 * 00-9-22 repair enable after delayed disable when disabled - ptb v2.4.14
 * 00-9-22 include sync (nonblocking) after sync_intvl reqs - ptb v2.4.14
 * 00-9-25 disable sync (nonblocking) after sync_intvl reqs - ptb v2.4.14
 * 00-9-25 bundle invalidate_buffers in clr_sock - ptb v2.4.14
 * 00-10-20 implement req_timeo per device + ioctl (Wang Gang) - ptb v2.4.15
 * 00-10-20 add raid mode (Wang Gang) - ptb v2.4.15
 * 00-10-26 throttle in do_req  - ptb v2.4.15
 * 00-10-28 do set_sock on first open and clr_sock on last close - ptb v2.4.15
 * 00-11-01 make sync_intvl really sync - ptb v2.4.15
 * 00-11-14 rename throttle to plug, nbd_sync takes arg - ptb v2.4.17
 * 00-11-19 clr_sock errs req not rollback if show_errs & !aslot - ptb v2.4.17
 * 00-11-20 removed autodeadlock when disabled in do_req end_req - ptb v2.4.17
 * 00-11-21 make MY_NBD_SYNC only sync when sync_intvl > 0 - ptb v2.4.17
 * 00-12-24 make MY_NBD_GET_REQ use a timeout arg - ptb v2.4.18
 * 01-02-12 ported to 2.4.0 (works). do_nbd_request rewritten - ptb v2.4.20
 * 01-02-20 managed to get plugging and clustered read/writes OK - ptb v2.4.21
 * 01-02-21 eliminated slot->buflen for the time being - ptb v2.4.21
 * 01-02-27 added proper devfs support - ptb v2.4.22
 * 01-03-15 allowed more devices/in devfs, cleaned up endio - ptb v2.4.23
 * 01-03-15 added device letter (<= 3 chars) to struct-  - ptb v2.4.23
 * 01-03-15 added request size check to do_nbd_req - ptb v2.4.23
 * 01-03-15 increased MAX_SECTORS to 512 by default - ptb v2.4.23
 * 01-03-15 made major number a module parameter - ptb v2.4.23
 * 01-03-18 added max_sectors array - ptb v2.4.23
 * 01-03-23 added devfs links - ptb v2.4.23
 * 01-04-17 plugging always enabled for 2.4 kernels - ptb v2.4.24
 * 01-04-17 made SET_RO set_device_ro as well as set local flags - ptb v2.4.25
 * 01-04-28 impl SET_MD5SUM ioctl and proc support for md5sum - ptb v2.4.25
 * 01-04-29 added accounting for md5'd reqs - ptb v2.4.25
 * 01-07-29 added atomic protections for accounting - ptb v2.4.25
 * 01-08-01 fixed 2.4 smp bugs. Interrupts off in spinlocks - ptb v2.4.25
 * 01-08-01 removed all semaphores for spinlocks - ptb v2.4.25
 * 01-08-01 invalidate_buffers in clr_sock (req'd Rogier Wolff) - ptb v2.4.25
 * 01-08-02 fixed smp deadlock - end_that_request_first slept! ptb v2.4.26
 * 01-10-16 provisionally added error in device open when notenabled ptb v2.4.27
 * 01-10-18 added DIRTY flag to save on repeated invalidate_buffers ptb v2.4.27
 * 01-10-31 increment seqno_out before delivery, so really starts at 1  v2.4.27
 * 01-11-01 move zeroing of seqno in cmd field to nbe_end_req* ptb v2.4.27
 * 01-11-18 add speed calculation, dev fields, display in proc ptb v2.4.27
 * 01-11-20 modifications for compiling into monolithic kernel ptb v2.4.27
 * 01-12-06 clr requests before reenabling, not after, in nbd_enable ptb 2.4.27
 * 02-02-21 make nbd_rollback modal, absorbing nbd_error ptb 2.4.27
 * 02-03-10 path for ioctls included ptb 2.4.27
 * 02-04-02 ioctls extended to arbitrary length ptb 2.4.28
 * 02-05-01 restructured init to get a per individual device init ptb 2.4.29
 * 02-05-12 add PF_MEMALLOC for tcp to win contention for buffs ptb 2.4.29
 * 02-08-03 erase syncs from last _release, daemons are usually dead ptb 2.4.29
 * 02-08-07 added partition support ptb 2.4.30
 * 02-08-08 added local BLKSSZGET and related ioctl treatments ptb 2.4.30
 * 02-08-12 make nbd_ack not ruin req when its rolled back already ptb 2.4.30
 * 02-09-18 always allow daemon death even with reqs waiting ptb 2.4.30
 * 02-09-18 eliminate SYNC_REQD, RLSE_REQD ptb 2.4.30
 * 02-09-18 eliminate speed_lim ptb 2.4.30
 * 02-09-18 fix countq accounting ptb 2.4.30
 * 02-09-18 IOCTLACTIVE flag instead of RO_ACTIVE status for ioctl ptb 2.4.30
 * 02-09-18 eliminated ctldta use (too much tricky logic) ptb 2.4.30
 * 02-10-01 eliminated IOCTLACTIVE, use req_sem semaphore instead ptb 2.4.30
 * 02-10-10 introduce DIRECT flag ptb 2.4.30
 * 02-10-13 rollback pushes reqs to local queue, not queues them! ptb 2.4.30
 * 02-10-13 add hooks for separate ioctl module  ptb 2.4.30
 * 02-10-16 take set_sock out of open. Put pid check in handshake  ptb 2.4.30
 * 02-10-16 define MY_NBD_GET_NPORT ioctl ptb 2.4.30
 * 02-10-18 remove wait from MY_NBD_SYNC ioctl ptb 2.4.30
 * 02-10-20 rollback adds requests to queue in seqno order ptb 2.4.30
 */

static int paranoia = 0;

#include <linux/major.h>
#ifndef UNIX98_PTY_MAJOR_COUNT
  #define UNIX98_PTY_MAJOR_COUNT 8
  #ifndef UNIX98_NR_MAJORS
    #define UNIX98_NR_MAJORS=UNIX98_PTY_MAJOR_COUNT
  #endif
#endif

#include <linux/module.h>

#if defined(__GNUC__) && __GNUC__ >= 2
#define _LOOSE_KERNEL_NAMES
#endif

#include <linux/version.h>
#ifndef KERNEL_VERSION
#define KERNEL_VERSION(a,b,c)  (((a) << 16) + ((b) << 8) + (c))
#endif

#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/errno.h>

#include <asm/segment.h>
#include <asm/uaccess.h>
#include <asm/byteorder.h>
#include <linux/wrapper.h>

#define MAJOR_NR NBD_MAJOR
static int major = MAJOR_NR;

#include <linux/proc_fs.h>
#include <linux/genhd.h>
#include <linux/hdreg.h>
#include <linux/file.h>
#include <linux/smp_lock.h>
#include <linux/devfs_fs_kernel.h>
#include <linux/sysctl.h>
#include <linux/init.h>
#include <linux/blkdev.h>
#include <linux/iobuf.h>
#include <linux/delay.h>
#include <linux/locks.h>
#include <linux/config.h>

#define ENDREQ_NOCURRENT
#define LOCAL_END_REQUEST
#include <linux/blk.h>
#include <linux/enbd.h>
#include "enbd_ioctl.h"



/* PTB --------------- compatibility ------------------- */
#if defined(__SMP__) || defined(SMP)
  #if ! defined(CONFIG_SMP)
    #error CONFIG_SMP not defined
  #endif
#endif
#define rq_data_dir(req) ((req)->cmd & 0x01)
/* PTB --------------- end compatibility --------------- */

#define NBD_ID "ENBD #%d[%d]: %s "
#define NBD_ERROR( s...) { static int icnt; printk( KERN_ERR   NBD_ID, __LINE__, icnt++, __FUNCTION__); printk(s);}
#define NBD_ALERT( s...) { static int icnt; printk( KERN_ALERT NBD_ID, __LINE__, icnt++, __FUNCTION__); printk(s);}
#define NBD_INFO( s...)  { static int icnt; printk( KERN_INFO  NBD_ID, __LINE__, icnt++, __FUNCTION__); printk(s);}

int linux_version_code = LINUX_VERSION_CODE;

// PTB forward declaration
static struct nbd_device nbd_dev[];

/*
 * PTB Stuff that used to be in enbd.h, mostly end_request, and is static
 */
static long
wait_for_completion_timeout (struct completion *x, long timeout)
{
	spin_lock_irq (&x->wait.lock);
	if (!x->done && timeout > 0) {
		DECLARE_WAITQUEUE (wait, current);

		wait.flags |= WQ_FLAG_EXCLUSIVE;
		__add_wait_queue_tail (&x->wait, &wait);
		do {
			__set_current_state (TASK_UNINTERRUPTIBLE);
			spin_unlock_irq (&x->wait.lock);
			timeout = schedule_timeout (timeout);
			spin_lock_irq (&x->wait.lock);
		} while (!x->done && timeout > 0);
		__remove_wait_queue (&x->wait, &wait);
	}
	if (x->done) {
		x->done--;
		if (timeout <= 0)
			timeout = 1;
	}
	spin_unlock_irq (&x->wait.lock);
	return timeout;
}

static void
end_request (struct request *req, int uptodate)
{
	int dev = MINOR (req->rq_dev);
	int nbd = dev >> NBD_SHIFT;
	struct nbd_device *lo = &nbd_dev[nbd];
	if (&lo->req == req) {

		complete (req->waiting);

		return;
	}

	if (req->cmd != rq_data_dir(req)) {
		printk (KERN_ERR
			"nbd: request %0x has dirty cmd field ..  repairing\n",
			(unsigned) req);
		req->cmd = rq_data_dir(req);
	}
	while (req->nr_sectors > 0) {
		if (!end_that_request_first (req, uptodate, DEVICE_NAME))
			break;
	}
	end_that_request_last (req);

}

/* 
 * PTB This takes the spinlock itself! So call it with the io spinlock
 * not held.
 */
static void
end_request_lock (struct request *req, int uptodate)
{
	unsigned long flags;
	int dev = MINOR (req->rq_dev);
	int nbd = dev >> NBD_SHIFT;
	struct nbd_device *lo = &nbd_dev[nbd];
	if (&lo->req == req) {
		complete (req->waiting);
		return;
	}
	if (req->cmd != rq_data_dir(req)) {
		printk (KERN_ERR
			"nbd: request %0x has dirty cmd field ..  repairing\n",
			(unsigned) req);
		req->cmd = rq_data_dir(req);
	}

	spin_lock_irqsave (&io_request_lock, flags);
	while (req->nr_sectors > 0) {
		if (!end_that_request_first (req, uptodate, DEVICE_NAME))
			break;
	}
	end_that_request_last (req);
	spin_unlock_irqrestore (&io_request_lock, flags);
}

/*
 * PTB Call this only with the io spinlock * held.
 */
static void
nbd_end_request (struct request *req)
{

	req->cmd = rq_data_dir(req);
	end_request (req, !req->errors);
}

/* 
 * PTB This takes the spinlock itself! So call it with the io spinlock
 * not held.
 */
static void
nbd_end_request_lock (struct request *req)
{

	req->cmd = rq_data_dir(req);
	end_request_lock (req, !req->errors);
}

/*
 * PTB kernel data - 4KB worth
 * We need space for nda, nda1, .. nda15, ndb, ndb1, ..
 * The index is exactly the minor number.
 */
static int    nbd_blksizes[MAX_NBD * NBD_MAXCONN];
static int    nbd_sizes[MAX_NBD * NBD_MAXCONN];
static u64    nbd_bytesizes[MAX_NBD * NBD_MAXCONN];
static int    nbd_max_sectors[MAX_NBD * NBD_MAXCONN];
static struct gendisk nbd_gendisk;
static struct hd_struct nbd_hd_struct[MAX_NBD * NBD_MAXCONN];

/* 
 * PTB our data   - about 3KB
 * These are nda, ndb, ndc, ...
 * Divide the minor by NBD_MAXCONN to get this index.
 * Or shift it right by NBD_SHIFT.
 */
static struct nbd_device nbd_dev[MAX_NBD];

static struct nbd_ioctl *remote_ioctl;

// PTB this is the hook for the nbd_ioctl module
int
nbd_register_remote_ioctl (struct nbd_ioctl *x)
{

	if (remote_ioctl == NULL) {
		remote_ioctl = x;
		return 0;
	}
	return -EINVAL;
}

int
nbd_unregister_remote_ioctl (struct nbd_ioctl *x)
{

	if (remote_ioctl == x) {
		remote_ioctl = NULL;
		return 0;
	}
	return -EINVAL;
}

/*
 * PTB device parameters
 */
static int rahead = NBD_RAHEAD_DFLT;
static int sync_intvl = NBD_SYNC_INTVL;
static int merge_requests = NBD_MERGE_REQ_DFLT;
static int buf_sectors = NBD_MAX_SECTORS;
static int debug = 0;
static int show_errs = 1;
static int direct = 0;
static int plug = NBD_PLUG_DFLT;

static int buffer_writes = 0;

static int md5sum = 0;
static int md5_on_threshold = 1000;
static int md5_off_threshold = 10;
static int enable = 0;

#if defined(MODULE) 
MODULE_PARM (rahead, "i");
MODULE_PARM (sync_intvl, "i");
MODULE_PARM (merge_requests, "i");
MODULE_PARM (buf_sectors, "i");
MODULE_PARM (debug, "i");
MODULE_PARM (show_errs, "i");
MODULE_PARM (direct, "i");
MODULE_PARM (plug, "i");

MODULE_PARM (buffer_writes, "i");

MODULE_PARM (major, "i");
MODULE_PARM (paranoia, "i");
MODULE_PARM (md5sum, "i");
MODULE_PARM (md5_on_threshold, "i");
MODULE_PARM (md5_off_threshold, "i");
#endif

/*
 * PTB count number of blocks in a request. This will be an overestimate
 * if the number is not an exact multiple. It seems to happen. We 
 * guarrantee to return -ve only if the request is invalid.
 *
 * @req - request we want to count
 */
inline long
nr_blks (struct request *req)
{

	unsigned log_sectors_per_blk;
	unsigned sectors_per_blk;
	int size, minor, nbd;
	int sectors;
	struct nbd_device *lo;

	if (!req)
		return -EINVAL;

	minor = MINOR (req->rq_dev);
	nbd = minor >> NBD_SHIFT;
	lo = &nbd_dev[nbd];

	log_sectors_per_blk = lo->logblksize - 9;
	sectors_per_blk = 1 << log_sectors_per_blk;

	sectors = req->nr_sectors;
	size = (sectors + sectors_per_blk - 1) >> log_sectors_per_blk;

	return size;
}

/*
 * return a temporary buffer containing the (1 or 2 char) device letter.
 * This works for i up to 26*26. 0 is "a". The buffer is zero
 * terminated.
 *
 * @i number to be transtaed to x[y] alphabetical  form.
 */
static char *
device_letter (int i)
{

	static char buf[3];
	static int cached_i = -1;

	if (cached_i == i)
		return buf;

	cached_i = i;

	if (i < 26) {
		buf[0] = 'a' + i;
		buf[1] = 0;
		return buf;
	}

	buf[0] = 'a' + i / 26;
	buf[1] = 'a' + i % 26;
	buf[2] = 0;
	return buf;
}

/*
 *  PTB sync the device. Modes:
 *  @arg = 1:  Do it sync
 *  @arg = 0:  Do it async
 *
 *  We can't call sync_dev outside a process context. I don't know why.
 *  Death results from a scheduled attempt.
 *
 *  Call without the semaphore held, as we lock it and call sync_dev.
 */
static void
nbd_sync (struct nbd_device *lo, long arg)
{
	struct inode *inode = lo->inode;
	short minor, nbd, islot;

	islot = atomic_read (&lo->islot);


	if (!(atomic_read (&lo->flags) & NBD_INITIALISED) || !inode) {
		goto fail;
	}

	minor = MINOR (inode->i_rdev);
	nbd = minor >> NBD_SHIFT;


	switch (arg) {
	  case 0:

		sync_dev (MKDEV (major, nbd << NBD_SHIFT));
		break;
	  default:

		fsync_dev (MKDEV (major, nbd << NBD_SHIFT));
		invalidate_buffers (MKDEV (major, nbd << NBD_SHIFT));
		break;
	}


	return;

      fail:

}

static void
nbd_async_sync (struct nbd_device *lo)
{
	nbd_sync (lo, 0);
}
static void
nbd_sync_sync (struct nbd_device *lo)
{
	nbd_sync (lo, 1);
}

/*
 *  Do it async if we're enabled, sync if we're not.
 */
static void
nbd_maybe_sync_sync (struct nbd_device *lo)
{

	if ((atomic_read (&lo->flags) & NBD_ENABLED)
	    && (atomic_read (&lo->flags) & NBD_VALIDATED)) {
		nbd_async_sync (lo);
		return;
	}
	nbd_sync_sync (lo);
}

/*
 * PTB - put a request onto the head of a nbd device's queue
 *     - presumably having taken it off the kernel's queue first!
 *     - We cannot take the io_spinlock since we are called with it on!
 *     - and we cannot take the semaphore as we may not sleep!
 *
 *     @lo      = the device we are on (could we get it from the req?)
 *     @req     = the request we shift
 *     @irqsave = save and restore irqmask when taking our queue spinlock
 */
static void
nbd_enqueue (struct nbd_device *lo, struct request *req, int irqsave)
{
	unsigned long req_blks = nr_blks (req);
	short islot = atomic_read (&lo->islot);
	int countq;
	int cmd;

	islot = islot;


	if (req_blks < 0) {
		NBD_ERROR ("(%d): invalid req %#x. Not touching!\n",
			   islot, (unsigned) req);
		return;
	}

	cmd = rq_data_dir(req);

	atomic_add (req_blks, &lo->requests_in[cmd]);
	atomic_inc (&lo->countq[cmd]);
	countq = atomic_read (&lo->countq[cmd]);

	if (atomic_read (&lo->maxq[cmd]) < countq)
		atomic_set (&lo->maxq[cmd], countq);
	atomic_inc (&lo->req_in[cmd][req_blks]);

	if (atomic_read (&lo->maxreqblks) < req_blks)
		atomic_set (&lo->maxreqblks, req_blks);


	if (irqsave) {
		unsigned long flags;
		write_lock_irqsave (&lo->queue_lock, flags);

		list_add (&req->queue, &lo->queue);

		write_unlock_irqrestore (&lo->queue_lock, flags);
	}
	else {
		write_lock (&lo->queue_lock);

		list_add (&req->queue, &lo->queue);

		write_unlock (&lo->queue_lock);
	}

	wake_up_interruptible (&lo->wq);

}

/*
 * PTB - remove a request from anywhere in the nbd device general queue 
 *     - return 0 for success, -ve for fail
 *
 *     We need to hold the queue semaphore when calling this routine
 *     and the queue spinlock too! It walks the queue.
 */
static int
nbd_remove (struct nbd_device *lo, struct request *req)
{
	int cmd;

	if (!req)
		return -2;

	if (req == &lo->req) ;

	list_del (&req->queue);

	goto success;

      success:
	cmd = rq_data_dir(req);
	atomic_dec (&lo->countq[cmd]);
	return 0;
}

/*
 *  PTB - Open the device
 */
int
nbd_open (struct inode *inode, struct file *file)
{
	int dev;
	struct nbd_device *lo;
	int nbd;
	int part;
	int islot;
	char *devnam;


	if (1 && !inode && file) {

		inode = file->f_dentry->d_inode;

	}
	if (!inode) {
		NBD_ERROR ("null inode.\n");
		return -EINVAL;
	}

	dev = MINOR (inode->i_rdev);
	nbd = dev >> NBD_SHIFT;
	part = dev - (nbd << NBD_SHIFT);
	islot = part - 1;


	if (nbd >= MAX_NBD) {
		NBD_ERROR ("too many (%d) whole devices open\n", nbd);
		return -ENODEV;
	}

	lo = &nbd_dev[nbd];
	devnam = lo->devnam;

	if (file) {
		if ((atomic_read (&lo->flags) & NBD_DIRECT)
		    && !(file->f_flags & O_DIRECT)) {
			int err = alloc_kiovec (1, &file->f_iobuf);
			if (err == 0) {

				file->f_flags |= O_DIRECT | O_NOFOLLOW;
				NBD_INFO
				 ("converted open of nd%s%d to O_DIRECT\n",
				  lo->devnam, part);
			}
		}

	}


	if (part == 0) {
		if (!lo->file || lo->file != file) {


			lo->file = file;
			atomic_set (&lo->frstj, jiffies);
		}
		if (!lo->inode || lo->inode != inode) {

			lo->inode = inode;
		}
	}

	atomic_inc (&lo->refcnt);


#ifdef MODULE
	MOD_INC_USE_COUNT;
#endif

	if (!(atomic_read (&lo->flags) & NBD_INITIALISED)) {

		rwlock_init (&lo->queue_lock);
		init_MUTEX (&lo->req_sem);
		atomic_set_mask (NBD_INITIALISED, &lo->flags);

	}

	if (lo->aslot > 0 && !(atomic_read (&lo->flags) & NBD_VALIDATED)) {
		atomic_set_mask (NBD_VALIDATED, &lo->flags);
		NBD_INFO ("partition check on device %s\n", lo->devnam);
		if (nbd_gendisk.nr_real <= nbd)
			nbd_gendisk.nr_real = nbd + 1;
		grok_partitions (&nbd_gendisk, nbd, NBD_MAXCONN,
				 lo->sectors);
	}

	return 0;
}

/*
 * PTB - complete a transaction irrefutably by taking it out of the
 *     - slot pending position it is in, and reporting end_request to kernel
 *
 *       We are called without the spinlock held, and without the io
 *       lock held, because our call to end request will take the io
 *       lock momentarily.
 */
void
nbd_commit (struct nbd_slot *slot, struct request *req)
{

	struct nbd_device *lo = slot->lo;
	unsigned long req_blks = nr_blks (req);
	int errors, cmd;

	if (req_blks < 0) {
		NBD_ERROR
		 ("corrupted req %#x. Not touching with bargepole.\n",
		  (unsigned) req);
		return;
	}

	errors = req->errors;

	list_del (&req->queue);

	nbd_end_request_lock (req);

	slot->req_age = 0;
	slot->req -= req_blks;

	cmd = rq_data_dir(req);

	atomic_sub (req_blks, &lo->requests_req[cmd]);
	if (errors < 0) {
		atomic_add (req_blks, &lo->requests_err);
		slot->err += req_blks;
		return;
	}

	atomic_add (req_blks, &lo->requests_out[cmd]);
	slot->out += req_blks;

	if (cmd != WRITE)
		return;

	switch (slot->flags & (NBD_SLOT_MD5SUM | NBD_SLOT_MD5_OK)) {

	  case NBD_SLOT_MD5SUM | NBD_SLOT_MD5_OK:

		atomic_add (req_blks, &lo->wrequests_5to);
		atomic_add (req_blks, &lo->wrequests_5so);

		atomic_set (&lo->wrequests_5co, 0);
		break;

	  case NBD_SLOT_MD5SUM:

		atomic_add (req_blks, &lo->wrequests_5to);
		atomic_add (req_blks, &lo->wrequests_5wo);
		atomic_inc (&lo->wrequests_5co);
		if (atomic_read (&lo->wrequests_5co) > md5_off_threshold) {
			atomic_set (&lo->wrequests_5co, 0);

			atomic_clear_mask (NBD_MD5SUM, &lo->flags);
		}
		break;

	  case NBD_SLOT_MD5_OK:

		atomic_add (req_blks, &lo->wrequests_5to);
		atomic_add (req_blks, &lo->wrequests_5eo);
		atomic_inc (&lo->wrequests_5co);
		if (atomic_read (&lo->wrequests_5co) > md5_off_threshold) {
			atomic_set (&lo->wrequests_5co, 0);

			atomic_clear_mask (NBD_MD5SUM, &lo->flags);
		}
		break;

	  default:
	  case 0:

		atomic_inc (&lo->wrequests_5no);
		if (atomic_read (&lo->wrequests_5no) > md5_on_threshold) {
			atomic_set (&lo->wrequests_5no, 0);

			atomic_set_mask (NBD_MD5SUM, &lo->flags);
		}
		break;
	}

	slot->flags &= ~(NBD_SLOT_MD5SUM | NBD_SLOT_MD5_OK);

}

/*
 * PTB - error out a transaction irrefutably by taking it out of the
 *     - slot pending position it is in, and reporting end_request to kernel
 *
 *     We must be called without the io spinlock held, as we take it
 */
void
nbd_error (struct nbd_slot *slot, struct request *req)
{
	struct nbd_device *lo = slot->lo;
	unsigned long req_blks = nr_blks (req);
	int cmd;

	if (req_blks < 0) {
		NBD_ERROR ("passed illegal request %#x\n", (unsigned) req);
	}

	req->errors++;

	list_del (&req->queue);

	NBD_ALERT ("error out req %x from slot %d!\n", (unsigned) req,
		   slot->i);

	nbd_end_request_lock (req);


	cmd = rq_data_dir(req);
	atomic_sub (req_blks, &lo->requests_req[cmd]);

	slot->in -= req_blks;
	slot->req -= req_blks;

	slot->req_age = 0;
	slot->err += req_blks;
	atomic_add (req_blks, &lo->requests_err);
}

/*
 * Take a request out of a slot. This must not hold the i/o lock on
 * entry as we may take it in order to kill the request (end_request)
 * or we may take the queue lock in order to play with the devices
 * queue (nbd_enqueue).
 */
static void
nbd_rollback (struct nbd_slot *slot, struct request *req)
{

	struct nbd_device *lo = slot->lo;
	unsigned long req_blks;
        int seqno;
	unsigned long flags;
        struct list_head *pos;
        struct request *xreq;

	if (atomic_read (&lo->flags) & NBD_SHOW_ERRS) {
		nbd_error (slot, req);
		return;
	}

	req_blks = nr_blks (req);

	if (req_blks < 0) {
		NBD_ERROR ("passed illegal request %#x\n", (unsigned) req);
		return;
	}

	list_del (&req->queue);

	NBD_ALERT ("rollback req %x from slot %d!\n", (unsigned) req,
		   slot->i);

	slot->in -= req_blks;
	slot->req -= req_blks;

	write_lock_irqsave (&lo->queue_lock, flags);

        list_for_each_prev (pos, &lo->queue) {
                xreq = list_entry (pos, struct request, queue);
                if (rq_seqno(xreq) > seqno) {
                        break;
                }
        }
	list_add_tail (&req->queue, pos);

	write_unlock_irqrestore (&lo->queue_lock, flags);

}

/*
 * PTB - undo transactions by taking them out of the slot pending
 *     - position and replacing them on the generic device queue
 *     - NB we do not hold the io request lock or queue sem when
 *     -    calling this as we take it internally
 */
static void
nbd_rollback_all (struct nbd_slot *slot)
{

	struct request *req;
	short count = 0;

	while (!list_empty (&slot->queue)) {

		if (count++ > 1000)
			break;

		req =
		 (list_empty (&slot->queue) ? NULL :
		  list_entry (((struct list_head *) &slot->queue)->next,
			      struct request, queue));

		if (!req)
			break;

		nbd_rollback (slot, req);
	}

}

/*     PTB error out all the requests on a slot
 *     
 *     We must be called without the io spinlock held, as we take it in
 *     nbd_error().
 */
static void
nbd_error_all (struct nbd_slot *slot)
{

	struct request *req;
	short count = 0;

	while (!list_empty (&slot->queue)) {
		if (count++ > 1000)
			break;
		req =
		 (list_empty (&slot->queue) ? NULL :
		  list_entry (((struct list_head *) &slot->queue)->next,
			      struct request, queue));
		if (!req)
			break;
		nbd_error (slot, req);
	}
}

/*
 * PTB - let a request onto the slot pending position
 *     - Can be called without the spinlock and doesn't take the
 *       spinlock as we  only deal with our  unique slot. If there
 *       were more than one client per slot this woould be a problem
 *       but there aren't so it isn't.
 */
void
nbd_accept (struct nbd_slot *slot, struct request *req)
{

	struct nbd_device *lo = slot->lo;
	unsigned long req_blks = nr_blks (req);
	int cmd;

	if (req_blks < 0)
		return;

	cmd = rq_data_dir(req);
	atomic_add (req_blks, &lo->requests_req[cmd]);

	list_add (&req->queue, &slot->queue);

	slot->req_age = jiffies;
	slot->in += req_blks;
	slot->req += req_blks;
}

/*
 * PTB - read from userspace to a request buffer. Do it piecewuse
 *     - to cope with clustered requests.
 *     - return number of bytes read
 *
 *     Unfortunately the only way we can return less than the right
 *     number of bytes is when the recieving req does not have the
 *     right number of buffers, because the copy_from_user itself
 *     doesn't tell us.
 */
static int
copy_from_user_to_req (struct request *req, char *user, int len)
{

	unsigned size = 0;
	struct buffer_head *bh = req->bh;

	while (size < len && bh) {
		unsigned current_size = bh->b_size;
		char *buffer = bh->b_data;


		copy_from_user (buffer, user + size, current_size);

		size += current_size;

		bh = bh->b_reqnext;
	}
	if (size != len) {
		NBD_ALERT
		 ("requested %d and could only read %d bytes to req #%x\n",
		  len, size, (unsigned) req);
		NBD_ALERT ("request %#x has first bh %#x size %ld\n",
			   (unsigned) req, (unsigned) req->bh,
			   (long) (req->bh ? req->bh->b_size : 0));
		NBD_ALERT
		 ("request %#x wanted to read user space buffer  %#x\n",
		  (unsigned) req, (unsigned) user);
	}
	return size;
}

/*
 * PTB - auxiliary function
 */
static int
rq_type (struct request *req)
{
	return req->cmd & ((1 << NBD_CMDBITS) - 1);
}

/*
 * PTB - andres' kernel half of the user-space network handshake, used
 *     - to complete a transaction.
 *     - return 0 for success and -ve for fail.
 */
int
nbd_ack (struct nbd_slot *slot, char *buffer)
{
	struct nbd_reply reply;
	struct request *req, *xreq;
	int result = 0;

	void *user;
	unsigned long req_blks = 1;
	struct nbd_device *lo = slot->lo;
	unsigned buflen = 0;
	unsigned reqlen;


	current->flags &= ~PF_MEMALLOC;

	if (lo->magic != NBD_DEV_MAGIC) {
		if (slot->nerrs++ < 3) {
			NBD_ALERT
			 ("nbd_dev[] corrupted: Not enough magic\n");
		}
		result = -ENODEV;
		goto error_out;

	}

	if (!(slot->flags & NBD_SLOT_BUFFERED)) {

		return -ENOMEM;
	}
	if (slot->buffer != buffer) {
		if (slot->nerrs++ < 3)
			NBD_ALERT ("(%d): user buffer changed\n", slot->i);

		return -EINVAL;
	}

	atomic_inc (&lo->cthreads);
	slot->flags |= NBD_SLOT_RUNNING;
	slot->cli_age = jiffies;

	user = slot->buffer;
	copy_from_user ((char *) &reply, (char *) user,
			sizeof (struct nbd_reply));

	buflen += NBD_BUFFER_DATA_OFFSET;

	memcpy ((char *) &req, reply.handle, sizeof (req));

	if (reply.flags & NBD_REPLY_IOCTL) ;

	if (!req) {
		if (slot->nerrs++ < 3)
			NBD_ALERT
			 ("(%d): null handle in reply from userspace\n",
			  slot->i);
		NBD_ALERT ("exited malformed or invalid reply\n");

		return -EINVAL;
	}

	do {

		struct list_head *pos;

		int count = 0;
		xreq = NULL;
		list_for_each (pos, &slot->queue) {
			xreq = list_entry (pos, struct request, queue);
			if (reply.flags & NBD_REPLY_IOCTL) ;

			if (count++ > 1000)
				break;
			if (xreq == req)

				break;
		}
	} while (0);

	if (xreq != req) {
		if (xreq && slot->nerrs++ < 3) {
			NBD_ALERT
			 ("fatal: Bad handle (given) %x != %x (found)!\n",
			  (unsigned) req, (unsigned) xreq);
		}
		atomic_dec (&lo->cthreads);
		slot->flags &= ~NBD_SLOT_RUNNING;
		NBD_ALERT
		 ("ignoring ack of req %x which slot does not have\n",
		  (unsigned) req);

		return 0;
	}

	if (rq_type (req) == IOCTL) {
	}

	if (reply.magic != NBD_REPLY_MAGIC) {

		if (slot->nerrs++ < 3) {
			NBD_ALERT ("Not enough reply magic in "
				   __FUNCTION__ "\n");
		}

		return -EAGAIN;
	}

	if (reply.error > 0 || req->errors > 0) {

		if (slot->nerrs++ < 3) {
			NBD_ALERT ("exited with reply error\n");
		}

		goto error_out;
	}

	req_blks = nr_blks (req);

	reqlen = req->nr_sectors;
	reqlen <<= 9;

	if (rq_type (req) != IOCTL && req->nr_sectors > lo->max_sectors) {
		if (slot->nerrs++ < 3) {
			NBD_ALERT
			 ("req %x nr_sectors %ld bigger than buf %d, cutting it!\n",
			  (unsigned) req, req->nr_sectors,
			  lo->max_sectors);
		}
		reqlen = lo->max_sectors;
		reqlen <<= 9;
	}

	switch (rq_type (req)) {
		int size;

	  case READ:

		size =
		 copy_from_user_to_req (req, ((char *) user) + buflen,
					reqlen);
		if (size < reqlen) {
			NBD_ALERT
			 ("(%d): copy %dB from user to req %#x failed (%d)\n",
			  slot->i, reqlen, (unsigned) req, size);

			goto error_out;

		}

		buflen += size;

		break;
	  case WRITE:

		switch (reply.
			flags & (NBD_REPLY_MD5SUM | NBD_REPLY_MD5_OK)) {

		  case NBD_REPLY_MD5SUM | NBD_REPLY_MD5_OK:

			slot->flags |= (NBD_SLOT_MD5SUM | NBD_SLOT_MD5_OK);
			break;

		  case NBD_REPLY_MD5SUM:

			slot->flags |= NBD_SLOT_MD5SUM;
			slot->flags &= ~NBD_SLOT_MD5_OK;
			break;

		  case NBD_REPLY_MD5_OK:

			slot->flags &= ~NBD_SLOT_MD5SUM;
			slot->flags |= NBD_SLOT_MD5_OK;
			break;

		  default:
		  case 0:

			slot->flags &=
			 ~(NBD_SLOT_MD5SUM | NBD_SLOT_MD5_OK);
			break;
		}

		break;

	  case IOCTL:

		if (!(reply.flags & NBD_REPLY_IOCTL))
			NBD_ALERT
			 ("ioctl reply to req %#x has no ioctl flag set\n",
			  (unsigned) req);

		do {

			unsigned cmd = (unsigned) req->special;
			char *arg = req->buffer;

			if (cmd == -1) {
				result = -EINVAL;
				goto error_out;
			}

			if (_IOC_DIR (cmd) & _IOC_READ) {

				int sectors = req->nr_sectors;

				if (sectors > 0) {

					int size = sectors << 9;
					copy_from_user (arg,
							(char *) user +
							buflen, size);

					buflen += size;
				}
			}
		} while (0);
		break;
	}
	goto success;

      success:
	slot->nerrs = 0;

	nbd_commit (slot, req);
	atomic_dec (&lo->cthreads);
	slot->flags &= ~NBD_SLOT_RUNNING;
	if (reply.flags & NBD_REPLY_IOCTL) ;
	return 0;

      error_out:

	if (result != -EAGAIN && result != 0) {
		req->errors += req_blks;
		slot->err += req_blks;
	}
	result = result < 0 ? result : -ENODEV;
	atomic_dec (&lo->cthreads);
	slot->flags &= ~NBD_SLOT_RUNNING;
	if (reply.flags & NBD_REPLY_IOCTL) ;
	return result;
}

/*
 * PTB - write to userspace from a request buffer. Do it piecewuse
 *     - to cope with clustered requests.
 *     - return number of bytes written
 */
static int
copy_to_user_from_req (struct request *req, char *user, int len)
{

	unsigned size = 0;
	struct buffer_head *bh = req->bh;

	while (size < len) {

		unsigned current_size;
		char *buffer;

		if (!bh) {

			NBD_ALERT
			 ("req %#x wanted to offer %d, could only give %d bytes\n",
			  (unsigned) req, len, size);
			NBD_ALERT
			 ("request %#x has first bh %#x size %ld\n",
			  (unsigned) req, (unsigned) req->bh,
			  (long) (req->bh ? req->bh->b_size : 0));
			NBD_ALERT
			 ("request %#x wanted to write to user space buffer %#x\n",
			  (unsigned) req, (unsigned) user);
			NBD_ALERT ("request %#x is aimed at sector %d\n",
				   (unsigned) req, (unsigned) req->sector);
			NBD_ALERT ("request %#x has %d segments,\n",
				   (unsigned) req,
				   (unsigned) req->nr_segments);
			NBD_ALERT ("request %#x has major %d minor %d\n",
				   (unsigned) req, MAJOR (req->rq_dev),
				   MINOR (req->rq_dev));
			NBD_ALERT
			 ("request %#x has %d sectors, first set of %d,\n",
			  (unsigned) req, (unsigned) req->nr_sectors,
			  (unsigned) req->current_nr_sectors);

			NBD_ALERT
			 ("request %#x is being patched up. Kernel lost %ld sectors\n",
			  (unsigned) req, req->nr_sectors - (size >> 9));
			req->nr_sectors = size >> 9;
			if (bh == req->bh) {
				req->current_nr_sectors = req->nr_sectors;
			}
			break;
		}

		current_size = bh->b_size;
		buffer = bh->b_data;


		copy_to_user (user + size, buffer, current_size);

		size += current_size;

		bh = bh->b_reqnext;
	}
	return size;
}

static void
nbd_speed (struct nbd_speed *spd)
{

	struct nbd_device *lo = spd->lo;

	int lastjiffy = atomic_read (&spd->jiffy);

	int djiffy = jiffies - lastjiffy;

	int distance = spd->getdistance ? spd->getdistance (lo) : 0;

	int lastdist = atomic_read (&spd->distance);

	int ddistance = distance - lastdist;

	if (djiffy > HZ) {

		int tjiffy = jiffies - atomic_read (&spd->lo->frstj);

		int speedmax = atomic_read (&spd->speedmax);

		int lastspeed = atomic_read (&spd->speed);

		int speed = djiffy ? (ddistance * HZ) / djiffy : 0;

		int speedsmoothed =
		 (djiffy * speed + HZ * lastspeed) / (djiffy + HZ);

		int speedav = tjiffy ? (distance * HZ) / tjiffy : 0;

		int speedhi =
		 (speedav > speedsmoothed) ? speedav : speedsmoothed;

		atomic_set (&spd->speed, speedsmoothed);
		if (speedhi > speedmax)
			atomic_set (&spd->speedmax, speedhi);
		atomic_set (&spd->distance, distance);
		atomic_set (&spd->speedav, speedav);
		atomic_set (&spd->jiffy, jiffies);
	}
}

static void
nbd_set_speed (struct nbd_device *lo)
{
	nbd_speed (&lo->wspeed);
	nbd_speed (&lo->rspeed);
	nbd_speed (&lo->tspeed);
}
static int
gettdistance (struct nbd_device *lo)
{
	return atomic_read (&lo->requests_in[WRITE]) +
	 atomic_read (&lo->requests_in[READ]);
}
static int
getrdistance (struct nbd_device *lo)
{
	return atomic_read (&lo->requests_in[READ]);
}
static int
getwdistance (struct nbd_device *lo)
{
	return atomic_read (&lo->requests_in[WRITE]);
}

/*
 * PTB auxiliary functions for manipulating the sequence number 
 */
static int
rq_seqno (struct request *req)
{
	return req->cmd >> NBD_CMDBITS;
}
static void
rq_set_seqno (struct request *req, int val)
{
	req->cmd &= (1 << NBD_CMDBITS) - 1;
	req->cmd |= val << NBD_CMDBITS;
}

/*
 * PTB - andres' kernel half of the userspace networking. This part
 *     - initiates the transaction by taking a request off the generic
 *     - device queue and placing it in the slots pending position.
 *     - I believe we return 0 for success and -ve for fail.
 *     - timeo is the number of jiffies we are prepared to wait
 */
int
nbd_get_req (struct nbd_slot *slot, char *buffer)
{
	struct nbd_request request;
	struct request *req;
	int result = 0;
	static atomic_t count;
	unsigned start_time = jiffies;
	struct nbd_device *lo = slot->lo;
	unsigned timeout = lo->req_timeo * HZ;
	int islot = slot->i;

	extern struct timezone sys_tz;
	unsigned long flags;


	atomic_inc (&lo->cthreads);
	slot->flags |= NBD_SLOT_RUNNING;
	slot->cli_age = jiffies;

	if (!(slot->flags & NBD_SLOT_BUFFERED)) {
		goto error_out;
	}
	if (slot->buffer != buffer) {
		goto error_out;
	}

	atomic_set (&lo->islot, islot);

	if (!list_empty (&slot->queue)) {
		goto error_out;

	}
	if (!slot->file) {
		result = -EBADF;
		goto error_out;
	}
	if (!(atomic_read (&lo->flags) & NBD_ENABLED)) {
		result = -ENODEV;
		goto error_out;
	}

	atomic_inc (&lo->cwaiters);
	slot->flags |= NBD_SLOT_WAITING;

	read_lock_irqsave (&lo->queue_lock, flags);
	atomic_dec (&lo->cwaiters);
	slot->flags &= ~NBD_SLOT_WAITING;

	while (slot->file && list_empty (&lo->queue)) {

		read_unlock_irqrestore (&lo->queue_lock, flags);


		atomic_inc (&lo->cwaiters);
		slot->flags |= NBD_SLOT_WAITING;

		interruptible_sleep_on_timeout (&lo->wq,
						start_time + timeout -
						jiffies);

		slot->flags &= ~NBD_SLOT_WAITING;
		atomic_dec (&lo->cwaiters);
		atomic_inc (&count);

		atomic_inc (&lo->cwaiters);
		slot->flags |= NBD_SLOT_WAITING;

		read_lock_irqsave (&lo->queue_lock, flags);
		atomic_dec (&lo->cwaiters);
		slot->flags &= ~NBD_SLOT_WAITING;

		if (jiffies >= start_time + timeout
		    && list_empty (&lo->queue)) {


			result = -ETIME;

			read_unlock_irqrestore (&lo->queue_lock, flags);

			if (1) {
				int siz =
				 lo->blksize + sizeof (struct nbd_request);

				if (!access_ok
				    (VERIFY_WRITE, slot->buffer, siz)) {
					static int nbd_clr_sock (struct
								 nbd_slot
								 *slot);
					result = -EINVAL;

					nbd_clr_sock (slot);
					goto error_out;
				}
			}

			goto error_out;
		}

	}

	if (!(atomic_read (&lo->flags) & NBD_ENABLED)) {
		read_unlock_irqrestore (&lo->queue_lock, flags);
		result = -ENODEV;
		goto error_out;
	}
	if (!slot->file) {
		read_unlock_irqrestore (&lo->queue_lock, flags);
		result = -EBADF;
		goto error_out;
	}

	if (!list_empty (&slot->queue)) {
		read_unlock_irqrestore (&lo->queue_lock, flags);
		result = -EINVAL;
		goto error_out;

	}

	read_unlock_irqrestore (&lo->queue_lock, flags);

	write_lock_irqsave (&lo->queue_lock, flags);

	if (list_empty (&lo->queue)) {
		write_unlock_irqrestore (&lo->queue_lock, flags);

		result = -EINVAL;
		goto error_out;

	}

	req =
	 (list_empty (&lo->queue) ? NULL :
	  list_entry (((struct list_head *) &lo->queue)->prev,
		      struct request, queue));

	if (!req) {
		write_unlock_irqrestore (&lo->queue_lock, flags);
		result = -EINVAL;
		goto error_out;
	}

	result = nbd_remove (lo, req);


	if (0 && req->sector + req->nr_sectors > lo->sectors) {

		write_unlock_irqrestore (&lo->queue_lock, flags);
		result = -EINVAL;
		nbd_error (slot, req);
		goto error_out;
	}

	write_unlock_irqrestore (&lo->queue_lock, flags);

	request.magic = NBD_REQUEST_MAGIC;
	request.flags = 0;

	switch (rq_type (req)) {

	  case IOCTL:


		request.type = IOCTL;
		request.len = 0;

		if (1) {

			unsigned cmd = (unsigned) req->special;
			char *arg = req->buffer;

			if (_IOC_DIR (cmd) & _IOC_READ) {

				size_t size = req->nr_sectors << 9;
				if (size <= 0) {
					request.len = 0;
				}
				else {

					request.len = size;
				}
			}

			request.from = (((u64) cmd) << 32)

			 | ((u64) (unsigned long) arg);

		}
		break;

	  case READ:
	  case WRITE:


		request.type = rq_data_dir(req);
		request.from = req->sector
		 + nbd_hd_struct[MINOR (req->rq_dev)].start_sect;

		request.from <<= 9;
		request.len = req->nr_sectors;
		request.len <<= 9;
		if (atomic_read (&lo->flags) & NBD_MD5SUM) {

			request.flags |= NBD_REQUEST_MD5SUM;

		}
		break;

	  default:
		NBD_ALERT ("received unknown req %#x type %d\n",
			   (unsigned) req, req->cmd);
		break;
	}

	request.seqno = rq_seqno (req);


	do_gettimeofday (&request.time);
	request.zone = sys_tz;

	memcpy (request.handle, (char *) &req, sizeof (req));



	if (!access_ok
	    (VERIFY_WRITE, slot->buffer, sizeof (struct nbd_request))) {
		result = -EINVAL;
		goto error_out;

	}


	copy_to_user (slot->buffer, (char *) &request, sizeof (request));

	switch (request.type) {
		int err;
	  case READ:
		break;

	  case IOCTL:
		if (request.len > 0) {
			char *arg =
			 (char *) slot->buffer + NBD_BUFFER_DATA_OFFSET;
			copy_to_user (arg, req->buffer, request.len);
		}
		break;

	  case WRITE:
		if (req->nr_sectors << 9 != request.len) {
			NBD_ALERT
			 ("request and buffer lens %ld and %d differ\n",
			  req->nr_sectors << 9, request.len);
		}

		err = copy_to_user_from_req (req,
					     (char *) slot->buffer +
					     NBD_BUFFER_DATA_OFFSET,
					     request.len);
		if (err < request.len) {

			NBD_ERROR
			 ("req %#x only offered %d bytes of %d for copy to user\n",
			  (unsigned) req, result, request.len);

			if (req) {
				if (req->errors == 0) {
					req->errors++;
					nbd_end_request_lock (req);
				}
			}
			goto error_out;
		}
		break;
	}


	nbd_accept (slot, req);

	if (~(current->flags & PF_MEMALLOC)) {
		current->flags |= PF_MEMALLOC;

	}

	atomic_dec (&lo->cthreads);
	slot->flags &= ~NBD_SLOT_RUNNING;

	return 0;

      error_out:

	atomic_dec (&lo->cthreads);
	slot->flags &= ~NBD_SLOT_RUNNING;
	result = result < 0 ? result : -ENODEV;

	return result;
}

/*
 * PTB error out the pending requests on the kernel queue
 * We have to be called WITHOUT the io request lock held.
 * We sleep imbetween clearing each request, for "safety".
 */
static int
nbd_clr_kernel_queue ()
{

	int count = 0;
	unsigned long flags;

	spin_lock_irqsave (&io_request_lock, flags);

	while (!QUEUE_EMPTY && count++ < 1000) {

		struct request *req;
		req = CURRENT;
		if (!req) {
			spin_unlock_irqrestore (&io_request_lock, flags);
			NBD_ALERT
			 ("impossible! kernel queue empty after tested nonemty!\n");
			goto fail;
		}
		blkdev_dequeue_request (req);
		spin_unlock_irqrestore (&io_request_lock, flags);
		req->errors++;
		schedule ();
		nbd_end_request_lock (req);
		spin_lock_irqsave (&io_request_lock, flags);
	}
	spin_unlock_irqrestore (&io_request_lock, flags);
	goto success;

      fail:

      success:
	NBD_ALERT ("removed %d requests\n", count);
	return count;

}

/*
 * PTB error out the pending requests on the nbd queue and kernel queue
 * Note that we take the queue spinlock for this
 */
int
nbd_clr_queue (struct nbd_device *lo)
{
	int count = 0;
	unsigned long flags;


	while (1) {

		struct request *req;
		int invalid = 0;
		unsigned long req_blks = 1;

		write_lock_irqsave (&lo->queue_lock, flags);
		if (list_empty (&lo->queue)) {
			write_unlock_irqrestore (&lo->queue_lock, flags);
			break;
		}
		req =
		 (list_empty (&lo->queue) ? NULL :
		  list_entry (((struct list_head *) &lo->queue)->next,
			      struct request, queue));

		req_blks = nr_blks (req);

		if (lo != &nbd_dev[MINOR (req->rq_dev) >> NBD_SHIFT]) {
			NBD_ALERT ("request corrupted when clearing!\n");
			invalid = 1;
		}
		if (lo->magic != NBD_DEV_MAGIC) {
			NBD_ERROR ("Not enough magic when clearing!\n");
			write_unlock_irqrestore (&lo->queue_lock, flags);
			break;
		}
		req->errors += req_blks + 1;
		atomic_add (req_blks, &lo->requests_err);

		nbd_remove (lo, req);

		write_unlock_irqrestore (&lo->queue_lock, flags);
		count++;

		if (!invalid) {

			nbd_end_request_lock (req);
		}


	}
	NBD_ALERT ("unqueued %d reqs\n", count);
	return count;
}

/*
 * We always wait for result of write, for now. It would be nice to make it
 * optional in future
 * if (rq_data_dir(req) == WRITE) && (lo->flags & NBD_WRITE_NOCHK)) 
 *   { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); }
 */

static int
 (*buffered_write) (struct nbd_device *, struct request * req);
int
nbd_register_bufferwr (int (*f) (struct nbd_device *, struct request *))
{

	if (buffered_write == NULL) {
		buffered_write = f;
		return 0;
	}
	return -EINVAL;
}

int
nbd_unregister_bufferwr (int (*f) (struct nbd_device *, struct request *))
{

	if (buffered_write == f) {
		buffered_write = NULL;
		return 0;
	}
	return -EINVAL;
}

static void

do_nbd_request (request_queue_t * q)
 {
	struct request *req;
	int dev, nbd;
	unsigned long flags;


	while (!QUEUE_EMPTY) {

		struct nbd_device *lo = NULL;

		req = CURRENT;

		if (!req) {
			NBD_ALERT ("queue not empty but no request?");
			return;
		}
		dev = MINOR (req->rq_dev);
		nbd = dev >> NBD_SHIFT;


		if (nbd >= MAX_NBD) {
				NBD_ERROR
				 ("minor too big in do_nbd_request.");
				goto error_out;
		}
		lo = &nbd_dev[nbd];

		atomic_inc (&lo->kthreads);
		if (atomic_read (&lo->kthreads) > atomic_read (&lo->kmax))
			atomic_set (&lo->kmax,
				    atomic_read (&lo->kthreads));

		if (rq_data_dir(req) == WRITE
		    && (atomic_read (&lo->flags) & NBD_READ_ONLY)) {
				NBD_ERROR ("write on read-only device\n");
				goto error_out;
		}
		flags = atomic_read (&lo->flags);
		if (!(flags & NBD_INITIALISED)) {
			{;
				NBD_ERROR ("device not initialised.\n");
				goto error_out;
			};
		}
		if (!(flags & NBD_ENABLED)) {
			{;
				NBD_ERROR ("device not enabled.\n");
				goto error_out;
			};
		}
		if (flags & NBD_INVALID) {
			{;
				NBD_ERROR ("device invalidated.\n");
				goto error_out;
			};
		}

		if (lo->magic != NBD_DEV_MAGIC) {
			{;
				NBD_ERROR ("nbd[] is not magical!\n");
				goto error_out;
			};
		}
		if (req->nr_sectors > lo->max_sectors) {
			{;
				NBD_ERROR ("oversize request\n");
				goto error_out;
			};
		}
		if (req->sector + req->nr_sectors >
		    nbd_hd_struct[dev].nr_sects) {
			{;
				NBD_ERROR ("overrange request\n");
				goto error_out;
			};
		}
		if (req->sector < 0) {
			{;
				NBD_ERROR ("underrange request\n");
				goto error_out;
			};
		}
		req->errors = 0;
		blkdev_dequeue_request (req);
		if (CURRENT == req) {
			NBD_ALERT ("CURRENT did not move from %x\n",
				   (unsigned) req);
		}

		if (rq_data_dir(req) == WRITE && rq_seqno (req) == 0) {

			atomic_inc (&lo->seqno_out);

			rq_set_seqno (req, atomic_read (&lo->seqno_out));
		}

		if (buffered_write != NULL
		    && (atomic_read (&lo->flags) & NBD_BUFFERWR)) {

			switch (rq_data_dir(req)) {

			  case WRITE:
				if (buffered_write (lo, req) < 0) {
					if (req->errors < 0)
						req->errors = 0;
					req->errors++;
				}

				nbd_end_request (req);
				goto accounting;
				break;

			}

		}

		nbd_enqueue (lo, req, 0);
		goto accounting;

	      accounting:
		atomic_dec (&lo->kthreads);
		continue;

	      error_out:

		req->errors++;
		blkdev_dequeue_request (req);
		NBD_ALERT ("ending req %x with prejudice\n",
			   (unsigned) req);
		nbd_end_request (req);
		if (lo) {
			int req_blks = nr_blks (req);
			atomic_add (req_blks, &lo->requests_err);
			atomic_dec (&lo->kthreads);
		}
	}

	return;
}

/*
 * PTB rollback all requests on a given slot and then invalidate it
 * (so the requests can't go back until somebody reactivates the slot)
 * At least rollback (which we call takes both the io spinlock and our
 * spinlock, so we can hold neither when we are called. Soft_reset
 * (which we call) also calls rollback, so has the same problem.
 */
static int
nbd_clr_sock (struct nbd_slot *slot)

 {
	int i = 0;
	struct nbd_device *lo = slot->lo;
	int islot = slot->i;
	unsigned long flags;


	nbd_rollback_all (slot);

	slot->file = NULL;
	slot->bufsiz = 0;
	slot->flags = 0;
	slot->buffer = NULL;

	write_lock_irqsave (&lo->queue_lock, flags);

	if (lo->aslot > 0) {

		lo->aslot = 0;
		for (i = 0; i < lo->nslot; i++) {
			struct nbd_slot *sloti = &lo->slots[i];
			if (sloti->file)
				lo->aslot++;
		}

		if (lo->aslot <= 0) {

			if (atomic_read (&lo->flags) & NBD_SHOW_ERRS) {
				static int nbd_soft_reset (struct
							   nbd_device *lo);

				atomic_clear_mask (NBD_ENABLED,
						   &lo->flags);

				write_unlock_irqrestore (&lo->queue_lock,
							 flags);
				nbd_soft_reset (lo);
				write_lock_irqsave (&lo->queue_lock,
						    flags);
			}
		}
		else {

			if (!(atomic_read (&lo->flags) & NBD_ENABLED)) {

				atomic_set_mask (NBD_ENABLED, &lo->flags);
				lo->lives++;
				NBD_ALERT ("enabled device nd%s\n",
					   lo->devnam);
			}
		}
	}

	if (atomic_read (&lo->islot) == islot) {
		for (i = 0; i++ < lo->nslot;) {
			atomic_inc (&lo->islot);
			if (atomic_read (&lo->islot) >= lo->nslot)
				atomic_set (&lo->islot, 0);
			if (lo->slots[atomic_read (&lo->islot)].file)
				break;
		}

	}

	lo->harderror = 0;

	write_unlock_irqrestore (&lo->queue_lock, flags);

	return 0;
}

/*
 * PTB - check all slots for old requests and roll them back. 
 * At least rollback (which we call takes both the io spinlock and our
 * spinlock, so we can hold neither when we are called.
 */
static void
nbd_rollback_old (struct nbd_device *lo)
{

	int islot;

	for (islot = 0; islot < lo->nslot; islot++) {
		struct nbd_slot *slot = &lo->slots[islot];
		if (slot->req_age > 0
		    && slot->req_age < jiffies - lo->req_timeo * HZ) {
			nbd_rollback_all (slot);
		}
	}

}

/*
 * PTB - finally register a socket to a slot.
 *     - Return 0 for success and -ve for failure.
 *       Nowadays this doesn't do very much! Just finalizes things.
 */
static int
nbd_set_sock (struct nbd_slot *slot, int arg)
{

	struct nbd_device *lo = slot->lo;
	int islot = slot->i;
	unsigned long flags;


	if (!(atomic_read (&lo->flags) & NBD_INITIALISED)) {
		NBD_ALERT ("(%d) device nd%s not initialised yet!\n",
			   islot, lo->devnam);
		return -ENODEV;
	}
	if (!(atomic_read (&lo->flags) & NBD_SIZED)) {
		NBD_ALERT ("(%d) device nd%s not sized yet!\n", islot,
			   lo->devnam);
		return -EINVAL;
	}
	if (!(atomic_read (&lo->flags) & NBD_BLKSIZED)) {
		NBD_ALERT ("(%d) device nd%s not blksized yet!\n", islot,
			   lo->devnam);
		return -EINVAL;
	}
	if (!(atomic_read (&lo->flags) & NBD_SIGNED)) {
		NBD_ALERT
		 ("(%d) setting unsigned device nd%s! But harmless.\n",
		  islot, lo->devnam);
		return -EINVAL;
	}

	down (&lo->req_sem);
	if (slot->pid == 0) {
		slot->pid = current->pid;

	}

	if (slot->pid != current->pid) {
		slot->pid = 0;
		up (&lo->req_sem);
		nbd_alert
		 ("(%d) other process %d is signing device nd%s!\n", islot,
		  slot->pid, lo->devnam);
		return -einval;
	}
	up (&lo->req_sem);



	slot = &lo->slots[islot];

	if (slot->file) {
		static int my_nbd_set_sig (struct nbd_slot *slot,
					   char *sig);
		int error;

		if (arg && my_nbd_set_sig (slot, (char *) arg) >= 0) {
			NBD_ALERT
			 ("did clr_sock in set_sock nd%s%d just after signing\n",
			  lo->devnam, islot + 1);

			nbd_clr_sock (slot);
			if (slot->file) {
				error = -EBUSY;
				goto error_out;
			}
			error = 0;
			goto success_out;
		}

	      error_out:
		error = -EBUSY;

		return error;

	      success_out:
	}

	write_lock_irqsave (&lo->queue_lock, flags);

	slot->file = (void *) 1;

	if (++lo->aslot > 0) {

		if (!(atomic_read (&lo->flags) & NBD_ENABLED)) {
			atomic_set_mask (NBD_ENABLED, &lo->flags);
			lo->lives++;
			NBD_ALERT ("(%d) enabled device nd%s\n", islot,
				   lo->devnam);

		}
	}
	if (islot >= lo->nslot) {
		lo->nslot = islot + 1;
		NBD_INFO ("increased socket count to %d\n", lo->nslot);
	}

	lo->harderror = 0;

	write_unlock_irqrestore (&lo->queue_lock, flags);

	return 0;
}

/*
 * PTB - return the index i of 2^i + j, 0 <= j < 2^i
 */
static inline unsigned
log2 (unsigned arg)
{
	unsigned log = 0;
	while ((arg >>= 1) > 0)
		log++;
	return log;
}

/*
 * PTB - set the blksize in bytes of the block device. Return 0 for
 *     - success and -ve for failure.
 */
static int
nbd_set_blksize (struct nbd_device *lo, unsigned int arg)
{
	int nbd = lo->nbd;
	if (arg > PAGE_SIZE) {
		NBD_ERROR ("blksize (%u) too big\n", arg);
		return -EINVAL;
	}
	if (arg < 512) {
		NBD_ERROR ("blksize (%u) too small\n", arg);
		return -EINVAL;
	}
	if (arg & (arg - 1)) {
		NBD_ERROR ("blksize (%u) not power of 2\n", arg);
		return -EINVAL;
	}

	lo->blksize = nbd_blksizes[nbd << NBD_SHIFT] = arg;
	lo->logblksize = log2 (lo->blksize);
	atomic_set_mask (NBD_BLKSIZED, &lo->flags);
	return 0;
}

/*
 * PTB - set the size in bytes of the block device. Return 0 for
 *     - success and -ve for failure.
 */
static int
nbd_set_size (struct nbd_device *lo, u64 arg)
{
	int nbd = lo->nbd;

	lo->bytesize = nbd_bytesizes[nbd << NBD_SHIFT] = arg;

	lo->size = nbd_sizes[nbd << NBD_SHIFT] = arg >> 10;
	lo->sectors = nbd_hd_struct[nbd << NBD_SHIFT].nr_sects =
	 lo->size << 1;


	atomic_set_mask (NBD_SIZED, &lo->flags);
	return 0;
}

static int
my_nbd_set_intvl (struct nbd_device *lo, int arg)
{

	if (arg <= 0) {
		NBD_ERROR ("bad pulse interval/req timeout value (%d)\n",
			   arg);
		return -EINVAL;
	}
	lo->req_timeo = arg;
	return 0;
}

static int
my_nbd_set_spid (struct nbd_slot *slot, int arg)
{
	short spid = arg;


	if (arg <= 0 || arg >= (1 << (sizeof (short) * 8))) {
		NBD_ERROR ("bad spid value (%d)\n", arg);
		return -EINVAL;
	}
	slot->spid = spid;
	return 0;
}

static int
my_nbd_set_bufferwr (struct nbd_device *lo, int arg)
{

	if (arg) {
		atomic_set_mask (NBD_BUFFERWR, &lo->flags);
	}
	else {
		atomic_clear_mask (NBD_BUFFERWR, &lo->flags);
	}

	return 0;
}

static int
my_nbd_set_invalid (struct nbd_device *lo, int arg)
{


	if (arg == 0) {
		atomic_clear_mask (NBD_INVALID, &lo->flags);
	}
	else {
		if (!(atomic_read (&lo->flags) & NBD_INVALID)) {
			kdev_t dev = MKDEV (major, lo->nbd << NBD_SHIFT);

			atomic_set_mask (NBD_INVALID, &lo->flags);
			destroy_buffers (dev);
		}
	}
	return 0;
}

/*
 * Return the first slot index free when asking for n new ones.
 * If there s no such gap, then NBD_MAXCONN will be returned.
 * The return is always in the same argument address.
 */
static int
nbd_get_nport (struct nbd_device *lo, int *arg)
{
	int err, nslot, i;

	if (arg == NULL) {
		return -EINVAL;
	}

	nslot = *arg;
	err = copy_from_user ((char *) &nslot, arg, sizeof (int));
	if (err < 0) {
		return err;
	}

	for (i = 0; i < NBD_MAXCONN; i++) {
		struct nbd_slot *sloti = &lo->slots[i];
		int j;
		if (sloti->file) {
			continue;
		}

		for (j = i; j < NBD_MAXCONN && j < i + nslot; j++) {
			if (sloti->file)
				break;
		}
		if (j == i + nslot) {

			break;
		}
	}

	err = copy_to_user (arg, (char *) &i, sizeof (int));
	return err;
}

/*
 * PTB - if we're not signed, accept new sig and return success.
 *     - if we are signed, compare the offer and return success if equal,
 *     - and -ve for failure.
 */
static int
my_nbd_set_sig (struct nbd_slot *slot, char *sig)
{
	int err = 0;
	char buf[NBD_SIGLEN];
	int islot = slot->i;
	struct nbd_device *lo = slot->lo;


	if (!access_ok (VERIFY_READ, sig, NBD_SIGLEN)) {
		err = -EINVAL;
		return err;
	}

	down (&lo->req_sem);

	if (slot->pid == 0) {
		slot->pid = current->pid;
	}
	if (slot->pid != current->pid) {
		up (&lo->req_sem);
		NBD_ALERT
		 ("(%d): failed to set sig because process %d is trying\n",
		  islot, slot->pid);
		return -EINVAL;
	}

	if (!(atomic_read (&lo->flags) & NBD_SIGNED)) {

		copy_from_user ((char *) lo->signature, sig, NBD_SIGLEN);
		atomic_set_mask (NBD_SIGNED, &lo->flags);
		up (&lo->req_sem);
		return 0;
	}

	copy_from_user (buf, sig, NBD_SIGLEN);

	if (memcmp (buf, (char *) &lo->signature[0], NBD_SIGLEN) != 0) {

		slot->pid = 0;
		err = -EINVAL;
		up (&lo->req_sem);
		NBD_ALERT ("(%d): failed sigcheck wth %d\n", islot, err);
		return err;
	}
	up (&lo->req_sem);
	err = 0;
	return err;
}

/*
 * PTB set the max_sectors for a device
 */
static void
set_max_sectors (struct nbd_device *lo, int sectors)
{
	int j;
	lo->max_sectors = sectors;
	for (j = 0; j < NBD_MAXCONN; j++) {
		nbd_max_sectors[(lo->nbd << NBD_SHIFT) + j] = sectors;
	}
}

/*
 * PTB - register a userspace buffer to a slot. Return 0 for success
 *     - and -ve for failure. Null arg acts as erase.
 */
static int
my_nbd_reg_buf (struct nbd_slot *slot, char *buffer)
{

	int err = 0, bufsiz;
	struct nbd_device *lo = slot->lo;


	if (!buffer) {
		slot->flags &= ~NBD_SLOT_BUFFERED;

		slot->buffer = NULL;
		slot->bufsiz = 0;
		return 0;
	}

	bufsiz = lo->max_sectors << 9;

	if (!access_ok (VERIFY_WRITE, buffer, bufsiz)) {
		err = -EINVAL;
		return err;
	}

	slot->buffer = buffer;
	slot->bufsiz = bufsiz;

	if (!lo->bufsiz) {

		lo->bufsiz = bufsiz;
	}
	else {

		if (lo->bufsiz > bufsiz)
			lo->bufsiz = bufsiz;
	}

	if (lo->max_sectors < (lo->bufsiz >> 9)) {
		set_max_sectors (lo, lo->bufsiz >> 9);
	}

	slot->flags |= NBD_SLOT_BUFFERED;

	return 0;
}

static struct timer_list reenable_timer;
/*
 * PTB - if ! ENABLED, clear all queues and then resset ENABLED flag
 * ( call without the spinlock held ) 
 */
static void
nbd_restart (struct nbd_device *lo)
{

	int m, n;
	unsigned long flags;

	if (!(atomic_read (&lo->flags) & NBD_INITIALISED))
		return;
	if (lo->aslot <= 0)
		return;

	if ((atomic_read (&lo->flags) & NBD_ENABLED)) {
		return;
	}

	m = nbd_clr_queue (lo);

	n = nbd_clr_kernel_queue ();
	if (m + n > 0) {

		NBD_ALERT
		 ("cleared %d+%d kernel requests, rescheduling enable\n",
		  m, n);
		reenable_timer.data = (unsigned long) lo;
		reenable_timer.expires = jiffies + 1 * HZ;
		add_timer (&reenable_timer);
		return;
	}

	write_lock_irqsave (&lo->queue_lock, flags);
	if ((atomic_read (&lo->flags) & NBD_ENABLED)) {
		write_unlock_irqrestore (&lo->queue_lock, flags);
		return;
	}
	atomic_set_mask (NBD_ENABLED, &lo->flags);
	lo->lives++;
	write_unlock_irqrestore (&lo->queue_lock, flags);

}
static struct timer_list reenable_timer = {

	{NULL, NULL},

	0,
	0,
	(void (*)(unsigned long)) nbd_restart,
};

/*
 * PTB - this unsets the enabled flag on the device and then clears the
 *     - queue for the device.
 */
static void
nbd_disable (struct nbd_device *lo)
{
	int i;
	if (!(atomic_read (&lo->flags) & NBD_INITIALISED)) {
		return;
	}
	atomic_clear_mask (NBD_ENABLED, &lo->flags);
	for (i = 0; i < 100; i++) {
		if (nbd_clr_queue (lo) <= 0)
			break;
	}
}

/*
 * PTB - drains device queue. Disables device.
 * At least rollback (which we call takes both the io spinlock and our
 * spinlock, so we can hold neither when we are called. Also
 * invalidate buffers, on request of Rogier Wolff.
 */
static int
nbd_soft_reset (struct nbd_device *lo)
{
	int j;
	if (!(atomic_read (&lo->flags) & NBD_INITIALISED && lo->nslot > 0)) {
		return -EINVAL;
	}

	for (j = 0; j < lo->nslot; j++) {
		struct nbd_slot *slot = &lo->slots[j];
		nbd_rollback_all (slot);
	}

	nbd_disable (lo);

	for (j = 0; j < lo->nslot; j++) {
		invalidate_buffers (MKDEV
				    (major, (lo->nbd << NBD_SHIFT) + j));
	}
	return 0;
}

/*
 * PTB - added a device/module reset for tidyness in face of rampant hacking
 *     - this does a soft_reset of all devices, followed bu a clr sock
 *     - on each, and then clears the kernel queue. It unsets the
 *     - enabled flag on each device.
 *       We have to be called without either the spinlock or the
 *       spinlock held, as we call soft_reset which takes both, as
 *       does clr_sock
 */
int
nbd_hard_reset ()
{
	int i;
	int err = 0;

	for (i = 0; i < MAX_NBD; i++) {
		struct nbd_device *lo = &nbd_dev[i];
		int j;
		nbd_soft_reset (lo);
		for (j = 0; j < lo->nslot; j++) {
			struct nbd_slot *slot = &lo->slots[j];

			nbd_clr_sock (slot);
		}
	}

	nbd_clr_kernel_queue ();

	while (MOD_IN_USE)
		MOD_DEC_USE_COUNT;


	return err;
}

static int
indirect_ioctl_load (struct request *req, int cmd, char *buf)
{

	int size;
	int err;

	size = remote_ioctl->size_user (cmd, buf);

	if (size < 0) {

		err = -EINVAL;
		goto error_out;
	}

	if (size == 0) {

		req->nr_sectors = 0;
		req->buffer = NULL;
		return size;
	}

	req->nr_sectors = (size + 511) >> 9;
	req->buffer = kmalloc (req->nr_sectors << 9, GFP_KERNEL);

	if (!req->buffer) {
		err = -ENOMEM;
		goto error_out;
	}

	if (_IOC_DIR (cmd) & _IOC_WRITE) {
		err =
		 remote_ioctl->cp_from_user (cmd, req->buffer, buf, size);
		if (err < 0) {
			kfree (req->buffer);
			goto error_out;
		}
	}
	return size;

      error_out:
	req->buffer = NULL;
	req->nr_sectors = 0;
	return err;
}

static int
indirect_ioctl_store (struct request *req, int cmd, char *buf, int size)
{
	int err;


	if (size > 0) {

		if (!req->buffer)
			return -ENOMEM;
		err =
		 remote_ioctl->cp_to_user (cmd, buf, req->buffer, size);
		kfree (req->buffer);
		if (err < size) {
			return -ENOMEM;
		}
	}
	return size;
}

static int
nbd_remote_ioctl (struct nbd_device *lo, int minor, int cmd,
		  unsigned long arg)
{

	unsigned start_time, timeout;
	size_t size;
	int err;
	struct request *req;

	struct completion x;

	timeout = lo->req_timeo * HZ;
	start_time = jiffies;

	while (down_trylock (&lo->req_sem) != 0) {

		if (jiffies >= start_time + timeout) {

			NBD_ALERT
			 ("took too long to get a spare ioctl: TIMEOUT\n");
			return -ETIME;
		}
		err = interruptible_sleep_on_timeout (&lo->req_wq,
						      start_time +
						      timeout - jiffies);
	}

	req = &lo->req;

	memset (req, 0, sizeof (struct request));
	req->cmd = IOCTL;

	req->errors = 0;


	req->special = (typeof (req->special)) cmd;

	if (_IOC_DIR (cmd) & _IOC_READ) {

		size = indirect_ioctl_load (req, cmd, (char *) arg);
		if (size < 0) {
			up (&lo->req_sem);
			return size;
		}

	}
	else {

		size = 0;
		req->buffer = (char *) arg;
	}

	req->rq_dev = MKDEV (major, minor);

	init_completion (&x);
	req->waiting = &x;

	req->rq_status = RQ_ACTIVE;
	nbd_enqueue (lo, req, 0);


	while (1) {
		if (jiffies >= start_time + timeout) {

			struct list_head *pos;

			static void delete_req (struct request *req) {

				atomic_dec (&lo->countq[rq_data_dir(req)]);

				list_del (&req->queue);

				req->errors = -ETIME;

				if (req->nr_sectors > 0 && req->buffer) {
					kfree (req->buffer);
					req->buffer = NULL;
			}};

			write_lock (&lo->queue_lock);
			list_for_each (pos, &lo->queue) {
				struct request *xreq =
				 list_entry (pos, struct request, queue);
				if (req == xreq) {
					delete_req (req);
					write_unlock (&lo->queue_lock);
					NBD_ALERT
					 ("took too long to treat queued ioctl: TIMEOUT\n");
					err = -ETIME;
					goto end;
				}
			}
			write_unlock (&lo->queue_lock);
		}

		err = wait_for_completion_timeout (&x, 1);
		if (err > 0)

			break;

	}
	req->rq_status = 0;

	if (_IOC_DIR (cmd) & _IOC_READ) {
		err = indirect_ioctl_store (req, cmd, (char *) arg, size);
		if (err < 0) {
			goto end;
		}
	}

	if (req->errors != 0) {
		err = req->errors;
		err = err < 0 ? err : -EINVAL;
	}
	else {
		err = 0;
	}
      end:
	if (err < 0) {
	}
	else {
	}
	up (&lo->req_sem);
	return err;

}


/*
 * PTB - generic ioctl handling
 */
static int
nbd_ioctl (struct inode *inode, struct file *file,
	   unsigned int cmd, unsigned long arg)
{
	struct nbd_device *lo = NULL;
	int minor = -1;
	int islot = -1;
	int nbd   = -1;
	struct nbd_slot *slot = NULL;
	int err;


	if (!suser ()) {
		NBD_ERROR ("caller must be root.\n");
		return -EACCES;
	}
	if (!inode) {
		NBD_ERROR ("given bad inode.\n");
		return -EINVAL;
	}
	if (MAJOR (inode->i_rdev) != major) {
		NBD_ERROR ("pseudo-major %d != %d\n",
			   MAJOR (inode->i_rdev), major);
		return -ENODEV;
	}

	minor = MINOR (inode->i_rdev);
	nbd = minor >> NBD_SHIFT;

	if (nbd >= MAX_NBD) {
		NBD_ERROR ("tried to open too many devices, %d\n", minor);
		return -ENODEV;
	}

	lo = &nbd_dev[nbd];
	lo->harderror = 0;
	islot = minor % NBD_MAXCONN - 1;

	if (islot >= 0 && islot < NBD_MAXCONN)
		slot = &lo->slots[islot];


	switch (cmd) {
		int err;
		int intval;

	  case NBD_CLEAR_SOCK:
		if (islot < 0) {
			NBD_ALERT ("CLR_SOCK called on full device nd%s\n",
				   lo->devnam);
			return -EINVAL;
		}
		err = nbd_clr_sock (slot);
		return err;

	  case NBD_SET_SOCK:
		if (islot < 0) {
			NBD_ALERT ("SET_SOCK called on full device nd%s\n",
				   lo->devnam);
			return -EINVAL;
		}
		err = nbd_set_sock (slot, arg);
		return err;

	  case BLKBSZGET:

	  case NBD_GET_BLKSIZE:
		if (!(atomic_read (&lo->flags) & NBD_BLKSIZED)) {
			return -EINVAL;
		}
		err = put_user (lo->blksize, (long *) arg);
		return err;

	  case BLKBSZSET: // PTB fall through
	  case NBD_SET_BLKSIZE:
		if (!arg)
			return -EINVAL;
		intval = -1;
		if (get_user (intval, (int *) arg))
			return -EFAULT;
		if (intval == -1) {
			NBD_ALERT ("BLKBSZSET got %d from user\n", intval);
		}
		err = nbd_set_blksize (lo, intval);
		return err;

	  case NBD_SET_SIZE:
		err = nbd_set_size (lo, (u64) arg);
		return err;

	  case NBD_SET_SECTORS:
		err = nbd_set_size (lo, ((u64) arg) << 9);
		return err;

	  case MY_NBD_SET_INTVL:
		err = my_nbd_set_intvl (lo, arg);
		return err;

	  case MY_NBD_SET_SPID:
		if (islot < 0) {
			NBD_ALERT ("SET_SPID called on full device nd%s\n",
				   lo->devnam);
			return -EINVAL;
		}
		err = my_nbd_set_spid (slot, arg);
		return err;

	  case MY_NBD_SET_BUFFERWR:
		err = my_nbd_set_bufferwr (lo, arg);
		return err;

	  case MY_NBD_REG_BUF:
		if (!arg) {

			return 0;
		}
		if (islot < 0) {
			NBD_ALERT ("REG_BUF called on full device nd%s\n",
				   lo->devnam);
			return -EINVAL;
		}
		err = my_nbd_reg_buf (slot, (char *) arg);
		return err;

	  case MY_NBD_SET_SIG:
		if (islot < 0) {
			NBD_ALERT ("SET_SIG called on full device nd%s\n",
				   lo->devnam);
			return -EINVAL;
		}
		err = my_nbd_set_sig (slot, (char *) arg);
		return err;

	  case MY_NBD_GET_REQ:
		if (islot < 0) {
			NBD_ALERT ("GET_REQ called on full device nd%s\n",
				   lo->devnam);
			return -EINVAL;
		}
		err = nbd_get_req (slot, (char *) arg);
		return err;

	  case MY_NBD_GET_NPORT:
		err = nbd_get_nport (lo, (int *) arg);
		return err;

	  case MY_NBD_CLR_REQ:
		if (islot < 0) {
			NBD_ALERT ("CLR_REQ called on full device nd%s\n",
				   lo->devnam);
			return -EINVAL;
		}
		nbd_rollback_all (slot);
		err = 0;
		return err;

	  case MY_NBD_ERR_REQ:
		if (islot < 0) {
			NBD_ALERT ("ERR_REQ called on full device nd%s\n",
				   lo->devnam);
			return -EINVAL;
		}
		nbd_error_all (slot);
		err = 0;
		return err;

	  case MY_NBD_SYNC:
		err = 0;

		nbd_rollback_old (lo);

		nbd_set_speed (lo);

		return err;

	  case MY_NBD_ACK:
		if (islot < 0) {
			NBD_ALERT ("NBD_ACK called on full device nd%s\n",
				   lo->devnam);
			return -EINVAL;
		}
		err = nbd_ack (slot, (char *) arg);
		return err;

	  case NBD_PRINT_DEBUG:
		NBD_INFO
		 ("device %d: head = %x, tail = %x, in = %d, out = %d\n",
		  minor,
		  (int) (list_empty (&lo->queue) ? NULL :
			 list_entry (((struct list_head *) &lo->queue)->
				     next, struct request, queue)),
		  (int) (list_empty (&lo->queue) ? NULL :
			 list_entry (((struct list_head *) &lo->queue)->
				     prev, struct request, queue)),
		  atomic_read (&lo->requests_in[READ]) +
		  atomic_read (&lo->requests_in[WRITE]),
		  atomic_read (&lo->requests_out[READ]) +
		  atomic_read (&lo->requests_out[WRITE]));
		err = 0;
		return err;
	  case NBD_HARD_RESET:
		err = nbd_hard_reset ();
		return err;

	  case NBD_RESET:
		err = nbd_soft_reset (lo);

		reenable_timer.data = (unsigned long) lo;
		reenable_timer.expires = jiffies + 5 * HZ;
		add_timer (&reenable_timer);
		return err;

	  case NBD_SET_MD5SUM:
		if (arg) {
			atomic_set_mask (NBD_MD5SUM, &lo->flags);
		}
		else {
			atomic_clear_mask (NBD_MD5SUM, &lo->flags);
		}
		err = 0;
		return err;

	  case MY_NBD_SET_SHOW_ERRS:
		if (arg) {
			atomic_set_mask (NBD_SHOW_ERRS, &lo->flags);
		}
		else {
			atomic_clear_mask (NBD_SHOW_ERRS, &lo->flags);
		}
		return 0;

	  case MY_NBD_SET_DIRECT:
		if (arg) {
			atomic_set_mask (NBD_DIRECT, &lo->flags);
		}
		else {
			atomic_clear_mask (NBD_DIRECT, &lo->flags);
		}
		return 0;

	  case MY_NBD_INVALIDATE:
		err = my_nbd_set_invalid (lo, (int) arg);
		return err;

	  case NBD_SET_PF_MEMALLOC:
		if (arg) {
			current->flags |= PF_MEMALLOC;
		}
		else {
			current->flags &= ~PF_MEMALLOC;
		}
		return 0;
	}

	switch (cmd) {
		int err;
		int intval;

	  case BLKROSET:
		if (get_user (intval, (int *) arg))
			return -EFAULT;

		if (intval) {
			atomic_set_mask (NBD_READ_ONLY, &lo->flags);
		}
		else {
			atomic_clear_mask (NBD_READ_ONLY, &lo->flags);
		}

		set_device_ro (MKDEV (major, minor), intval);
		return 0;

	  case BLKROGET:
		intval = (atomic_read (&lo->flags) & NBD_READ_ONLY) != 0;
		return put_user (intval, (int *) arg);

	  case BLKFLSBUF:
		nbd_maybe_sync_sync (lo);

		invalidate_buffers (inode->i_rdev);

		if (atomic_read (&lo->flags) & NBD_BUFFERWR) {


			destroy_buffers (inode->i_rdev);
		}

		return 0;

	  case BLKRAGET:
		err = put_user (read_ahead[major], (long *) arg);
		return err;

	  case BLKRASET:
		if (arg > 0xff) {
			return -EINVAL;
		}
		rahead = read_ahead[major] = arg;
		return 0;

	  case BLKSSZGET:
		err = put_user (512, (int *) arg);
		return err;

	  case BLKSECTSET:
		if (arg <= 0 || arg > (lo->bufsiz >> 9)) {
			return -EINVAL;
		}
		set_max_sectors (lo, arg);
		return 0;

	  case BLKSECTGET:
		err = put_user (lo->max_sectors, (int *) arg);
		return err;

	  case HDIO_GETGEO:
		if (!arg) {
			return -EINVAL;
		}

		if (1) {
			struct hd_geometry *geo =
			 (struct hd_geometry *) arg;
			int sectors = nbd_sizes[nbd << NBD_SHIFT] << 1;
			unsigned short c;
			unsigned char h, s;
			if (sectors < (1 << 22)) {
				h = 4;
				s = 16;
				c = sectors >> 6;
			}
			else {
				h = 255;
				s = 63;
				c = (sectors / h) / s;
			}
			err = 0;
			if ((err = put_user (c, &geo->cylinders), err < 0)
			    || (err = put_user (h, &geo->heads), err < 0)
			    || (err = put_user (s, &geo->sectors), err < 0)
			    || (err = put_user (h, &geo->start), err < 0)) {
				return err;
			}
		}

		return 0;

	  case BLKRRPART:
		return 0;

	  case BLKGETSIZE:

		if (!(atomic_read (&lo->flags) & NBD_SIZED)) {
			return -ENODEV;
		}

		if (islot < 0) {
			err = put_user ((unsigned long) lo->sectors,
					(unsigned long *) arg);
		}
		else {
			err =
			 put_user ((unsigned long) nbd_hd_struct[minor].
				   nr_sects, (unsigned long *) arg);
		}

		if (lo->size != 0
		    && (u32) (lo->bytesize >> 10) != lo->size) {
			NBD_ALERT
			 ("bytes %luKB mismatch with KB %u in BLKGETSIZE\n",
			  (unsigned long) (lo->bytesize >> 10), lo->size);
		}
		else {

		}
		return err;

	  case BLKGETSIZE64:

		if (!(atomic_read (&lo->flags) & NBD_SIZED)) {
			return -ENODEV;
		}
		if (islot < 0) {
			err = put_user (lo->bytesize, (u64 *) arg);
		}
		else {
			u64 size64 = nbd_hd_struct[minor].nr_sects;
			size64 <<= 9;
			err = put_user (size64, (u64 *) arg);
		}
		return err;

	}

	if (remote_ioctl == NULL)
		return -EINVAL;


	if (remote_ioctl->convert_inplace (&cmd) < 0) {
		NBD_ALERT ("unauthorized ioctl %#x\n", cmd);
		return -EINVAL;
	}


	err = nbd_remote_ioctl (lo, minor, cmd, arg);

	return err;
}

/*
 * PTB - release the device. This happens when the last process closes
 * or dies.
 */
static int
nbd_release (struct inode *inode, struct file *file)
{
	struct nbd_device *lo;
	int dev;
	int nbd;
	int islot;


	if (!inode) {
		NBD_ALERT ("null inode.\n");
		return (-ENODEV);
	}
	dev = MINOR (inode->i_rdev);
	nbd = dev >> NBD_SHIFT;

	lo = &nbd_dev[nbd];

	islot = dev % NBD_MAXCONN - 1;


	if (islot >= 0) {

		struct nbd_slot *slot = &lo->slots[islot];

		--slot->refcnt;
		if (slot->pid == current->pid) {

			int err = nbd_clr_sock (slot);
			slot->pid = 0;
			if (err < 0) {

			}
			if (slot->refcnt > 0) {
				NBD_ALERT
				 ("slot owner process %d released slot nd%s%d while not last\n",
				  slot->pid, lo->devnam, islot + 1);
			}
		}


	}

	atomic_dec (&lo->refcnt);
	if (MOD_IN_USE)
		MOD_DEC_USE_COUNT;

	if (atomic_read (&lo->refcnt) <= 0 || !MOD_IN_USE) {
		if (atomic_read (&lo->flags) & NBD_SHOW_ERRS) {

			int j;
			for (j = 0; j < NBD_MAXCONN; j++) {
				int minor = (nbd << NBD_SHIFT) + j;
				invalidate_buffers (MKDEV (major, minor));
			}
			for (j = 0; j < NBD_MAXCONN; j++) {
				int minor = (nbd << NBD_SHIFT) + j;
				destroy_buffers (MKDEV (major, minor));
			}
		}

		lo->bufsiz = 0;
		atomic_set (&lo->seqno_out, 0);
	}

	if (file && file->f_iobuf && (file->f_flags & O_DIRECT)

	    && (file->f_flags & O_NOFOLLOW)) {
		free_kiovec (1, &file->f_iobuf);

		file->f_flags &= ~(O_DIRECT | O_NOFOLLOW);

	}


	return (0);
}

static struct block_device_operations nbd_blkops = {
	open:nbd_open,
	release:nbd_release,
	ioctl:nbd_ioctl,
	check_media_change:NULL,
	revalidate:NULL,
};

/*
 * And here should be modules and kernel interface 
 *  (Just smiley confuses emacs :-)
 */

/*
 * This is just to get a nice limited width integer printout in proc!
 */
char *
display (unsigned n, int endpos)
{

	static char buf[16];
	int units = 0;
	int decimals = 0;
	int decpos = endpos;
	int wholepart = n, fractionpart = 0;
	buf[endpos--] = 0;

	while (n >= 1 << 10) {
		decimals = n & ((1 << 10) - 1);
		n >>= 10;
		units++;
	}
	switch (units) {
	  case 0:
		break;
	  case 1:
		buf[endpos--] = 'K';
		break;
	  case 2:
		buf[endpos--] = 'M';
		break;
	  case 3:
		buf[endpos--] = 'G';
		break;
	  case 4:
		buf[endpos--] = 'T';
		break;
	}

	fractionpart = wholepart & ((1 << (units * 10)) - 1);
	wholepart >>= units * 10;

	if (n == 0) {
		buf[endpos--] = '0';
	}
	else {
		while (endpos >= 0 && n > 0) {
			buf[endpos--] = '0' + n % 10;
			n /= 10;
		}
	}

	if (endpos >= 1 && units > 0) {
		int k = 0;
		char unitchar = buf[--decpos];
		buf[decpos + k++] = '.';
		while (endpos >= k) {
			int digit = (decimals * 10) >> 10;
			buf[decpos + k++] = '0' + digit;
			decimals -= (digit << 10) / 10;
			decimals *= 10;
		}
		buf[decpos + k++] = unitchar;
		buf[decpos + k] = 0;
	}

	return buf + endpos + 1;
}

int

nbd_read_proc (char *buf, char **start, off_t offset, int len, int *eof,
	       void *data)
 {

	const int limit = min((int)PAGE_SIZE,len) - 80;
	static int i;
	struct nbd_device *lo;
	static int last;
	static void *next_label;
	static char *next_label_name;
	static int total;
	unsigned long flags;


	if (offset > 0 && !next_label) {

		*eof = 1;

		*start = buf;
		return 0;
	}

	if (offset <= 0) {

		last = -1;
		i = 0;
		next_label = NULL;
		next_label_name = NULL;
		total = 0;
	}

	len = 0;

#define NBD_PROC_LABEL(n) \
        next_label = &&label_##n; \
        next_label_name = "label_" #n; \
        if (len > limit) { \
            *start = (char *) len; \
            total += len; \
            return len;\
        } \
        label_##n:

	for (; i < MAX_NBD; i++) {

		char *devnam;

		lo = &nbd_dev[i];
		devnam = lo->devnam;
		if (lo->nslot <= 0) {
			next_label = NULL;
			continue;
		}

		if (next_label) {
			void *label = next_label;

			next_label = NULL;
			next_label_name = NULL;
			len = 0;
			goto *label;
		}

		NBD_PROC_LABEL (1);

		if (last == i - 2) {
			char *prevdevnam = device_letter (i - 1);
			len +=
			 sprintf (buf + len, "Device %s:\tClosed\n",
				  prevdevnam);
		}
		if (last < i - 2) {
			char lastdevnam[3];
			char prevdevnam[3];
			strncpy (lastdevnam, device_letter (last + 1), 3);
			strncpy (prevdevnam, device_letter (i - 1), 3);
			len +=
			 sprintf (buf + len, "Device %s-%s:\tClosed\n",
				  lastdevnam, prevdevnam);
		}

		NBD_PROC_LABEL (2);

		len +=
		 sprintf (buf + len, "Device %s:\tOpen " "\n", devnam);

		NBD_PROC_LABEL (3);

		len += sprintf (buf + len,
				"[%s] State:\t%s%s%s%s%s%s%s%s%s%s%s%s%slast error %d, lives %d, bp %d\n",
				devnam, atomic_read (&lo->flags)
				& NBD_INITIALISED ? "" : "uninitialized, ",
				atomic_read (&lo->flags)
				& NBD_WRITE_NOCHK ? "noverify, " :
				"verify, ", atomic_read (&lo->flags)
				& NBD_SIGNED ? "signed, " : "unsigned, ",
				atomic_read (&lo->flags)
				& NBD_READ_ONLY ? "ro, " : "rw, ",
				merge_requests ? "merge requests, " : "",
				atomic_read (&lo->flags)
				& NBD_BUFFERWR ? "buffer writes, " : "",
				atomic_read (&lo->flags)
				& NBD_ENABLED ? "enabled, " : "disabled, ",
				atomic_read (&lo->flags)
				& NBD_INVALID ? "invalid, " : "",
				atomic_read (&lo->flags)
				& NBD_SHOW_ERRS ? "show_errs, " : "",
				atomic_read (&lo->flags)
				& NBD_DIRECT ? "direct, " : "",
				plug ? "plug, " : "",
				atomic_read (&lo->
					     flags) & NBD_SYNC ? "sync, " :
				"", atomic_read (&lo->flags)
				& NBD_MD5SUM ? "md5sum, " : "",
				lo->harderror,
				lo->lives -
				((atomic_read (&lo->flags) & NBD_ENABLED) ?
				 1 : 0), 0);

		NBD_PROC_LABEL (4);

		do {
			int countq[2] = { 0, 0 };
			int cmd;

			struct list_head *pos;

			read_lock_irqsave (&lo->queue_lock, flags);


			list_for_each (pos, &lo->queue) {
				struct request *req =
				 list_entry (pos, struct request, queue);

				if (countq[READ] + countq[WRITE] > 1000)
					break;

				cmd = rq_data_dir(req);
				countq[cmd]++;
			}

			read_unlock_irqrestore (&lo->queue_lock, flags);

			len += sprintf (buf + len,
					"[%s] Queued:\t+%dR/%dW curr (check %dR/%dW) +%dR/%dW max\n",
					devnam,
					atomic_read (&lo->countq[READ]),
					atomic_read (&lo->countq[WRITE]),
					countq[READ], countq[WRITE],
					atomic_read (&lo->maxq[READ]),
					atomic_read (&lo->maxq[WRITE]));
			if (countq[READ] != atomic_read (&lo->countq[READ])
			    || countq[WRITE] !=
			    atomic_read (&lo->countq[WRITE])) {


				atomic_set (&lo->countq[READ],
					    countq[READ]);
				atomic_set (&lo->countq[WRITE],
					    countq[WRITE]);
			}
		} while (0);

		NBD_PROC_LABEL (5);

		len += sprintf (buf + len,
				"[%s] Buffersize:\t%d\t(sectors=%d, blocks=%d)\n",
				devnam, lo->bufsiz, lo->max_sectors,
				lo->max_sectors / (lo->blksize >> 9));
		len +=
		 sprintf (buf + len, "[%s] Blocksize:\t%d\t(log=%d)\n",
			  devnam, lo->blksize, lo->logblksize);
		len +=
		 sprintf (buf + len, "[%s] Size:\t%luKB\n", devnam,
			  (unsigned long) (lo->bytesize >> 10));
		len +=
		 sprintf (buf + len, "[%s] Blocks:\t%u\n", devnam,
			  lo->size >> (lo->logblksize - 10));

		NBD_PROC_LABEL (6);

		len +=
		 sprintf (buf + len, "[%s] Sockets:\t%d", devnam,
			  lo->nslot);

		NBD_PROC_LABEL (7);

		do {
			int j;
			for (j = 0; j < lo->nslot; j++) {
				struct nbd_slot *slotj = &lo->slots[j];
				if (j != atomic_read (&lo->islot))
					len +=
					 sprintf (buf + len, "\t(%s)",
						  slotj->file ? "+" : "-");
				else
					len +=
					 sprintf (buf + len, "\t(%s)",
						  slotj->file ? "*" : ".");
			}
		} while (0);

		len += sprintf (buf + len, "\n");

		NBD_PROC_LABEL (8);

		len += sprintf (buf + len, "[%s] Requested:\t%s", devnam,
				display (atomic_read
					 (&lo->requests_in[READ]) +
					 atomic_read (&lo->
						      requests_in[WRITE]),
					 7));

		NBD_PROC_LABEL (9);

		do {
			int j;
			char buff[2][8];
			for (j = 0; j < lo->nslot; j++) {
				struct nbd_slot *slotj = &lo->slots[j];
				len +=
				 sprintf (buf + len, "\t(%s)",
					  display (slotj->in, 5));
			}
			strncpy (buff[0],
				 display (atomic_read
					  (&lo->requests_in[READ]), 6), 7);
			strncpy (buff[1],
				 display (atomic_read
					  (&lo->requests_in[WRITE]), 6),
				 7);
			len +=
			 sprintf (buf + len, "\t%sR/%sW", buff[0],
				  buff[1]);
			nbd_set_speed (lo);
			len += sprintf (buf + len, "\tmax %d",
					atomic_read (&lo->maxreqblks));
		} while (0);

		len += sprintf (buf + len, "\n");
		len += sprintf (buf + len, "[%s] Despatched:\t%s", devnam,
				display (atomic_read
					 (&lo->requests_out[READ]) +
					 atomic_read (&lo->
						      requests_out[WRITE]),
					 7));

		NBD_PROC_LABEL (10);

		do {
			int j;
			char buff[2][8];
			for (j = 0; j < lo->nslot; j++) {
				struct nbd_slot *slotj = &lo->slots[j];
				len +=
				 sprintf (buf + len, "\t(%s)",
					  display (slotj->out, 5));
			}
			strncpy (buff[0],
				 display (atomic_read
					  (&lo->requests_out[READ]), 6),
				 7);
			strncpy (buff[1],
				 display (atomic_read
					  (&lo->requests_out[WRITE]), 6),
				 7);
			len +=
			 sprintf (buf + len, "\t%sR/%sW", buff[0],
				  buff[1]);
			len +=
			 sprintf (buf + len, "\tmd5 %sW",
				  display (atomic_read
					   (&lo->wrequests_5to), 5));
			len +=
			 sprintf (buf + len, " (%s eq,",
				  display (atomic_read
					   (&lo->wrequests_5so), 5));
			len +=
			 sprintf (buf + len, " %s ne,",
				  display (atomic_read
					   (&lo->wrequests_5wo), 5));
			len +=
			 sprintf (buf + len, " %s dn)",
				  display (atomic_read
					   (&lo->wrequests_5eo), 5));
		} while (0);

		len += sprintf (buf + len, "\n");
		len += sprintf (buf + len, "[%s] Errored:\t%s", devnam,
				display (atomic_read (&lo->requests_err),
					 7));

		NBD_PROC_LABEL (11);

		do {
			int j;
			char buff[2][8];
			int toterrs = 0;

			for (j = 0; j < lo->nslot; j++) {
				struct nbd_slot *slotj = &lo->slots[j];
				len +=
				 sprintf (buf + len, "\t(%s)",
					  display (slotj->err, 5));
				toterrs += slotj->err;
			}
			strncpy (buff[0], display (toterrs, 6), 7);
			strncpy (buff[1],
				 display (atomic_read (&lo->requests_err) -
					  toterrs, 6), 7);
			len +=
			 sprintf (buf + len, "\t%s+%s\n", buff[0],
				  buff[1]);
		} while (0);

		NBD_PROC_LABEL (12);

		do {
			int pending_rblks = 0;
			int pending_wblks = 0;
			int blks = 0;
			struct list_head *pos;
			int count = 0;
			struct request *req;

			read_lock_irqsave (&lo->queue_lock, flags);

			list_for_each (pos, &lo->queue) {

				req = list_entry (pos, struct request, queue);

				if (count++ > 1000)
					break;

				blks = nr_blks (req);
				if (blks > 0) {
					switch (rq_data_dir(req)) {
					  case READ: pending_rblks += blks;
						break;
					  case WRITE:
						pending_wblks += blks;
						break;
					}
				}
			}

			read_unlock_irqrestore (&lo->queue_lock, flags);
			len +=
			 sprintf (buf + len, "[%s] Pending:\t%d", devnam,
				  atomic_read (&lo->requests_req[READ]) +
				  atomic_read (&lo->requests_req[WRITE]));

			do {
				int j;
				for (j = 0; j < lo->nslot; j++) {
					struct nbd_slot *slotj =
					 &lo->slots[j];
					len +=
					 sprintf (buf + len, "\t(%d)",
						  slotj->req);
				}
			} while (0);

			len += sprintf (buf + len,
					"\t%dR/%dW+%dR/%dW\n",
					atomic_read (&lo->
						     requests_req[READ]),
					atomic_read (&lo->
						     requests_req[WRITE]),
					pending_rblks, pending_wblks);

		} while (0);

		NBD_PROC_LABEL (13);

		do {
			char buff[10][8];
			int shift = lo->logblksize;

			strncpy (buff[0],
				 display (atomic_read (&lo->wspeed.speed)
					  << shift, 5), 7);
			strncpy (buff[1],
				 display (atomic_read (&lo->wspeed.speedav)
					  << shift, 5), 7);
			strncpy (buff[2],
				 display (atomic_read
					  (&lo->wspeed.speedmax) << shift,
					  5), 7);

			strncpy (buff[3],
				 display (atomic_read (&lo->rspeed.speed)
					  << shift, 5), 7);
			strncpy (buff[4],
				 display (atomic_read (&lo->rspeed.speedav)
					  << shift, 5), 7);
			strncpy (buff[5],
				 display (atomic_read
					  (&lo->rspeed.speedmax) << shift,
					  5), 7);

			strncpy (buff[6],
				 display (atomic_read (&lo->tspeed.speed)
					  << shift, 5), 7);
			strncpy (buff[7],
				 display (atomic_read (&lo->tspeed.speedav)
					  << shift, 5), 7);
			strncpy (buff[8],
				 display (atomic_read
					  (&lo->tspeed.speedmax) << shift,
					  5), 7);

			len +=
			 sprintf (buf + len, "[%s] B/s now:", devnam);
			len +=
			 sprintf (buf + len, "\t%s\t(%sR+%sW)\n", buff[6],
				  buff[3], buff[0]);
			len +=
			 sprintf (buf + len, "[%s] B/s ave:", devnam);
			len +=
			 sprintf (buf + len, "\t%s\t(%sR+%sW)\n", buff[7],
				  buff[4], buff[1]);
			len +=
			 sprintf (buf + len, "[%s] B/s max:", devnam);
			len +=
			 sprintf (buf + len, "\t%s\t(%sR+%sW)\n", buff[8],
				  buff[5], buff[2]);
		} while (0);

		do {
			int blks;
			int tot_reqs = 0;

			len +=
			 sprintf (buf + len, "[%s] Spectrum:", devnam);
			for (blks = 0;
			     blks <= atomic_read (&lo->maxreqblks); blks++) {
				tot_reqs +=
				 atomic_read (&lo->req_in[READ][blks]) +
				 atomic_read (&lo->req_in[WRITE][blks]);
			}

			for (blks = 0;
			     blks <= atomic_read (&lo->maxreqblks); blks++) {
				int req_blks =
				 atomic_read (&lo->req_in[READ][blks])
				 + atomic_read (&lo->req_in[WRITE][blks]);
				int percent =
				 tot_reqs >
				 0 ? (100 * req_blks) / tot_reqs : 0;
				if (percent <= 0)
					continue;
				len +=
				 sprintf (buf + len, "\t%u%%%d", percent,
					  blks);
			}
			len += sprintf (buf + len, "\n");
		} while (0);

		NBD_PROC_LABEL (14);

		len += sprintf (buf + len, "[%s] Kthreads:\t%d", devnam,
				atomic_read (&lo->kthreads));
		len +=
		 sprintf (buf + len, "\t(%d waiting/%d running/%d max)\n",
			  atomic_read (&lo->kwaiters),
			  atomic_read (&lo->kthreads) -
			  atomic_read (&lo->kwaiters),
			  atomic_read (&lo->kmax));

		NBD_PROC_LABEL (15);

		len += sprintf (buf + len, "[%s] Cthreads:\t%d", devnam,
				atomic_read (&lo->cthreads));

		NBD_PROC_LABEL (16);

		do {
			int j;
			for (j = 0; j < lo->nslot; j++) {
				struct nbd_slot *slotj = &lo->slots[j];
				int state =
				 ((slotj->
				   flags & NBD_SLOT_RUNNING) ? 1 : 0) +
				 ((slotj->
				   flags & NBD_SLOT_WAITING) ? 2 : 0);
				char *desc = "?";
				switch (state) {
				  case 0:
					desc = "-";
					break;
				  case 1:
					desc = "*";
					break;
				  case 2:
					desc = "?";
					break;
				  case 3:
					desc = "+";
					break;
				}
				len += sprintf (buf + len, "\t(%s)", desc);
			}
		} while (0);

		len += sprintf (buf + len, "\n");

		NBD_PROC_LABEL (17);

		last = i;
		len += sprintf (buf + len, "[%s] Cpids:\t%d", devnam,
				atomic_read (&lo->cthreads));

		do {
			int j;
			for (j = 0; j < lo->nslot; j++) {
				struct nbd_slot *slotj = &lo->slots[j];
				len +=
				 sprintf (buf + len, "\t(%u)", slotj->pid);
			}
			len += sprintf (buf + len, "\n");
		} while (0);

		do {
			int j, k;
			for (j = 0; j < lo->nslot; j++) {
				struct nbd_slot *slotj = &lo->slots[j];
				if (slotj->spid != 0)
					break;
			}
			if (j < lo->nslot) {
				len +=
				 sprintf (buf + len, "[%s] Kpids:\t%d",
					  devnam,
					  atomic_read (&lo->cthreads));
				for (k = 0; k < lo->nslot; k++) {
					struct nbd_slot *slotk =
					 &lo->slots[k];
					len +=
					 sprintf (buf + len, "\t(%u)",
						  slotk->spid);
				}
				len += sprintf (buf + len, "\n");
			}
		} while (0);

		NBD_PROC_LABEL (18);

		NBD_PROC_LABEL (19);

		next_label = NULL;
		next_label_name = NULL;
	}

	NBD_PROC_LABEL (20);

	if (last == i - 2) {
		char *prevnam = device_letter (i - 1);
		len +=
		 sprintf (buf + len, "Device %s:\tClosed\n", prevnam);
	}

	if (last < i - 2) {
		char lastnam[3];
		char prevnam[3];
		strncpy (lastnam, device_letter (last + 1), 3);
		strncpy (prevnam, device_letter (i - 1), 3);
		len += sprintf (buf + len, "Device %s-%s:\tClosed\n",
				lastnam, prevnam);
	}

	NBD_PROC_LABEL (21);

	next_label = NULL;
	next_label_name = NULL;

	*eof = 1;

	*start = buf;
	total += len;

	return len;
}

/*
 * PTB read an int from a string. Return number of ints read (0 or 1).
 */
static int
sscani (char *buf, int len, int *n)
{

	int i, a = 0;
	short has_digits = 0;
	short is_signed = 0;

	for (i = 0; i < len; i++) {
		char c = buf[i];
		if (c == ' ' || c == '\t') {
			if (is_signed)
				return 0;
		}
		else if (c == '-') {
			if (is_signed)
				return 0;
			is_signed = -1;
		}
		else if (c == '+') {
			if (is_signed)
				return 0;
			is_signed = 1;
		}
		else if (c >= '0' && c <= '9') {
			is_signed = 1;
			has_digits = 1;
			break;
		}
		else {
			return 0;
		}
	}

	if (!has_digits)
		return 0;
	for (; i < len; i++) {
		char c = buf[i];
		if (c < '0' || c > '9')
			break;
		a *= 10;
		a += c - '0';
	}
	if (is_signed >= 0)
		*n = a;
	else
		*n = -a;
	return 1;
}

/*
 * look for a 1 or 2 letter device code ("a" or "aa") and save the
 * device number to which it refers. Return number of device letter
 * codes found (0 or 1).
 */
static int
sscana (char *buf, int len, int *n)
{

	int i, a = 0;
	short has_letters = 0;

	for (i = 0; i < len; i++) {
		char c = buf[i];
		if (c >= 'a' && c <= 'z') {
			has_letters = 1;
			break;
		}
		else if (c == ' ') {
			if (has_letters)
				return 0;
		}
		else {
			return 0;
		}
	}
	if (!has_letters)
		return 0;
	for (; i < len; i++) {
		char c = buf[i];
		if (c < 'a' || c > 'z')
			break;
		a *= 26;
		a += c - 'a';
	}
	*n = a;
	return 1;
}

/*
 * read an interger (or 2-letter ascii) arg into an int. Return numner
 * of integers read (0 or 1) and -1 for no keymatch. The first arg is a
 * preceding key.
 * @i is the integer value that results
 * @j is an index if one is supplied (foo[j] = i ), else -1
 */
static int
getarg (const char *buffer, int buflen, const char *key, int *i, int *j)
{

	int keylen;

	void skip_ws () {
		while (buflen > 0) {
			if (*buffer != ' ' && *buffer != '\t')
				break;
			buffer++;
			buflen--;
	        }
        };
        
        skip_ws ();

	keylen = strlen (key);
	if (strncmp (buffer, key, keylen))
		return -1;

	buffer += keylen;
	buflen -= keylen;

	skip_ws ();

	*j = -1;
	if (*buffer == '[') {
		char *closing;
		int indexlen;

		buffer++;
		buflen--;

		skip_ws ();

		closing = strchr (buffer, ']');
		if (!closing)
			return -1;
		indexlen = closing - buffer;
		*closing = 0;

		if (sscani ((char *) buffer, indexlen, j) < 1)
			return 0;
		if (sscana ((char *) buffer, buflen, j) < 1)
			return 0;

		buffer = closing;
		buflen -= indexlen;

		buffer++;
		buflen--;

		skip_ws ();
	}

	if (*buffer != '=')
		return -1;

	buffer++;
	buflen--;

	skip_ws ();

	if (sscani ((char *) buffer, buflen, i) < 1)
		return 0;
	if (sscana ((char *) buffer, buflen, i) < 1)
		return 0;
	return 1;
}


static void
set_generic (int x, int i, int X)
{
	void set_x () {
		struct nbd_device *lo = &nbd_dev[i];
		if (x != 0) {
			atomic_set_mask (X, &lo->flags);
			return;
		};
		atomic_clear_mask (X, &lo->flags);
	};

	if (i >= 0 && i < MAX_NBD) {
		set_x ();
		return;
	}
	for (i = 0; i < MAX_NBD; i++) {
		set_x ();
	}
}

static void
set_sync_intvl (int sync_intvl, int i)
{
	set_generic (sync_intvl, i, NBD_SYNC);
}

static void
set_show_errs (int show_errs, int i)
{
	set_generic (show_errs, i, NBD_SHOW_ERRS);
}

static void
set_md5sum (int md5sum, int i)
{
	set_generic (md5sum, i, NBD_MD5SUM);
}

static void
set_enable (int enable, int i)
{
	void set_e () {
		struct nbd_device *lo = &nbd_dev[i];
		if (enable != 0) {
			nbd_restart (lo);
			atomic_set_mask (NBD_ENABLED, &lo->flags);
			return;
		};
		atomic_clear_mask (NBD_ENABLED, &lo->flags);
	};

	if (i >= 0 && i < MAX_NBD) {
		set_e ();
		return;
	}
	for (i = 0; i < MAX_NBD; i++) {
		set_e ();
	}
}

static void
set_direct (int direct, int i)
{
	set_generic (direct, i, NBD_DIRECT);
}

static void
zero_counters (int zs, int i)
{
	void z_s () {
		struct nbd_device *lo = &nbd_dev[i];
		int blks;
		if (zs == 0) {
			return;
		}
		 for (blks = 0; blks <= atomic_read (&lo->maxreqblks);
		      blks++) {
			atomic_set (&lo->req_in[READ][blks], 0);
			atomic_set (&lo->req_in[WRITE][blks], 0);
		}
	};

	if (i >= 0 && i < MAX_NBD) {
		z_s ();
		return;
	}
	for (i = 0; i < MAX_NBD; i++) {
		z_s ();
	}
}

static void
set_buffer_writes (int buffer_writes, int i)
{
	set_generic (buffer_writes, i, NBD_BUFFERWR);
}

/*  
 * PTB - write a 0 with echo -n 0 to /proc/nbdinfo to do a hard reset.
 */
static int
nbd_write_proc (struct file *file, const char *buffer, unsigned long count,
		void *data)
{


	switch (count) {

		int i;

	  case 2:
		if (buffer[1] != '\n')
			break;

	  case 1:
		switch (*buffer) {
		  case '1':
			nbd_hard_reset ();
			break;
		  case '0':
			for (i = 0; i < MAX_NBD; i++) {

				struct nbd_device *lo = &nbd_dev[i];
				nbd_soft_reset (lo);
				reenable_timer.data = (unsigned long) lo;
				reenable_timer.expires = jiffies + 5 * HZ;
				add_timer (&reenable_timer);
			}
			break;
		}
		break;
	  default:
		do {
			int index, intval;

			if (getarg (buffer, count, "merge_requests",
				    &merge_requests, &index) >= 0) {

				break;
			}
			if (getarg (buffer, count, "sync_intvl",
				    &sync_intvl, &index) >= 0
			    || getarg (buffer, count, "sync",
				       &sync_intvl, &index) >= 0) {

				set_sync_intvl (sync_intvl, index);
				break;
			}
			if (getarg (buffer, count, "show_errs",
				    &show_errs, &index) >= 0) {

				set_show_errs (show_errs, index);
				break;
			}
			if (getarg (buffer, count, "plug",
				    &plug, &index) >= 0) {

				break;
			}
			if (getarg (buffer, count, "md5sum",
				    &md5sum, &index) >= 0) {

				set_md5sum (md5sum, index);
				break;
			}
			if (getarg (buffer, count, "rahead",
				    &rahead, &index) >= 0) {

				read_ahead[major] = rahead;
				break;
			}

			if (getarg (buffer, count, "buffer_writes",
				    &buffer_writes, &index) >= 0) {

				set_buffer_writes (buffer_writes, index);
				break;
			}

			if (getarg (buffer, count, "enable",
				    &enable, &i) >= 0) {

				set_enable (enable, index);
				break;
			}
			if (getarg (buffer, count, "direct",
				    &direct, &i) >= 0) {

				set_direct (direct, index);
				break;
			}
			if (getarg (buffer, count, "zero",
				    &intval, &index) >= 0) {
				zero_counters (intval, index);
			}
			NBD_ERROR ("illegal %ld character command\n",
				   count);
			return -EINVAL;
		} while (0);
		break;
	}
	return count;
}

#ifdef MODULE
        MODULE_AUTHOR ("Peter T. Breuer, Andres Marin");
        MODULE_DESCRIPTION ("Enhanced Network Block Device " NBD_VERSION);
        MODULE_LICENSE ("GPL");
#endif		/* MODULE */

// PTB we steal these from the queue struct at init
static merge_requests_fn *ll_merge_requests_fn;
static merge_request_fn *ll_front_merge_fn;
static merge_request_fn *ll_back_merge_fn;

/*
 * PTB -
 * These functions are needed when the kernel does request merging in
 * order to stop it making requests that are bigger than our buffer.
 *
 * To turn OFF merging (once these functions are in place), set
 * merge_requests=0.
 */
static int
nbd_merge_requests_fn (request_queue_t * q, struct request *req,
		       struct request *req2, int max_segments)
{
	int dev = MINOR (req->rq_dev);
	int nbd = dev >> NBD_SHIFT;
	struct nbd_device *lo = &nbd_dev[nbd];

	if (!merge_requests)
		return 0;

	if (!ll_merge_requests_fn)
		return 0;

	if (req->nr_sectors + req2->nr_sectors > lo->max_sectors)
		return 0;

	if (req->nr_sectors + req2->nr_sectors >
	    ((merge_requests + 1) << (lo->logblksize - 9)))
		return 0;

	return ll_merge_requests_fn (q, req, req2, max_segments);
}
static int
nbd_front_merge_fn (request_queue_t * q, struct request *req,
		    struct buffer_head *bh, int max_segments)
{
	int dev = MINOR (req->rq_dev);
	int nbd = dev >> NBD_SHIFT;
	struct nbd_device *lo = &nbd_dev[nbd];

	if (!merge_requests)
		return 0;

	if (!ll_front_merge_fn)
		return 0;

	if (req->nr_sectors > lo->max_sectors)
		return 0;

	if (req->nr_sectors >
	    ((merge_requests + 1) << (lo->logblksize - 9))) return 0;

	return ll_front_merge_fn (q, req, bh, max_segments);
}
static int
nbd_back_merge_fn (request_queue_t * q, struct request *req,
		   struct buffer_head *bh, int max_segments)
{
	int dev = MINOR (req->rq_dev);
	int nbd = dev >> NBD_SHIFT;
	struct nbd_device *lo = &nbd_dev[nbd];

	if (!merge_requests)
		return 0;

	if (!ll_back_merge_fn)
		return 0;

	if (req->nr_sectors > lo->max_sectors)
		return 0;

	if (req->nr_sectors >
	    ((merge_requests + 1) << (lo->logblksize - 9))) return 0;

	return ll_back_merge_fn (q, req, bh, max_segments);
}

static struct ctl_table_header *nbd_table_header;

static ctl_table nbd_table[] = {
	{1, "rahead",
	 &rahead, sizeof (int), 0644, NULL, &proc_dointvec},
	{2, "plug",
	 &plug, sizeof (int), 0644, NULL, &proc_dointvec},
	{3, "sync_intvl",
	 &sync_intvl, sizeof (int), 0644, NULL, &proc_dointvec},
	{4, "merge_requests",
	 &merge_requests, sizeof (int), 0644, NULL, &proc_dointvec},
	{5, "md5sum",
	 &md5sum, sizeof (int), 0644, NULL, &proc_dointvec},
	{6, "debug",
	 &debug, sizeof (int), 0644, NULL, &proc_dointvec},
	{7, "paranoia",
	 &paranoia, sizeof (int), 0644, NULL, &proc_dointvec},
	{8, "md5_on_threshold",
	 &md5_on_threshold, sizeof (int), 0644, NULL, &proc_dointvec},
	{9, "md5_off_threshold",
	 &md5_off_threshold, sizeof (int), 0644, NULL, &proc_dointvec},
	{0}
};
static ctl_table nbd_dir_table[] = {
	{6, "enbd", NULL, 0, 0555, nbd_table},
	{0}
};
static ctl_table nbd_root_table[] = {
	{CTL_DEV, "dev", NULL, 0, 0555, nbd_dir_table},
	{0}
};

static devfs_handle_t devfs_handle;
static devfs_handle_t devfs_handles[MAX_NBD];

static void
nbd_reset (struct nbd_device *lo, int i)
{
	int j;

	memset (lo, 0, sizeof (struct nbd_device));
	lo->magic = NBD_DEV_MAGIC;

	strncpy (lo->devnam, device_letter (i), 4);
	for (j = 0; j < NBD_MAXCONN; j++) {
		struct nbd_slot *slot = &lo->slots[j];
		slot->lo = lo;
		slot->i = j;
		INIT_LIST_HEAD (&slot->queue);
	}
	lo->blksize = 1024;
	lo->logblksize = 10;
	lo->bytesize = 0x7fffffff00000;
	lo->size = 0x7fffffff;
	lo->sectors = 0xfffffffe;
	lo->nbd = i;
	lo->req_timeo = NBD_REQ_TIMEO;
	lo->max_sectors = buf_sectors;

	lo->wspeed.getdistance = getwdistance;
	lo->rspeed.getdistance = getrdistance;
	lo->tspeed.getdistance = gettdistance;
	lo->wspeed.lo = lo;
	lo->rspeed.lo = lo;
	lo->tspeed.lo = lo;

	INIT_LIST_HEAD (&lo->queue);
	init_waitqueue_head (&lo->wq);

	INIT_LIST_HEAD (&lo->req.queue);

	init_waitqueue_head (&lo->req_wq);

	for (j = 0; j < NBD_MAXCONN; j++) {
		nbd_blksizes[i * NBD_MAXCONN + j] = lo->blksize;
		nbd_bytesizes[i * NBD_MAXCONN + j] = lo->bytesize;
		nbd_sizes[i * NBD_MAXCONN + j] = lo->size;
		nbd_max_sectors[i * NBD_MAXCONN + j] = lo->max_sectors;
	}
	if (md5sum) {
		atomic_set_mask (NBD_MD5SUM, &lo->flags);
	}
	if (sync_intvl) {
		atomic_set_mask (NBD_SYNC, &lo->flags);
	}
	if (show_errs) {
		atomic_set_mask (NBD_SHOW_ERRS, &lo->flags);
	}
	if (direct) {
		atomic_set_mask (NBD_DIRECT, &lo->flags);
	}
	if (buffer_writes) {
		atomic_set_mask (NBD_BUFFERWR, &lo->flags);
	}

}

int __init
nbd_init (void)
{
	int i;
	int err = 0;

	NBD_INFO ("Network Block Device support by pavel@elf.mj.gts.cz\n");
	NBD_INFO ("Network Block Device port to 2.0 by ptb@it.uc3m.es\n");
	NBD_INFO ("Network Block Device move networking to user space by "
		  "amarin@it.uc3m.es\n");
	NBD_INFO ("Enhanced Network Block Device " NBD_VERSION " by "
		  "ptb@it.uc3m.es\n");

	if (register_blkdev (major, "nbd", &nbd_blkops)) {

		NBD_ERROR ("Unable to register major number %d for NBD\n",
			   major);
		return -EIO;
	}

#ifdef MODULE
	NBD_INFO ("registered device at major %d\n", major);
#endif
	blksize_size[major] = nbd_blksizes;
	blk_size[major] = nbd_sizes;
	max_sectors[major] = nbd_max_sectors;

	blk_init_queue (BLK_DEFAULT_QUEUE (major), do_nbd_request);

	blk_queue_headactive (BLK_DEFAULT_QUEUE (major), 0);

	ll_merge_requests_fn =
	 (BLK_DEFAULT_QUEUE (major))->merge_requests_fn;
	ll_front_merge_fn = (BLK_DEFAULT_QUEUE (major))->front_merge_fn;
	ll_back_merge_fn = (BLK_DEFAULT_QUEUE (major))->back_merge_fn;

	(BLK_DEFAULT_QUEUE (major))->merge_requests_fn = &nbd_merge_requests_fn;
	(BLK_DEFAULT_QUEUE (major))->front_merge_fn = &nbd_front_merge_fn;
	(BLK_DEFAULT_QUEUE (major))->back_merge_fn = &nbd_back_merge_fn;

	read_ahead[major] = rahead;

	for (i = 0; i < MAX_NBD; i++) {
		struct nbd_device *lo = &nbd_dev[i];
		nbd_reset (lo, i);
	}

	nbd_gendisk.major = major;
	nbd_gendisk.major_name = "nd";
	nbd_gendisk.minor_shift = NBD_SHIFT;
	nbd_gendisk.max_p = NBD_MAXCONN;

	nbd_gendisk.part = nbd_hd_struct;
	nbd_gendisk.sizes = nbd_sizes;
	nbd_gendisk.nr_real = 0;
	nbd_gendisk.real_devices = NULL;
	nbd_gendisk.next = NULL;

	nbd_gendisk.fops = &nbd_blkops;

	nbd_gendisk.de_arr = devfs_handles;

	add_gendisk (&nbd_gendisk);

	{

		struct proc_dir_entry *res =
		 create_proc_read_entry ("nbdinfo", 0, NULL,
					 &nbd_read_proc, NULL);
		if (!res) {
			NBD_ALERT ("creation of proc entry failed\n");
			err = -EINVAL;
			return err;
		}

		res->write_proc = &nbd_write_proc;

	}

	{

		devfs_handle = devfs_mk_dir (NULL, "nd", NULL);
		if (devfs_handle) {
			for (i = 0; i < MAX_NBD; i++) {
				struct nbd_device *lo = &nbd_dev[i];
				int j;

				devfs_handles[i] =
				 devfs_mk_dir (devfs_handle, lo->devnam,
					       NULL);

				if (devfs_handles[i]) {
					devfs_register_series
					 (devfs_handles[i], "%u",
					  NBD_MAXCONN, DEVFS_FL_DEFAULT,
					  major, i * NBD_MAXCONN,
					  S_IFBLK | S_IRUSR | S_IWUSR,
					  &nbd_blkops, NULL);
				}

				devfs_mk_symlink (devfs_handles[i], "disc",
						  DEVFS_FL_DEFAULT, "0",
						  NULL, NULL);

				for (j = 1; j < MAX_NBD; j++) {
					char link[4];
					char name[8];
					sprintf (link, "%u", j);
					sprintf (name, "part%u", j);
					devfs_mk_symlink (devfs_handles[i],
							  name,
							  DEVFS_FL_DEFAULT,
							  link, NULL,
							  NULL);
				}
			}
		}
	}

	nbd_table_header = register_sysctl_table (nbd_root_table, 1);

	return err;
}

void __exit
nbd_cleanup (void)
{
	int i;


	for (i = 0; i < MAX_NBD; i++) {

		struct nbd_device *lo = &nbd_dev[i];
		int j;

		if (!(atomic_read (&lo->flags) & NBD_INITIALISED))
			continue;

		NBD_INFO ("invalidating buffers on device nd%s%d-%d\n",
			  lo->devnam, 0, NBD_MAXCONN);

		for (j = 0; j < NBD_MAXCONN; j++) {
			int minor = i * NBD_MAXCONN + j;
			invalidate_buffers (MKDEV (major, minor));
		}

		NBD_INFO ("destroying buffers on device nd%s%d-%d\n",
			  lo->devnam, 0, NBD_MAXCONN);

		for (j = 0; j < NBD_MAXCONN; j++) {
			int minor = i * NBD_MAXCONN + j;
			destroy_buffers (MKDEV (major, minor));
		}
	}

	unregister_sysctl_table (nbd_table_header);

	if (1) {

		if (devfs_handle) {
			for (i = 0; i < MAX_NBD; i++) {
				int j;
				if (!devfs_handles[i])
					continue;
				for (j = 0; j < NBD_MAXCONN; j++) {
					devfs_handle_t x;
					char s[3];
					s[0] = '0' + j;
					s[1] = 0;
					if (j >= 10) {
						s[0] = '1';
						s[1] = '0' + (j - 10);
						s[2] = 0;
					}
					x =
					 devfs_find_handle (devfs_handles
							    [i], s, major,
							    i *
							    NBD_MAXCONN +
							    j,
							    DEVFS_SPECIAL_BLK,
							    0);
					if (x)
						devfs_unregister (x);
				}

				devfs_unregister (devfs_handles[i]);
			}
			devfs_unregister (devfs_handle);
		}
	}

	remove_proc_entry ("nbdinfo", &proc_root);

	del_gendisk (&nbd_gendisk);

	for (i = 0; i < MAX_NBD; i++) {
		struct nbd_device *lo = &nbd_dev[i];
		atomic_clear_mask (NBD_ENABLED, &lo->flags);
		if (lo->blockmap) {
			kfree (lo->blockmap);
			lo->blockmap = NULL;
		}
		nbd_sync_sync (lo);
		del_timer (&lo->run_queue);
	}

	blk_cleanup_queue (BLK_DEFAULT_QUEUE (major));
	blk_size[major] = NULL;
	read_ahead[major] = 0;

	if (unregister_blkdev (major, "nbd") != 0) {
		NBD_ALERT ("cleanup_module failed\n");
	}
	else {
		NBD_INFO ("module cleaned up.\n");
	}
}

module_init (nbd_init);
module_exit (nbd_cleanup);

/* Compile line:

 *  gcc -O2 -D__KERNEL__ -DMODULE -xc -c nbd.c -o nbd.o
 *
 *  (possibly with -DMODVERSIONS also). PTB
 */
