/*
 * (Enhanced) Network block device - make block devices work over TCP
 *
 * Original NBD Copyright 1997 Pavel Machek <pavel@elf.mj.gts.cz>
 * Further ENBD Copyrights 1998, 1999, 2000 Peter Breuer <ptb@it.uc3m.es>
 *
 *
 *
 * ATTENTION: You need the userspace daemons available from
 *            ftp://oboe.it.uc3m.es/pub/Programs/nbd-2.4.*.tgz
 *            and then ENBD project on http://freshmeat.net
 *
 *
 *
 * Development of the ENBD software has been supported by grants and
 * contributions from Realm Information Technologies, Inc. of 5555
 * Oakbrook Parkway, NW Norcross, GA and iNsu Innovations Inc.  of
 * 3465, Boulevard Thimens, Saint-Laurent, Quebec, Canada.
 * 
 * ------------ Pavel's history notes ----------------------------------
 * 97-3-25 compiled 0-th version, not yet tested it 
 *   (it did not work, BTW) (later that day) HEY! it works!
 *   (bit later) hmm, not that much... 2:00am next day:
 *   yes, it works, but it gives something like 50kB/sec
 * 97-3-28 it's completely strange - when using 1024 byte "packets"
 *   it gives 50kB/sec and CPU idle; with 2048 bytes it gives
 *   500kB/sec (and CPU loaded 100% as it should be) (all done
 *   against localhost)
 * 97-4-1 complete rewrite to make it possible for many requests at 
 *   once to be processed
 * 97-4-1 23:57 rewrite once again to make it work :-(
 * 97-4-3 00:02 hmm, it does not work.
 * 97-4-3 23:06 hmm, it will need one more rewrite :-)
 * 97-4-10 It looks like it's working and stable. But I still do not
 *  have any recovery from lost connection...
 * (setq tab-width 4)
 * 97-4-11 Making protocol independent of endianity etc.
 * 97-4-15 Probably one more rewrite, since it loses requests under
 *  heavy loads
 * 97-9-13 Cosmetic changes
 *
 * possible FIXME: make set_sock / set_blksize / set_size / do_it one syscall
 * why not: would need verify_area and friends, would share yet another 
 *          structure with userland
 *
 * FIXME: not module-safe
 *
 * ------------ Peter's history notes ----------------------------------
 * 98-12-18 modules now OK ptb@it.uc3m.es (Peter Breuer) ported to
 * 2.0.*. + better debugging. Still possible lockup in connection with APM
 * and spurious interrupt - only on write. Error treatment should
 * be improved. After 100 errors from end_request the kernel can
 * do anything. We should catch it ourselves.
 * 99-1-sometime fixed lockup by extending semaphore - ptb v1.0.
 * 99-3-sometime reconnect protocol (client mod agreed by pavel) - ptb v1.1
 * 99-4-25 add /proc/nbdinfo - ptb v1.1.1
 * 99-4-sometime add multiplex - ptb v1.2
 * 99-4-26 fix multiplex and redundancy - ptb v1.2.1
 * 99-4-29 reentrant client threads - ptb v1.2.2
 * 99-4-29 socket related stuff placed in user space - amarin v1.3.0
 * 99-5-3  fix all, all writes had to be before all reads - ptb v1.2.4
 * 99-5-5  fix out-of-order, async - ptb v1.2.5
 * 99-5-7  semaphores removed (still works!), fail cases corrected - ptb v1.2.6
 * 99-5-12 signals unblocked in xmit, blksize != 1024 fixed, ioctls
 *         added  - ptb v1.2.7
 * 99-6-1  interaction with client split into two functions - amarin v1.3.0
 * 99-6-3  reintegrated fully, mem manager fixed, accounts fixed - ptb v1.2.8.3
 * 99-6-3  extra queue removed, mem manager removed  - ptb v1.2.8.4
 * 99-7-3  buffer registration introduced - ptb v1.2.8.5
 * 99-7-3  some client redundancy reestablished - ptb v2.1.1
 * 99-7-10 encapsulated queue calls. One element rollback buffer - ptb v2.1.2
 * 99-7-20 timestamp and rollback old abandoned request - ptb v2.1.3
 * 99-7-24 64bit file sizes and offsets accepted - ptb v2.1.9
 * 99-7-26 experimental request coalesces - ptb v2.1.10
 * 99-7-27 partitioning scheme - ptb v2.2.1
 * 99-8-3  nbd_clr_sock bug in invalidate_device fixed? - ptb v2.2.4
 * 99-8-5  reverse replace of block_fsync, add sig ioctls - ptb v2.2.5
 *         reverse bug introduced about v2.2.3 for compound reqs - ptb v2.2.5
 *         fix clear_que bug (didn't rollback first) from 2.1.3 - ptb v2.2.5
 * 99-8-22 workaround strange nr_sectors bug - ptb v2.2.6
 * 99-8-11 fix MY_NBD_SYNC bug. Never sync'ed all - ptb v2.2.7
 * 99-8-12 wakeups all moved to enqueue - ptb v2.2.7
 * 99-8-23 remove slot->cli_age - ptb v2.2.7
 * 99-8-24 first 8 bytes of signature embedded in packets - ptb v2.2.8
 *         fix SET_SIG define buglet, remove hardcoded constants - ptb v2.2.8
 *         fix huge bug. Missing copy_fromfs in my_nbd_ack - ptb v2.2.8     
 *         removed signature embedding and all other decorations - ptb v2.2.8
 * 99-8-25 recast fix in my_nbd_ack to avoid align. bug - ptb v2.2.9
 *         put in MKDEVs and put back some hardcode const fixes - ptb v2.2.10
 * 99-9-29 fix BLKGETSIZE bug - ptb v2.2.14
 * 99-10-2 run with interrupts on throughout. Think we lose some - ptb v2.2.15
 * 99-10-8 trim dead code, kernel 2.2 ifdef's - ptb v2.2.17
 * 99-12-18 further o-o - ptb v2.2.19
 * 99-12-28 queue account cleanup. endio on queue reqs at reset - ptb v2.2.20
 *          interruptible semaphores for better client recovery - ptb v2.2.20
 * 00-1-2   debugging cleanups. Fix race in end_request - ptb v2.2.21
 * 00-1-4   semaphores simplified. - ptb v2.2.22
 * 00-6-8   emergency control by write to proc - ptb v2.2.24
 * 00-7-20  ported to 2.4.0-test1. Possible minor bugs found/fixed - ptb v2.2.24
 * 00-7-27  changed proc i/f to read_proc from get_info in 2.2/2.4 - ptb v2.2.25
 * 00-7-30  fixed reads before writes under 2.4 by disabling merge - ptb v2.2.25
 * 00-7-30  and fixed merge_reqs for 2.4, now that I understand! - ptb v2.2.25
 * 00-7-30  fixed/introduced possible bug in end_io  for 2.2/2.4 - ptb v2.2.25
 * 00-7-30 added timeval/zone field in requests and replies - ptb v2.4.0
 * 00-7-30 fixed hitherto masked bug in read_stat in nbd_client - ptb v2.4.0
 * 00-7-30 added timeout to net writes in nbd_client - ptb v2.4.0
 * 00-8-20 display fix for devices over 2GB - ptb v2.4.5
 * 00-8-23 more 64 bit fixes + error out overrange requests- ptb v2.4.6/2.2.27
 * 00-8-31 add NBD_ERR ioctl to error out slot request- ptb v2.4.9
 * 00-8-31 soften NBD_SOFT_RESET so doesn't wreck protocol - ptb v2.4.9
 * 00-9-1  remove %L's from printfs. Kernel 2.2. doesn't - ptb v2.4.10/2.2.27
 * 00-9-6  add various state flags to help init order - ptb v2.4.11
 * 00-9-8  add checks for device initialised to set_sock - ptb v2.4.12
 * 00-9-17 en/disable device as aslot count goes through 0 - ptb v2.4.13/2.2.28
 * 00-9-21 split read/write dev req counts for accounting - ptb v2.4.14
 * 00-9-21 renamed sync_intvl to req_timeo - ptb v2.4.14
 * 00-9-21 made sync_intvl count write blocks - ptb v2.4.14
 * 00-9-22 repair enable after delayed disable when disabled - ptb v2.4.14
 * 00-9-22 include sync (nonblocking) after sync_intvl reqs - ptb v2.4.14
 * 00-9-25 disable sync (nonblocking) after sync_intvl reqs - ptb v2.4.14
 * 00-9-25 bundle invalidate_buffers in clr_sock - ptb v2.4.14
 * 00-10-20 implement req_timeo per device + ioctl (Wang Gang) - ptb v2.4.15
 * 00-10-20 add raid mode (Wang Gang) - ptb v2.4.15
 * 00-10-26 throttle in do_req  - ptb v2.4.15
 * 00-10-28 do set_sock on first open and clr_sock on last close - ptb v2.4.15
 * 00-11-01 make sync_intvl really sync - ptb v2.4.15
 * 00-11-14 rename throttle to plug, nbd_sync takes arg - ptb v2.4.17
 * 00-11-19 clr_sock errs req not rollback if show_errs & !aslot - ptb v2.4.17
 * 00-11-20 removed autodeadlock when disabled in do_req end_req - ptb v2.4.17
 * 00-11-21 make MY_NBD_SYNC only sync when sync_intvl > 0 - ptb v2.4.17
 * 00-12-24 make MY_NBD_GET_REQ use a timeout arg - ptb v2.4.18
 * 01-02-12 ported to 2.4.0 (works). do_nbd_request rewritten - ptb v2.4.20
 * 01-02-20 managed to get plugging and clustered read/writes OK - ptb v2.4.21
 * 01-02-21 eliminated slot->buflen for the time being - ptb v2.4.21
 * 01-02-27 added proper devfs support - ptb v2.4.22
 * 01-03-15 allowed more devices/in devfs, cleaned up endio - ptb v2.4.23
 * 01-03-15 added device letter (<= 3 chars) to struct-  - ptb v2.4.23
 * 01-03-15 added request size check to do_nbd_req - ptb v2.4.23
 * 01-03-15 increased MAX_SECTORS to 512 by default - ptb v2.4.23
 * 01-03-15 made major number a module parameter - ptb v2.4.23
 * 01-03-18 added max_sectors array - ptb v2.4.23
 * 01-03-23 added devfs links - ptb v2.4.23
 * 01-04-17 plugging always enabled for 2.4 kernels - ptb v2.4.24
 * 01-04-17 made SET_RO set_device_ro as well as set local flags - ptb v2.4.25
 * 01-04-28 impl SET_MD5SUM ioctl and proc support for md5sum - ptb v2.4.25
 * 01-04-29 added accounting for md5'd reqs - ptb v2.4.25
 * 01-07-29 added atomic protections for accounting - ptb v2.4.25
 * 01-08-01 fixed 2.4 smp bugs. Interrupts off in spinlocks - ptb v2.4.25
 * 01-08-01 removed all semaphores for spinlocks - ptb v2.4.25
 * 01-08-01 invalidate_buffers in clr_sock (req'd Rogier Wolff) - ptb v2.4.25
 * 01-08-02 fixed smp deadlock - end_that_request_first slept! ptb v2.4.26
 * 01-10-16 provisionally added error in device open when notenabled ptb v2.4.27
 * 01-10-18 added DIRTY flag to save on repeated invalidate_buffers ptb v2.4.27
 * 01-10-31 increment seqno_out before delivery, so really starts at 1  v2.4.27
 * 01-11-01 move zeroing of seqno in cmd field to nbe_end_req* ptb v2.4.27
 * 01-11-18 add speed calculation, dev fields, display in proc ptb v2.4.27
 * 01-11-20 modifications for compiling into monolithic kernel ptb v2.4.27
 * 01-12-06 clr requests before reenabling, not after, in nbd_enable ptb 2.4.27
 * 02-02-21 make nbd_rollback modal, absirbing nbd_error ptb 2.4.27
 */

#include <linux/major.h>
#ifndef UNIX98_PTY_MAJOR_COUNT
  #define UNIX98_PTY_MAJOR_COUNT 8
  #ifndef UNIX98_NR_MAJORS
    #define UNIX98_NR_MAJORS=UNIX98_PTY_MAJOR_COUNT
  #endif
#endif

#include <linux/module.h>

#if defined(__GNUC__) && __GNUC__ >= 2
#define _LOOSE_KERNEL_NAMES
#endif

#include <linux/version.h>

#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <asm/segment.h>

#include <asm/uaccess.h>	/* PTB - when did this arrive in kernel? */
#include <asm/byteorder.h>
#include <linux/wrapper.h>

#define MAJOR_NR NBD_MAJOR
static int major = MAJOR_NR;

#include <linux/proc_fs.h>
#include <linux/genhd.h>
#include <linux/hdreg.h>

#include <linux/file.h>		/* PTB - when did this arrive in kernel? */

#include <linux/smp_lock.h>

#include <linux/devfs_fs_kernel.h>

#include <linux/sysctl.h>
#include <linux/init.h>
#include <linux/kdev_t.h>

/*                                                       *
 * PTB --------------- compatibility ------------------- *
 *                   layer starts here.                  *
 */

  // BH_Protected disappeared somewhere around 2.4.10
  #define mark_buffer_protected(rbh) \
      { \
  	mark_buffer_dirty (rbh); \
  	mark_buffer_uptodate (rbh, 1); \
  	refile_buffer (rbh); \
       }

  #if defined(__SMP__) || defined(SMP)
    #if ! defined(CONFIG_SMP)
      #error CONFIG_SMP not defined
    #endif
  #endif

  /* PTB list interface extensions */
  #define list_head(ptr, type, member) \
  (list_empty(ptr)?NULL:list_entry(((struct list_head *)ptr)->next,type,member))
  #define list_tail(ptr, type, member) \
  (list_empty(ptr)?NULL:list_entry(((struct list_head *)ptr)->prev,type,member))

int linux_version_code = LINUX_VERSION_CODE;

int warning_old_kernel = 0;

/*                                                       *
 * PTB --------------- compatibility ------------------- *
 *                   layer ENDS here.                    *
 */

#include <linux/enbd.h>
#include <linux/enbd_ioctl.h>

/*
 * PTB kernel data - 4KB worth
 * We need space for nda, nda1, .. nda15, ndb, ndb1, ..
 * The index is exactly the minor number.
 */
  static int nbd_blksizes[MAX_NBD * NBD_MAXCONN];
  static int nbd_sizes[MAX_NBD * NBD_MAXCONN];
  static u64 nbd_bytesizes[MAX_NBD * NBD_MAXCONN];
  static int nbd_max_sectors[MAX_NBD * NBD_MAXCONN];

/*
 * PTB our data   - about 3KB
 * These are nda, ndb, ndc, ...
 * Divide the minor by NBD_MAXCONN to get this index.
 */
  static struct nbd_device nbd_dev[MAX_NBD];
  static spinlock_t nbd_lock = SPIN_LOCK_UNLOCKED;

/*
 * PTB User messaging defs.
 */

  #define NBD_ID "NBD #%d[%d]: " __FUNCTION__ " "

  #define NBD_DEBUG(level, s...) \
  { static int icnt; printk( KERN_DEBUG NBD_ID, __LINE__, icnt++); printk(s);}
  #define NBD_ERROR( s...) \
  { static int icnt; printk( KERN_ERR   NBD_ID, __LINE__, icnt++); printk(s);}
  #define NBD_ALERT( s...) \
  { static int icnt; printk( KERN_ALERT NBD_ID, __LINE__, icnt++); printk(s);}
  #define NBD_INFO( s...)  \
  { static int icnt; printk( KERN_INFO  NBD_ID, __LINE__, icnt++); printk(s);}

  #define NBD_FAIL( s ) { \
    NBD_DEBUG(1, s " (result %d).\n" , result ); \
    goto error_out; \
  }
  #define NBD_HARDFAIL( s ) { \
    NBD_ERROR( s " (result %d).\n" , result ); \
    lo->harderror = result; \
    goto hard_error_out; \
  }

/*
 * PTB device parameters. These are module parameters too.
 */

  static int rahead     = NBD_RAHEAD_DFLT;/* PTB - read ahead blocks  */
  static int sync_intvl = NBD_SYNC_INTVL; /* PTB - sync every n secs/Kreqs */
  static int speed_lim  = NBD_SPEED_LIM;  /* PTB - for throttling, in Kw */
  static int merge_requests               /* PTB - bool, do request coalesce */
                        = NBD_MERGE_REQ_DFLT;
  static int buf_sectors = NBD_MAX_SECTORS;
                                          /* PTB - user bufsize required */
  static int show_errs = 1;	          /* PTB - RAID mode? not usually */
  static int plug = NBD_PLUG_DFLT;

  static int md5sum = 0;		  /* PTB - use md5summing write proto */
  static int md5_on_threshold = 1000;	  /* PTB - reqs reqd to turn md5 on */
  static int md5_off_threshold = 10;	  /* PTB - errs reqd to turn md5 off */
  static int enable = 0;		  /* PTB global on/off, not used */

#ifndef NO_BUFFERED_WRITES
  static int buffer_writes = 0;	          /* PTB - act like ramd on write */
#endif		/* NO_BUFFERED_WRITES */

#if defined(MODULE)
  MODULE_PARM (rahead, "i");
  MODULE_PARM (sync_intvl, "i");
  MODULE_PARM (speed_lim, "i");
  MODULE_PARM (merge_requests, "i");
  MODULE_PARM (buf_sectors, "i");
  MODULE_PARM (show_errs, "i");
  MODULE_PARM (plug, "i");
  #ifndef NO_BUFFERED_WRITES
    MODULE_PARM (buffer_writes, "i");
  #endif		/* NO_BUFFERED_WRITES */
  MODULE_PARM (major, "i");
  MODULE_PARM (md5sum, "i");
  MODULE_PARM (md5_on_threshold, "i");
  MODULE_PARM (md5_off_threshold, "i");
#endif

#define NO_BUFFERED_WRITES 1

/*                                                       *
 * PTB --------------- functions ----------------------- *
 */

/*
 * PTB count number of blocks in a request. This will be an overestimate
 * if the number is not an exact multiple. It seems to happen. We 
 * guarrantee to return -ve only if the request is invalid.
 *
 * @req - request we want to count
 */
inline long
nr_blks (struct request *req)
{

	unsigned log_sectors_per_blk;
	unsigned sectors_per_blk;
	int size, minor, nbd;
	int sectors;
	struct nbd_device *lo;

	if (!req)
		return -EINVAL;

	if (major (req->rq_dev) != major) {

		NBD_ERROR ("req %#x has major %d instead of %d,"
			   " curr sectors %d, nr sectors %ld\n, buffer %#x",
			   (unsigned) req, major (req->rq_dev),
                           major, req->current_nr_sectors, req->nr_sectors,
			   (unsigned) req->buffer);

		return -EINVAL;
	}

	minor = minor (req->rq_dev);
	nbd = minor >> NBD_SHIFT;
	lo = &nbd_dev[nbd];

	log_sectors_per_blk = lo->logblksize - 9;
	sectors_per_blk = 1 << log_sectors_per_blk;

	sectors = req->nr_sectors;
	size = (sectors + sectors_per_blk - 1) >> log_sectors_per_blk;

	return size;
}

/*
 * return a temporary buffer containing the (1 or 2 char) device letter.
 * This works for i up to 26*26. 0 is "a". The buffer is zero
 * terminated.
 *
 * @i number to be transtaed to x[y] alphabetical  form.
 */
static char *
device_letter (int i)
{

	static char buf[3];
	static int cached_i = -1;

	if (cached_i == i)
		return buf;

	cached_i = i;

	if (i < 26) {
		buf[0] = 'a' + i;
		buf[1] = 0;
		return buf;
	}

	buf[0] = 'a' + i / 26;
	buf[1] = 'a' + i % 26;
	buf[2] = 0;
	return buf;
}

/*
 *  PTB sync the device. Modes:
 *  @arg = 1:  Do it sync
 *  @arg = 0:  Do it async
 *
 *  We can't call sync_dev outside a process context. I don't know why.
 *  Death results from a scheduled attempt.
 *
 *  Call without the semaphore held, as we lock it and call sync_dev.
 */
static void
nbd_sync (struct nbd_device *lo, long arg)
{
	struct inode *inode = lo->inode;
	short minor, nbd, islot;

	islot = atomic_read (&lo->islot);

	if (!(atomic_read (&lo->flags) & NBD_INITIALISED) || !inode) {
		goto fail;
	}

	minor = minor (inode->i_rdev);
	nbd = minor >> NBD_SHIFT;

	// PTB sync_dev is async. fsync_dev is sync.
	switch (arg) {
	  case 0:
		// async
	        // PTB 2.5.7 doesn't have async sync!	 FIXME
                // PTB we want sync_buffers(..., 0):
		break;
	  default:
		// sync
		fsync_dev (mk_kdev (major, nbd << NBD_SHIFT));
		invalidate_buffers (mk_kdev (major, nbd << NBD_SHIFT));
		break;
	}

	atomic_clear_mask (NBD_SYNC_REQD, &lo->flags);

	return;

      fail:
}

static void
nbd_async_sync (struct nbd_device *lo)
{
	nbd_sync (lo, 0);
}
static void
nbd_sync_sync (struct nbd_device *lo)
{
	nbd_sync (lo, 1);
}

/*
 *  Do it async if we're enabled, sync if we're not.
 */
static void
nbd_maybe_sync_sync (struct nbd_device *lo)
{

	if (atomic_read (&lo->flags) & NBD_ENABLED) {
		nbd_async_sync (lo);
		return;
	}
	nbd_sync_sync (lo);
}

/*
 * PTB - put a request onto the head of a nbd device's queue
 *     - presumably having taken it off the kernel's queue first!
 *     - We cannot take the io_spinlock since we are called with it on!
 *     - and we cannot take the semaphore as we may not sleep!
 *
 *     @lo      = the device we are on (could we get it from the req?)
 *     @req     = the request we shift
 *     @irqsave = save and restore irqmask when taking our queue spinlock
 */
static void
nbd_enqueue (struct nbd_device *lo, struct request *req, int irqsave)
{
	unsigned long req_blks = nr_blks (req);
	short islot = atomic_read (&lo->islot);
	int cmd;

	islot = islot;		// PTB stops compiler complaints

	if (req_blks < 0) {
		NBD_ERROR ("(%d): invalid req %#x. Not touching!\n",
			   islot, (unsigned) req);
		return;
	}

	// PTB don't want anybody mucking with the request quete but we're
	// called from the kernel request loop so we can't use the spinlock
	// and I believe we can't use the semaphore as that may cause us to
	// sleep. We need a special spinlock here if anything.
	// PTB experiment since we are called from the kernel loop when the
	// req is already off the kernel queue, we can't be affected by
	// other alterations to the kernel queue, and our own alter-egos
	// never fire on interrupts, so they can't interfere either even
	// if irqs are allowed.

	/* PTB accounting and nothing more */
	cmd = rq_data_dir (req);

	atomic_add (req_blks, &lo->requests_in[cmd]);
	atomic_inc (&lo->countq[cmd]);
	atomic_inc (&lo->req_in[cmd][req_blks]);

	if (atomic_read (&lo->maxq[cmd]) < atomic_read (&lo->countq[cmd]))
		atomic_set (&lo->maxq[cmd], atomic_read (&lo->countq[cmd]));

	if (atomic_read (&lo->maxreqblks) < req_blks)
		atomic_set (&lo->maxreqblks, req_blks);

	// PTB don't need irqsave because no irq uses our spinlock
	if (irqsave) {
		unsigned long flags;
		write_lock_irqsave (&lo->queue_spinlock, flags);
		list_add (&req->queuelist, &lo->queue);
		write_unlock_irqrestore (&lo->queue_spinlock, flags);
	}
	else {
		write_lock (&lo->queue_spinlock);
		list_add (&req->queuelist, &lo->queue);
		write_unlock (&lo->queue_spinlock);
	}

	wake_up_interruptible (&lo->wq);

}

/*
 * PTB - remove a request from anywhere in the nbd device general queue 
 *     - return 0 for success, -ve for fail
 *
 *     We need to hold the queue semaphore when calling this routine
 *     and the queue spinlock too! It walks the queue.
 */
static int
nbd_remove (struct nbd_device *lo, struct request *req)
{
	int cmd;

	if (!req)
		return -2;

	/* PTB How can I check that the handle is valid first? */
	// PTB don't want anybody mucking with the request quete
	// so we have the semaphore at this point. We don't want to take
	// the io lock as we're not dealing with the kernel queue but with
	// our own queue and that only needs a semaphore for protection.

	list_del (&req->queuelist);
	goto success;

      success:
        /* PTB accounting and nothing more */
	cmd = rq_data_dir (req);
	atomic_dec (&lo->countq[cmd]);
	return 0;
}

/*
 *  PTB - Open the device
 */
int
nbd_open (struct inode *inode, struct file *file)
{
	int dev;
	struct nbd_device *lo;
	int nbd;
	int part;
	int islot;
	char *devnam;

	if (!inode && file) {	/* added by ptb for 2.0.35. Necessary? */
		inode = file->f_dentry->d_inode;
	}
	if (!inode) {
		NBD_ERROR ("null inode.\n");
		return -EINVAL;
	}

	dev = minor (inode->i_rdev);
	nbd = dev >> NBD_SHIFT;
	part = dev - (nbd << NBD_SHIFT);
	islot = part - 1;

	if (nbd >= MAX_NBD) {
		NBD_ERROR ("too many (%d) whole devices open\n", nbd);
		return -ENODEV;
	}

	lo = &nbd_dev[nbd];
	devnam = lo->devnam;

    /* PTB try and stop opens before there's a client available but
     * after setup has occured. This might be a nuisance, as we open
     * the device just to send it an ioctl saying we are here!
     */

	// PTB it's a daemon opening the slot? Assume yes.
	if (islot >= 0) {

		struct nbd_slot *slot = &lo->slots[islot];
		int refcnt = slot->refcnt++;

		// PTB only one exclusive open allowed on slots
		if (refcnt > 0) {
			if (slot->pid != current->pid) {
				slot->refcnt--;
				return -EBUSY;
			}
		}

		// PTB first time do init on slot
		if (refcnt <= 0) {
			static int nbd_set_sock(struct nbd_slot *slot, int arg);
			nbd_set_sock (slot, 0);
		}
		slot->pid = current->pid;
	}

	if (part == 0) {
		/* PTB we have got the whole dev's file or inode for 1st time */
		if (!lo->file || lo->file != file) {
			lo->file = file;
			atomic_set (&lo->frstj, jiffies);
		}
		if (!lo->inode || lo->inode != inode) {
			lo->inode = inode;
		}
	}

	atomic_inc (&lo->refcnt);

	MOD_INC_USE_COUNT;

	if (!(atomic_read (&lo->flags) & NBD_INITIALISED)) {	/* PTB 132 */
		rwlock_init (&lo->queue_spinlock);
		atomic_set_mask (NBD_INITIALISED, &lo->flags);
		if (!(atomic_read (&lo->flags) & NBD_ENABLED)) {
			atomic_set_mask (NBD_ENABLED, &lo->flags);
			lo->lives++;
		}
	}

	return 0;
}

/*
 * PTB - complete a transaction irrefutably by taking it out of the
 *     - slot pending position it is in, and reporting end_request to kernel
 *
 *       We are called without the spinlock held, and without the io
 *       lock held, because our call to end request will take the io
 *       lock momentarily.
 */
void
nbd_commit (struct nbd_slot *slot, struct request *req)
{

	struct nbd_device *lo = slot->lo;
	unsigned long req_blks = nr_blks (req);
	int errors, cmd;

	if (req_blks < 0) {
		NBD_ERROR
		 ("corrupted req %#x. Not touching with bargepole.\n",
		  (unsigned) req);
		return;
	}

	errors = req->errors;

	// PTB We're the only daemon who can touch this slot so we don't need
	// no steenking spinlock. And our slot queue doesn't need the
	// kernels io lock protection either, so no steenking io lock.
	list_del (&req->queuelist);
	// PTB NB nbd_end_req_lock needs to be called without the io spinlock on
	nbd_end_request_lock (req);

	slot->req_age = 0;
	slot->req -= req_blks;

        /* PTB accounting and nothing more */
	cmd = rq_data_dir (req);

	atomic_sub (req_blks, &lo->requests_req[cmd]);
	if (errors < 0) {
		atomic_add (req_blks, &lo->requests_err);
		slot->err += req_blks;
		return;
	}

	atomic_add (req_blks, &lo->requests_out[cmd]);
	slot->out += req_blks;

	if (cmd != WRITE)
		return;

	// PTB non error case writes

	// PTB account the 4 cases for a md5sum'd transaction
	switch (slot->flags & (NBD_SLOT_MD5SUM | NBD_SLOT_MD5_OK)) {

	  case NBD_SLOT_MD5SUM | NBD_SLOT_MD5_OK:
		atomic_add (req_blks, &lo->wrequests_5to);	// 11
		atomic_add (req_blks, &lo->wrequests_5so);
		// zero the countdown to turning off md5 as it works
		atomic_set (&lo->wrequests_5co, 0);
		break;

	  case NBD_SLOT_MD5SUM:
		atomic_add (req_blks, &lo->wrequests_5to);	// 10
		atomic_add (req_blks, &lo->wrequests_5wo);
		atomic_inc (&lo->wrequests_5co);
		if (atomic_read (&lo->wrequests_5co) > md5_off_threshold) {
			atomic_set (&lo->wrequests_5co, 0);
			// turn off md5summing as it's not successful
			atomic_clear_mask (NBD_MD5SUM, &lo->flags);
		}
		break;

	  case NBD_SLOT_MD5_OK:
		atomic_add (req_blks, &lo->wrequests_5to);	// 01
		atomic_add (req_blks, &lo->wrequests_5eo);
		atomic_inc (&lo->wrequests_5co);
		if (atomic_read (&lo->wrequests_5co) > md5_off_threshold) {
			atomic_set (&lo->wrequests_5co, 0);
			// turn off md5summing as it's errored
			atomic_clear_mask (NBD_MD5SUM, &lo->flags);
		}
		break;

	  default:
	  case 0:
		// PTB nobody asked for a md5 and nobdy gave one back
		atomic_inc (&lo->wrequests_5no);
		if (atomic_read (&lo->wrequests_5no) > md5_on_threshold) {
			atomic_set (&lo->wrequests_5no, 0);
			// turn on md5summing every so often
			atomic_set_mask (NBD_MD5SUM, &lo->flags);
		}
		break;
	}

	// PTB clear the md5sum indicators from the slot afterwards!
	slot->flags &= ~(NBD_SLOT_MD5SUM | NBD_SLOT_MD5_OK);

}

/*
 * PTB - error out a transaction irrefutably by taking it out of the
 *     - slot pending position it is in, and reporting end_request to kernel
 *
 *     We must be called without the io spinlock held, as we take it
 */
void
nbd_error (struct nbd_slot *slot, struct request *req)
{
	struct nbd_device *lo = slot->lo;
	unsigned long req_blks = nr_blks (req);
	int cmd;

	if (req_blks < 0) {
		NBD_ERROR ("passed illegal request %#x\n", (unsigned) req);
	}

	req->errors++;

	// PTB We don't need the spinlock since we don't touch our queue,
	// and we're the only ones working on this slot.
	list_del (&req->queuelist);

	NBD_ALERT ("error out req %x from slot %d!\n", (unsigned) req,
		    slot->i);

	// PTB nbd_end_req_lock needs to be called without the spinlock on
	nbd_end_request_lock (req);

	/* PTB accounting and nothing more */
	cmd = rq_data_dir (req);
	atomic_sub (req_blks, &lo->requests_req[cmd]);

	slot->in -= req_blks;
	slot->req -= req_blks;

	slot->req_age = 0;
	slot->err += req_blks;
	atomic_add (req_blks, &lo->requests_err);
}

#define NBD_UNINTERRUPTIBLE 0
#define NBD_INTERRUPTIBLE   1

/*
 * Take a request out of a slot. This must not hold the i/o lock on
 * entry as we may take it in order to kill the request (end_request)
 * or we may take the queue lock in order to play with the devices
 * queue (nbd_enqueue).
 */
static void
nbd_rollback (struct nbd_slot *slot, struct request *req)
{

	struct nbd_device *lo = slot->lo;
	unsigned long req_blks;
	int cmd;

	if (atomic_read (&lo->flags) & NBD_SHOW_ERRS) {
		nbd_error (slot, req);
		return;
	}

	req_blks = nr_blks (req);

	if (req_blks < 0) {
		NBD_ERROR ("passed illegal request %#x\n", (unsigned) req);
		return;
	}

	// PTB don't want anybody mucking with the request queue
	// but classically this was never held so I'll try doing without it
	// PTB the reason is that we here modify only the slot queue, to
	// which we have unique access anyway.
	list_del (&req->queuelist);

	NBD_ALERT ("rollback req %x from slot %d!\n", (unsigned) req, slot->i);

	/* PTB accounting */
	slot->in -= req_blks;
	slot->req -= req_blks;

	/* PTB - pre-decrement count for enqueue */
	cmd = rq_data_dir (req);
	atomic_sub (req_blks, &lo->requests_in[cmd]);
	atomic_dec (&lo->req_in[cmd][req_blks]);
	atomic_sub (req_blks, &lo->requests_req[cmd]);

	// PTB don't want anybody mucking with the request queue
	// but classically this was never held 
	// PTB we have to take the spinlock to protect nbd_enqueue from
	// the request function's competition. We also HAVE to take the
	// spinlock as we need to protect ourselves from competing alter
	// egos
	// PTB Don't think the io lock is necessary, but it gives enqueue
	// its normal env back, so I don't have to think about what it does
	// PTB yeah, enqueue takes a write lock anyway, so no sweat

	nbd_enqueue (lo, req, NBD_INTERRUPTIBLE);

}

/*
 * PTB - undo transactions by taking them out of the slot pending
 *     - position and replacing them on the generic device queue
 *     - NB we do not hold the io request lock or queue sem when
 *     -    calling this as we take it internally
 */
static void
nbd_rollback_all (struct nbd_slot *slot)
{

	struct request *req;
	short count = 0;

	while (!list_empty (&slot->queue)) {

		if (count++ > 1000)
			break;

		req = list_head (&slot->queue, struct request, queuelist);

		if (!req)
			break;

		nbd_rollback (slot, req);
	}

}

/*     PTB error out all the requests on a slot
 *     
 *     We must be called without the io spinlock held, as we take it in
 *     nbd_error().
 */
static void
nbd_error_all (struct nbd_slot *slot)
{

	struct request *req;
	short count = 0;

	while (!list_empty (&slot->queue)) {
		if (count++ > 1000)
			break;
		req = list_head (&slot->queue, struct request, queuelist);
		if (!req)
			break;
		nbd_error (slot, req);
	}
}

/*
 * PTB - let a request onto the slot pending position
 *     - Can be called without the spinlock and doesn't take the
 *       spinlock as we  only deal with our  unique slot. If there
 *       were more than one client per slot this woould be a problem
 *       but there aren't so it isn't.
 */
void
nbd_accept (struct nbd_slot *slot, struct request *req)
{

	struct nbd_device *lo = slot->lo;
	unsigned long req_blks = nr_blks (req);
	int cmd;

	if (req_blks < 0)
		return;

        /* PTB accounting and nothing more */
	cmd = rq_data_dir (req);
	atomic_add (req_blks, &lo->requests_req[cmd]);
	/* PTB - Note that this really is slot and not lo.
	 */
	list_add (&req->queuelist, &slot->queue);

	slot->req_age = jiffies;
	slot->in += req_blks;
	slot->req += req_blks;
}

/*
 * PTB - read from userspace to a request buffer. Do it piecewuse
 *     - to cope with clustered requests.
 *     - return number of bytes read
 *
 *     Unfortunately the only way we can return less than the right
 *     number of bytes is when the receiving req does not have the
 *     right number of buffers, because the copy_from_user itself
 *     doesn't tell us.
 */
static int
copy_from_user_to_req (struct request *req, char *user, int len)
{

	unsigned size = 0;
        struct bio *bio /* = req->bio */;

	/* PTB assume user verified */

        rq_for_each_bio(bio, req) {

            int i;
            struct bio_vec * bvl;

            bio_for_each_segment(bvl, bio, i) {

                struct page *page       = bvl->bv_page;
                int offset              = bvl->bv_offset;
                const unsigned current_size
                                    = bvl->bv_len;
	        char *buffer;
                buffer = page_address(page) + offset;
 
		copy_from_user (buffer, user + size, current_size);

		size += current_size;
	    }
	}
	if (size != len) {
		NBD_ALERT
		 ("requested %d and could only read %d bytes to req #%x\n",
		  len, size, (unsigned) req);
		NBD_ALERT
		 ("request %#x wanted to read user space buffer  %#x\n",
		  (unsigned) req, (unsigned) user);
	}
	return size;
}

# define REQ_NBD (1 << __REQ_NR_BITS)

/*
 * PTB - auxiliary function
 *       we use the low bit (REQ_RW) of the flags and the first high bit
 *       (REQ_NBD) to designate the type of request.
 */
static int
rq_type (struct request *req)
{
        switch (((req->flags & REQ_RW)?1:0) | ((req->flags & REQ_NBD)?2:0)) {
            case 0:
                return READ;
            case 1:
                return WRITE;
            case 2:
                return IOCTL;
            case 3:
                return MD5SUM;
        }
        // PTB report what we can of the strangeness if it is strange
        return (req->flags < 4) ? -1: req->flags;
}
static void
set_rq_type (struct request *req, int type)
{
        switch (type) {
            case READ:
                req->flags &= ~(REQ_RW | REQ_NBD);
                return;
            case WRITE:
                req->flags &= ~REQ_NBD;
                req->flags |= REQ_RW;
                return;
            case IOCTL:
                req->flags &= ~REQ_RW;
                req->flags |= REQ_NBD;
                return;
            case MD5SUM:
                req->flags |= REQ_RW|REQ_NBD;
                return;
        }
}

/*
 * PTB - andres' kernel half of the user-space network handshake, used
 *     - to complete a transaction.
 *     - return 0 for success and -ve for fail.
 */
int
nbd_ack (struct nbd_slot *slot, char *buffer)
{
	struct nbd_reply reply;
	struct request *req, *xreq;
	int result = 0;

	void *user;
	unsigned long req_blks = 1;
	struct nbd_device *lo = slot->lo;
	unsigned buflen = 0;
	unsigned reqlen;
        int cmd;

	if (!(slot->flags & NBD_SLOT_BUFFERED)) {
		return -EINVAL;
	}
	if (slot->buffer != buffer) {
		if (slot->nerrs++ < 3)
			NBD_ALERT ("(%d): user buffer changed\n", slot->i);
		return -EINVAL;
	}

	atomic_inc (&lo->cthreads);
	slot->flags |= NBD_SLOT_RUNNING;
	slot->cli_age = jiffies;

	user = slot->buffer;
	copy_from_user ((char *) &reply, (char *) user,
			sizeof (struct nbd_reply));

	// PTB we keep tracking the write position in the input buffer
	buflen += NBD_BUFFER_DATA_OFFSET;

	// PTB save the reply handle (which is an address) as our req
	memcpy ((char *) &req, reply.handle, sizeof (req));

	do {
		struct list_head *pos;
		int count = 0;
		xreq = NULL;
		list_for_each (pos, &slot->queue) {
			xreq = list_entry (pos, struct request, queuelist);
			if (count++ > 1000)
				break;
			if (xreq == req)
				/* PTB found it */
				break;
		}
	} while (0);

	if (xreq != req) {
		if (slot->nerrs++ < 3)
			NBD_ALERT
			 ("fatal: Bad handle (given) %x != %x (found)!\n",
			  (unsigned) req, (unsigned) xreq);
		result = -EAGAIN;
		NBD_FAIL ("exited wrong request\n");
		/* PTB we will roll back requests */
	}

	if (reply.magic != NBD_REPLY_MAGIC) {
		if (slot->nerrs++ < 3)
			NBD_ALERT ("Not enough reply magic in "
				   __FUNCTION__ "\n");
		result = -EAGAIN;
		NBD_FAIL ("Not enough reply magic in " __FUNCTION__ "\n");
		/* PTB we will roll back requests */
	}

	if (reply.error > 0 || req->errors > 0) {
		/* PTB wasn't error++'ed before */
		req->errors++;
		if (slot->nerrs++ < 3)
			NBD_ALERT ("exited with reply error\n");
		/* PTB we handle this internally */
		goto success;
	}

	req_blks = nr_blks (req);

	reqlen = req->nr_sectors;
	reqlen <<= 9;

	cmd = rq_type (req);
        switch (cmd) {

		int size;

	  case READ:

		// PTB We have to copy the buffer bit by bit in
		// case the request is clustered.

		size =
		 copy_from_user_to_req (req, ((char *) user) + buflen,
					reqlen);
		if (size < reqlen) {
			NBD_ALERT
			 ("(%d): copy %dB from user to req %#x failed (%d)\n",
			  slot->i, reqlen, (unsigned) req, size);
			// PTB we could try again? We should investigate.
			NBD_FAIL
			 ("exited because of bad copy from user\n");
		}

		// PTB we keep tracking the write position in the buffer
		buflen += size;

		break;
	  case WRITE:
		// PTB we want to know if the reply is md5summed, and if it is
		//     whether the md5sum is the same as the one on the
		//     request. But that's not something we can presently see
		//     from here as we don't make an md5sum in the kernel.
		//     So we have to rely on the reply flag from userspace.
		//     We transmit the information to the slot, as we can't
		//     keep it on the request.

		switch (reply.flags &
			(NBD_REPLY_MD5SUM | NBD_REPLY_MD5_OK)) {

		  case NBD_REPLY_MD5SUM | NBD_REPLY_MD5_OK:
			// PTB we asked for an md5sum comparison
			// PTB the two matched, so we skipped writing the request
			slot->flags |= (NBD_SLOT_MD5SUM | NBD_SLOT_MD5_OK);	// 11
			break;
		  case NBD_REPLY_MD5SUM:
			// PTB the two differed, so we wrote the request
			slot->flags |= NBD_SLOT_MD5SUM;
			slot->flags &= ~NBD_SLOT_MD5_OK;	// 10
			break;
		  case NBD_REPLY_MD5_OK:
			// PTB the server refused the md5 request
			slot->flags &= ~NBD_SLOT_MD5SUM;
			slot->flags &= NBD_SLOT_MD5_OK;	// 01
			break;
		  default:
		  case 0:
			// PTB mobody asked for an md5sum comparison
			slot->flags &= ~(NBD_SLOT_MD5SUM | NBD_SLOT_MD5_OK);	// 00
			break;
		}
		// PTB now we're all set up to do the accounting in commit etc.

		break;

	  case IOCTL:

		if (!(reply.flags & NBD_REPLY_IOCTL))
			NBD_ALERT
			 ("ioctl reply to req %#x has no ioctl flag set\n",
			  (unsigned) req);
		// PTB the commit should emit the request notification
		do {

			unsigned cmd = (unsigned) req->special;
			char *arg = req->buffer;	// PTB saved local address or direct val

			if (cmd == -1) {
				result = -EINVAL;
				NBD_FAIL ("unauthorized remote ioctl\n");
			}

			if (_IOC_DIR (cmd) & _IOC_READ) {
				// PTB it would have been nice to save size in req and
				// in fact we did .. but only approximately, as nr_sectors.
				// size_t  size = nbd_ioctl_size(cmd, arg);
				int sectors = req->nr_sectors;
				// PTB if we are reading, it should be to the local buffer
				// PTB arg, which points at lo->ctldata or other buffer
				if (sectors <= 0) {
					memcpy (arg, &reply.data.ctldta[0],
						NBD_CTLDTA_LENGTH);
				}
				else {
					// PTB sectors is an overestimate. SHould be OK
					// PTB as we are reading from the client buffer
					// PTB which has plenty of room to spare
					int size = sectors << 9;
					copy_from_user (arg,
							(char *) user +
							buflen, size);
					buflen += size;
				}
			}
		} while(0);
		break;
	}			// PTB eswitch

      success:
	slot->nerrs = 0;
	// PTB - completion (or erroring) of transaction 
	// nbd_commit will take the io lock to do end_req
	nbd_commit (slot, req);
	atomic_dec (&lo->cthreads);
	slot->flags &= ~NBD_SLOT_RUNNING;
	return 0;

      error_out:
	// PTB we will next do a client rollback on the slot from userspace.
	//     Right here we just skip the request. 
	req->errors += req_blks;
	slot->err += req_blks;
	atomic_dec (&lo->cthreads);
	slot->flags &= ~NBD_SLOT_RUNNING;
	result = result < 0 ? result : -ENODEV;
	return result;
}

/*
 * PTB - write to userspace from a request buffer. Do it piecewuse
 *     - to cope with clustered requests.
 *     - return number of bytes written
 */
static int
copy_to_user_from_req (struct request *req, char *user, int len)
{

	unsigned size = 0;
        struct bio *bio /* = req->bio */;

	/* PTB assume user verified */

        rq_for_each_bio(bio, req) {

            int i;
            struct bio_vec * bvl;

            bio_for_each_segment(bvl, bio, i) {

                struct page *page       = bvl->bv_page;
                int offset              = bvl->bv_offset;
                const unsigned current_size
                                        = bvl->bv_len;
	        char *buffer;
                buffer = page_address(page) + offset;
 
		copy_to_user (user + size, buffer, current_size);

		size += current_size;
            }

	}
	return size;
}

/*
 * PTB - update speed counters if 5s has passed
 */
static void
nbd_speed (struct nbd_speed *spd)
{

	struct nbd_device *lo = spd->lo;

	// last time we measured
	int lastjiffy = atomic_read (&spd->jiffy);
	// jiffies since last time
	int djiffy = jiffies - lastjiffy;

	// tot blocks since first time
	int distance = spd->getdistance ? spd->getdistance (lo) : 0;
	// previous no we measured
	int lastdist = atomic_read (&spd->distance);
	// blocks since last time
	int ddistance = distance - lastdist;

	// write every 5 second in time
	if (djiffy > 5 * HZ) {

		// jiffies since first time
		int tjiffy = jiffies - atomic_read (&spd->lo->frstj);

		// max tot speed measured so far
		int speedmax = atomic_read (&spd->speedmax);

		// last instantaneous speed we measured
		int lastspeed = atomic_read (&spd->speed);

		// instantaneous read blocks/s
		int speed = djiffy ? (ddistance * HZ) / djiffy : 0;

		// smoothed KB/s
		int speedsmoothed =
		 (djiffy * speed + HZ * lastspeed) / (djiffy + HZ);

		// average speed to now in KB/s
		int speedav = tjiffy ? (distance * HZ) / tjiffy : 0;

		// smoothing count for max
		int speedhi =
		 (speedav > speedsmoothed) ? speedav : speedsmoothed;

		// doing settings
		atomic_set (&spd->speed, speedsmoothed);
		if (speedhi > speedmax)
			atomic_set (&spd->speedmax, speedhi);
		atomic_set (&spd->distance, distance);
		atomic_set (&spd->speedav, speedav);
		atomic_set (&spd->jiffy, jiffies);
	}
}

/*
 * PTB do the devices three speed updates
 */
static void
nbd_set_speed (struct nbd_device *lo)
{
	nbd_speed (&lo->wspeed);
	nbd_speed (&lo->rspeed);
	nbd_speed (&lo->tspeed);
}
static int
gettdistance (struct nbd_device *lo)
{
	return atomic_read (&lo->requests_in[WRITE]) +
	 atomic_read (&lo->requests_in[READ]);
}
static int
getrdistance (struct nbd_device *lo)
{
	return atomic_read (&lo->requests_in[READ]);
}
static int
getwdistance (struct nbd_device *lo)
{
	return atomic_read (&lo->requests_in[WRITE]);
}

/*
 * PTB auxiliary functions for manipulating the sequence number 
 */
static int
rq_seqno (struct request *req)
{
	return req->flags >> (__REQ_NR_BITS + 1);
}
static void
rq_set_seqno (struct request *req, int val)
{
	req->flags &= (1 << (__REQ_NR_BITS + 1)) - 1;
	req->flags |= val << (__REQ_NR_BITS + 1);
}

/*
 * PTB - andres' kernel half of the userspace networking. This part
 *     - initiates the transaction by taking a request off the generic
 *     - device queue and placing it in the slots pending position.
 *     - I believe we return 0 for success and -ve for fail.
 *     - timeo is the number of jiffies we are prepared to wait
 */
int
nbd_get_req (struct nbd_slot *slot, char *buffer)
{
	struct nbd_request request;
	struct request *req;
	int result = 0;
	static atomic_t count;
	unsigned start_time = jiffies;
	struct nbd_device *lo = slot->lo;
	unsigned timeout = lo->req_timeo * HZ;
	int islot = slot->i;
	// PTB for the new timezone field in requests 
	extern struct timezone sys_tz;
	unsigned long flags;

	atomic_inc (&lo->cthreads);	// PTB - client thread enters
	slot->flags |= NBD_SLOT_RUNNING;
	slot->cli_age = jiffies;

	if (!(slot->flags & NBD_SLOT_BUFFERED)) {
		NBD_FAIL ("Our slot has no buffer");
	}
	if (slot->buffer != buffer) {
		NBD_FAIL ("Our slot has changed its buffer!");
	}

	atomic_set (&lo->islot, islot);

	if (!list_empty (&slot->queue)) {
		NBD_FAIL ("impossible! already treating one request");
		// PTB we do a nontrivial rollback from the user daemon 
	}
	if (!slot->file) {
		result = -EBADF;
		NBD_FAIL ("Our slot has been nofiled");
	}
	if (!(atomic_read (&lo->flags) & NBD_ENABLED)) {
		result = -ENODEV;
		NBD_FAIL ("Our slot has been vamooshed");
	}

	// PTB don't even bother to look if we're throttling
	if (lo->speed_lim > 0
	    && atomic_read (&lo->wspeed.speed) << (lo->logblksize - 10) >
	    lo->speed_lim) {
		int delay_jiffies = 1;
		// PTB delay a jiffy to pretend we did domething
		NBD_ALERT
		 ("throttling on nd%s at jiffies %lu speed %dKB/s\n",
		  lo->devnam, jiffies,
		  atomic_read (&lo->wspeed.speed) << (lo->logblksize - 10));
		interruptible_sleep_on_timeout (&lo->wq, delay_jiffies);
		nbd_set_speed (lo);
		result = -ETIME;
		goto error_out;
	}

	// PTB take spinlock in order to examine queue
	atomic_inc (&lo->cwaiters);
	slot->flags |= NBD_SLOT_WAITING;

	// we need to protect ourselves against the request fn too
	read_lock_irqsave (&lo->queue_spinlock, flags);
	atomic_dec (&lo->cwaiters);
	slot->flags &= ~NBD_SLOT_WAITING;

	// PTB - now spin until request arrives to treat 
	while (slot->file && list_empty (&lo->queue)) {

		read_unlock_irqrestore (&lo->queue_spinlock, flags);

		atomic_inc (&lo->cwaiters);
		slot->flags |= NBD_SLOT_WAITING;

		interruptible_sleep_on_timeout (&lo->wq,
						start_time + timeout - jiffies);

		slot->flags &= ~NBD_SLOT_WAITING;
		atomic_dec (&lo->cwaiters);
		atomic_inc (&count);

		// PTB Have to take the spinlock again to check at the queue
		atomic_inc (&lo->cwaiters);
		slot->flags |= NBD_SLOT_WAITING;
		// we need to protect ourselves against the request fn too
		read_lock_irqsave (&lo->queue_spinlock, flags);
		atomic_dec (&lo->cwaiters);
		slot->flags &= ~NBD_SLOT_WAITING;

		// PTB fail for recheck if we're inactive too long 

		if (jiffies >= start_time + timeout
		    && list_empty (&lo->queue)) {

			result = -ETIME;

			// PTB we will exit with one code or another, so up
			// spinlock
			read_unlock_irqrestore (&lo->queue_spinlock, flags);

			do {
				static int
                                      nbd_clr_sock (struct nbd_slot *slot);
				int siz =
				 lo->blksize + sizeof (struct nbd_request);
				// PTB verify the buffer is still OK - holds one block 
				if (access_ok(VERIFY_WRITE,slot->buffer,siz))
                                    break;

				result = -EINVAL;

				// PTB clr_sock takes both the io lock and the spinlock
				nbd_clr_sock (slot);	// PTB TEST
				NBD_FAIL ("Our process has died or lost its buffer");
			} while (0);

			// PTB we may do a rollback from the user daemon here
			// but it'll be trivial - without effect - as we don't
			// have a request in our slot to treat.

			goto error_out;
		}

	}

	// PTB we still have the (read) spinlock here

	if (!(atomic_read (&lo->flags) & NBD_ENABLED)) {
		read_unlock_irqrestore (&lo->queue_spinlock, flags);
		result = -ENODEV;
		NBD_FAIL ("Our slot vaporized while we slept!");
	}
	if (!slot->file) {
		read_unlock_irqrestore (&lo->queue_spinlock, flags);
		result = -EBADF;
		NBD_FAIL ("Our slot nofiled itself while we slept!");
	}

	if (!list_empty (&slot->queue)) {
		read_unlock_irqrestore (&lo->queue_spinlock, flags);
		result = -EINVAL;
		NBD_FAIL ("impossible! already treating one request");
		// PTB we do a nontrivial rollback from the user daemon 
	}

	// PTB now let's relinquish the read lock and try for the write lock
	read_unlock_irqrestore (&lo->queue_spinlock, flags);

	write_lock_irqsave (&lo->queue_spinlock, flags);
	// PTB got the write lock

	if (list_empty (&lo->queue)) {
		write_unlock_irqrestore (&lo->queue_spinlock, flags);
		// PTB - somebody else did it while we waited on spinlock. OK 
		result = -EINVAL;
		NBD_FAIL ("ho hum beaten to the punch");
		// PTB we may do a trivial rollback from the user daemon 
	}

	// PTB cli/sti here looks unnec. hardware interrupts return here 
	// AMARIN begin uninterruptible code 

	// PTB we have the (write) spinlock

	// PTB oldest=last element in queue 
	req = list_tail (&lo->queue, struct request, queuelist);

	// PTB this is where we free the req from our queue. We need to be
	// holding our spinlock at this point

	// PTB - must succeed as have the spinlock 
	result = nbd_remove (lo, req);
	// PTB now holding irqs off in nbd_remove 

	// AMARIN end uninterruptable code 
	// PTB uh - maybe cli/sti is needed? interrupts can muck the queue?
	//        - Nah! I've left them enabled so we can see any errors.

	write_unlock_irqrestore (&lo->queue_spinlock, flags);

	request.magic = NBD_REQUEST_MAGIC;
	request.flags = 0;

	switch (rq_type (req)) {

	  case IOCTL:

		request.type = IOCTL;
		request.len = 0;

		do {
			// PTB this is our special ioctl kernel request

			unsigned cmd = (unsigned) req->special;
			char *arg = req->buffer;

			// PTB the arg was (either a literal or) the lo->ctldta buffer

			if (_IOC_DIR (cmd) & _IOC_READ) {
				// PTB we're in get_req, transferring stored ioctl
				size_t size = req->nr_sectors << 9;
				if (size <= 0) {
					// PTB we copy the data during the request copy
					if (_IOC_DIR (cmd) & _IOC_WRITE) {
						memcpy (&request.data.
							ctldta[0], arg,
							NBD_CTLDTA_LENGTH);
					}
					request.len = 0;
				}
				else {
					// PTB we copy to the user buffer later
					request.len = size;
				}
			}
			// PTB we store the weirded ioctl id
			// PTB this composition is our private invention
			request.from = (((u64) cmd) << 32)
			 // PTB really want this to go to a 64 bit request.special
			 | ((u64) (unsigned long) arg);
			// PTB note that we recorded the "original" buffer,
			// which is the lo->ctldata stuff, so we can put it
			// back there later
		} while (0);
		break;

	  case READ:
	  case WRITE:

		// PTB we might check first if it's actually our own ack
		do {
			int minor = minor (req->rq_dev);
			int nbd = minor >> NBD_SHIFT;
			int islot = minor - (nbd << NBD_SHIFT) - 1;
			if (islot != -1) {
				NBD_ALERT
				 ("received  req type %d directed at slot %d\n",
				  rq_type (req), islot);
				// We have to go looking at other peoples slots.
			}
		} while (0);

		request.type = rq_data_dir (req);
		request.from = req->sector;
		request.from <<= 9;
		request.len = req->nr_sectors;
		request.len <<= 9;
		if (atomic_read (&lo->flags) & NBD_MD5SUM) {
			// PTB set the please do md5sum flag on the request until we
			// learn to do it ourselves in-kernel during the memcopy below.
			request.flags |= NBD_REQUEST_MD5SUM;
		}
		break;
          case MD5SUM:
                break;

	  default:
		NBD_ALERT ("received unknown req %#x flags %#x\n",
			   (unsigned) req, rq_type (req));
		break;
	}

	request.seqno = rq_seqno (req);
	// PTB we need to erase the extra seqno info so that on error or on ack
	// the kernel can use the right internal array, but I'll try and
	// catch this in the ack function instad

	do_gettimeofday (&request.time);
	request.zone = sys_tz;

	// PTB I don't know how to fill tz_minuteswest otherwise (tz_dsttime = 0
	// always)

	memcpy (request.handle, (char *) &req, sizeof (req));

	copy_to_user (slot->buffer, (char *) &request, sizeof (request));

	// PTB the type is always 2bit since it's been created by a mask
	switch (request.type) {
		int err;
	  case READ:
		break;

	  case IOCTL:
		if (request.len > 0) {
			char *arg =
			 (char *) slot->buffer + NBD_BUFFER_DATA_OFFSET;
			copy_to_user (arg, req->buffer, request.len);
		}
		break;

	  case WRITE:
		err = copy_to_user_from_req (req,
					     (char *) slot->buffer +
					     NBD_BUFFER_DATA_OFFSET,
					     request.len);
		if (err < request.len) {
			// PTB buffer had missing BHS's
			NBD_ERROR
			 ("req %#x only offered %d bytes of %d for copy to user\n",
			  (unsigned) req, result, request.len);
			// PTB this request is badly damaged. We'd better shoot it.
			if (req) {
				if (req->errors == 0) {
					req->errors++;
					nbd_end_request_lock (req);
				}
			}
			NBD_FAIL
			 ("kernel failed to keep req while we copied from it");
		}
		break;
          case MD5SUM:
                break;
          default:
		NBD_ERROR
		 ("req %#x was type %#x\n", (unsigned)req, rq_type(req));
		NBD_FAIL ("unknown req type");
                break;
	}

	// PTB nbd_accept does not take spinlock and does not need to as
	// the req is already free of the shared queue and only needs
	// to be placed on the unique slot queue.

	nbd_accept (slot, req);
        NBD_DEBUG(1,
                "slotted OK request %#x type %d for sector %ld-%ld slot %d\n",
                        (unsigned)req, rq_type(req), req->sector,
                        req->sector + req->nr_sectors - 1, slot->i);

	atomic_dec (&lo->cthreads);	// PTB - client thread leaves normally 
	slot->flags &= ~NBD_SLOT_RUNNING;

	return 0;

      error_out:
	// PTB accounting - a fail to get a request is not an errored request 
	atomic_dec (&lo->cthreads);	// PTB - client thread leaves abnormally 
	slot->flags &= ~NBD_SLOT_RUNNING;
	result = result < 0 ? result : -ENODEV;

	return result;
}

/*
 * PTB error out the pending requests on the kernel queue
 * We have to be called WITHOUT the io request lock held.
 * We sleep imbetween clearing each request, for "safety".
 */
static int
nbd_clr_kernel_queue (void)
{

	int count = 0;
	unsigned long flags;
        request_queue_t *q = BLK_DEFAULT_QUEUE(major);

	spin_lock_irqsave (q->queue_lock, flags);

	while (!QUEUE_EMPTY && count++ < 1000) {
		struct request *req;
		req = CURRENT;
		if (!req) {	// PTB impossible
			spin_unlock_irqrestore (q->queue_lock, flags);
			NBD_ALERT
			 ("impossible! kernel queue empty after tested nonemty!\n");
			goto fail;
		}
		blkdev_dequeue_request (req);
		spin_unlock_irqrestore (q->queue_lock, flags);
		req->errors++;
		schedule ();
		// nbd_end_request should be called with the spinlock held
		spin_lock_irqsave (q->queue_lock, flags);
		nbd_end_request (req);
	}
	spin_unlock_irqrestore (q->queue_lock, flags);
	goto success;

      fail:
	/* PTB fall thru */
      success:
	NBD_ALERT ("removed %d requests\n", count);
	return count;

}

/*
 * PTB error out the pending requests on the nbd queue and kernel queue
 * Note that we take the queue spinlock for this
 */
int
nbd_clr_queue (struct nbd_device *lo)
{
	int count = 0;

	while (1) {

		struct request *req;
		unsigned long req_blks = 1;

                // PTB can't allow new requests via interrupts
		write_lock (&lo->queue_spinlock);
		if (list_empty (&lo->queue)) {
			write_unlock(&lo->queue_spinlock);
			break;
		}
		req = list_head (&lo->queue, struct request, queuelist);

		req_blks = nr_blks (req);

		req->errors += req_blks + 1;
		atomic_add (req_blks, &lo->requests_err);

		/* PTB - must succeed as have the spinlock */
		nbd_remove (lo, req);
		/* PTB now hold irqs off in nbd_remove */
		write_unlock(&lo->queue_spinlock);
		count++;

		// PTB nbd_end_req_lock needs to be called without the io spinlock
		nbd_end_request_lock (req);

	}
	NBD_ALERT ("unqueued %d reqs\n", count);
	return count;
}

#undef NBD_FAIL
#define NBD_FAIL( s... ) { \
  NBD_ERROR( s); printk("\n"); \
  goto error_out; \
}

#ifndef NO_BUFFERED_WRITES
  /*
   * Magic function from rd.c that we hope saves a buffer head
   * permanently somewhere in the kernel VM system.
   */
static int
buffered_write_pagecache_IO (struct buffer_head *sbh, int nbd)
{
	struct address_space *mapping;
	unsigned long index;
	int offset, size, err;
	struct nbd_device *lo = &nbd_dev[nbd];
	err = 0;

	// PTB we need to save the /dev/nda inode
	if (!lo->inode) {
		err = -ENODEV;
		goto out;
	}
	mapping = lo->inode->i_mapping;

	// PTB index appears to be the page number
	index = sbh->b_rsector >> (PAGE_CACHE_SHIFT - 9);
	// PTB offset is in bytes, and says where in the page the sector starts
	offset = (sbh->b_rsector << 9) & ~PAGE_CACHE_MASK;
	// PTB well, an abbreviation for the buffer size, in bytes
	size = sbh->b_size;

	do {
		// PTB we mark each page that we should write to Uptodate

		int count;
		struct page **hash;
		struct page *page;
		char *src, *dst;

		int unlock = 0;

		// PTB ummm, how much of the page is left to traverse
		count = PAGE_CACHE_SIZE - offset;
		// PTB reduce it to how much we actually need to traverse
		if (count > size)
			count = size;
		// PTB say NOW? that we have traversed what we want of the page
		size -= count;

		hash = page_hash (mapping, index);
		page = __find_get_page (mapping, index, hash);

		if (!page) {
			// PTB we get to make a new page
			page = grab_cache_page (mapping, index);
			if (!page) {
				// PTB failed to get new page
				err = -ENOMEM;
				goto out;
			}
			// PTB magic
			if (!Page_Uptodate (page)) {
				memset (kmap (page), 0, PAGE_CACHE_SIZE);
				kunmap (page);
				SetPageUptodate (page);
			}
			// PTB the new page is locked. We need to unlock it later
			unlock = 1;
		}

		// PTB prepare already for next page
		index++;

		// PTB set up for copy
		dst = kmap (page);
		dst += offset;
		src = bh_kmap (sbh);

		// PTB prepare for next round
		offset = 0;

		// PTB do a copy
		memcpy (dst, src, count);

		kunmap (page);
		bh_kunmap (sbh);

		if (unlock) {
			UnlockPage (page);
		}
		SetPageDirty (page);
		__free_page (page);

	} while (size > 0);

      out:
	return err;

}
static int
buffered_write (struct request *req)
{

	struct buffer_head *bh;
	int dev = minor (req->rq_dev);
	int nbd = dev >> NBD_SHIFT;
	int err = 0;

	// PTB go through and copy and protect the written buffers
	for (bh = req->bh; bh; bh = bh->b_reqnext) {
		struct buffer_head *rbh;
		rbh =
		 getblk (bh->b_rdev, bh->b_rsector / (bh->b_size >> 9),
			 bh->b_size);
		if (bh != rbh) {
			char *bdata = bh_kmap (bh);
			memcpy (rbh->b_data, bdata, rbh->b_size);
			NBD_ALERT ("got new bh sector %lu on write\n",
				   bh->b_rsector);
		}
		bh_kunmap (bh);
		mark_buffer_protected (rbh);	// PTB equals dirty, uptodate
		err = buffered_write_pagecache_IO (bh, nbd);
		if (err < 0) {
			break;
		}
		brelse (rbh);
	}
	return err;
}

#endif		/* NO_BUFFERED_WRITES */

/*
 * PTB - kernel function to take reqs off the kernel queue. Runs with
 * io lock held.
 */
static void
do_nbd_request (request_queue_t * q)
{
	struct request *req;
	int dev, nbd;
	unsigned long flags;

	while (!QUEUE_EMPTY) {

		struct nbd_device *lo = NULL;

		req = CURRENT;

		dev = minor (req->rq_dev);
		nbd = dev >> NBD_SHIFT;

		if (nbd >= MAX_NBD) {
			NBD_FAIL ("minor too big in do_nbd_request.");
		}
		lo = &nbd_dev[nbd];

		atomic_inc (&lo->kthreads);	/* PTB - one kernel thread enters */
		if (atomic_read (&lo->kthreads) > atomic_read (&lo->kmax))
			atomic_set (&lo->kmax, atomic_read (&lo->kthreads));

		if (!lo->file) {
			NBD_FAIL ("Request when device not ready.");
		}

                if (!(req->flags & REQ_CMD)) {
			NBD_FAIL ("Request doesn't have the CMD bit set.");
                }

		// PTB - should use is_read_only(MKDEV(major,nbd<<NBD_SHIFT))
		// FIXME run over all the partitions too
		if (rq_data_dir (req) == WRITE
		    && (atomic_read (&lo->flags) & NBD_READ_ONLY)) {
			NBD_FAIL ("write on read-only device");
		}
		flags = atomic_read (&lo->flags);
		if (!(flags & NBD_INITIALISED)) {
			NBD_FAIL ("device not initialised.");
		}
		if (!(flags & NBD_ENABLED)) {
			NBD_FAIL ("device not enabled.");
		}
		if (flags & NBD_INVALID) {
			NBD_FAIL ("device invalidated.");
		}
		if (lo->nslot <= 0) {
			NBD_ERROR
			 ("device nd%s has no (%d) clients registered\n",
			  lo->devnam, lo->nslot);
			NBD_FAIL ("device has no clients registered yet.");
		}
		if (lo->aslot <= 0 && (flags & NBD_SHOW_ERRS)) {
			NBD_FAIL ("device presently has no active clients.");
		}
		if (req->sector + req->nr_sectors > lo->sectors) {
			NBD_FAIL ("overrange request");
		}
		if (req->sector > ~(1 << (sizeof (int) * 8 - 1))) {
			NBD_FAIL ("overrange request");
		}
		if (req->sector < 0) {
			NBD_FAIL ("underrange request");
		}
		req->errors = 0;
		blkdev_dequeue_request (req);
                // PTB in 2.5 we can release the iolock briefly here
                spin_unlock_irq(q->queue_lock);
		// PTB we are the only reader and writer of lo->seqno
		if (rq_data_dir (req) == WRITE && rq_seqno (req) == 0) {
			// PTB it's a new request never seen before
			atomic_inc (&lo->seqno_out);
			// PTB we have to be careful to change this back before
			// giving it back to the kernel, as the kernel uses it.
			// We patch it back again in nbd_end_request.
			rq_set_seqno (req, atomic_read (&lo->seqno_out));
		}

		// PTB normal sequence is to queue request locally
		nbd_enqueue (lo, req, NBD_UNINTERRUPTIBLE);
                NBD_DEBUG(1,
                        "queued OK request %#x flags %#lx for sector %ld-%ld\n",
                         (unsigned)req, req->flags, req->sector,
                         req->sector + req->nr_sectors - 1);
		goto accounting;

	      accounting:
		atomic_dec (&lo->kthreads);
                // PTB regain the iolock for another turn
                spin_lock_irq(q->queue_lock);
		continue;	// PTB next request

	      error_out:
		// PTB can rely on req being nonnull here
		req->errors++;
		blkdev_dequeue_request (req);
		NBD_ALERT ("ending req %x with prejudice\n",
			   (unsigned) req);
		nbd_end_request (req);
                // PTB more accounting
		if (lo) {
			int req_blks = nr_blks (req);
			atomic_add (req_blks, &lo->requests_err);
			atomic_dec (&lo->kthreads);
		} else {
                        NBD_ALERT("failed to account one orphan errored req\n");
                }
                // PTB regain the iolock for another turn
                spin_lock(q->queue_lock);
                continue;
	}
        return;
}

/*
 * PTB rollback all requests on a given slot and then invalidate it
 * (so the requests can't go back until somebody reactivates the slot)
 * At least rollback (which we call takes both the io spinlock and our
 * spinlock, so we can hold neither when we are called. Soft_reset
 * (which we call) also calls rollback, so has the same problem.
 */
static int
   /* introduced by PTB for better modularity */
nbd_clr_sock (struct nbd_slot *slot)
   /* PTB arg introduced for multiplexing */
   /* PTB the nbd arg if -ve, means don't invalidate buffers also */
{
	int i = 0;
	struct nbd_device *lo = slot->lo;
	int islot = slot->i;
	unsigned long flags;

	nbd_rollback_all (slot);

	slot->file = NULL;
	slot->bufsiz = 0;
	slot->flags = 0;
	slot->buffer = NULL;

	write_lock_irqsave (&lo->queue_spinlock, flags);

	/* PTB reset lo->aslot */

	if (lo->aslot > 0) {

		/* PTB grr .. do this the hard way since I don't seem to count right */
		lo->aslot = 0;
		for (i = 0; i < lo->nslot; i++) {
			struct nbd_slot *sloti = &lo->slots[i];
			if (sloti->file)
				lo->aslot++;
		}

		if (lo->aslot <= 0) {
			// PTB if we are the last client alive, diasable device as we die
			atomic_clear_mask (NBD_ENABLED, &lo->flags);
			if (atomic_read (&lo->flags) & NBD_SHOW_ERRS) {
				// PTB soft_reset will invalidate_buffers
				static int nbd_soft_reset (struct nbd_device *lo);
				// PTB soft reset will take the io spinlock and our spinlock
				// so we must release our spinlock and never
				// have the io spinlock at this point
				write_unlock_irqrestore (&lo->queue_spinlock, flags);
				nbd_soft_reset (lo);
				write_lock_irqsave (&lo->queue_spinlock, flags);
			}
		}
                else if (!(atomic_read (&lo->flags) & NBD_ENABLED)) {
			// PTB we must not call reenable as that clears the queue
			// PTB this is silly. We're already enabled or should have been!
			atomic_set_mask (NBD_ENABLED, &lo->flags);
			lo->lives++;
			NBD_ALERT ("enabled device nd%s\n", lo->devnam);
		}
	}

	/* PTB reset lo->islot, for no good reason */

	if (atomic_read (&lo->islot) == islot) {
		for (i = 0; i++ < lo->nslot;) {
			atomic_inc (&lo->islot);
			if (atomic_read (&lo->islot) >= lo->nslot)
				atomic_set (&lo->islot, 0);
			if (lo->slots[atomic_read (&lo->islot)].file)
				break;
		}
	}

	lo->harderror = 0;

	write_unlock_irqrestore (&lo->queue_spinlock, flags);

	/* PTB don't clear whole device queue as we might still be open */

	return 0;
}

/*
 * PTB - check all slots for old requests and roll them back. 
 * At least rollback (which we call takes both the io spinlock and our
 * spinlock, so we can hold neither when we are called.
 */
static void
nbd_rollback_old (struct nbd_device *lo)
{

	int islot;

	for (islot = 0; islot < lo->nslot; islot++) {
		struct nbd_slot *slot = &lo->slots[islot];
		if (slot->req_age > 0
		    && slot->req_age < jiffies - lo->req_timeo * HZ) {
			nbd_rollback_all (slot);
		}
	}

}

/*
 * PTB - register a socket to a slot.
 *     - Return 0 for success and -ve for failure.
 *       Nowadays this doesn't do very much!
 */
static int
nbd_set_sock (struct nbd_slot *slot, int arg)
{

	struct nbd_device *lo = slot->lo;
	int islot = slot->i;
	unsigned long flags;

	if (!(atomic_read (&lo->flags) & NBD_INITIALISED)) {
		NBD_ALERT ("device nd%s not initialised yet!\n",
			   lo->devnam);
		return -ENODEV;
	}
	if (!(atomic_read (&lo->flags) & NBD_SIZED)) {
		NBD_ALERT ("device nd%s not sized yet!\n", lo->devnam);
		return -EINVAL;
	}
	if (!(atomic_read (&lo->flags) & NBD_BLKSIZED)) {
		NBD_ALERT ("device nd%s not blksized yet!\n", lo->devnam);
		return -EINVAL;
	}
	if (!(atomic_read (&lo->flags) & NBD_SIGNED)) {
		NBD_ALERT ("setting unsigned device nd%s! But harmless.\n",
			   lo->devnam);
		return -EINVAL;
	}

	slot = &lo->slots[islot];

	if (slot->file) {
		static int my_nbd_set_sig (struct nbd_slot *slot, int *sig);
		int error;	/* PTB need something better */
		// PTB it could be that the last daemon got kill -9 and that's all
		// PTB the device is signed at this point
		if (arg && my_nbd_set_sig (slot, (int *) arg) >= 0) {
			// PTB signature matches, so error pending requests to let
			// last accessor release, then try again
			nbd_clr_sock (slot);
			if (slot->file) {
				error = -EBUSY;
				goto error_out;
			}
			error = 0;
			goto success_out;
		}

	      error_out:
		error = -EBUSY;
		// PTB but it sure would mess up the counts if we didn't exit now
		return error;

	      success_out:
	}

	// PTB this is a queue critical code region for the flags business
	write_lock_irqsave (&lo->queue_spinlock, flags);

	// PTB file has to be nonzero to indicate we're all set up. 
	slot->file = (void *) 1;

	if (++lo->aslot > 0) {
		// PTB if this is the first slot, we might call reenable and
		// thus clr queue too, but reenable takes the spinlock
		if (!(atomic_read (&lo->flags) & NBD_ENABLED)) {
			atomic_set_mask (NBD_ENABLED, &lo->flags);
			lo->lives++;
		}
	}
	if (islot >= lo->nslot) {
		lo->nslot = islot + 1;
		NBD_INFO ("increased socket count to %d\n", lo->nslot);
	}

	lo->harderror = 0;

	write_unlock_irqrestore (&lo->queue_spinlock, flags);
	// PTB end of queue critical region 

	return 0;
}

/*
 * PTB - return the index i of 2^i + j, 0 <= j < 2^i
 */
static inline unsigned
log2 (unsigned arg)
{
	unsigned log = 0;
	while ((arg >>= 1) > 0)
		log++;
	return log;
}

/*
 * PTB - set the blksize in bytes of the block device. Return 0 for
 *     - success and -ve for failure.
 */
static int
nbd_set_blksize (struct nbd_device *lo, unsigned int arg)
{
	int nbd = lo->nbd;
	if (arg > PAGE_SIZE || arg < 512 || (arg & (arg - 1))) {
		NBD_ERROR ("blksize too big (%u)\n", arg);
		return -EINVAL;
	}
	lo->blksize = nbd_blksizes[nbd << NBD_SHIFT] = arg;
	lo->logblksize = log2 (lo->blksize);
	atomic_set_mask (NBD_BLKSIZED, &lo->flags);
	return 0;
}

/*
 * PTB - set the size in bytes of the block device. Return 0 for
 *     - success and -ve for failure.
 */
static int
nbd_set_size (struct nbd_device *lo, u64 arg)
{
	int nbd = lo->nbd;
	lo->bytesize = nbd_bytesizes[nbd << NBD_SHIFT] = arg;
	lo->size = nbd_sizes[nbd << NBD_SHIFT] = arg >> 10;
	lo->sectors = lo->size << 1;

	atomic_set_mask (NBD_SIZED, &lo->flags);
	return 0;
}

/* WG */
static int
my_nbd_set_intvl (struct nbd_device *lo, int arg)
{
	if (arg <= 0) {
		NBD_ERROR ("bad pulse interval/req timeout value (%d)\n", arg);
		return -EINVAL;
	}
	lo->req_timeo = arg;
	return 0;
}

static int
my_nbd_set_spid (struct nbd_slot *slot, int arg)
{
	short spid = arg;
	if (arg < 0 || arg >= (1 << (sizeof (short) * 8))) {
		NBD_ERROR ("bad spid value (%d)\n", arg);
		return -EINVAL;
	}
	slot->spid = spid;
	return 0;
}

static int
my_nbd_set_bufferwr (struct nbd_device *lo, int arg)
{
	if (arg) {
		atomic_set_mask (NBD_BUFFERWR, &lo->flags);
	}
	else {
		atomic_clear_mask (NBD_BUFFERWR, &lo->flags);
	}
	return 0;
}

static int
my_nbd_set_invalid (struct nbd_device *lo, int arg)
{
	// PTB we handle the event ourself exactly when it happens
	// instead of letting the kernel have check_media defined
	// and doing it there (and reporting 0 to the kernel)

	if (arg == 0) {
		atomic_clear_mask (NBD_INVALID, &lo->flags);
	}
	else {
		if (!(atomic_read (&lo->flags) & NBD_INVALID)) {
			kdev_t dev = mk_kdev (major, lo->nbd << NBD_SHIFT);
			// PTB - clear buffers now instead of waiting for kernel
			atomic_set_mask (NBD_INVALID, &lo->flags);
			destroy_buffers (dev);
		}
	}
	return 0;
}

/*
 * PTB - if we're not signed, accept new sig and return success.
 *     - if we are signed, compare the offer and return success if equal,
 *     - and -ve for failure.
 */
static int
my_nbd_set_sig (struct nbd_slot *slot, int *sig)
{
	int err = 0;
	int buf[NBD_SIGLEN / sizeof (int)];
	int islot = slot->i;
	struct nbd_device *lo = slot->lo;

	if (!access_ok (VERIFY_READ, (char *) sig, NBD_SIGLEN)) {
		err = -EINVAL;
		return err;
	}
	if (!(atomic_read (&lo->flags) & NBD_SIGNED)) {
		/* PTB first time grab sig */
		copy_from_user ((char *) lo->signature, (char *) sig,
				NBD_SIGLEN);
		atomic_set_mask (NBD_SIGNED, &lo->flags);
		return 0;
	}
	copy_from_user ((char *) buf, (char *) sig, NBD_SIGLEN);

	/* PTB test for equality */

	if (memcmp (&buf[0], &lo->signature[0], NBD_SIGLEN / sizeof (int))
	    != 0) {
		err = -EINVAL;
		NBD_ALERT ("(%d): failed sigcheck wth %d\n", islot, err);
		return err;
	}
	err = 0;
	return err;
}

/*
 * PTB - register a userspace buffer to a slot. Return 0 for success
 *     - and -ve for failure. Null arg acts as erase.
 */
static int
my_nbd_reg_buf (struct nbd_slot *slot, char *buffer)
{

	int err = 0, siz;
	struct nbd_device *lo = slot->lo;

	if (!buffer) {
		slot->flags &= ~NBD_SLOT_BUFFERED;
		slot->buffer = NULL;
		slot->bufsiz = 0;
		return 0;
	}

	siz = lo->max_sectors << 9;

	/* verify the buffer is in the process space */
	if (!access_ok (VERIFY_WRITE, buffer, siz)) {
		err = -EINVAL;
		return err;
	}
	/* PTB hope the buffer is as big as it should be - FIXME */
	slot->buffer = buffer;
	slot->bufsiz = siz;

	/* PTB let the device bufsiz be min of registered nonzero bufsizes */
	if (!lo->bufsiz) {
		// PTB first time
		lo->bufsiz = siz;
	}
	else {
		if (lo->bufsiz > siz)
			lo->bufsiz = siz;
	}

	// PTB just in case the buffer really is small, we reset all the
	//     kernels request maxima if we have to adjust the device max
	if (lo->max_sectors < (lo->bufsiz >> 9)) {
		int j;
		lo->max_sectors = lo->bufsiz >> 9;
		for (j = 0; j < NBD_MAXCONN; j++) {
			nbd_max_sectors[(lo->nbd << NBD_SHIFT) + j] =
			 lo->max_sectors;
		}
	}

	slot->flags |= NBD_SLOT_BUFFERED;
	return 0;
}

static struct timer_list reenable_timer;
/*
 * PTB - set the enabled flag on a device and then clear all queues
 * ( call without the spinlock held ) 
 */
static void
nbd_reenable (struct nbd_device *lo)
{

	int m, n;
	unsigned long flags;

	if (!(atomic_read (&lo->flags) & NBD_INITIALISED))
		return;
	if (lo->aslot <= 0)
		return;

	// PTB disable part
	write_lock_irqsave (&lo->queue_spinlock, flags);
	if ((atomic_read (&lo->flags) & NBD_ENABLED)) {
		atomic_clear_mask (NBD_ENABLED, &lo->flags);
	}
	write_unlock_irqrestore (&lo->queue_spinlock, flags);

	// PTB clear queue part
	m = nbd_clr_queue (lo);
	// PTB - have to call clr_kernel_queue without the io_spinlock held
	n = nbd_clr_kernel_queue ();
	if (m + n > 0) {
		// PTB - schedule ourselves to try and reenable again later
		NBD_ALERT
		 ("cleared %d+%d kernel requests, rescheduling enable\n",
		  m, n);
		reenable_timer.data = (unsigned long) lo;
		reenable_timer.expires = jiffies + 1 * HZ;
		add_timer (&reenable_timer);
		return;
	}

	// PTB reenable part
	write_lock_irqsave (&lo->queue_spinlock, flags);
	if ((atomic_read (&lo->flags) & NBD_ENABLED)) {
		write_unlock_irqrestore (&lo->queue_spinlock, flags);
		return;
	}
	atomic_set_mask (NBD_ENABLED, &lo->flags);
	lo->lives++;
	write_unlock_irqrestore (&lo->queue_spinlock, flags);

}
static struct timer_list reenable_timer = {
	{NULL, NULL},
	0,
	0,
	(void (*)(unsigned long)) nbd_reenable,
};

/*
 * PTB - this unsets the enabled flag on the device and then clears the
 *     - queue for the device.
 */
static void
nbd_disable (struct nbd_device *lo)
{
	int i;
	if (!(atomic_read (&lo->flags) & NBD_INITIALISED)) {
		return;
	}
	atomic_clear_mask (NBD_ENABLED, &lo->flags);
	for (i = 0; i < 100; i++) {
		if (nbd_clr_queue (lo) <= 0)
			break;
	}
}

/*
 * PTB - drains device queue. Disables device.
 * At least rollback (which we call takes both the io spinlock and our
 * spinlock, so we can hold neither when we are called. Also
 * invalidate buffers, on request of Rogier Wolff.
 */
static int
nbd_soft_reset (struct nbd_device *lo)
{
	int j;
	if (!(atomic_read (&lo->flags) & NBD_INITIALISED && lo->nslot > 0)) {
		return -EINVAL;
	}
	// We push back the requests in the slot, in order to be able to
	// vamoosh them in a moment. This is a race, surely? We ought to
	// do this atomically or dsiable the slots first.
	for (j = 0; j < lo->nslot; j++) {
		struct nbd_slot *slot = &lo->slots[j];
		nbd_rollback_all (slot);
	}
	// disable unsets the enabled flag and clears the queue
	nbd_disable (lo);
	// PTB WE used to renable in 5s but I couldn't see how that was
	// triggered, so I got rid of the code (temporarily)
	//lo->flags &= ~NBD_SIGNED;   /* PTB unsign the device */
	// PTB put back invalidate buffers for use when called from
	// clr_sock from nbd_release on request of Rogier Wolff.
	for (j = 0; j < lo->nslot; j++) {
		invalidate_buffers (mk_kdev (major, (lo->nbd << NBD_SHIFT) + j));
	}
	return 0;
}

/*
 * PTB - added a device/module reset for tidyness in face of rampant hacking
 *     - this does a soft_reset of all devices, followed bu a clr sock
 *     - on each, and then clears the kernel queue. It unsets the
 *     - enabled flag on each device.
 *       We have to be called without either the spinlock or the
 *       spinlock held, as we call soft_reset which takes both, as
 *       does clr_sock
 */
int
nbd_hard_reset (void)
{
	int i;
	int err = 0;

	for (i = 0; i < MAX_NBD; i++) {
		struct nbd_device *lo = &nbd_dev[i];
		int j;
		nbd_soft_reset (lo);
		for (j = 0; j < lo->nslot; j++) {
			struct nbd_slot *slot = &lo->slots[j];
			//  PTB this takes the io spinlock and our spinlock.
			nbd_clr_sock (slot);
		}
	}
	// PTB - have to call clr_kernel_queue without the io_spinlock held
	nbd_clr_kernel_queue ();

#ifdef MODULE
	// PTB I'd alert if it were negative, if only I had access to count!
	while (MOD_IN_USE)
		MOD_DEC_USE_COUNT;
#endif

	return err;
}

/*
 * PTB - generic ioctl handling
 */
static int
nbd_ioctl (struct inode *inode, struct file *file,
	   unsigned int cmd, unsigned long arg)
{
	struct nbd_device *lo = 0;	// PTB device pointer
	int minor = -1;		// PTB minor on which we got the ioctl
	int islot = -1;		// PTB slot number 0, 1, ...
	int nbd = -1;		// PTB the count for the device group
	struct nbd_slot *slot = 0;	// PTB slot pointer
	unsigned start_time, timeout;
	size_t size;		// PTB for use in ioctls

	if (!suser ()) {
		NBD_ERROR ("caller must be root.\n");
		return -EACCES;
	}
	if (!inode) {
		NBD_ERROR ("given bad inode.\n");
		return -EINVAL;
	}
	if (major (inode->i_rdev) != major) {
		NBD_ERROR ("pseudo-major %d != %d\n",
			   major (inode->i_rdev), major);
		return -ENODEV;
	}
	minor = minor (inode->i_rdev);
	nbd = minor >> NBD_SHIFT;
	if (nbd >= MAX_NBD) {
		NBD_ERROR ("tried to open too many devices, %d\n", minor);
		return -ENODEV;
	}
	lo = &nbd_dev[nbd];
	lo->harderror = 0;
	islot = minor % NBD_MAXCONN - 1;
	slot = &lo->slots[islot];

	// PTB these are all always local ioctls
	switch (cmd) {
		int err;
		int intval;

	  case NBD_CLEAR_SOCK:
		err = nbd_clr_sock (slot);
		return err;

	  case NBD_SET_SOCK:
		err = nbd_set_sock (slot, arg);
		return err;

	  case BLKBSZGET:
		if (!(atomic_read (&lo->flags) & NBD_BLKSIZED)) {
			return -EINVAL;
		}
		err = put_user (lo->blksize, (long *) arg);
		return err;

	  case BLKBSZSET:
		if (!arg)
			return -EINVAL;
		if (get_user (intval, (int *) arg))
			return -EFAULT;
		err = nbd_set_blksize (lo, intval);
		return err;

	  case NBD_SET_SIZE:
		err = nbd_set_size (lo, (u64) arg);
		return err;

	  case NBD_SET_SECTORS:
		err = nbd_set_size (lo, ((u64) arg) << 9);
		return err;

	  case MY_NBD_SET_INTVL:	/* WG */
		err = my_nbd_set_intvl (lo, arg);
		return err;

	  case MY_NBD_SET_SPID:
		err = my_nbd_set_spid (slot, arg);
		return err;

	  case MY_NBD_SET_BUFFERWR:
		err = my_nbd_set_bufferwr (lo, arg);
		return err;

	  case MY_NBD_REG_BUF:	/* PTB register your buffer per socket here */
		if (!arg) {
			/* PTB serves as existence check for this ioctl */
			return 0;
		}
		err = my_nbd_reg_buf (slot, (char *) arg);
		return err;

	  case MY_NBD_SET_SIG:
		err = my_nbd_set_sig (slot, (int *) arg);
		return err;

	  case MY_NBD_GET_REQ:
		err = nbd_get_req (slot, (char *) arg);
		return err;

	  case MY_NBD_CLR_REQ:
		nbd_rollback_all (slot);
		err = 0;
		return err;

	  case MY_NBD_ERR_REQ:
		nbd_error_all (slot);
		err = 0;
		return err;

	  case MY_NBD_SYNC:
		err = 0;

		// PTB error too old reqs if show_errs is set, else roll them back

		nbd_rollback_old (lo);

		nbd_set_speed (lo);

		// PTB wait   max(sync_intvl,1) s,
		if (atomic_read (&lo->flags) & NBD_ENABLED) {
			// we stay here for 1s or more
			int intvl = (sync_intvl > 0) ? sync_intvl : 1;
			// PTB jiffies we're prepared to throttle for
			unsigned long timeout = intvl * HZ;
			// ptb jiffies at which we abort
			unsigned long timedue = jiffies + timeout;

			while (1) {
				if (jiffies >= timedue)
					break;
				interruptible_sleep_on_timeout (&lo->wq, 1);
			}
		}
		else {
			err = -EINVAL;
		}

		return err;

	  case MY_NBD_ACK:
		err = nbd_ack (slot, (char *) arg);
		return err;

		/* let this be compiled in always - it's useful. PTB */
	  case NBD_PRINT_DEBUG:
		NBD_INFO
		 ("device %d: head = %x, tail = %x, in = %d, out = %d\n",
		  minor, (int) list_head (&lo->queue, struct request,
					  queuelist),
		  (int) list_tail (&lo->queue, struct request, queuelist),
		  atomic_read (&lo->requests_in[READ]) +
		  atomic_read (&lo->requests_in[WRITE]),
		  atomic_read (&lo->requests_out[READ]) +
		  atomic_read (&lo->requests_out[WRITE]));
		err = 0;
		return err;
	  case NBD_HARD_RESET:	/* PTB - debugging */
		err = nbd_hard_reset ();
		return err;

	  case NBD_RESET:	/* PTB - debugging */
		err = nbd_soft_reset (lo);
		// PTB we reenable in 5s
		reenable_timer.data = (unsigned long) lo;
		reenable_timer.expires = jiffies + 5 * HZ;
		add_timer (&reenable_timer);
		return err;

	  case NBD_SET_MD5SUM:	/* PTB - change to do/plead md5summing */
		if (arg) {
			atomic_set_mask (NBD_MD5SUM, &lo->flags);
		}
		else {
			atomic_clear_mask (NBD_MD5SUM, &lo->flags);
		}
		err = 0;
		return err;

	  case MY_NBD_SET_SHOW_ERRS:	/* PTB/WG - change show error status */
		if (arg) {
			atomic_set_mask (NBD_SHOW_ERRS, &lo->flags);
		}
		else {
			atomic_clear_mask (NBD_SHOW_ERRS, &lo->flags);
		}
		return 0;

	  case MY_NBD_INVALIDATE:
		err = my_nbd_set_invalid (lo, (int) arg);
		return err;
	}

	// PTB these are the standard ioctls, and we might get them from
	// the other side

	switch (cmd) {
		int err;
		int intval;

	  case BLKROSET:	/* PTB - change ro status */
		if (get_user (intval, (int *) arg))
			return -EFAULT;
		// PTB local flags
		if (intval) {
			atomic_set_mask (NBD_READ_ONLY, &lo->flags);
		}
		else {
			atomic_clear_mask (NBD_READ_ONLY, &lo->flags);
		}
		// PTB which device really doesn't matter. We do the checking.
		set_device_ro (mk_kdev (major, nbd << NBD_SHIFT), intval);
		return 0;

	  case BLKROGET:
		intval = (atomic_read (&lo->flags) & NBD_READ_ONLY) != 0;
		return put_user (intval, (int *) arg);

	  case BLKFLSBUF:
		nbd_maybe_sync_sync (lo);	// PTB normally fsync_dev
		// PTB device likely has buffers or caches in kernel
		invalidate_buffers (inode->i_rdev);
#ifndef NO_BUFFERED_WRITES
		if (atomic_read (&lo->flags) & NBD_BUFFERWR) {
			// PTB got this from rd.c
			destroy_buffers (inode->i_rdev);
		}
#endif		/* NO_BUFFERED_WRITES */
		return 0;

	  case HDIO_GETGEO:
		if (!arg) {
			return -EINVAL;
		}
		do {
			struct hd_geometry *geo =
			 (struct hd_geometry *) arg;
			int sectors = nbd_sizes[nbd << NBD_SHIFT] << 1;
			unsigned short c;
			unsigned char h, s;
			if (sectors < (1 << 22)) {
				h = 4;
				s = 16;
				c = sectors >> 6;
			}
			else {
				h = 255;
				s = 63;
				c = (sectors / h) / s;
			}
			err = 0;
			if ((err = put_user (c, &geo->cylinders), err < 0)
			    || (err = put_user (h, &geo->heads), err < 0)
			    || (err = put_user (s, &geo->sectors), err < 0)
			    || (err = put_user (h, &geo->start), err < 0)) {
				return err;
			}
		} while (0);
		return 0;

	  case BLKGETSIZE:	/* PTB 132 */
		/* PTB return nr sectors */
		if (!(atomic_read (&lo->flags) & NBD_SIZED)) {
			return -ENODEV;
		}
		err = put_user ((unsigned long) lo->sectors,
			   (unsigned long *) arg);
		// PTB this check is silly here and seems to trigger!
		if (lo->size != 0
		    && (u32) (lo->bytesize >> 10) != lo->size) {
			NBD_ALERT
			 ("bytes %luKB mismatch with KB %u in BLKGETSIZE\n",
			  (unsigned long) (lo->bytesize >> 10), lo->size);
		}
		return err;

#ifdef BLKGETSIZE64
	  case BLKGETSIZE64:
		/* PTB return real size in bytes */
		if (!(atomic_read (&lo->flags) & NBD_SIZED)) {
			return -ENODEV;
		}
		err = put_user (lo->bytesize, (u64 *) arg);
		return err;
#endif

	}

	cmd = nbd_ioctl_convert (cmd);
	if (cmd == -1) {
		NBD_ALERT ("unauthorized ioctl\n");
		return -EINVAL;
	}

	// PTB here we have to treat remote ioctls. We should probably make
	// a request and put it on the local queue, but where can we get
	// the request from? We might have to keep one in reserve.
	// That's not a bad idea, because
	// we generate it here and we delete it here, and the daemon code
	// is all set up to read that sort of thing. So that's what we do ...

	timeout = lo->req_timeo * HZ;
	start_time = jiffies;

	// PTB we'll spinlock on our queue just to have any old lock. 
	// PTB we're waiting to be able to write the req safely
	write_lock (&lo->queue_spinlock);
	while (lo->req.rq_status == RQ_ACTIVE) {
		int err;
		if (jiffies >= start_time + timeout) {
			// PTB it takes too long to get the ioctl lock
			NBD_ALERT
			 ("took too long to get a spare ioctl: TIMEOUT\n");
			write_unlock (&lo->queue_spinlock);
			return -ETIME;
		}
		write_unlock (&lo->queue_spinlock);
		err = interruptible_sleep_on_timeout (&lo->req_wq,
						      start_time +
						      timeout - jiffies);
		write_lock (&lo->queue_spinlock);
	}

	// PTB lock is now held, prepare the fake request for our queue

	memset (&lo->req, 0, sizeof (struct request));
	lo->req.rq_status = RQ_ACTIVE;
	set_rq_type(&lo->req, IOCTL);

	// PTB 1 block announced for accounting visibility
	//lo->req.nr_sectors = lo->logblksize - 9;

	lo->req.errors = 0;

	// PTB this is the fixed-up command
	lo->req.special = (void *) cmd;

	// PTB this is (arg if it is direct, else) the address of a local buffer
	// PTB we need to store the arg or its dereference somewhere local
	// for a while until the cnb-client thread can enter and pick it
	// up. The alternative is to block the ioctl here until it is
	// picked up, which IS possible.
	if (_IOC_DIR (cmd) & _IOC_READ) {
		// PTB indirect

		int err;
		char *buf;

		size = nbd_ioctl_size_user (cmd, (char *) arg);

		if (size < 0) {
			// PTB unauthorized ioctl
			return -EINVAL;
		}

		if (size > NBD_CTLDTA_LENGTH) {
			// PTB we have to use an extra buffer or else block
			// here and rendezvous directly with the get_req call
			buf = kmalloc (size, GFP_KERNEL);
			if (!buf) {
				write_unlock (&lo->queue_spinlock);
				return -ENOMEM;
			}
			// PTB nr_sectors is at least one iff we kmalloc'ed
			lo->req.nr_sectors = (size + 511) >> 9;
		}
		else {
			buf = &lo->ctldta[0];
			lo->req.nr_sectors = 0;
		}

		if (_IOC_DIR (cmd) & _IOC_WRITE) {
			err =
			 nbd_ioctl_copy_from_user (cmd, buf, (char *) arg,
						   size);
			if (err < 0) {
				write_unlock (&lo->queue_spinlock);
				if (lo->req.nr_sectors > 0)
					kfree (buf);
				return err;
			}
		}

		// PTB always record which buffer we are using
		lo->req.buffer = buf;

	}
	else {
		// PTB direct - we just need to remember the value
		size = 0;
		lo->req.buffer = (char *) arg;
	}

	// PTB point the request buffer vaguely in the direction of where
	// the data is, but it doesn't matter.
	lo->req.rq_dev = mk_kdev (major, minor);

	write_unlock (&lo->queue_spinlock);
	// PTB Lock released. Now everyone knows this ioctl req is being used.

	// PTB we queue the request for treatment and wait till treated
	nbd_enqueue (lo, &lo->req, NBD_UNINTERRUPTIBLE);

	// PTB gain the lock again to watch for when req released in nbd_commit
	write_lock (&lo->queue_spinlock);

	while (lo->req.rq_status == RQ_ACTIVE) {
		int err;
		if (jiffies >= start_time + timeout) {
			// PTB it takes too long 
			NBD_ALERT
			 ("deleting queued ioctl from queue for timeout!\n");
			// PTB FIXME really need a spinlock per slot
			list_del (&lo->req.queuelist);
			lo->req.rq_status = 0;
			lo->req.errors++;
			write_unlock (&lo->queue_spinlock);
			// PTB FIXME want some kind of error accounting increment
			NBD_ALERT
			 ("took too long to treat queued ioctl: TIMEOUT\n");
			if (lo->req.nr_sectors > 0) {
				kfree (lo->req.buffer);
			}
			return -ETIME;
		}
		write_unlock (&lo->queue_spinlock);
		err = interruptible_sleep_on_timeout (&lo->req_wq, 1);
		write_lock (&lo->queue_spinlock);
	}

	// PTB lock is now held for normal exit
	if (_IOC_DIR (cmd) & _IOC_READ) {
		int err;
		// PTB if we are reading, it should be to the local buffer
		// PTB the buffer points at lo->ctldta or kmalloced area
		err =
		 nbd_ioctl_copy_to_user (cmd, (char *) arg, lo->req.buffer,
					 size);
		if (lo->req.nr_sectors > 0) {
			kfree (lo->req.buffer);
		}
		if (err < size) {
			return -ENOMEM;
		}
	}
	// PTB inspect error status and then release lock
	lo->req.rq_status = 0;
	if (lo->req.errors != 0) {
		write_unlock (&lo->queue_spinlock);
		return -EINVAL;
	}
	write_unlock (&lo->queue_spinlock);
	return 0;

}

/*
 * PTB - release the device. This happens when the last process closes
 * or dies.
 */
static int
nbd_release (struct inode *inode, struct file *file)
{
	struct nbd_device *lo;
	int dev;
	int nbd;
	int islot;

	if (!inode) {
		NBD_ALERT ("null inode.\n");
		return -ENODEV;
	}
	dev = minor (inode->i_rdev);
	nbd = dev >> NBD_SHIFT;

	if (nbd >= MAX_NBD) {
		NBD_ALERT ("too many open devices.\n");
		return -ENODEV;
	}

	lo = &nbd_dev[nbd];

	islot = dev % NBD_MAXCONN - 1;

	// PTB it's a daemon closing the slot?
	if (islot >= 0) {
		struct nbd_slot *slot = &lo->slots[islot];
		if (--slot->refcnt <= 0) {
			// PTB this was the last ref, so it's certain the daemon died
			nbd_clr_sock (slot);
			slot->pid = 0;
		}
	}

      sync:
	if (atomic_read (&lo->refcnt) > 1) {
		// PTB can try async flush since will get another chance later
	        // PTB 2.5.7 doesn't have async sync!	 FIXME
                // PTB we want sync_buffers(..., 0):
                // or write_unlocked_buffers
	}
	else {
		// PTB need to really turn off now and flush
		fsync_dev (inode->i_rdev);
	}

	if (atomic_read (&lo->flags) & NBD_RLSE_REQD) {
		// delayed release attempt followed up now
		atomic_clear_mask (NBD_RLSE_REQD, &lo->flags);
		atomic_dec (&lo->refcnt);
		if (MOD_IN_USE)
			MOD_DEC_USE_COUNT;
		goto sync;
	}

	if (atomic_read (&lo->refcnt) <= 0) {
		NBD_ALERT ("refcount(%d) <= 0\n",
			   atomic_read (&lo->refcnt));
		return 0;
	}
	if (!list_empty (&lo->queue) && atomic_read (&lo->refcnt) < 2) {
		NBD_ALERT
		 ("Some requests are waiting queued -> cannot turn off.\n");
		// PTB signal that we want to release but can't
		atomic_set_mask (NBD_RLSE_REQD, &lo->flags);
		return -EBUSY;
	}
	if (atomic_read (&lo->requests_req[READ]) +
	    atomic_read (&lo->requests_req[WRITE]) > 0
	    && atomic_read (&lo->refcnt) < 2) {
		NBD_ALERT
		 ("Some requests are still in progress -> cannot turn off.\n");
		// PTB signal that we want to release but can't
		atomic_set_mask (NBD_RLSE_REQD, &lo->flags);
		return -EBUSY;
	}

	/* POSSIBLE change socket here PTB */

	atomic_dec (&lo->refcnt);
	if (MOD_IN_USE)
		MOD_DEC_USE_COUNT;

	// PTB invalidate buffers on last close
	if (atomic_read (&lo->refcnt) <= 0 || !MOD_IN_USE) {
		//invalidate_buffers (lo->inode->i_rdev);     

		lo->bufsiz = 0;
		atomic_set (&lo->seqno_out, 0);
	}

	return 0;
}

static struct block_device_operations nbd_blkops = {
        owner:                  THIS_MODULE,
	open:                   nbd_open,
	release:                nbd_release,
	ioctl:                  nbd_ioctl,
	check_media_change:     NULL,
	revalidate:             NULL,
};

/*
 * Pavel - And here should be modules and kernel interface 
 *  (Just smiley confuses emacs :-)
 */

/*
 * PTB This is just to get a nice limited width integer printout in proc!
 * use endpos (<= 8) spaces at most. We serve from a static buffer size 16.
 */
char *
display (unsigned n, int endpos)
{
	// PTB  use endpos (<= 8) spaces at most
	static char buf[16];
	int units = 0;
	int decimals = 0;
	int decpos = endpos;
	int wholepart = n, fractionpart = 0;
	buf[endpos--] = 0;
	// PTB  find the right units to display. U or K or M or G.
	while (n >= 1 << 10) {
		decimals = n & ((1 << 10) - 1);
		n >>= 10;
		units++;
	}
	switch (units) {
	  case 0:
		break;
	  case 1:
		buf[endpos--] = 'K';
		break;
	  case 2:
		buf[endpos--] = 'M';
		break;
	  case 3:
		buf[endpos--] = 'G';
		break;
	  case 4:
		buf[endpos--] = 'T';
		break;
	}
	// after this wholepart = n && fractionpart = decimals
	fractionpart = wholepart & ((1 << (units * 10)) - 1);
	wholepart >>= units * 10;
	// PTB  write the whole digits (something between 0 and 1023 inclusive)
	if (n == 0) {
		buf[endpos--] = '0';
	}
	else {
		while (endpos >= 0 && n > 0) {
			buf[endpos--] = '0' + n % 10;
			n /= 10;
		}
	}
	// PTB if there is space and cause, add decimal digits
	if (endpos >= 1 && units > 0) {
		int k = 0;
		char unitchar = buf[--decpos];
		buf[decpos + k++] = '.';
		while (endpos >= k) {
			int digit = (decimals * 10) >> 10;
			buf[decpos + k++] = '0' + digit;
			decimals -= (digit << 10) / 10;
			decimals *= 10;
		}
		buf[decpos + k++] = unitchar;
		buf[decpos + k] = 0;
	}
	// PTB report the start position
	return buf + endpos + 1;
}

int
nbd_read_proc (char *buf, char **start, off_t offset, int len, int *eof,
	       void *data)
{

#ifndef MIN
#define MIN(x,y) ((x)<(y)?(x):(y))
#endif

	const int limit = MIN (PAGE_SIZE, len) - 80;
	static int i;
	struct nbd_device *lo;
	static int last;
	static void *next_label;
	static char *next_label_name;
	static int total;
	unsigned long flags;

	if (offset > 0 && !next_label) {
		*eof = 1;
		*start = buf;
		return 0;
	}

	if (offset <= 0) {
		// PTB do static inits first time through
		last = -1;
		i = 0;
		next_label = NULL;
		next_label_name = NULL;
		total = 0;
	}

	// PTB  start this bytecount
	len = 0;

#define NBD_PROC_LABEL(n) \
        next_label = &&label_##n; \
        next_label_name = "label_" #n; \
        if (len > limit) { \
            *start = (char *) len; \
            total += len; \
            return len;\
        } \
        label_##n:

	for ( /* static init */ ; i < MAX_NBD; i++) {

		char *devnam;

		lo = &nbd_dev[i];
		devnam = lo->devnam;
		if (lo->nslot <= 0) {
			next_label = NULL;
			continue;
		}

		// PTB computed goto next not-done
		if (next_label) {
			void *label = next_label;
			next_label = NULL;
			next_label_name = NULL;
			len = 0;
			goto *label;
		}

		NBD_PROC_LABEL (1);

		if (last == i - 2) {
			char *prevdevnam = device_letter (i - 1);
			len +=
			 sprintf (buf + len, "Device %s:\tClosed\n",
				  prevdevnam);
		}
		if (last < i - 2) {
			char lastdevnam[3];
			char prevdevnam[3];
			strncpy (lastdevnam, device_letter (last + 1), 3);
			strncpy (prevdevnam, device_letter (i - 1), 3);
			len +=
			 sprintf (buf + len, "Device %s-%s:\tClosed\n",
				  lastdevnam, prevdevnam);
		}

		NBD_PROC_LABEL (2);

		len +=
		 sprintf (buf + len, "Device %s:\tOpen " "\n", devnam);

		NBD_PROC_LABEL (3);

		len += sprintf (buf + len,
				"[%s] State:\t%s%s%s%s%s%s%s%s%s%s%slast error %d, lives %d, bp %d\n",
				devnam, atomic_read (&lo->flags)
				& NBD_INITIALISED ? "" : "uninitialized, ",
				atomic_read (&lo->flags)
				& NBD_WRITE_NOCHK ? "noverify, " :
				"verify, ", atomic_read (&lo->flags)
				& NBD_READ_ONLY ? "ro, " : "rw, ",
				merge_requests ? "merge requests, " : "",
#ifndef NO_BUFFERED_WRITES
				atomic_read (&lo->flags)
				& NBD_BUFFERWR ? "buffer writes, " : "",
#else
				"",
#endif		/* NO_BUFFERED_WRITES */
				atomic_read (&lo->flags)
				& NBD_ENABLED ? "enabled, " :
				"disabled, ", atomic_read (&lo->flags)
				& NBD_INVALID ? "invalid, " : "",
				atomic_read (&lo->flags)
				& NBD_SHOW_ERRS ? "show_errs, " : "",
				plug ? "plug, " : "",
				atomic_read (&lo->flags)
				& NBD_SYNC ? "sync, " : "",
				atomic_read (&lo->flags)
				& NBD_MD5SUM ? "md5sum, " : "",
				lo->harderror,
				lo->lives -
				((atomic_read (&lo->flags) & NBD_ENABLED) ?
				 1 : 0), 0	//atomic_read(&buffermem_pages)
		 );

		NBD_PROC_LABEL (4);

		do {		// PTB begin long do once block
			int countq[2] = { 0, 0 };
			int cmd;

			struct list_head *pos;

			read_lock_irqsave (&lo->queue_spinlock, flags);

			list_for_each (pos, &lo->queue) {
				struct request *req =
				 list_entry (pos, struct request, queuelist);
				if (countq[READ] + countq[WRITE] > 1000)
					break;

				cmd = rq_data_dir (req);
				countq[cmd]++;
			}

			read_unlock_irqrestore (&lo->queue_spinlock,
						flags);

			len += sprintf (buf + len,
					"[%s] Queued:\t+%dR/%dW curr (check %dR/%dW) +%dR/%dW max\n",
					devnam,
					atomic_read (&lo->countq[READ]),
					atomic_read (&lo->countq[READ]),
					countq[READ], countq[WRITE],
					atomic_read (&lo->maxq[READ]),
					atomic_read (&lo->maxq[WRITE]));
		} while (0);	// PTB end long do once block

		NBD_PROC_LABEL (5);

		len += sprintf (buf + len,
				"[%s] Buffersize:\t%d\t(sectors=%d, blocks=%d)\n",
				devnam, lo->bufsiz, lo->max_sectors,
				lo->max_sectors / (lo->blksize >> 9));
		len +=
		 sprintf (buf + len, "[%s] Blocksize:\t%d\t(log=%d)\n",
			  devnam, lo->blksize, lo->logblksize);
		len +=
		 sprintf (buf + len, "[%s] Size:\t%luKB\n", devnam,
			  (unsigned long) (lo->bytesize >> 10));
		len +=
		 sprintf (buf + len, "[%s] Blocks:\t%u\n", devnam,
			  lo->size >> (lo->logblksize - 10));

		NBD_PROC_LABEL (6);

		len +=
		 sprintf (buf + len, "[%s] Sockets:\t%d", devnam,
			  lo->nslot);

		NBD_PROC_LABEL (7);

		do {		// PTB begin short do once block
			int j;
			for (j = 0; j < lo->nslot; j++) {
				struct nbd_slot *slotj = &lo->slots[j];
				if (j != atomic_read (&lo->islot))
					len +=
					 sprintf (buf + len, "\t(%s)",
						  slotj->file ? "+" : "-");
				else
					len +=
					 sprintf (buf + len, "\t(%s)",
						  slotj->file ? "*" : ".");
			}
		} while (0);	// PTB end short do once block

		len += sprintf (buf + len, "\n");

		NBD_PROC_LABEL (8);

		len += sprintf (buf + len, "[%s] Requested:\t%s", devnam,
				display (atomic_read
					 (&lo->requests_in[READ]) +
					 atomic_read (&lo->requests_in
						      [WRITE]), 7));

		NBD_PROC_LABEL (9);

		do {		// PTB begin short do once block
			int j;
			char buff[2][8];
			for (j = 0; j < lo->nslot; j++) {
				struct nbd_slot *slotj = &lo->slots[j];
				len +=
				 sprintf (buf + len, "\t(%s)",
					  display (slotj->in, 5));
			}
			strncpy (buff[0],
				 display (atomic_read
					  (&lo->requests_in[READ]), 6), 7);
			strncpy (buff[1],
				 display (atomic_read
					  (&lo->requests_in[WRITE]), 6),
				 7);
			len +=
			 sprintf (buf + len, "\t%sR/%sW", buff[0],
				  buff[1]);
			nbd_set_speed (lo);
			len += sprintf (buf + len, "\tmax %d",
					atomic_read (&lo->maxreqblks));
		} while (0);	// PTB end short do once block

		len += sprintf (buf + len, "\n");
		len += sprintf (buf + len, "[%s] Despatched:\t%s", devnam,
				display (atomic_read
					 (&lo->requests_out[READ]) +
					 atomic_read (&lo->requests_out
						      [WRITE]), 7));

		NBD_PROC_LABEL (10);

		do {		// PTB begin short do once block
			int j;
			char buff[2][8];
			for (j = 0; j < lo->nslot; j++) {
				struct nbd_slot *slotj = &lo->slots[j];
				len +=
				 sprintf (buf + len, "\t(%s)",
					  display (slotj->out, 5));
			}
			strncpy (buff[0],
				 display (atomic_read
					  (&lo->requests_out[READ]), 6),
				 7);
			strncpy (buff[1],
				 display (atomic_read
					  (&lo->requests_out[WRITE]), 6),
				 7);
			len +=
			 sprintf (buf + len, "\t%sR/%sW", buff[0],
				  buff[1]);
			len +=
			 sprintf (buf + len, "\tmd5 %sW",
				  display (atomic_read
					   (&lo->wrequests_5to), 5));
			len +=
			 sprintf (buf + len, " (%s eq,",
				  display (atomic_read
					   (&lo->wrequests_5so), 5));
			len +=
			 sprintf (buf + len, " %s ne,",
				  display (atomic_read
					   (&lo->wrequests_5wo), 5));
			len +=
			 sprintf (buf + len, " %s dn)",
				  display (atomic_read
					   (&lo->wrequests_5eo), 5));
		} while (0);	// PTB end short do once block

		len += sprintf (buf + len, "\n");
		len += sprintf (buf + len, "[%s] Errored:\t%s", devnam,
				display (atomic_read (&lo->requests_err),
					 7));

		NBD_PROC_LABEL (11);

		do {		// PTB begin short do once block
			int j;
			char buff[2][8];
			int toterrs = 0;

			for (j = 0; j < lo->nslot; j++) {
				struct nbd_slot *slotj = &lo->slots[j];
				len +=
				 sprintf (buf + len, "\t(%s)",
					  display (slotj->err, 5));
				toterrs += slotj->err;
			}
			strncpy (buff[0], display (toterrs, 6), 7);
			strncpy (buff[1],
				 display (atomic_read (&lo->requests_err) -
					  toterrs, 6), 7);
			len +=
			 sprintf (buf + len, "\t%s+%s\n", buff[0],
				  buff[1]);
		} while (0);	// PTB end short do once block

		NBD_PROC_LABEL (12);

		do {		// PTB begin long do once block
			int pending_rblks = 0;	/* PTB  reads not reached the slots yet */
			int pending_wblks = 0;	/* PTB  writes not reached the slots yet */
			int blks = 0;

			read_lock_irqsave (&lo->queue_spinlock, flags);

			do {	// PTB begin short do once block
				struct list_head *pos;

				int count = 0;
				struct request *req;

				list_for_each (pos, &lo->queue) {
					req =
					 list_entry (pos, struct request,
						     queuelist);
					if (count++ > 1000)
						break;
					blks = nr_blks (req);
					if (blks > 0) {
						switch (rq_data_dir (req)) {
						  case READ:
							pending_rblks +=
							 blks;
							break;
						  case WRITE:
							pending_wblks +=
							 blks;
							break;
						}
					}
				}
			} while (0);	// PTB end short do once block

			read_unlock_irqrestore (&lo->queue_spinlock,
						flags);
			len +=
			 sprintf (buf + len, "[%s] Pending:\t%d", devnam,
				  atomic_read (&lo->requests_req[READ]) +
				  atomic_read (&lo->requests_req[WRITE]));

			do {	// PTB begin short do once block
				int j;
				for (j = 0; j < lo->nslot; j++) {
					struct nbd_slot *slotj =
					 &lo->slots[j];
					len +=
					 sprintf (buf + len, "\t(%d)",
						  slotj->req);
				}
			} while (0);	// PTB end short do once block

			len += sprintf (buf + len,
					"\t%dR/%dW+%dR/%dW\n",
					atomic_read (&lo->requests_req[READ]),
					atomic_read (&lo->requests_req[WRITE]),
					pending_rblks, pending_wblks);

		} while (0);	// PTB end long do once block

		NBD_PROC_LABEL (13);

		do {		// PTB begin long do once block
			char buff[10][8];
			int shift = lo->logblksize;

			strncpy (buff[0],
				 display (atomic_read (&lo->wspeed.speed)
					  << shift, 5), 7);
			strncpy (buff[1],
				 display (atomic_read (&lo->wspeed.speedav)
					  << shift, 5), 7);
			strncpy (buff[2],
				 display (atomic_read
					  (&lo->wspeed.speedmax) << shift,
					  5), 7);

			strncpy (buff[3],
				 display (atomic_read (&lo->rspeed.speed)
					  << shift, 5), 7);
			strncpy (buff[4],
				 display (atomic_read (&lo->rspeed.speedav)
					  << shift, 5), 7);
			strncpy (buff[5],
				 display (atomic_read
					  (&lo->rspeed.speedmax) << shift,
					  5), 7);

			strncpy (buff[6],
				 display (atomic_read (&lo->tspeed.speed)
					  << shift, 5), 7);
			strncpy (buff[7],
				 display (atomic_read (&lo->tspeed.speedav)
					  << shift, 5), 7);
			strncpy (buff[8],
				 display (atomic_read
					  (&lo->tspeed.speedmax) << shift,
					  5), 7);

			len +=
			 sprintf (buf + len, "[%s] B/s now:", devnam);
			len +=
			 sprintf (buf + len, "\t%s\t(%sR+%sW)\n", buff[6],
				  buff[3], buff[0]);
			len +=
			 sprintf (buf + len, "[%s] B/s ave:", devnam);
			len +=
			 sprintf (buf + len, "\t%s\t(%sR+%sW)\n", buff[7],
				  buff[4], buff[1]);
			len +=
			 sprintf (buf + len, "[%s] B/s max:", devnam);
			len +=
			 sprintf (buf + len, "\t%s\t(%sR+%sW)\n", buff[8],
				  buff[5], buff[2]);
			if (lo->speed_lim > 0) {
				char buff[8];
				strncpy (buff,
					 display (lo->speed_lim << 10, 5),
					 7);
				len +=
				 sprintf (buf + len, "[%s] B/s lim:",
					  devnam);
				len +=
				 sprintf (buf + len, "\t%s\t(%sR+%sW)\n",
					  buff, buff, buff);
			}
		} while (0);	// PTB end long do once block

		do {		// PTB begin short do once block
			int blks;
			int tot_reqs = 0;

			len +=
			 sprintf (buf + len, "[%s] Spectrum:", devnam);
			for (blks = 0;
			     blks <= atomic_read (&lo->maxreqblks); blks++) {
				tot_reqs +=
				 atomic_read (&lo->req_in[READ][blks]) +
				 atomic_read (&lo->req_in[WRITE][blks]);
			}

			for (blks = 0;
			     blks <= atomic_read (&lo->maxreqblks); blks++) {
				int req_blks =
				 atomic_read (&lo->req_in[READ][blks])
				 + atomic_read (&lo->req_in[WRITE][blks]);
				int percent =
				 tot_reqs >
				 0 ? (100 * req_blks) / tot_reqs : 0;
				if (percent <= 0)
					continue;
				len +=
				 sprintf (buf + len, "\t%u%%%d", percent,
					  blks);
			}
			len += sprintf (buf + len, "\n");
		} while (0);	// PTB end short do once block

		NBD_PROC_LABEL (14);

		len += sprintf (buf + len, "[%s] Kthreads:\t%d", devnam,
				atomic_read (&lo->kthreads));
		len +=
		 sprintf (buf + len, "\t(%d waiting/%d running/%d max)\n",
			  atomic_read (&lo->kwaiters),
			  atomic_read (&lo->kthreads) -
			  atomic_read (&lo->kwaiters),
			  atomic_read (&lo->kmax));

		NBD_PROC_LABEL (15);

		len += sprintf (buf + len, "[%s] Cthreads:\t%d", devnam,
				atomic_read (&lo->cthreads));

		NBD_PROC_LABEL (16);

		do {
			int j;
			for (j = 0; j < lo->nslot; j++) {
				struct nbd_slot *slotj = &lo->slots[j];
				int state =
				 ((slotj->flags & NBD_SLOT_RUNNING) ? 1 :
				  0) +
				 ((slotj->flags & NBD_SLOT_WAITING) ? 2 :
				  0);
				char *desc = "?";
				switch (state) {
				  case 0:
					desc = "-";
					break;	/* PTB not in */
				  case 1:
					desc = "*";
					break;	/* PTB in and not waiting */
				  case 2:
					desc = "?";
					break;	/* PTB impossible */
				  case 3:
					desc = "+";
					break;	/* PTB in and waiting */
				}
				len += sprintf (buf + len, "\t(%s)", desc);
			}
		} while (0);

		len += sprintf (buf + len, "\n");

		NBD_PROC_LABEL (17);

		last = i;
		len += sprintf (buf + len, "[%s] Cpids:\t%d", devnam,
				atomic_read (&lo->cthreads));

		do {
			int j;
			for (j = 0; j < lo->nslot; j++) {
				struct nbd_slot *slotj = &lo->slots[j];
				len +=
				 sprintf (buf + len, "\t(%u)", slotj->pid);
			}
			len += sprintf (buf + len, "\n");
		} while (0);

		do {
			int j, k;
			for (j = 0; j < lo->nslot; j++) {
				struct nbd_slot *slotj = &lo->slots[j];
				if (slotj->spid != 0)
					break;
			}
			if (j < lo->nslot) {
				len +=
				 sprintf (buf + len, "[%s] Kpids:\t%d",
					  devnam,
					  atomic_read (&lo->cthreads));
				for (k = 0; k < lo->nslot; k++) {
					struct nbd_slot *slotk =
					 &lo->slots[k];
					len +=
					 sprintf (buf + len, "\t(%u)",
						  slotk->spid);
				}
				len += sprintf (buf + len, "\n");
			}
		} while (0);

		NBD_PROC_LABEL (18);

		NBD_PROC_LABEL (19);

		// PTB have to tell loop head that we are not reentering 
		next_label = NULL;
		next_label_name = NULL;
	}

	NBD_PROC_LABEL (20);

	if (last == i - 2) {
		char *prevnam = device_letter (i - 1);
		len +=
		 sprintf (buf + len, "Device %s:\tClosed\n", prevnam);
	}

	if (last < i - 2) {
		char lastnam[3];
		char prevnam[3];
		strncpy (lastnam, device_letter (last + 1), 3);
		strncpy (prevnam, device_letter (i - 1), 3);
		len += sprintf (buf + len, "Device %s-%s:\tClosed\n",
				lastnam, prevnam);
	}

	NBD_PROC_LABEL (21);

	// PTB re-init vital statics for next time 
	next_label = NULL;
	next_label_name = NULL;

	*eof = 1;
	*start = buf;
	total += len;

	return len;
}

/*
 * PTB read an int from a string. Return number of ints read (0 or 1).
 */
static int
sscani (char *buf, int len, int *n)
{

	int i, a = 0;
	short has_digits = 0;
	short is_signed = 0;

	// PTB look for first significant character
	for (i = 0; i < len; i++) {
		char c = buf[i];
		if (c == ' ' || c == '\t') {
			if (is_signed)
				return 0;
		}
		else if (c == '-') {
			if (is_signed)
				return 0;
			is_signed = -1;
		}
		else if (c == '+') {
			if (is_signed)
				return 0;
			is_signed = 1;
		}
		else if (c >= '0' && c <= '9') {
			is_signed = 1;
			has_digits = 1;
			break;
		}
		else {
			return 0;
		}
	}
	// PTB i now points at first digit if there is one
	if (!has_digits)
		return 0;
	for (; i < len; i++) {
		char c = buf[i];
		if (c < '0' || c > '9')
			break;
		a *= 10;
		a += c - '0';
	}
	if (is_signed >= 0)
		*n = a;
	else
		*n = -a;
	return 1;
}

/*
 * look for a 1 or 2 letter device code ("a" or "aa") and save the
 * device number to which it refers. Return number of device letter
 * codes found (0 or 1).
 */
static int
sscana (char *buf, int len, int *n)
{

	int i, a = 0;
	short has_letters = 0;

	for (i = 0; i < len; i++) {
		char c = buf[i];
		if (c >= 'a' && c <= 'z') {
			has_letters = 1;
			break;
		}
		else if (c == ' ') {
			if (has_letters)
				return 0;
		}
		else {
			return 0;
		}
	}
	if (!has_letters)
		return 0;
	for (; i < len; i++) {
		char c = buf[i];
		if (c < 'a' || c > 'z')
			break;
		a *= 26;
		a += c - 'a';
	}
	*n = a;
	return 1;
}

/*
 * read an integer (or 2-letter ascii) arg into an int. Return numner
 * of integers read (0 or 1) and -1 for no keymatch. The first arg is a
 * preceding key.
 * @i is the integer value that results
 * @j is an index if one one supplied (foo[j] = i ), else -1
 */
static int
getarg (const char *buffer, int buflen, const char *key, int *i, int *j)
{

	int keylen;

	void skip_ws (void) {
		while (buflen > 0) {
			if (*buffer != ' ' && *buffer != '\t')
				break;
			buffer++;
			buflen--;
	        }
        };

        skip_ws ();

	keylen = strlen (key);
	if (strncmp (buffer, key, keylen))
		return -1;

	buffer += keylen;
	buflen -= keylen;

	skip_ws ();

	*j = -1;
	if (*buffer == '[') {
		char *closing;
		int indexlen;

		buffer++;
		buflen--;

		skip_ws ();

		closing = strchr (buffer, ']');
		if (!closing)
			return -1;
		indexlen = closing - buffer;
		*closing = 0;

		if (sscani ((char *) buffer, indexlen, j) < 1)
			return 0;
		if (sscana ((char *) buffer, buflen, j) < 1)
			return 0;

		buffer = closing;
		buflen -= indexlen;

		buffer++;
		buflen--;

		skip_ws ();
	}

	if (*buffer != '=')
		return -1;

	buffer++;
	buflen--;

	skip_ws ();

	if (sscani ((char *) buffer, buflen, i) < 1)
		return 0;
	if (sscana ((char *) buffer, buflen, i) < 1)
		return 0;
	return 1;
}

static void
set_sync_intvl (int sync_intvl, int i)
{
	void set_si (void) {
		struct nbd_device *lo = &nbd_dev[i];
		if (sync_intvl) {
			atomic_set_mask (NBD_SYNC, &lo->flags);
		}
		else {
			atomic_clear_mask (NBD_SYNC, &lo->flags);
		}
	};

	if (i >= 0 && i < MAX_NBD) {
		set_si ();
		return;
	}
	for (i = 0; i < MAX_NBD; i++) {
		set_si ();
	}
}

static void
set_speed_lim (int speed_lim, int i)
{
	void set_sl (void) {
		struct nbd_device *lo = &nbd_dev[i];
		 lo->speed_lim = speed_lim;
	};

	if (i >= 0 && i < MAX_NBD) {
		set_sl ();
		return;
	}
	for (i = 0; i < MAX_NBD; i++) {
		set_sl ();
	}
}

static void
set_show_errs (int show_errs, int i)
{
	void set_se (void) {
		struct nbd_device *lo = &nbd_dev[i];
		if (show_errs) {
			atomic_set_mask (NBD_SHOW_ERRS, &lo->flags);
			return;
		};
		atomic_clear_mask (NBD_SHOW_ERRS, &lo->flags);
	};

	if (i >= 0 && i < MAX_NBD) {
		set_se ();
		return;
	}
	for (i = 0; i < MAX_NBD; i++) {
		set_se ();
	}
}

static void
set_md5sum (int md5sum, int i)
{

	void set_md5 (void) {
		struct nbd_device *lo;
		 lo = &nbd_dev[i];
		if (md5sum) {
			atomic_set_mask (NBD_MD5SUM, &lo->flags);
			return;
		};
		atomic_clear_mask (NBD_MD5SUM, &lo->flags);
	};

	if (i >= 0 && i < MAX_NBD) {
		set_md5 ();
		return;
	}
	for (i = 0; i < MAX_NBD; i++) {
		set_md5 ();
	}
}

static void
set_enable (int enable, int i)
{
	void set_e (void) {
		struct nbd_device *lo = &nbd_dev[i];
		if (enable != 0) {
			nbd_reenable (lo);
			return;
		};
		atomic_clear_mask (NBD_ENABLED, &lo->flags);
	};

	if (i >= 0 && i < MAX_NBD) {
		set_e ();
		return;
	}
	for (i = 0; i < MAX_NBD; i++) {
		set_e ();
	}
}

/*  
 * PTB - write a 0 with echo -n 0 to /proc/nbdinfo to do a hard reset.
 */
static int
nbd_write_proc (struct file *file, const char *buffer, unsigned long count,
		void *data)
{

	switch (count) {

		int i;

	  case 2:
		if (buffer[1] != '\n')
			break;
		/* else fallthru to case 1 */
	  case 1:
		switch (*buffer) {
		  case '1':
			nbd_hard_reset ();
			break;
		  case '0':
			for (i = 0; i < MAX_NBD; i++) {
				//  PTB this takes the io spinlock and our spinlock.
				struct nbd_device *lo = &nbd_dev[i];
				nbd_soft_reset (lo);
				reenable_timer.data = (unsigned long) lo;
				reenable_timer.expires = jiffies + 5 * HZ;
				add_timer (&reenable_timer);
			}
			break;
		}
		break;
	  default:
		do {
			int index;

			if (getarg (buffer, count, "merge_requests",
				    &merge_requests, &index) >= 0) {
				// merge_requests
				break;
			}
			if (getarg (buffer, count, "sync_intvl",
				    &sync_intvl, &index) >= 0
			    || getarg (buffer, count, "sync",
				       &sync_intvl, &index) >= 0) {
				// sync_intvl
				set_sync_intvl (sync_intvl, index);
				break;
			}
			if (getarg (buffer, count, "speed_lim",
				    &speed_lim, &index) >= 0) {
				// speed_lim
				set_speed_lim (speed_lim, index);
				break;
			}
			if (getarg (buffer, count, "show_errs",
				    &show_errs, &index) >= 0) {
				// show_errs
				set_show_errs (show_errs, index);
				break;
			}
			if (getarg (buffer, count, "plug",
				    &plug, &index) >= 0) {
				// plug
				break;
			}
			if (getarg (buffer, count, "md5sum",
				    &md5sum, &index) >= 0) {
				// md5sum
				set_md5sum (md5sum, index);
				break;
			}
#ifndef NO_BUFFERED_WRITES
			if (getarg (buffer, count, "buffer_writes",
				    &buffer_writes, &index) >= 0) {
				// buffer_writes
				set_buffer_writes (buffer_writes, index);
				break;
			}
#endif		/* NO_BUFFERED_WRITES */
			if (getarg (buffer, count, "enable",
				    &enable, &i) >= 0) {
				// enable
				set_enable (enable, index);
				break;
			}
			NBD_ERROR ("illegal %ld character command\n",
				   count);
			return -EINVAL;
		} while (0);
		break;
	}
	return count;
}

#ifdef MODULE
MODULE_AUTHOR ("Peter T. Breuer, Andres Marin");
MODULE_DESCRIPTION ("Enhanced Network Block Device " NBD_VERSION);
#endif		/* MODULE */

// PTB we steal these from the queue struct at init
static merge_requests_fn *ll_merge_requests_fn;
static merge_request_fn *ll_front_merge_fn;
static merge_request_fn *ll_back_merge_fn;

/* PTB -
 * These functions are needed when the kernel does request merging in
 * order to stop it making requests that are bigger than our buffer.
 *
 * To turn OFF merging (once these functions are in place), set
 * merge_requests=0.
 */
static int
nbd_merge_requests_fn (request_queue_t * q, struct request *req,
		       struct request *req2)
{
	int dev = minor (req->rq_dev);
	int nbd = dev >> NBD_SHIFT;
	struct nbd_device *lo = &nbd_dev[nbd];

	if (!merge_requests)
		return 0;

	if (!ll_merge_requests_fn)
		return 0;

	if (req->nr_sectors + req2->nr_sectors > lo->max_sectors)
		return 0;

	if (req->nr_sectors + req2->nr_sectors >
	    ((merge_requests + 1) << (lo->logblksize - 9)))
		return 0;

	return ll_merge_requests_fn (q, req, req2);
}
static int
nbd_front_merge_fn (request_queue_t * q, struct request *req, struct bio * bio)
{
	int dev = minor (req->rq_dev);
	int nbd = dev >> NBD_SHIFT;
	struct nbd_device *lo = &nbd_dev[nbd];

	if (!merge_requests)
		return 0;

	if (!ll_front_merge_fn)
		return 0;

	if (req->nr_sectors > lo->max_sectors)
		return 0;

	if (req->nr_sectors > ((merge_requests + 1) << (lo->logblksize - 9)))
                return 0;

	return ll_front_merge_fn (q, req, bio);
}
static int
nbd_back_merge_fn (request_queue_t * q, struct request *req,
		   struct bio * bio)
{
	int dev = minor (req->rq_dev);
	int nbd = dev >> NBD_SHIFT;
	struct nbd_device *lo = &nbd_dev[nbd];

	if (!merge_requests)
		return 0;

	if (!ll_back_merge_fn)
		return 0;

	if (req->nr_sectors > lo->max_sectors)
		return 0;

	if (req->nr_sectors >
	    ((merge_requests + 1) << (lo->logblksize - 9))) return 0;

        return ll_back_merge_fn (q, req, bio);
}

// PTB - and now to play with the sysctl interface ...
static struct ctl_table_header *nbd_table_header;
// the above was set by the register call of the root table
static ctl_table nbd_table[] = {
	{1, "rahead",
	 &rahead, sizeof (int), 0644, NULL, &proc_dointvec},
	{2, "plug",
	 &plug, sizeof (int), 0644, NULL, &proc_dointvec},
	{3, "sync_intvl",
	 &sync_intvl, sizeof (int), 0644, NULL, &proc_dointvec},
	{4, "merge_requests",
	 &merge_requests, sizeof (int), 0644, NULL, &proc_dointvec},
	{5, "md5sum",
	 &md5sum, sizeof (int), 0644, NULL, &proc_dointvec},
	{8, "md5_on_threshold",
	 &md5_on_threshold, sizeof (int), 0644, NULL, &proc_dointvec},
	{9, "md5_off_threshold",
	 &md5_off_threshold, sizeof (int), 0644, NULL, &proc_dointvec},
	{10, "speed_lim",
	 &speed_lim, sizeof (int), 0644, NULL, &proc_dointvec},
	{0}
};
static ctl_table nbd_dir_table[] = {
	{6, "nbd", NULL, 0, 0555, nbd_table},
	{0}
};
static ctl_table nbd_root_table[] = {
	{CTL_DEV, "dev", NULL, 0, 0555, nbd_dir_table},
	{0}
};

#ifdef CONFIG_DEVFS_FS
static devfs_handle_t devfs_handle;
static devfs_handle_t devfs_handles[MAX_NBD];
#endif

int __init
nbd_init (void)
{
	int i, j;
	int err = 0;

	NBD_INFO ("Network Block Device originally by pavel@elf.mj.gts.cz\n");
	NBD_INFO ("Network Block Device port to 2.0 by ptb@it.uc3m.es\n");
	NBD_INFO ("Network Block Device move networking to user space by "
		  "amarin@it.uc3m.es\n");
	NBD_INFO ("Enhanced Network Block Device " NBD_VERSION " by "
		  "ptb@it.uc3m.es\n");
	if (register_blkdev (major, "nbd", &nbd_blkops)) {
		NBD_ERROR ("Unable to register major number %d for NBD\n",
			   major);
		return -EIO;
	}
#ifdef MODULE
	NBD_INFO ("registered device at major %d\n", major);
#endif
	blksize_size[major] = nbd_blksizes;	/* blksize in B */
	blk_size[major] = nbd_sizes;	/* size in KB */

// PTB - set up kernel queue struct with default methods
	blk_init_queue (BLK_DEFAULT_QUEUE (major), do_nbd_request, &nbd_lock);

        (BLK_DEFAULT_QUEUE (major))->max_sectors
                       = buf_sectors;	/* max per request */

// PTB - I think that put:
//     - q->plug_device_fn    = generic_plug_device    (static ll_rw_blk)
//     - q->plug_tq.routine   = generic_unplug_device  (static ll_rw_blk)
//     - q->back_merge_fn     = ll_back_merge_fn       (static ll_rw_blk)
//     - q->front_merge_fn    = ll_front_merge_fn      (static ll_rw_blk)
//     - q->merge_requests_fn = ll_merge_requests_fn   (static ll_rw_blk)
//     - q->request_fn        = do_nbd_request         (param)

// PTB - we have to do some more init magic in 2.4.*. This says that we
//     - take all stuff off the kernel queue before processing it, so in
//     - particular it's OK for kernel to do merges with the queue head.
	blk_queue_headactive (BLK_DEFAULT_QUEUE (major), 0);

// LA - moved the next #if higher;
//    - kernel 2.2.* doesn't know about plug_device_fn

	// PTB control merge attempts so we don't overflow our buffer
	ll_merge_requests_fn = (BLK_DEFAULT_QUEUE (major))->merge_requests_fn;
	ll_front_merge_fn = (BLK_DEFAULT_QUEUE (major))->front_merge_fn;
	ll_back_merge_fn = (BLK_DEFAULT_QUEUE (major))->back_merge_fn;

// JSA - Add this line because under >=2.4.1, merge optimizations are in flux
// PTB - however it's not this which does damage, I believe. Data: plugging
//     - simply has to be enabled in these kernels. Without it, requests just
//     - sit on the kernel queue and never come off and into our request_fn.
// PTB - commented the ifdef again after talks with Jens Axboe.
//     - Apparently plug_fn will disappear in 2.4.4 and merge functions are
//       the only way to control merges, so they MUST be included.

// PTB - The functions below just impose our own stricter size limit before
//     - calling the defaults if all seems OK sizewise.

	(BLK_DEFAULT_QUEUE (major))->merge_requests_fn = &nbd_merge_requests_fn;
	(BLK_DEFAULT_QUEUE (major))->front_merge_fn = &nbd_front_merge_fn;
	(BLK_DEFAULT_QUEUE (major))->back_merge_fn = &nbd_back_merge_fn;


	for (i = 0; i < MAX_NBD; i++) {
		struct nbd_device *lo = &nbd_dev[i];
		memset (lo, 0, sizeof (struct nbd_device));
		lo->magic = NBD_DEV_MAGIC;
		strncpy (lo->devnam, device_letter (i), 4);
		for (j = 0; j < NBD_MAXCONN; j++) {	/* PTB */
			struct nbd_slot *slot = &lo->slots[j];
			slot->lo = lo;
			slot->i = j;
			INIT_LIST_HEAD (&slot->queue);
		}
		lo->blksize = 1024;	/* PTB 132 */
		lo->logblksize = 10;	/* PTB */
		lo->bytesize = 0x7fffffff00000;	/* PTB 132 */
		lo->size = 0x7fffffff;	/* PTB (bytesizes >> 10) */
		lo->sectors = 0xfffffffe;	/* PTB sectors */
		lo->nbd = i;
		lo->req_timeo = NBD_REQ_TIMEO;	/* PTB default pulse intvl */
		lo->max_sectors = buf_sectors;
                register_disk(NULL, mk_kdev(major,i << NBD_MAXCONN), 1,
                        &nbd_blkops, lo->bytesize >> 9);
		// speed struct inits
		lo->wspeed.getdistance = getwdistance;
		lo->rspeed.getdistance = getrdistance;
		lo->tspeed.getdistance = gettdistance;
		lo->wspeed.lo = lo;
		lo->rspeed.lo = lo;
		lo->tspeed.lo = lo;

		INIT_LIST_HEAD (&lo->queue);
		init_waitqueue_head (&lo->wq);
		INIT_LIST_HEAD (&lo->req.queuelist);
		init_waitqueue_head (&lo->req_wq);
		for (j = 0; j < NBD_MAXCONN; j++) {
			nbd_blksizes[i * NBD_MAXCONN + j] = lo->blksize;
			nbd_bytesizes[i * NBD_MAXCONN + j] = lo->bytesize;
			nbd_sizes[i * NBD_MAXCONN + j] = lo->size;
			nbd_max_sectors[i * NBD_MAXCONN + j] = lo->max_sectors;
		}
		if (md5sum) {
			atomic_set_mask (NBD_MD5SUM, &lo->flags);
		}
		if (sync_intvl) {
			atomic_set_mask (NBD_SYNC, &lo->flags);
		}
		if (show_errs) {
			atomic_set_mask (NBD_SHOW_ERRS, &lo->flags);
		}
		if (buffer_writes) {
			atomic_set_mask (NBD_BUFFERWR, &lo->flags);
		}

		//nbd_partitions[i << NBD_SHIFT].nr_sects = lo->sectors; 

	}

	do {
		struct proc_dir_entry *res =
		 create_proc_read_entry ("nbdinfo", 0, NULL,
					 &nbd_read_proc, NULL);
		if (!res) {
			NBD_ALERT ("creation of proc entry failed\n");
			err = -EINVAL;
			return err;
		}
		// PTB additional write_proc entry in struct
		res->write_proc = &nbd_write_proc;
	} while (0);

#ifdef CONFIG_DEVFS_FS

	devfs_handle = devfs_mk_dir (NULL, "nd", NULL);
	if (devfs_handle) {
		for (i = 0; i < MAX_NBD; i++) {
			struct nbd_device *lo = &nbd_dev[i];
			int j;
			// PTB make the directory "a" "b" etc.
			devfs_handles[i] =
			 devfs_mk_dir (devfs_handle, lo->devnam, NULL);
			// PTB add the blk specials, called "0" "1" to NBD_MAXCONN-1
			if (devfs_handles[i]) {
				devfs_register_series
				 (devfs_handles[i], "%u",
				  NBD_MAXCONN, DEVFS_FL_DEFAULT,
				  major, i * NBD_MAXCONN,
				  S_IFBLK | S_IRUSR | S_IWUSR,
				  &nbd_blkops, NULL);
			}
			// PTB do the whole disk symlink ..
			devfs_mk_symlink (devfs_handles[i], "disk",
					  DEVFS_FL_DEFAULT, "0",
					  NULL, NULL);
			// PTB .. and the channel symlinks
			for (j = 1; j < MAX_NBD; j++) {
				char link[4];
				char name[8];
				sprintf (link, "%u", j);
				sprintf (name, "chan%u", j);
				devfs_mk_symlink (devfs_handles[i],
						  name,
						  DEVFS_FL_DEFAULT,
						  link, NULL, NULL);
			}
		}
	}
#endif		/* CONFIG_DEVFS_FS */

	// PTB - sysctl interface
	nbd_table_header = register_sysctl_table (nbd_root_table, 1);

	return err;
}

void __exit
nbd_cleanup (void)
{
	int i;

	for (i = 0; i < MAX_NBD; i++) {

		struct nbd_device *lo = &nbd_dev[i];
		int j;

		if (!(atomic_read (&lo->flags) & NBD_INITIALISED))
			continue;

		NBD_INFO ("invalidating buffers on device nd%s%d-%d\n",
			  lo->devnam, 0, NBD_MAXCONN);

		for (j = 0; j < NBD_MAXCONN; j++) {
			int minor = i * NBD_MAXCONN + j;
			destroy_buffers (mk_kdev (major, minor));
		}

		NBD_INFO ("destroying buffers on device nd%s%d-%d\n",
			  lo->devnam, 0, NBD_MAXCONN);

		for (j = 0; j < NBD_MAXCONN; j++) {
			int minor = i * NBD_MAXCONN + j;
			destroy_buffers (mk_kdev (major, minor));
		}
	}

	unregister_sysctl_table (nbd_table_header);

#ifdef CONFIG_DEVFS_FS
	if (devfs_handle) {
		for (i = 0; i < MAX_NBD; i++) {
			int j;
			if (!devfs_handles[i])
				continue;
			for (j = 0; j < NBD_MAXCONN; j++) {
				devfs_handle_t x;
				char s[3];
				s[0] = '0' + j;
				s[1] = 0;
				if (j >= 10) {
					s[0] = '1';
					s[1] = '0' + (j - 10);
					s[2] = 0;
				}
				x = devfs_find_handle (devfs_handles[i],
						       s, major,
						       i * NBD_MAXCONN + j,
						       DEVFS_SPECIAL_BLK,
						       0);
				if (x)
					devfs_unregister (x);
			}
			// PTB should we also search for links? No - they're not inodes
			devfs_unregister (devfs_handles[i]);
		}
		devfs_unregister (devfs_handle);
	}
#endif

	remove_proc_entry ("nbdinfo", &proc_root);

	for (i = 0; i < MAX_NBD; i++) {
		struct nbd_device *lo = &nbd_dev[i];
		atomic_clear_mask (NBD_ENABLED, &lo->flags);
		if (lo->blockmap) {
			kfree (lo->blockmap);
			lo->blockmap = NULL;
		}
		nbd_sync_sync (lo);
		del_timer (&lo->run_queue);
	}

	blk_cleanup_queue (BLK_DEFAULT_QUEUE (major));
	blk_size[major] = NULL;

	if (unregister_blkdev (major, "nbd") != 0) {
		NBD_ALERT ("cleanup_module failed\n");
	}
	else {
		NBD_INFO ("module cleaned up.\n");
	}
}

module_init (nbd_init);
module_exit (nbd_cleanup);


/* Compile line:

 *  gcc -O2 -D__KERNEL__ -DMODULE -xc -c nbd.c -o nbd.o
 *
 *  (possibly with -DMODVERSIONS also). PTB
 */
