/*
 * Copyright 1999-2006 University of Chicago
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * pr_shm.c	- Protocol module for shared memory.
 */

static char *rcsid = "$Header: /home/globdev/CVS/globus-packages/nexus/source/nexus/pr_shm.c,v 1.52 2006/01/19 05:57:06 mlink Exp $";

#include "internal.h"


#if defined (HAVE_SHM_PROTO)

#include <netdb.h> 
#include <limits.h>
#include <sys/types.h>
#include <sys/ipc.h>
#include <sys/sem.h>

/*-----------------------------------------------------------------------*/

/*
 * Include protocol and shared memory specific setup for type
 * of shared memory used.
 */
#define NEXUS_SHMEM_INCLUDE "pr_shm_ipc.h"
#include NEXUS_SHMEM_INCLUDE


#ifndef BUILD_LITE
#ifndef SHMEM_PROTO_IS_THREAD_SAFE
#define SHMEM_PROTO_IS_THREAD_SAFE
#endif
#endif

static globus_callback_handle_t       globus_l_nexus_shm_callback_handle = -1;

/*
 * SHM_USE_VERSIONING 
 * 
 * Temporary compatability enablement for 0.9.1 testing. 
 */ 
#define  SHM_USE_VERSIONING

#ifdef SHM_USE_VERSIONING
/*
 * GLOBUS_L_NEXUS_SHM_PROTOCOL_VERSION
 *
 * The version of this protocol module's wire protocol.  If you change
 * this module wire protocol, bump this version number.
 */
#define GLOBUS_L_NEXUS_SHM_PROTOCOL_VERSION 0

/*
 * GLOBUS_L_NEXUS_SHM_MI_PROTO_VERSION
 *
 * The version of this protocol module's mi_proto.  If you change
 * the contents of this module's mi_proto, bump this version number.
 */
#define GLOBUS_L_NEXUS_SHM_MI_PROTO_VERSION (0 + GLOBUS_I_NEXUS_BUFFER_VERSION)
#endif


/*
 * Hash table size for the proto table
 */
#define PROTO_TABLE_SIZE 1021

/*
 * Thread is handler?
 *
 * Thread specific storage is used to keep track if the current
 * thread is a handler thread or not.
 */
#ifdef BUILD_LITE
#define _nx_set_i_am_shm_handler_thread() /* nop */
#define _nx_i_am_shm_handler_thread(Result) *(Result) = NEXUS_TRUE
#define set_handle_in_progress_true()  /* nop */
#define set_handle_in_progress_false() /* nop */
#else
static globus_thread_key_t i_am_shm_handler_thread_key;
#define _nx_set_i_am_shm_handler_thread() \
    nexus_thread_setspecific(i_am_shm_handler_thread_key, (void *) 1)
#define _nx_i_am_shm_handler_thread(Result) \
    *(Result) = (globus_bool_t)nexus_thread_getspecific(i_am_shm_handler_thread_key)
#define set_handle_in_progress_true()  handle_in_progress = NEXUS_TRUE
#define set_handle_in_progress_false() handle_in_progress = NEXUS_FALSE
#endif /* BUILD_LITE */


/*
 * Only one thread is allowed to be in the shm code (and thus
 * mucking with data structures) at a time.
 */
static nexus_mutex_t		shm_mutex;
static globus_bool_t		shm_done;
static globus_bool_t		globus_l_shm_wakeup;
static globus_bool_t		handle_in_progress;
static globus_bool_t		using_handler_thread;
static globus_bool_t		handler_thread_done;
static nexus_mutex_t		handler_thread_done_mutex;
static nexus_cond_t		handler_thread_done_cond;


#ifdef BUILD_LITE
#define shm_enter()
#define shm_exit()  
#else
#define shm_enter() nexus_mutex_lock(&shm_mutex);
#define shm_exit()  nexus_mutex_unlock(&shm_mutex);
#endif

#define shm_fatal  shm_exit(); nexus_fatal

/*
 * The mi_proto for this protocol module carries a host name
 * string with it.  This allows one node to completely distinguish
 * itself from another.
 */
static char 	_nx_hostname_string[MAXHOSTNAMELEN];
static int	_nx_hostname_string_length;
    
/*
 * Other useful defines
 */
#define BLOCKING			NEXUS_TRUE
#define NON_BLOCKING			NEXUS_FALSE
#define CLOSE_HANDLER_FLAG		NEXUS_DC_FORMAT_LAST + 2


/*
 * Some forward typedef declarations...
 */
typedef struct _shm_buffer_t	shm_buffer_t;
typedef struct _shm_proto_t	shm_proto_t;


/*
 * Some useful queue macros
 */
#define Enqueue(Qhead, Qtail, Item) \
{ \
    if (Qhead) \
    { \
	(Qtail)->next = (Item); \
	(Qtail) = (Item); \
    } \
    else \
    { \
	(Qhead) = (Qtail) = (Item); \
    } \
}

#define Dequeue(Qhead, Qtail, Item) \
{ \
    (Item) = (Qhead); \
    (Qhead) = (Qhead)->next; \
}

#define QueueNotEmpty(Qhead)	(Qhead)


/******************************************************************
 * shared memory specific setup 		 
 ******************************************************************/

/*
 * SHM_NOTIFY_WHEN_SHM_UNAVAILABLE 
 *
 * Temporary testing diagnostic messaging when system
 * shared memory has been expended and can not be used.
 */ 
#undef  SHM_NOTIFY_WHEN_SHM_UNAVAILABLE


/*
 * Large messages will be send in chunks to allow both
 * sender and receiver to concurrently participate.
 * The following three #defines set the base values to
 * control these large sends.
 *
 * SHM_PAGES_PER_MSG_MTU
 * 
 * Each message chunk is transferred into and out of 
 * shared memory via memcpy.
 * The size of each message chunk copy is set by this
 * value as a multiple of the host platform's pagesize.
 *
 * SHM_MTU_CHUNKS_PER_MSG
 *
 * As there must be some limit to the amount of shared
 * memory we are willing to dedicate to a single message,
 * we limit the number of chunks per message with this value.
 * Large sends reduce to a bounded buffer problem, with this
 * value setting a limit on the size of the bufer.
 * We seek to make this value large enough to provide for a
 * little play in the timing between sender and receiver, 
 * while keeping it small enough to fit within L2 cache.
 *
 * SHM_BLOCKED_CUTOFF
 *
 * The cooperative nature of large sends raises the possiblility
 * that either sendor or receiver isn't in a position to participate
 * when the other is.  We'll limit the amount of time that a party
 * will stall waiting for the other by considering the transfer
 * blocked.  Should either sender or receiver stall beyond this 
 * blocked cutoff, we'll preempt the action and pick up where we
 * left off at a later time.
 */

#ifdef TARGET_ARCH_AIX	
#define SHM_PAGES_PER_MSG_MTU   1
#define SHM_MTU_CHUNKS_PER_MSG  16

#elif  TARGET_ARCH_IRIX
#define SHM_PAGES_PER_MSG_MTU   2
#define SHM_MTU_CHUNKS_PER_MSG  4

#else
#define SHM_PAGES_PER_MSG_MTU    2
#define SHM_MTU_CHUNKS_PER_MSG   8
#endif

#define SHM_BLOCKED_CUTOFF 50000000  


/* 
 * Shared memory segments are preformatted by it's creator into
 * a number of fixed receive cells.  It's first cell will be large
 * enough to handle any size message, followed by a set of increasingly
 * larger cells.  These cells are allocated on a first fit basis.
 *
 * CELL_SIZES_COUNT
 * 
 * This is the number of different cell sizes we'll preallocate.
 * The last three cell sizes are host dependent.
 * They'll be filled in during shm_init to 1 page, 2 pages, and
 * the size required for large messges.
 *  
 * SHM_CELL_COUNT
 * 
 * The total number of cells we'll preallocate.
 * This values is set to the total of number of cells of each size
 * plus one for the initial extra large message cell.
 *
 * cell_sizes
 *
 * Corresponds to CELL_SIZES_COUNT as the increasingly larger sizes
 * we'll setup.  The last three entries of 0 are filled in during init.
 *
 * cell_counts
 *
 * The number of cells of each size set in cell_sizes that we'll preallocate.
 *
 * size_start
 *
 * An index of start positions used for first fit cell allocation.
 * This must correspond to the cell_counts array.
 */
#define CELL_SIZES_COUNT   8
#define SHM_CELL_COUNT     101

static int cell_sizes [CELL_SIZES_COUNT] 
                     = {128, 256, 512, 1024, 2048,   0,   0,   0};
static int cell_counts[CELL_SIZES_COUNT] 
                     = { 20,  20,  20,   10,   10,  10,   5,   5};
static int size_start [CELL_SIZES_COUNT] 
                     = {  1,  21,  41,   61,   71,  81,  91,  96};

/*
 * SHM_POLL_TRIES
 *
 * Shm polling involves a handlful of memory references to determine
 * if a new message has arrived.  Since there can be a large difference
 * in time to poll between communication modules, shm may not be 
 * requested to poll for some time.  To make more productive use of the
 * time spent getting to the shm poll, we'll allow a handful of tries
 * before a non blocking shm poll determines that no new incoming message
 * has arrived.
 */
/*#define SHM_POLL_TRIES 10*/


/* 
 * SHM_NO_LOCK_SAFE_CUTOFF
 *
 * Shared memory communication has each participant create it's
 * own segment to receive messages into.  Senders lock this
 * this segment with a semaphore to allocate a cell in the 
 * receiver's segment.  When there is only one sender, this 
 * lock is not necessary.  While the count of senders is increased
 * when a new communicator attaches, leaving plenty of time for 
 * any previous lone sender to become aware of this, prior to the
 * second sender initiating an allocation request, we'll require
 * that even lone senders lock the recievers segment during some
 * number of initial sends.  This should give us a margin of safety
 * while communication patterns settle down.  Beyond this lock cutoff,
 * segments with only one sender attached, may be allocated from without
 * a full segment lock, so long as there has never been more than one
 * sender.
 */
#ifdef  BUILD_LITE
#define SHM_NO_LOCK_SAFE_CUTOFF 50
#endif


/*-----------------------------------------------------------------------*/


/*
 * SHMEM_LOCK_PROBE_LIMIT
 *
 * When process shared mutex conditions are unavailable, shared memory
 * is locked via semaphores.  Since a full semaphore lock will block
 * all threads, a flag, or "soft lock" is utilized to minimize full
 * process blocking.  To avoid starvation, we'll eventually have to
 * request the full semaphore lock, but only after a set number of tries
 * on the soft lock.  The soft lock probe limit defined here sets the 
 * number of times a process will check the soft lock value before 
 * queueing up for the full semaphore lock request. 
 */
#define SHMEM_LOCK_PROBE_LIMIT  500000


/*
 * Lock values and operations
 */

#define SHMEM_UNLOCKED_VALUE  1
#define SHMEM_LOCKED_VALUE    0
#define SHMEM_LOCK_OP        -1
#define SHMEM_UNLOCK_OP       1


/*
 *  Define semaphore control request union for systems without it. 
 */
#ifndef HAVE_UNION_SEMUN
union semun
{
  int val;
  struct semid_ds *buf;
  ushort *array;
};
#endif


/*
 * As pointer addresses will differ between processes, we
 * can not store pointers within the shared memory segment.
 * Instead, we store offsets from top of segment in their 
 * place.
 */  
typedef long shm_offset_t;


/*
 * Misc shared memory defines
 */
#define SHMEM_SUCCESS  0
#define SHMEM_FAILURE -1
#define NULL_OFFSET   -1
#define SHMEM_CREATE_PERMS  (IPC_CREAT | IPC_EXCL | 0600)
#define SHMEM_CONNECT_PERMS (0600)


/*-----------------------------------------------------------------------*/

/*
 * Forward declare shared memory control record types.
 */
typedef struct shm_segment_struct_t  shm_segment_t;
typedef struct shm_cell_struct_t     shm_cell_t;
typedef struct shm_tcb_struct_t      shm_tcb_t;
typedef struct shm_message_struct_t  shm_message_t;


/*
 * Segment control record.
 * 
 * Controls locking, message ordering, and shm cell management info.
 */   
typedef struct shm_segment_struct_t
{
  nexus_mutex_t  thread_lock;           /* lock threads of same process    */
  volatile unsigned long  msg_count;    /* number of send request to this  */
  volatile int   soft_lock;             /* 1st level lock before semaphore */
  volatile int   senders;               /* total attaches to this segment  */
  int            alloc_count;           /* total cells allocated           */
  volatile int   status[SHM_CELL_COUNT];/* unreferenced, sending, recvg,etc*/
  int            offset[SHM_CELL_COUNT];/* cell distance from top of segmnt*/ 

} shm_segment_struct_t;



/*
 * Cell header record.
 * 
 * In addition to cell and message id's, contains control info
 * for preemptive receives.
 * The actual data starts at cell_data for small messages.
 * This location is used for message chunk syncronization for
 * large messages.
 * Large message data will be aligned on the next page boundary,
 * indicated by the location stored in data_loc.
 */
typedef struct shm_cell_struct_t                
{
  volatile unsigned long msg_number; /* sequential order send requested   */
  shm_offset_t  data_loc;            /* large message data location       */
  volatile int  data_size;           /* amount of data to send/recv       */
  int           cell_number;         /* sequential cell # within this seg */ 

  nexus_byte_t *recv_buf;            /* receive buffer addr in local heap */
  nexus_byte_t *recv_tgt;            /* current copy to location in heap  */
  nexus_byte_t *recv_src;            /* current copy from location in shm */
  int           recv_mtu;            /* current message chunk processing  */
  int           recv_left;           /* number of bytes left to receive   */

  nexus_byte_t  cell_data;           /* small msg data, large msg control */

} shm_cell_struct_t;


/*
 * Transmission control block.
 * 
 * Contains control info for resuming preemptive sends.
 * Senders who stall out beyond the blocked cutoff will save the
 * state of the their send in a list of these records for resumption
 * at a later time.
 */
typedef struct shm_tcb_struct_t
{
  struct globus_nexus_buffer_s *buffer;    /* rsr buffer to send      */
  shm_cell_t   *send_cell;                 /* shm cell sending in     */
  nexus_byte_t *send_data;                 /* present send from addr  */
  nexus_byte_t *send_loc;                  /* present send to addr    */ 
  shm_tcb_t    *next;                      /* next tcb record in list */
  long          send_size;                 /* bytes left to send      */
  int           cur_send_mtu;              /* next msg chunk to send  */
} shm_tcb_struct_t;


/*
 * Message control record.
 *
 * Messages that can't start sending due to space unavailability,
 * and messages received out of order that can't yet be dispatched
 * must be kept for later processing.
 */

typedef struct shm_message_struct_t
{
   struct globus_nexus_buffer_s  *buffer;    /* rsr send/dispatch buffer */ 
   shm_message_t                 *next;      /* next message in list     */
   shm_message_t                 *prior;     /* prior message in list    */
   shm_proto_t                   *proto;     /* proto of sender/receiver */
   nexus_byte_t                  *send_data; /* loc of data in local heap*/ 
   long                           send_size; /* amt of data in local heap*/
   unsigned long                  msg_number;/* message order control val*/

} shm_message_struct_t;


/*-----------------------------------------------------------------------*/

/*
 * Shm-Cell reference values 
 */
#define SHMEM_SENDING      3
#define SHMEM_RECEIVING    2
#define SHMEM_RECV_WAIT    1
#define SHMEM_UNREFERENCED 0

/*-----------------------------------------------------------------------*/

/*
 * Shared memory address generation from offsets based
 * on top of segment.
 */
#define SHMEM_SEGMENT_ADDR(Segment, Offset, Addr) \
  if ((Offset) == NULL_OFFSET) \
    (Addr) = (shm_segment_t *) NULL; \
  else \
    (Addr) = (shm_segment_t *) (((char *) (Segment)) + (Offset));


#define SHMEM_CELL_ADDR(Segment, Offset, Addr) \
  if ((Offset) == NULL_OFFSET) \
    (Addr) = (shm_cell_t *) NULL; \
  else \
    (Addr) = (shm_cell_t *) (((char *) (Segment)) + (Offset));

/*-----------------------------------------------------------------------*/

/*
 *  Thread level shared memory lock operations
 */

#ifndef BUILD_LITE

#define SHMEM_THREAD_LOCK_INIT(Segment) \
    nexus_mutex_init(&Segment->thread_lock, \
                       (nexus_mutexattr_t *) NULL) 

#define SHMEM_THREAD_SET_LOCKED(Segment) \
   nexus_mutex_lock (&Segment->thread_lock)

#define SHMEM_THREAD_SET_UNLOCKED(Segment) \
   nexus_mutex_unlock (&Segment->thread_lock)

#define SHMEM_THREAD_SCHED_YIELD()  nexus_thread_yield()

#else
#define SHMEM_THREAD_LOCK_INIT(Segment)
#define SHMEM_THREAD_SET_LOCKED(Segment)
#define SHMEM_THREAD_SET_UNLOCKED(Segment)
#define SHMEM_THREAD_SCHED_YIELD()
#endif

/*-----------------------------------------------------------------------*/


/******************************************************************
 * Shared memory protocol	       
 ******************************************************************/

/*
 * shm_proto_t
 *
 * This is an overload of nexus_proto_t.  It adds the
 * shm specific information to that structure.
 */
struct _shm_proto_t
{
    nexus_proto_type_t		type;	/* NEXUS_PROTO_TYPE_SHMEM */
    nexus_proto_funcs_t *	funcs;
    int				version;
    unsigned long		direct_custom_min_size;
    unsigned long		direct_custom_max_size;
    unsigned long		direct_pointer_min_size;
    unsigned long		direct_pointer_max_size;
    globus_bool_t		can_use_iovec;
    unsigned long		reserved_header_size;
    shm_destination_t		destination;
    int				reference_count;
    shm_handle_t                dest_handle;
    shm_segment_t             * dest_segment;
    int                         dest_lock;
};


/******************************************************************
 * Shared memory handling
 ******************************************************************/


static shm_proto_t   *my_proto      = NULL;
static shm_segment_t *my_segment    = NULL;     
static int           *my_cell_offsets;
static volatile int  *my_cell_status;

static shm_cell_t    *my_first_cell;
static volatile int  *my_first_status;

static int            shm_segment_size;
static int            nexus_shm_alignment;

static int            nexus_shm_msg_ctl_len;
static int            nexus_shm_pagesize;
static int            nexus_shm_mtu;
static int            nexus_shm_seg_msg_len;
static int            nexus_shm_preemptive_send_cutoff;

static int            smallest_cell_size;
static int            next_largest_cell_size;
static int            largest_cell_start;

static struct sembuf  my_full_lock_req;  
static struct sembuf  my_full_unlock_req;  



/* the next message number we can receive and dispatch */ 
static unsigned long    next_msg_to_recv; 

/* messages received out of order that cant be dispatched */
static shm_message_t *  received_list_head;
static shm_message_t *  received_list_tail;


/* messages that haven't been sent yet due to shm space */
static shm_message_t *	send_list_head;
static shm_message_t *	send_list_tail;


/* large messages whose send was preempted */
static shm_tcb_t     *  blocked_send_q_head;
static shm_tcb_t     *  blocked_send_q_tail;

/*-----------------------------------------------------------------------*/

/*
 * Protocol table stuff
 *
 * The protocol table is hashed on the destination. The table itself is an
 * array of header structures pointing to a linked list of buckets.
 *
 * This table is used to avoid creating multiple shm_proto_t
 * objects to the same context.  Multiple global pointers to the same
 * context share a shm_proto_t.
 */
typedef struct _proto_table_entry_t
{
    shm_proto_t *proto;
    struct _proto_table_entry_t *next;
} proto_table_entry_t;

struct _proto_table_entry_t	proto_table[PROTO_TABLE_SIZE];

static void			proto_table_init(void);
static void			proto_table_insert(shm_proto_t *proto);
static shm_proto_t *		proto_table_lookup(shm_destination_t *dest);

/*-----------------------------------------------------------------------*/

/*
 * Various forward declarations of procedures
 */
static void		  shm_init(globus_bool_t * add_to_my_mi_proto);
static void		  shm_shutdown(void);
static void	          shm_poll(
			      void *                               user_args);

static int		  shm_send_rsr(struct globus_nexus_buffer_s *buffer);
static void               shm_send_pending(void);

static globus_bool_t
shm_send_rsr_outstanding(globus_nexus_proto_t *nproto);

static void               shm_increment_reference_count(nexus_proto_t *nproto);
static globus_bool_t	  shm_decrement_reference_count(nexus_proto_t *nproto);
static int		  shm_get_my_mi_proto(nexus_byte_t **array,
	  			              int *size,
					      void *proto_info,
					      nexus_endpoint_t *endpoint);

static globus_bool_t	  shm_construct_from_mi_proto(nexus_proto_t **proto,
						   nexus_mi_proto_t *mi_proto,
						   nexus_byte_t *array,
						   int size);
static int		  shm_direct_info_size(void);

static shm_proto_t *	  construct_proto(shm_destination_t destination);


static void               shm_dispatch_receive(nexus_byte_t * receive_buffer,
                                               unsigned long           receive_buffer_size,
                                               unsigned long           msg_number);

static void               shm_dispatch_pending_receive();

static void               shm_send(struct globus_nexus_buffer_s * buffer,
                                   shm_cell_t                   * send_cell,
                                   shm_proto_t                  * proto, 
                                   nexus_byte_t                 * send_data, 
                                   int                            send_size);

static void  shm_send_nonpreemptive(shm_cell_t   *send_cell,
                                    nexus_byte_t *send_data,
                                    int           send_size,
                                    nexus_byte_t *send_loc );
static void  shm_send_preemptive(shm_tcb_t *send_tcb);

static globus_bool_t shm_receive(void);
static globus_bool_t shm_receive_message(shm_cell_t *recv_cell);

static globus_bool_t shm_local_node_connect(void);
static void          shm_local_node_disconnect(void);
static globus_bool_t shm_remote_node_connect(shm_proto_t * proto);
static void          shm_remote_node_disconnect(shm_proto_t * proto);

static shm_cell_t * 
shm_malloc(shm_proto_t * proto, int req_amount, unsigned long * msg_number); 
static void 
shm_prealloc(int size, int count, int *cell_num, shm_offset_t *free_stg_loc);


static void          shm_local_node_setup(void);
static globus_bool_t shm_remote_node_setup(shm_proto_t * proto);

static void shm_set_unlocked(shm_proto_t * proto);
static void shm_set_locked(shm_proto_t * proto);
static void shm_calc_alignment(void);

static void
globus_l_shm_handler_wakeup(
    void *                              user_args);

#endif /* HAVE_SHM_PROTO */

/* build always */
#define GLOBUS_L_SHM_PROTO_COUNT    1
static nexus_proto_type_t shm_proto_type(void);
static globus_bool_t      shm_startpoint_proto_match(
					   globus_nexus_mi_proto_t *  sp0,
					   int                        offset0,
					   globus_byte_t *            subarray0,
				           int                        sub_length0,
					   globus_nexus_mi_proto_t *  sp1,
					   int                        offset1,
					   globus_byte_t *            subarray1,
					   int                        sub_length1);

static int                shm_proto_count(void);
/*-----------------------------------------------------------------------*/

/*  ************************ nexus interface *************************   */

/*-----------------------------------------------------------------------*/

#if defined(HAVE_SHM_PROTO)

static nexus_proto_funcs_t shm_proto_funcs =
{
    shm_proto_type,
    shm_init,
    shm_shutdown,
    shm_increment_reference_count,
    shm_decrement_reference_count,
    shm_get_my_mi_proto,
    shm_construct_from_mi_proto,
    NULL /* shm_destroy_my_mi_proto */,
    NULL /* shm_test_proto */,
    shm_send_rsr,
    shm_send_rsr_outstanding,
    shm_direct_info_size,
    NULL /* shm_direct_get */,
    shm_startpoint_proto_match,
    shm_proto_count,
#ifdef BUILD_RESOURCE
    NULL /* shm_get_resource_name_sp */
    NULL /* shm_get_resource_name_ep */
#endif /* BUILD_RESOURCE */
};

#else

static nexus_proto_funcs_t shm_proto_funcs =
{
    shm_proto_type,
    GLOBUS_NULL,
    GLOBUS_NULL,
    GLOBUS_NULL,
    GLOBUS_NULL,
    GLOBUS_NULL,
    GLOBUS_NULL,
    GLOBUS_NULL,
    GLOBUS_NULL,
    GLOBUS_NULL,
    GLOBUS_NULL,
    GLOBUS_NULL,
    GLOBUS_NULL,
    shm_startpoint_proto_match,
    shm_proto_count,
#ifdef BUILD_RESOURCE
    NULL /* shm_get_resource_name_sp */
    NULL /* shm_get_resource_name_ep */
#endif /* BUILD_RESOURCE */
};

#endif /* HAVE_SHM_PROTO */

/* build always */

static globus_bool_t
shm_startpoint_proto_match(globus_nexus_mi_proto_t *   mi_proto0,
			   int                         offset0,
                           globus_byte_t *             subarray0,
                           int                         sub_length0,
                           globus_nexus_mi_proto_t *   mi_proto1,
			   int                         offset1,
                           globus_byte_t *             subarray1,
                           int                         sub_length1)
{
    char *    hostname0;
    char *    hostname1;
    int       i;
    int       version;
    int       cd;

    i = 0;
    UnpackMIProtoHeader(mi_proto0->array,
                        i,
                        cd,
                        hostname0,
                        version);

    i = 0;
    UnpackMIProtoHeader(mi_proto1->array,
                        i,
                        cd,
                        hostname1,
                        version);

    if(strcmp(hostname0, hostname1) == 0)
      {
        return GLOBUS_TRUE;
      }

    return GLOBUS_FALSE;
}
    

static int
shm_proto_count(void)
{
    return GLOBUS_L_SHM_PROTO_COUNT;
}

/*-----------------------------------------------------------------------*/

/*
 * _nx_pr_*_info()
 *
 * Return the nexus_proto_funcs_t function table for this protocol module.
 *
 * This procedure is used for bootstrapping the protocol module.
 * The higher level Nexus code needs to call this routine to
 * retrieve the functions it needs to use this protocol module.
 */
void *_nx_pr_shm_info(void)
{
    return((void *) (&shm_proto_funcs));
} /* _nx_pr_shm_info() */


/*-----------------------------------------------------------------------*/

/*
 * shm_proto_type()
 *
 * Return the nexus_proto_type_t for this protocol module.
 */
static nexus_proto_type_t shm_proto_type(void)
{
    return (NEXUS_PROTO_TYPE_SHMEM);
} /* shm_proto_type() */


#if defined(HAVE_SHM_PROTO)
/*-----------------------------------------------------------------------*/

#ifdef SHMEM_PROTO_IS_THREAD_SAFE    
/*
 * shm_handler_thread()
 *
 * In the multi-threaded version, this is the entry point
 * for the handler thread.
 */
static void
shm_handler_thread(
    void *                              user_args)
{
    _nx_set_i_am_shm_handler_thread();

    shm_receive_blocking_by_handler();

    nexus_mutex_lock(&handler_thread_done_mutex);
    handler_thread_done = NEXUS_TRUE;
    nexus_cond_signal(&handler_thread_done_cond);
    nexus_mutex_unlock(&handler_thread_done_mutex);
} /* shm_handler_thread() */

static void
globus_l_shm_handler_wakeup(
    void *                              user_args)
{
    globus_l_shm_wakeup = GLOBUS_TRUE;
}
#endif /* SHMEM_PROTO_IS_THREAD_SAFE */

/*-----------------------------------------------------------------------*/
     
/*
 * shm_increment_reference_count()
 *
 * Increase the reference count on the associated proto and copy the
 * pointer to the nexus_proto_t
 *
 */
static void shm_increment_reference_count(nexus_proto_t *nproto)
{
    shm_proto_t *proto = (shm_proto_t *) nproto;
    shm_enter();
    proto->reference_count++;
    shm_exit();
 
} /* shm_increment_reference_count() */

/*-----------------------------------------------------------------------*/

/*
 * shm_decrement_reference_count()
 *
 * Decrement the reference count for this proto.  
 *
 * Return NEXUS_TRUE if this function frees the proto.
 */
static globus_bool_t shm_decrement_reference_count(nexus_proto_t *nproto)
{
    shm_proto_t *proto = (shm_proto_t *) nproto;
    shm_enter();
    proto->reference_count--;
    NexusAssert2((proto->reference_count >= 0),
		 ("shm_decrement_reference_count(): Internal error: Reference count < 0\n"));
    shm_exit();

    return(NEXUS_FALSE);
} /* shm_decrement_reference_count() */

/*-----------------------------------------------------------------------*/

/*
 * shm_get_my_mi_proto()
 *
 * Return the machine independent shm protocol information
 * for this protocol.
 */
static int shm_get_my_mi_proto(nexus_byte_t **array,
			       int *size,
			       void *proto_info,
			       nexus_endpoint_t *endpoint)
{
    int my_size; 
    SHMEM_GET_MY_MI_PROTO_SIZE(my_size);

#ifdef SHM_USE_VERSIONING
    *size = (3 + _nx_hostname_string_length + my_size);
    NexusMalloc(shm_get_my_mi_proto(),
		*array,
		nexus_byte_t *,
		*size);
    (*array)[0] = GLOBUS_L_NEXUS_SHM_MI_PROTO_VERSION;
    (*array)[1] = GLOBUS_L_NEXUS_SHM_PROTOCOL_VERSION;
    memcpy((*array + 2),
	   _nx_hostname_string, 
	   _nx_hostname_string_length + 1);
    SHMEM_GET_MY_MI_PROTO(((*array)+_nx_hostname_string_length+3),
                          my_proto->destination);

#else
    *size = (1 + _nx_hostname_string_length + my_size);
    NexusMalloc(shm_get_my_mi_proto(),
		*array,
		nexus_byte_t *,
		*size);
    memcpy(*array,
	   _nx_hostname_string, 
	   _nx_hostname_string_length + 1);
    SHMEM_GET_MY_MI_PROTO(((*array)+_nx_hostname_string_length+1),
                          my_proto->destination);

#endif

    return(0);
} /* shm_get_my_mi_proto() */

/*-----------------------------------------------------------------------*/

/*
 * shm_construct_from_mi_proto()
 *
 * From the passed machine independent protocol list ('mi_proto'), plus
 * the shm specific entry from that list ('proto_array' and 'size'),
 * see if I can use the information to create a nexus_proto_t object
 * that can be used to connect to the node:
 *	- If I cannot use this protocol to attach to the node, then
 *		return NEXUS_FALSE.  (This option is useful if two nodes
 *		both speak a particular protocol, but they cannot
 *		talk to each other via that protocol.  For example,
 *              nodes on different host machines.)
 *	- If this shm protocol points to myself, then set
 *		*proto=NULL, and return NEXUS_TRUE.
 *	- Otherwise, construct a shm protocol object for this mi_proto
 *		and put it in *proto.  Then return NEXUS_TRUE.
 */
static globus_bool_t shm_construct_from_mi_proto(nexus_proto_t **proto,
	 				         nexus_mi_proto_t *mi_proto,
 					         nexus_byte_t *array,
					         int size)
{
    shm_destination_t destination;
    globus_bool_t result;
    int version;

#ifdef SHM_USE_VERSIONING
    /*
     * Check the shm mi_proto version
     */
    version = (int) array[0];
    if (version != GLOBUS_L_NEXUS_SHM_MI_PROTO_VERSION)
    {
	_nx_fault_detected(GLOBUS_NEXUS_ERROR_VERSION_MISMATCH);
	return(NEXUS_FALSE);
    }

    /*
     * Check the shm mi_protocol version
     * All communication for this attach will be for messages 
     * using this version, so we can check prior to  connect to ensure
     * ensure that all sends from this requestor will be at this version.
     */
    version = (int) array[1];
    if (version != GLOBUS_L_NEXUS_SHM_PROTOCOL_VERSION)
    {
	_nx_fault_detected(GLOBUS_NEXUS_ERROR_VERSION_MISMATCH);
	return(NEXUS_FALSE);
    }

    /*
     * Compare the hostname string from the array with mine.
     */
    if (strcmp((char *) (array + 2), _nx_hostname_string) != 0)
    {
	return(NEXUS_FALSE);
    }

    /*
     * Extract the shm_destination_t from the array
     */
    SHMEM_CONSTRUCT_FROM_MI_PROTO(destination,
		  	          mi_proto,
			          (array + _nx_hostname_string_length + 3));
    
#else
    /*
     * Compare the hostname string from the array with mine.
     */
    if (strcmp((char *) array, _nx_hostname_string) != 0)
    {
	return(NEXUS_FALSE);
    }

    /*
     * Extract the shm_destination_t from the array
     */
    SHMEM_CONSTRUCT_FROM_MI_PROTO(destination,
		  	          mi_proto,
			          (array + _nx_hostname_string_length + 1));
    
#endif


    /*
     * Test to see if this mi_proto points to myself.
     * If it does, then return *proto=NULL.
     */
    SHMEM_COMPARE_DESTINATIONS(destination, my_proto->destination, result);
    if (result)
    {
	*proto = (nexus_proto_t *) NULL;
    }
    else
    {
	shm_enter();
	*proto = (nexus_proto_t *) construct_proto(destination);
        result = shm_remote_node_setup( (shm_proto_t *) *proto);

        if ( result == NEXUS_SUCCESS ) 
	{
          result = shm_remote_node_connect( (shm_proto_t *) *proto);
	}
	shm_exit();

        if ( result == SHMEM_FAILURE )
	{
          return(NEXUS_FALSE);
	}
    }

    return (NEXUS_TRUE);
} /* shm_construct_from_mi_proto() */


/*-----------------------------------------------------------------------*/

/*
 * shm_direct_info_size()
 */
static int shm_direct_info_size(void)
{
    /* TODO: This needs to be filled in */
    return(0);
} /* shm_direct_info_size() */

/*-----------------------------------------------------------------------*/

/*
 * construct_proto()
 *
 * Construct a shm_proto_t for the given destination. Look up in the
 * proto table to see if one already exists. If it does, bump its reference
 * count and return that one. Otherwise create one, insert into the
 * table with a reference count of 1 and return it.
 */
static shm_proto_t *construct_proto(shm_destination_t destination)
{
    shm_proto_t *proto;
    proto = proto_table_lookup(&destination);
    nexus_debug_printf(3,
		       ("construct_proto(): Table lookup returns proto=%x\n",
			proto));
    if (proto == (shm_proto_t *) NULL)
    {
	NexusMalloc(construct_proto(), proto, shm_proto_t *,
		    sizeof(shm_proto_t));

	proto->type = NEXUS_PROTO_TYPE_SHMEM;
	proto->funcs = &shm_proto_funcs;
#ifdef SHM_USE_VERSIONING
	proto->version = GLOBUS_L_NEXUS_SHM_PROTOCOL_VERSION;
#endif
	proto->direct_custom_min_size = NEXUS_DC_MAX_U_LONG;
	proto->direct_custom_max_size = NEXUS_DC_MAX_U_LONG;
	proto->direct_pointer_min_size = NEXUS_DC_MAX_U_LONG;
	proto->direct_pointer_max_size = NEXUS_DC_MAX_U_LONG;
	proto->can_use_iovec = NEXUS_FALSE;
	proto->reserved_header_size = 0;
	proto->reference_count = 1;

	SHMEM_COPY_DESTINATION(proto->destination, destination);
        proto->dest_handle = SHMEM_FAILURE;
        proto->dest_segment= NULL;

	proto_table_insert(proto);
    }
    else
    {
	proto->reference_count++;
    }
	

    return (proto);
} /* construct_proto() */

/*-----------------------------------------------------------------------*/

/*
 * proto_table_init()
 *
 * Initialize the protocol table.
 */
static void proto_table_init(void)
{
    int i;

    for (i = 0; i < PROTO_TABLE_SIZE; i++)
    {
	proto_table[i].proto = (shm_proto_t *) NULL;
	proto_table[i].next = (proto_table_entry_t *) NULL;
    }
} /* proto_table_init() */

/*-----------------------------------------------------------------------*/

/*
 * proto_table_insert()
 *
 * Insert the given proto into the table, hashing on its destination.
 *
 * We assume that the entry is not present in the table.
 */
static void proto_table_insert(shm_proto_t *proto)
{
    int bucket;
    proto_table_entry_t *new_ent;

    SHMEM_HASH_DESTINATION(proto->destination, bucket);

    if (proto_table[bucket].proto == (shm_proto_t *) NULL)
    {
	/* Drop it into the preallocated table entry */
	proto_table[bucket].proto = proto;
    }
    else
    {
	/*
	 * Need to allocate a new proto_table_entry_t and add it
	 * to the bucket
	 */
	NexusMalloc(proto_table_insert(),
		    new_ent,
		    proto_table_entry_t *,
		    sizeof(struct _proto_table_entry_t));

	new_ent->proto = proto;
	new_ent->next = proto_table[bucket].next;

	proto_table[bucket].next = new_ent;
    }

} /* proto_table_insert() */

/*-----------------------------------------------------------------------*/

/*
 * proto_table_lookup()
 *
 * Look up and return the shm_proto_t for the given destination.
 * Return NULL if none exists.
 */
static shm_proto_t *proto_table_lookup(shm_destination_t *dest)
{
    proto_table_entry_t *ent;
    int bucket;
    globus_bool_t result;

    SHMEM_HASH_DESTINATION(*dest, bucket);

    for (ent = &(proto_table[bucket]);
	 ent != (proto_table_entry_t *) NULL;
	 ent = ent->next)
    {
	if (ent->proto != (shm_proto_t *) NULL)
	{
	    SHMEM_COMPARE_DESTINATIONS(*dest, ent->proto->destination, result);
	    if (result)
	    {
		return (ent->proto);
	    }
	}
    }
    
    return ((shm_proto_t *) NULL);
} /* proto_table_lookup() */


/*-----------------------------------------------------------------------*/

/*  ***************** shared memory setup and control ****************   */

/*-----------------------------------------------------------------------*/

static
void *
shm_handler_thread_kickout(
    void *                              user_args)
{
    shm_handler_thread(
        GLOBUS_NULL,
        GLOBUS_NULL,
        user_args);
}
    
/*
 * shm_init()
 *
 * Initialize the SHMEM protocol.
 */
static void shm_init(globus_bool_t * add_to_my_mi_proto)
{
    int                     sizes_lcv;
    globus_bool_t           connect_status;
    globus_reltime_t        delay_time;

    shm_calc_alignment();

    /*
     * setup shared memory control values 
     */
    nexus_shm_msg_ctl_len = sizeof(shm_cell_t) - 1;
    nexus_shm_pagesize = getpagesize();
    nexus_shm_mtu = nexus_shm_pagesize * SHM_PAGES_PER_MSG_MTU;
    nexus_shm_preemptive_send_cutoff = (SHM_MTU_CHUNKS_PER_MSG-1) 
                                       * nexus_shm_mtu; 

    /*
     * segmented message shared memory requirments 
     *    normal message control length of size of cell struct + 
     *    buffer segment control flags                         + 
     *    maximum real time page aligment factor of one page   + 
     *    number of segments we'll use per msg * each ones len + 
     *    host dependent padding
     */
    nexus_shm_seg_msg_len  = nexus_shm_msg_ctl_len                   + 
                             SHM_MTU_CHUNKS_PER_MSG * sizeof(int)    +
                             nexus_shm_pagesize                      +
                             SHM_MTU_CHUNKS_PER_MSG * nexus_shm_mtu;  
    nexus_shm_seg_msg_len += nexus_shm_alignment - 
                              (nexus_shm_seg_msg_len % nexus_shm_alignment);


    /*
     *  add in host dependent cell sizes
     */
    cell_sizes[CELL_SIZES_COUNT-1] = nexus_shm_seg_msg_len;
    cell_sizes[CELL_SIZES_COUNT-2] = nexus_shm_pagesize * 2;
    cell_sizes[CELL_SIZES_COUNT-3] = nexus_shm_pagesize;
    smallest_cell_size      = cell_sizes[0] - sizeof(shm_cell_t);
    next_largest_cell_size  = cell_sizes[CELL_SIZES_COUNT-2] 
                               - sizeof(shm_cell_t);
    largest_cell_start      = size_start[CELL_SIZES_COUNT-1];


    /*
     * determine size of segment to use.
     * actual size of segment includes segment control header,
     * padding to align first cell on next page boundary,     
     * followed by a set of receive cells                     
     * round out the segment size to account for any host     
     * alignment requirements                                 
     */
    shm_segment_size = sizeof(shm_segment_t)
                       + nexus_shm_pagesize 
                       + nexus_shm_seg_msg_len;
    for (sizes_lcv=0; sizes_lcv<CELL_SIZES_COUNT; sizes_lcv++)
    {
        shm_segment_size += cell_sizes[sizes_lcv] * cell_counts[sizes_lcv]; 

        if ( cell_sizes[sizes_lcv] > nexus_shm_mtu 
             && nexus_shm_mtu < nexus_shm_pagesize )
        {
           shm_segment_size += cell_counts[sizes_lcv] * 2 * nexus_shm_pagesize;
           shm_segment_size += cell_counts[sizes_lcv] *
                                (sizeof(int) * SHM_MTU_CHUNKS_PER_MSG);
        }
    }



    if (nexus_shm_alignment != 0)
    {
      shm_segment_size += nexus_shm_alignment;
      shm_segment_size -= (shm_segment_size % nexus_shm_alignment);
    } 


    /* 
     * the hostname tells us who we can communicate with via shm
     */
    globus_libc_gethostname(_nx_hostname_string, MAXHOSTNAMELEN);
    _nx_hostname_string_length = strlen(_nx_hostname_string);



    /*
     * see if we can get a shared memory segment.
     * if we can, get it setup for communication.
     * otherwise, indicate that shm should be removed as a 
     * usable communications protocol for this task.
     */
    connect_status = shm_local_node_connect();

    if ( connect_status == SHMEM_FAILURE )
    { 
#ifdef SHM_NOTIFY_WHEN_SHM_UNAVAILABLE
       fprintf(stderr, "\a Shared Memory Unavailable, pr_shm not used \n");
       fflush(stderr);
#endif
      *add_to_my_mi_proto = NEXUS_FALSE;
    }
    else
    {
      shm_local_node_setup(); 

#ifndef BUILD_LITE
      nexus_thread_key_create(&i_am_shm_handler_thread_key, NULL);
#endif
      proto_table_init();
      nexus_mutex_init(&shm_mutex, (nexus_mutexattr_t *) NULL);
      shm_done = NEXUS_FALSE;
      set_handle_in_progress_false();

      send_list_head = (shm_message_t *) NULL;
      send_list_tail = (shm_message_t *) NULL;
      received_list_head = (shm_message_t *) NULL;
      received_list_tail = (shm_message_t *) NULL;
      blocked_send_q_head = (shm_tcb_t *) NULL;
      blocked_send_q_tail = (shm_tcb_t *) NULL;

      my_full_lock_req.sem_num = 0;  
      my_full_lock_req.sem_op  = SHMEM_LOCK_OP;  
      my_full_lock_req.sem_flg = 0;

      my_full_unlock_req.sem_num = 0;  
      my_full_unlock_req.sem_op  = SHMEM_UNLOCK_OP;  
      my_full_unlock_req.sem_flg = 0;  
  
      GlobusTimeReltimeSet(delay_time, 0, 0); 
#ifdef SHMEM_PROTO_IS_THREAD_SAFE
      globus_l_shm_wakeup = GLOBUS_FALSE;
      if (globus_thread_preemptive_threads())
      {
	  nexus_thread_t thread;

  	  using_handler_thread = NEXUS_TRUE;

	  /* Create the handler thread */
	  handler_thread_done = NEXUS_FALSE;
	  nexus_mutex_init(&handler_thread_done_mutex,
		  	   (nexus_mutexattr_t *) NULL);
	  nexus_cond_init(&handler_thread_done_cond,
		  	  (nexus_condattr_t *) NULL);

#ifdef BUILD_LITE	  
	  globus_callback_register_oneshot(&globus_l_nexus_shm_callback_handle,
					   &delay_time,
					   shm_handler_thread,
					   GLOBUS_NULL);
#else
          globus_thread_create(
            GLOBUS_NULL,
            GLOBUS_NULL,
            shm_handler_thread_kickout,
            GLOBUS_NULL);
#endif
      }
      else
#endif /* SHMEM_PROTO_IS_THREAD_SAFE */
      {
	  using_handler_thread = NEXUS_FALSE;

	  globus_callback_register_periodic(&globus_l_nexus_shm_callback_handle,
					    &delay_time,
					    &delay_time,
					    shm_poll,
					    GLOBUS_NULL);
      }

      *add_to_my_mi_proto = NEXUS_TRUE;
    } /* successfully connected */

} /* shm_init() */

/*-----------------------------------------------------------------------*/

/*
 * shm_shutdown()
 *
 * This routine is called during normal shutdown of a process.
 */
static void shm_shutdown(void)
{
    globus_bool_t i_am_shm_handler_thread;

    shm_enter();
    shm_done = NEXUS_TRUE;

    if (using_handler_thread)
    {
	_nx_i_am_shm_handler_thread(&i_am_shm_handler_thread);
	if (!i_am_shm_handler_thread)
	{
            /*
             * If this is not the shm handler thread, then we need
             * to get the handler thread to shutdown.
             *
             * Since there other thread may be sitting in a blocking
             * receive, we need to send a message to myself
             * to wake up the handler thread.  Otherwise
             * the handler will not notice the shm_done flag is set.
             */              
             unsigned long msg_number;
  	     shm_cell_t * send_cell = NULL;
	     nexus_byte_t *cell_data;
             while (send_cell == NULL)
	     {
                send_cell = shm_malloc(my_proto, 2, &msg_number);
	     }
	     cell_data = &(send_cell->cell_data);
#ifdef SHM_USE_VERSIONING
             cell_data[1] = CLOSE_HANDLER_FLAG;
#else
             cell_data[0] = CLOSE_HANDLER_FLAG;
#endif
             my_segment->status[send_cell->cell_number] = SHMEM_RECV_WAIT;


	    /* Wait for the handler thread to shutdown */
	    shm_exit();
	    nexus_mutex_lock(&handler_thread_done_mutex);
	    while (!handler_thread_done)
	    {
		nexus_cond_wait(&handler_thread_done_cond,
				&handler_thread_done_mutex);
	    }
	    nexus_mutex_unlock(&handler_thread_done_mutex);
	    shm_enter();
	}
	nexus_mutex_destroy(&handler_thread_done_mutex);
	nexus_cond_destroy(&handler_thread_done_cond);
	using_handler_thread = NEXUS_FALSE;
    }
    else if(globus_l_nexus_shm_callback_handle != -1)
    {
	globus_callback_unregister(
	    globus_l_nexus_shm_callback_handle,
	    GLOBUS_NULL,
	    GLOBUS_NULL,
	    GLOBUS_NULL); 
    }
    shm_local_node_disconnect();
    shm_exit();

} /* shm_shutdown() */


/*-----------------------------------------------------------------------*/

/*
 * shm_local_node_connect()
 *
 * Prepare the process for communication via shared memory.
 * The unique destination identifier of this node will be
 * established here as the common ipc key used for shared
 * memory and the inter-process semaphore lock for it.
 * By allocating these system resources in this fashion,
 * we'll enable any process to access them via this destination
 * id, which we'll load into our proto connection array.
 */
static globus_bool_t shm_local_node_connect(void)
{  
    shm_destination_t  new_node = 0;
    shm_destination_t  my_dest;
    union semun        sem_info;   
    globus_bool_t      connect_successful = SHMEM_SUCCESS;

    /*
     * find a unique, unused set of shared memory and semaphore resources
     * keyed on the same id.
     */
    my_proto = construct_proto(new_node);
    my_proto->dest_handle = SHMEM_FAILURE;  


    for(new_node=0;  
        my_proto->dest_handle == SHMEM_FAILURE && new_node <= UCHAR_MAX;  
        new_node++)  
    {  
       my_dest = (new_node == 0) ? globus_libc_getpid()
                 : ftok("/usr/include/sys/shm.h", new_node);

       SHMEM_CREATE(my_proto->dest_handle, my_dest, 
                    shm_segment_size, SHMEM_CREATE_PERMS);

       if (my_proto->dest_handle != SHMEM_FAILURE)  
       {  
           my_proto->dest_lock = semget(my_dest, 1, SHMEM_CREATE_PERMS);  
           if (my_proto->dest_lock == SHMEM_FAILURE)  
	   {  
               SHMEM_DESTROY(my_proto->dest_handle);
               my_proto->dest_handle = SHMEM_FAILURE;  
	   }  
       }  
    }  

    /*
     * with resources allocated, try to attach to the shared 
     * memory segment acquired.
     */
    SHMEM_COPY_DESTINATION(my_proto->destination, my_dest);
  
    if (my_proto->dest_handle == SHMEM_FAILURE)  
    {  
        connect_successful = SHMEM_FAILURE;
    }  
    else  
    {  
        SHMEM_ATTACH(my_proto->dest_segment, my_proto->dest_handle);
        if (my_proto->dest_segment == (shm_segment_t *) SHMEM_FAILURE)  
	{  
            SHMEM_DESTROY(my_proto->dest_handle);
            sem_info.val = 0;
            semctl(my_proto->dest_lock, 0, IPC_RMID, sem_info);  
            connect_successful = SHMEM_FAILURE;
	}  
    }  

    return(connect_successful);

} /* shm_local_node_connect() */

/*-----------------------------------------------------------------------*/

/*
 * shm_remote_node_connect() 
 *
 * "Connect" to another process by attaching
 *  to the other's shared memory segment.
 */
static globus_bool_t shm_remote_node_connect(shm_proto_t * proto)
{  
   globus_bool_t connect_status = SHMEM_SUCCESS;

   if ( proto->dest_segment == NULL) {

      SHMEM_CREATE(proto->dest_handle, proto->destination, 
                  0, SHMEM_CONNECT_PERMS);

      if (proto->dest_handle == SHMEM_FAILURE)  
      { 
        connect_status = SHMEM_FAILURE;
      }  

      SHMEM_ATTACH(proto->dest_segment, proto->dest_handle);
      if (proto->dest_segment == (shm_segment_t *) SHMEM_FAILURE)  
      { 
        proto->dest_segment = NULL; 
        connect_status = SHMEM_FAILURE;
      }  
#ifdef SHM_NO_LOCK_SAFE_CUTOFF
      else
      {
	semop(proto->dest_lock, &my_full_lock_req, 1); 
        proto->dest_segment->senders++;
	semop(proto->dest_lock, &my_full_unlock_req, 1);  
      }
#endif

   }

   return(connect_status);
} /* shm_remote_node_connect() */

/*-----------------------------------------------------------------------*/

/*
 * shm_local_node_disconnect()
 *
 * "Disconnect" this process by releasing it's shared memory 
 * resources.  This process will no longer communicate via shm. 
 */
static void shm_local_node_disconnect(void)
{  
    union semun arg; 
    arg.val = 0;
    if (my_proto->dest_segment != NULL)  
    { 
       semctl(my_proto->dest_lock, 0, IPC_RMID, arg);
       SHMEM_DESTROY(my_proto->dest_handle);
    }


} /* shm_local_node_disconnect() */


/*-----------------------------------------------------------------------*/

/*
 * shm_remote_node_disconnect()
 *
 * "Disconnect" from the specified process
 * by detaching it's shared memory segment.
 */
static void shm_remote_node_disconnect(shm_proto_t * proto)
{  
    if (proto->dest_segment != NULL && proto != my_proto)  
    {  
       shmdt((char*) proto->dest_segment);
       proto->dest_handle  = SHMEM_FAILURE;
       proto->dest_segment = NULL;
    } 

} /* shm_remote_node_disconnect() */

/*-----------------------------------------------------------------------*/

/* 
 * shm_local_node_setup()
 *
 * One time setup of this local node's shared memory communication resources.
 */
static void shm_local_node_setup(void)
{
    int          cell_num, size_lcv;
    shm_offset_t free_stg_loc;
    union semun  sem_info; 

    my_segment = my_proto->dest_segment;
    my_segment->soft_lock = 0;
  
    sem_info.val = SHMEM_UNLOCKED_VALUE;  
    semctl(my_proto->dest_lock, 0, SETVAL, sem_info);
    SHMEM_THREAD_LOCK_INIT(my_segment);  

    my_segment->msg_count    = 0;  
    my_segment->senders      = 0;
    my_segment->alloc_count  = SHM_CELL_COUNT;

    my_cell_offsets = my_segment->offset;
    my_cell_status  = my_segment->status;

    next_msg_to_recv = 1;

    /* 
     * preformat shm segment by preallocating all cells 
     * start the segment with a cell large enough to satisfy 
     * any request.  then load in sets of increasingly larger
     * message cells.  we'll adjust the sizes array at this point
     * to reflect the amount of data that cells of each size can
     * hold.  
     */
    cell_num = 0;
    free_stg_loc = sizeof(shm_segment_t);  
    free_stg_loc += nexus_shm_pagesize - free_stg_loc % nexus_shm_pagesize;

    SHMEM_CELL_ADDR(my_segment, free_stg_loc, my_first_cell);
    my_first_status = my_cell_status;
    shm_prealloc(nexus_shm_seg_msg_len, 1, &cell_num, &free_stg_loc);

    for (size_lcv=0; size_lcv < CELL_SIZES_COUNT; size_lcv++)
    {
       shm_prealloc(cell_sizes[size_lcv], cell_counts[size_lcv], 
                    &cell_num,            &free_stg_loc   );
       cell_sizes[size_lcv] -= sizeof(shm_cell_t);
    }

} /* shm_local_node_setup() */ 

/*-----------------------------------------------------------------------*/

/*
 * shm_remote_node_setup()
 *
 * One time setup of remote node's communication linkage.
 */
static globus_bool_t shm_remote_node_setup(shm_proto_t * proto)
{
  globus_bool_t setup_status = SHMEM_SUCCESS;
  proto->dest_lock = semget(proto->destination, 0, 0);
  if (proto->dest_lock == SHMEM_FAILURE)
  {
    setup_status = SHMEM_FAILURE;
  }

  return(setup_status);
} /* shm_remote_node_setup() */


/*-----------------------------------------------------------------------*/

/*
 * shm_prealloc()
 *
 * Preallocate a block of cells as part of this segment's setup
 */
static void 
shm_prealloc(int size, int count, int *cell_num, shm_offset_t *free_stg_loc)
{
   int cell_lcv;
   shm_cell_t *next_cell;

   for (cell_lcv=0;  cell_lcv < count;  cell_lcv++ )
   {
      my_segment->status[*cell_num] = SHMEM_UNREFERENCED;
      my_segment->offset[*cell_num] = *free_stg_loc;

      SHMEM_CELL_ADDR(my_segment, *free_stg_loc, next_cell);
      next_cell->data_size   = 0;
      next_cell->msg_number  = 0; 
      next_cell->cell_number = *cell_num;


      if ( size - sizeof(shm_cell_t) <= nexus_shm_mtu )
      {
         next_cell->data_loc  = 0;
         *free_stg_loc       += size;
      }

      else
      {
         next_cell->data_loc =  *free_stg_loc + sizeof(shm_cell_t)
                                + sizeof(int) * SHM_MTU_CHUNKS_PER_MSG;

         next_cell->data_loc =  next_cell->data_loc + nexus_shm_pagesize
                              - next_cell->data_loc % nexus_shm_pagesize;
 
         if ( size == nexus_shm_seg_msg_len )
         {
           *free_stg_loc += nexus_shm_seg_msg_len;
         }
         else
         { 
           *free_stg_loc = next_cell->data_loc + size + nexus_shm_pagesize;
         }
      }

      (*cell_num)++;
   }

} /* shm_prealloc */

/*-----------------------------------------------------------------------*/

/* 
 * shm_malloc()
 *
 * Fill shared memory storage request by locating an appropriate
 * unused cell.
 */
static shm_cell_t * 
shm_malloc(shm_proto_t * proto, int req_amount, unsigned long * msg_number) 
{  
    shm_segment_t * segment;    
    shm_cell_t    * new_cell;
    volatile int  * cell_status;
    int cell_num, lcv;

    segment = proto->dest_segment; 
    shm_set_locked(proto);   


    /*
     * message ordering is based on the order of sends, which
     * we set here while we have the segment locked.
     * this malloc request may not succeed on this try, but 
     * the message number of the first send/request is the one
     * we must use.
     */  
    if ( *msg_number == 0 )
    {
       segment->msg_count++;
       *msg_number = segment->msg_count;
    }


    /* 
     * the first cell is large enough to satisfy any request.
     * use it if its available.
     */ 
    if ( *segment->status == SHMEM_UNREFERENCED )
    {
       SHMEM_CELL_ADDR(segment, *(segment->offset), new_cell);
       new_cell->data_size   = req_amount;
       new_cell->msg_number  = *msg_number;
       *segment->status      = SHMEM_SENDING;
    }
    else
    {
      /*
       * locate the position within increasingly larger 
       * cells where we can fill this request with a first
       * fit approach.
       * requests less than our smallest cell size can be
       * filled with any cell in the segment, while very
       * large requests, can only be filled by the last
       * few cells we preallocated.
       * otherwise, use the sizes start array as a 2nd 
       * level index to set the search start location.
       */
      if ( req_amount < smallest_cell_size )
      {
        cell_num = 0;
      }
      else if ( req_amount > next_largest_cell_size )
           {
              cell_num = largest_cell_start;
   	   }
           else for (lcv=1; lcv < CELL_SIZES_COUNT - 1; lcv++)
                {
                   if ( req_amount < cell_sizes[lcv] )
     	           {
                     cell_num = size_start[lcv];
                     lcv = CELL_SIZES_COUNT;
        	   }
                }
  

      /*
       * now that we've located the position within the preallocated
       * cells where any cell can fill this request, we can fill this
       * request with the first unused cell we find.
       */
      new_cell = NULL;
      cell_status = segment->status;
      while (cell_num < segment->alloc_count)
      {
         if ( cell_status[cell_num] == SHMEM_UNREFERENCED )
         {
            SHMEM_CELL_ADDR(segment, segment->offset[cell_num], new_cell);
            new_cell->data_size   = req_amount;
            new_cell->msg_number  = *msg_number;
            cell_status[cell_num] = SHMEM_SENDING;
            cell_num = segment->alloc_count;
         }
         else
         {
            cell_num++;
         } 
      } 
    }

    shm_set_unlocked(proto);  
    return(new_cell);
}

/*-----------------------------------------------------------------------*/

/* 
 * shm_set_locked()
 *
 * Lock shared memory segment heap control header.
 * In a threaded build, we'll use a two level locking
 * strategy to avoid blocking the entire process on
 * semaphore.  As only senders to a segment will lock
 * it during shm_malloc, segments with only one sender
 * should be able to safely continue without a full lock.
 */
static void shm_set_locked(shm_proto_t * proto)  
{  
   int soft_lock_probes;  
   shm_segment_t * segment;

   segment = proto->dest_segment;
   SHMEM_THREAD_SET_LOCKED(my_proto->dest_segment);   


#ifndef BUILD_LITE
   for (soft_lock_probes=0;  
        segment->soft_lock > 0 
          && soft_lock_probes < SHMEM_LOCK_PROBE_LIMIT;  
        soft_lock_probes++)  
   {  
       SHMEM_THREAD_SCHED_YIELD();  
   }  
#endif 


#ifdef SHM_NO_LOCK_SAFE_CUTOFF
   if (segment->senders > 1 || segment->msg_count < SHM_NO_LOCK_SAFE_CUTOFF)
   {
#endif
      segment->soft_lock++;
      semop(proto->dest_lock, &my_full_lock_req, 1); 
#ifdef SHM_NO_LOCK_SAFE_CUTOFF
   }
#endif

} /* shm_set_locked() */

/*-----------------------------------------------------------------------*/

/* 
 * shm_set_unlocked()
 *
 * Release shared memory segment heap control header.
 */
static void shm_set_unlocked(shm_proto_t * proto)  
{ 
#ifdef SHM_NO_LOCK_SAFE_CUTOFF
  shm_segment_t * segment;
  segment = proto->dest_segment;

  if ( segment->soft_lock != 0 || segment->senders > 1 )
  { 
     semop(proto->dest_lock, &my_full_unlock_req, 1);  
     segment->soft_lock--;
     SHMEM_THREAD_SET_UNLOCKED(my_proto->dest_segment);   
  }  
#else
     semop(proto->dest_lock, &my_full_unlock_req, 1);  
     proto->dest_segment->soft_lock--;
     SHMEM_THREAD_SET_UNLOCKED(my_proto->dest_segment);   
#endif

} /* shm_set_unlocked() */

/*-----------------------------------------------------------------------*/

/*
 * shm_calc_alignment()
 *
 * Determine run time memory alignment to use for shared memory management.
 */
static void shm_calc_alignment(void)
{
  struct aligned_struct
  {
     char shortest_field;
      
     union longest_field
     {
       long   long_field;
       long  *ptr_field;
     } longest_field;
  };

  nexus_shm_alignment = sizeof(struct aligned_struct) - 
                        sizeof(union longest_field);

  if ( nexus_shm_alignment == 0 )
  {
    nexus_shm_alignment = 4;
  }

} /* shm_calc_alignment */


/*-----------------------------------------------------------------------*/

/*  ********************* top level communications *******************   */

/*-----------------------------------------------------------------------*/

/*
 * shm_poll()
 *
 * In a version of the shm protocol module that does not
 * use a handler thread (preemptive thread module & thread safe blocking
 * receives), this routine should check to see if there are
 * any messages to receive, and if so then receive them and invoke them.
 * Incomplete previous operations will be reattempted at this time.
 */
static void 
shm_poll(
    void *                              user_args)
{
    globus_bool_t message_handled = NEXUS_FALSE;
    shm_tcb_t * send_tcb;

    /*
     * This should not be called if a separate handler thread is in use.
     */
    NexusAssert2((!using_handler_thread),
		 ("shm_poll(): Internal error: Should never be called when using a handler thread\n") );

    nexus_debug_printf(5, ("shm_poll(): entering\n"));


    /*
     * resume any blocked large preempted sends 
     */
    shm_enter();
    if ( QueueNotEmpty(blocked_send_q_head))
    { 
       Dequeue(blocked_send_q_head, blocked_send_q_tail, send_tcb);
       shm_exit();
       shm_send_preemptive(send_tcb);
       shm_enter();
    }
    shm_exit();


#ifndef BUILD_LITE
    if (!handle_in_progress)
    { 
#endif
        /*
	 * receive any current incoming messages 
         */
   
        do
	{
            message_handled = shm_receive();
#           if !defined(BUILD_LITE)
	    {
                if (message_handled == NEXUS_FALSE)
                {
                    globus_thread_yield();
                }
	    }
#           endif
	}
        while(!shm_done && 
	      !message_handled &&
	       !globus_callback_has_time_expired());


        /* 
         * dispatch messages received out of order 
         */
        if ( received_list_head )
        {
           shm_dispatch_pending_receive();
        }

#ifndef BUILD_LITE
    } 
#endif


    /*
     * try to send prior rsrs that couldn't be sent earlier 
     */
    if ( send_list_head )
    {
       shm_send_pending();       
    }



#ifndef BUILD_LITE
    /*
     * Only yield the processor if there was a message handled.
     * That handler may have enabled another thread for execution.
     */
    if (message_handled)
    {
	nexus_thread_yield();
    }
#endif
    if(message_handled)
    {
        globus_callback_signal_poll();
    }
} /* shm_poll() */

/*-----------------------------------------------------------------------*/

/*
 * shm_receive_non_blocking()
 *
 * Receive all pending messages to this node.
 *
 * Non-blocking receives do not return until the non-blocking receive fails.
 * Return: NEXUS_TRUE if a message is handled, otherwise NEXUS_FALSE
 */
static globus_bool_t shm_receive_non_blocking()
{
#ifdef SHM_POLL_TRIES
   int poll_tries = SHM_POLL_TRIES;
#endif
   nexus_bool_t message_received = NEXUS_FALSE;

   nexus_debug_printf(5, ("shm_receive_non_blocking(): entering\n"));

#ifdef SHM_POLL_TRIES
   while ( poll_tries > 0 )
   {
     while ( shm_receive() )
     {
       message_received = NEXUS_TRUE;
       poll_tries = SHM_POLL_TRIES;
     }
     poll_tries--;
   }
#else
     while ( shm_receive() )
     {
       message_received = NEXUS_TRUE;
     }
#endif


   nexus_debug_printf(5, ("shm_receive_non_blocking(): exiting\n"));
  
   return(message_received);

} /* shm_receive_non_blocking() */


/*-----------------------------------------------------------------------*/

/*
 * shm_receive_blocking()
 *
 * Receive all pending messages to this node, by using blocking receive.
 * Return: NEXUS_TRUE if a message is handled, otherwise NEXUS_FALSE.
 */

static globus_bool_t shm_receive_blocking()
{
   nexus_bool_t message_received = NEXUS_FALSE;

   nexus_debug_printf(5, ("shm_receive_blocking(): entering\n"));

   while(shm_done == NEXUS_FALSE && message_received == NEXUS_FALSE) 
   { 
      message_received = shm_receive();

#ifndef BUILD_LITE
      if (message_received == NEXUS_FALSE)
      {
         nexus_thread_yield();
      }
#endif
   }

   nexus_debug_printf(5, ("shm_receive_blocking(): exiting\n"));
  
   return(message_received);

} /* shm_receive_blocking() */


/*-----------------------------------------------------------------------*/

/*
 * shm_receive_blocking_by_handler()
 *
 * Receive all pending messages to this node, by using blocking receive.
 * Return: NEXUS_TRUE if a message is handled, otherwise NEXUS_FALSE.
 */

static globus_bool_t shm_receive_blocking_by_handler()
{
   nexus_bool_t message_received = NEXUS_FALSE;

   nexus_debug_printf(5, ("shm_receive_blocking_by_handler(): entering\n"));

   while(shm_done == NEXUS_FALSE && 
	 globus_l_shm_wakeup == GLOBUS_FALSE) 
   { 
      message_received = shm_receive();

      /* try to send prior rsrs that couldn't be sent earlier */
      if ( send_list_head )
      {
         shm_send_pending();       
      }

      /* 
       * dispatch messages received out of order 
       */
      if ( received_list_head )
      {
         shm_dispatch_pending_receive();
      }


      if (message_received == NEXUS_FALSE)
      {
         nexus_thread_yield();
      }
   }

   nexus_debug_printf(5, ("shm_receive_blocking_by_handler(): exiting\n"));
  
   return(message_received);

} /* shm_receive_blocking_by_handler() */


/*-----------------------------------------------------------------------*/

/*  *********************** receive processing ***********************   */

/*-----------------------------------------------------------------------*/

/*
 * shm_receive()
 *
 * Receive new messages sent to this segment by locating
 * any cell in a receive wait status.
 */
static globus_bool_t shm_receive(void)
{   
   globus_bool_t  message_received = NEXUS_FALSE;
   shm_cell_t    *recv_cell;  
   int            cell_num = 0;

   while(cell_num < SHM_CELL_COUNT)
   {
      if ( *my_first_status == SHMEM_RECV_WAIT )
      {
         message_received = shm_receive_message(my_first_cell);
         cell_num = SHM_CELL_COUNT;
      }
      else if ( my_cell_status[cell_num] == SHMEM_RECV_WAIT )
      {
         SHMEM_CELL_ADDR(my_segment, my_cell_offsets[cell_num], recv_cell);
         message_received = shm_receive_message(recv_cell);
         cell_num = SHM_CELL_COUNT;  
      }
      else
      { 
         cell_num++;
      }
   }

   return(message_received);

} /* shm_receive() */


/*-----------------------------------------------------------------------*/

/*
 * shm_receive_message()
 *
 * Perform the actual receive sent to this segment.
 */
static nexus_bool_t shm_receive_message(shm_cell_t *recv_cell)
{   
    unsigned long  msg_number;
    volatile int  *segment_vector;
    int            mtu_num, left_to_recv;
    nexus_byte_t  *msg_seg, *data_loc;
    nexus_byte_t  *recv_data;

    nexus_byte_t  *receive_buffer;
    unsigned long  receive_buffer_size;

    nexus_bool_t   receive_blocked; 
    nexus_bool_t   receive_complete = NEXUS_TRUE;
    long           stall_count;
    int            cell_num, copy_amt;

    /*
     * threaded builds must ensure that other threads
     * dont begin receiveing this same cell 
     */
#ifndef BUILD_LITE
       cell_num = recv_cell->cell_number;
       my_cell_status[cell_num] = SHMEM_RECEIVING;
#endif

      /*
       * receive small messages directly 
       */
      receive_buffer_size = recv_cell->data_size;  
      if ( receive_buffer_size <= nexus_shm_mtu ) 
      {
         NexusMalloc(shm_receive(), 
                     receive_buffer, 
                     nexus_byte_t *, 
                     receive_buffer_size);  

         memcpy(receive_buffer,
                &recv_cell->cell_data,
                receive_buffer_size);

         msg_number = recv_cell->msg_number;
         my_cell_status[recv_cell->cell_number] = SHMEM_UNREFERENCED;
         shm_exit();
         shm_dispatch_receive(receive_buffer, receive_buffer_size, msg_number);
      }
      else
      {
        /* 
         *  large message receives will copy the message in chunks,
         *  processing concurrently with the sender.
         */
        data_loc = (nexus_byte_t *)((char*)my_segment + recv_cell->data_loc);

	/* setup first try at receive */
        if ( recv_cell->recv_buf == NULL )
	{
           NexusMalloc(shm_receive(), 
                       receive_buffer, 
                       nexus_byte_t *, 
                       receive_buffer_size);  

           recv_data = receive_buffer;
           msg_seg = data_loc;
           mtu_num = 0;
           left_to_recv = receive_buffer_size;
           segment_vector = (int*) &recv_cell->cell_data;
	}

        else  /* resuming prior preempted receive */
	{
           receive_buffer = recv_cell->recv_buf;
           recv_data      = recv_cell->recv_tgt;
           msg_seg        = recv_cell->recv_src;  
           mtu_num        = recv_cell->recv_mtu;
           left_to_recv   = recv_cell->recv_left;           
           segment_vector = (int*) &recv_cell->cell_data + recv_cell->recv_mtu;
         
	}


	/* perform actual receive */
        receive_blocked = NEXUS_FALSE;
        while ( left_to_recv > 0 && receive_blocked == NEXUS_FALSE ) 
	{
           stall_count = 0;
           while ( *segment_vector == 0 && stall_count < SHM_BLOCKED_CUTOFF ) 
           {  
               SHMEM_THREAD_SCHED_YIELD();
               stall_count++;
           }

           if ( stall_count >= SHM_BLOCKED_CUTOFF )
           {
             receive_blocked = NEXUS_TRUE;
           }
           else 
           {
              copy_amt = *segment_vector;
              memcpy( recv_data, msg_seg, copy_amt);
              left_to_recv -= copy_amt;
              recv_data += copy_amt;
              msg_seg += nexus_shm_mtu; 

              *segment_vector = 0;
              segment_vector++;
              mtu_num++;

              if (mtu_num == SHM_MTU_CHUNKS_PER_MSG)
	      {
                segment_vector = (int*) &recv_cell->cell_data;
                msg_seg = data_loc;
                mtu_num = 0;
	      }
	   }
   	}


	/* receive attempt complete */
       if ( receive_blocked == NEXUS_FALSE )
       {
         msg_number = recv_cell->msg_number;
         my_cell_status[recv_cell->cell_number] = SHMEM_UNREFERENCED;
         shm_exit();
         shm_dispatch_receive(receive_buffer, receive_buffer_size, msg_number);
       }
       else
       {
         receive_complete     = NEXUS_FALSE;
         recv_cell->recv_buf  = receive_buffer;
         recv_cell->recv_src  = msg_seg;
         recv_cell->recv_tgt  = recv_data;
         recv_cell->recv_mtu  = mtu_num;
         recv_cell->recv_left = left_to_recv;
         my_cell_status[recv_cell->cell_number] = SHMEM_RECV_WAIT;
       }

    }

    return(receive_complete);

} /* shm_receive_message() */

/*-----------------------------------------------------------------------*/


/*
 * shm_dispatch_receive()
 *
 * Dispatch message received into 'receive buffer'.
 *
 */
static void shm_dispatch_receive(nexus_byte_t * receive_buffer,
                                 unsigned long  receive_buffer_size,
                                 unsigned long msg_number)
{
   struct globus_nexus_buffer_s *buffer;
   shm_message_t * received_message;

   nexus_debug_printf(5, ("shm_dispatch_receive(): entering\n"));

#ifdef SHM_USE_VERSIONING
   if ( (int) *(receive_buffer+1) < NEXUS_DC_FORMAT_LAST )
#else
   if ( (int) *receive_buffer < NEXUS_DC_FORMAT_LAST )
#endif
   {  
      shm_enter();
      set_handle_in_progress_true();
      shm_exit();
      _nx_buffer_create_from_raw(receive_buffer,
				 receive_buffer_size,
				 0,
				 receive_buffer_size,
                                 NULL,
                                 &buffer);

      /*
       * we may receive any message, but must dispatch only
       * the next message to ensure that actual receive processing
       * occurs in the same order as the sends.
       * messages received out of order must be saved for later
       * dispatch
       */ 
      if ( msg_number == next_msg_to_recv )
      {
         shm_enter();
         next_msg_to_recv++;
         shm_exit();

         nexus_debug_printf(2,("shm_dispatch_receive(): dispatching message\n"));
         _nx_buffer_dispatch(buffer);
         nexus_debug_printf(2,("shm_dispatch_receive(): message dispatch complete\n"));
      }
      else
      {
         NexusMalloc(shm_dispatch(),
                     received_message,
                     shm_message_t *,
                     sizeof(shm_message_t));

         received_message->buffer      = buffer;
         received_message->msg_number  = msg_number;
         received_message->next        = NULL;

         shm_enter();
         received_message->prior       = received_list_tail;
         if ( received_list_head == NULL )
	 {
           received_list_head = received_message;
	 }

         if ( received_list_tail != NULL ) 
	 {
           received_list_tail->next = received_message;
	 }
         received_list_tail = received_message;
         shm_exit();
      }

      shm_enter();
      set_handle_in_progress_false();
      shm_exit();

   }
#ifdef SHM_USE_VERSIONING
   else if ( (int) *(receive_buffer+1) == CLOSE_HANDLER_FLAG )
#else
   else if ( (int) *receive_buffer == CLOSE_HANDLER_FLAG )
#endif
   {
       /* end the handler thread */
       shm_done = NEXUS_TRUE;
   }
   else
   {
      nexus_fatal("receive_messages(): Got unknown control message\n");
   }

} /* shm_dispatch_receive() */


/*-----------------------------------------------------------------------*/

/*
 * shm_dispatch_pending_receive()
 *
 * Check received messages that couldn't be dispatched earlier.
 */
static void shm_dispatch_pending_receive(void)
{
   nexus_bool_t done = NEXUS_FALSE;
   shm_message_t * cur_msg, * recv_msg;

   shm_enter();
   while (!done)
   {
      /*
       * see if we can located the next message 
       * to receive in our held dispatches
       */
      recv_msg = NULL;
      cur_msg  = received_list_head;
      while ( recv_msg == NULL && cur_msg != NULL )
      {
         if ( cur_msg->msg_number == next_msg_to_recv )
         {
            recv_msg = cur_msg;
         }
         else
         { 
            cur_msg = cur_msg->next;
         }
      }


      /*
       * we've dispatched all we can now when we
       * traverse the list of pendings and find none
       */
      if ( recv_msg == NULL && cur_msg == NULL )
      {
        done = NEXUS_TRUE;
      }
      else
      {
        /* 
         * found the next to receive, remove it from
         * the pending receives and dispatch it now
         */
         if ( recv_msg->prior == NULL )
         {
            received_list_head = recv_msg->next;
         }
         else
         {
            recv_msg->prior->next = recv_msg->next;
         }

         if ( recv_msg->next == NULL )
         {
            received_list_tail = recv_msg->prior;
         }
         else
         {
            recv_msg->next->prior = recv_msg->prior;
         }


         next_msg_to_recv++;
         set_handle_in_progress_true();

         nexus_debug_printf(2,("shm_dispatch_pending_receive(): dispatching message\n"));
         _nx_buffer_dispatch(recv_msg->buffer);
         nexus_debug_printf(2,("shm_dispatch_pending_receive(): message dispatch complete\n"));      
         NexusFree(recv_msg);
         set_handle_in_progress_false();
      }
   }
   shm_exit();


} /* shm_dispatch_pending_receive() */



/*-----------------------------------------------------------------------*/

/*  ************************ send processing *************************   */

/*-----------------------------------------------------------------------*/

/*
 * shm_send_rsr_outstanding()
 *
 * Return true if there are any sends outstanding for this proto,
 * otherwise false.
 */
static globus_bool_t
shm_send_rsr_outstanding(globus_nexus_proto_t *nproto)
{
    globus_bool_t rc = GLOBUS_FALSE;
    shm_enter();
    if (send_list_head)
    {
	rc = GLOBUS_TRUE;
    }
    shm_exit();
    return(rc);
} /* shm_send_rsr_outstanding() */

/*-----------------------------------------------------------------------*/

/*
 * shm_send_rsr()
 * 
 * Send specfied buffer via shared memory
 */
static int shm_send_rsr(struct globus_nexus_buffer_s *buffer)
{
    long            send_size;
    unsigned long   msg_number;
    nexus_byte_t  * send_data;
    shm_cell_t    * send_cell;
    shm_proto_t   * dest_proto;
    shm_message_t * send_message; 
    globus_nexus_base_segment_t * base_segments; 

    nexus_debug_printf(2,("shm_send_rsr(): invoked with buffer: %x\n",buffer));
    base_segments = buffer->base_segments;
    buffer->current_base_segment = (nexus_base_segment_t *) NULL;

    send_data  = base_segments->current;
    send_size  = base_segments->size_used; 
    dest_proto = (shm_proto_t *) buffer->proto;
    msg_number = 0;
  

    /*
     * try to allocate shm space for this send.
     * message ordering will be established for this send
     * whether the malloc succeeds or not. 
     * successful malloc requests can continue with the send.
     */
    send_cell = shm_malloc(dest_proto, send_size, &msg_number);
    if (send_cell)
    {
       if ( send_size <= nexus_shm_mtu )
       {
          memcpy(&send_cell->cell_data, send_data, send_size);
          dest_proto->dest_segment->status[send_cell->cell_number] = SHMEM_RECV_WAIT; 
          nexus_buffer_destroy(&buffer);
       }
       else
       {
          shm_send(buffer, send_cell, dest_proto, send_data, send_size); 
       }
    }

    /*
     *  couldn't malloc now, save this send for later
     */
    else
    {
       NexusMalloc(shm_send_rsr(), 
                   send_message,
                   shm_message_t *,
                   sizeof(shm_message_t));

       send_message->buffer     = buffer;
       send_message->proto      = dest_proto;
       send_message->send_data  = send_data;
       send_message->send_size  = send_size;
       send_message->msg_number = msg_number;
       send_message->next       = NULL;

       shm_enter();
       send_message->prior      = send_list_tail;
       if (send_list_head == NULL)
       {
          send_list_head = send_message;
       }
       if (send_list_tail != NULL)
       {
          send_list_tail->next = send_message;
       }
       send_list_tail = send_message;
       shm_exit();
    }

    shm_exit();
    return(0);

} /* shm_send_rsr() */

/*-------------------------------------------------------------------------*/

/*
 * shm_send_pending()
 *
 * Retry earlier sends that couldn't be started due to unavailable
 * shared memory storage.
 */
static void shm_send_pending(void)
{
   shm_message_t *cur_msg, *free_msg;
   shm_cell_t    *send_cell;

   /*
    * retry malloc request, if we can fill it now
    * get the send started.
    */
   shm_enter(); 
   cur_msg  = send_list_head;
   while( cur_msg != NULL )
   {
     send_cell = shm_malloc(cur_msg->proto, 
                            cur_msg->send_size, 
                            &cur_msg->msg_number);
     if (send_cell)
     {
        shm_send(cur_msg->buffer, send_cell, cur_msg->proto, 
                 cur_msg->send_data, cur_msg->send_size); 


	/* remove current message from list of pending sends */
        if (cur_msg->prior == NULL)
	{
           send_list_head = cur_msg->next;
	}
        else
	{
           cur_msg->prior->next = cur_msg->next;
	}
        if (cur_msg->next == NULL)
	{
           send_list_tail = cur_msg->prior;
	}
        else
	{
           cur_msg->next->prior = cur_msg->prior;
	}

	/* current message removed from list, reclaim storage */
        free_msg = cur_msg;
        cur_msg = cur_msg->next;
        NexusFree(free_msg);
     }
     else
     {
       cur_msg = NULL;
     }
   }
   shm_exit();

} /* shm_send_pending() */


/*-------------------------------------------------------------------------*/

/* 
 * shm_send()
 *
 * Send message by allocating shared memory in reciever's segment
 * and copying the data into it.  Setup larger sends for copying
 * in chunks.
 */
static void shm_send(struct globus_nexus_buffer_s * buffer,
                     shm_cell_t                   * send_cell,
                     shm_proto_t                  * proto,
                     nexus_byte_t                 * send_data, 
                     int                            send_size)  
{  
    int           mtu_num;
    volatile int *segment_vector;
    nexus_byte_t *send_loc;
    shm_tcb_t    *send_tcb;

    /*
     *  copy small message in total into receivers segment
     */
    if ( send_size <= nexus_shm_mtu )
    {
       memcpy(&send_cell->cell_data, send_data, send_size );
       proto->dest_segment->status[send_cell->cell_number] = SHMEM_RECV_WAIT; 
       nexus_buffer_destroy(&buffer);
    }

    /*
     * setup medium and large sends
     */
    else
    {
       /* flag first segment as unsent, and tell receiver to go get message */
       segment_vector = (int*) &send_cell->cell_data;
       *segment_vector = 0;
       
       send_cell->recv_buf  = NULL; 
       proto->dest_segment->status[send_cell->cell_number] = SHMEM_RECV_WAIT; 
       segment_vector++;

       /* flag remaining message segments as unsent */
       for ( mtu_num=1; mtu_num < SHM_MTU_CHUNKS_PER_MSG; mtu_num++ )
       {
         *segment_vector++ = 0;
       }

       send_loc = (nexus_byte_t *)((char*)proto->dest_segment 
                  + send_cell->data_loc);

 
       if ( send_size <= nexus_shm_preemptive_send_cutoff )
       {
          shm_send_nonpreemptive(send_cell, send_data, send_size, send_loc);
          nexus_buffer_destroy(&buffer);
       }
       else
       {
          NexusMalloc(shm_send(),
                      send_tcb,
                      shm_tcb_t *,
                      sizeof(shm_tcb_t));

          send_tcb->buffer       = buffer;
          send_tcb->send_cell    = send_cell;
          send_tcb->send_data    = send_data;
          send_tcb->send_loc     = send_loc;
          send_tcb->next         = NULL;
          send_tcb->send_size    = send_size;
          send_tcb->cur_send_mtu = 0;

          shm_send_preemptive( send_tcb );
       }
    }


} /* shm_send() */



/*-----------------------------------------------------------------------*/


/*
 * shm_send_nonpreemptive()
 *
 *  Send segmented message to receive without interruption.
 *  Message to send fits in segmented buffer.
 */
static void  shm_send_nonpreemptive(shm_cell_t   *send_cell,
                                    nexus_byte_t *send_data,
                                    int           send_size,
                                    nexus_byte_t *send_loc)
{
   int max_send_amt;
   volatile int *segment_vector;

   segment_vector = (int*) &send_cell->cell_data;

   /*
    * setup first segment to page align remaining segments 
    */
   max_send_amt = (int) send_data % nexus_shm_mtu;
   if ( max_send_amt == 0 )
   {
     max_send_amt = nexus_shm_mtu;
   }
   else
   {
     max_send_amt = nexus_shm_mtu - max_send_amt;
   }


   /*
    *  copy into shared memory in chunks
    */
   while ( send_size > 0 )
   {
     if ( send_size >= max_send_amt ) 
      {
	memcpy( send_loc, send_data, max_send_amt );  
        *segment_vector = max_send_amt; 
        send_size -= max_send_amt;
        send_data += max_send_amt;
      }
      else
      {
	memcpy( send_loc, send_data, send_size );
        *segment_vector = send_size;
        send_size = 0;
      } 


      max_send_amt = nexus_shm_mtu;
      segment_vector++;
      send_loc += nexus_shm_mtu;
   }

} /* shm_send_nonpreemptive() */


/*-----------------------------------------------------------------------*/

/*
 * shm_send_preemptive()
 *
 * Send more than one buffer of segments to receiver.
 * We may resume this concurrent send later if sender blocks.
 */
static void  shm_send_preemptive(shm_tcb_t *send_tcb)
{
   nexus_bool_t send_blocked = NEXUS_FALSE;
   int send_size, max_send_amt, mtu_num;
   volatile int *segment_vector;
   nexus_byte_t *send_data, *send_loc;
   unsigned long stall_count;


   segment_vector = (int*) &send_tcb->send_cell->cell_data;
   send_data = send_tcb->send_data;
   mtu_num   = send_tcb->cur_send_mtu;
   send_loc  = send_tcb->send_loc + mtu_num * nexus_shm_mtu;
   send_size = send_tcb->send_size;

   /*
    * setup first segment to page align those remaining
    */
   max_send_amt = (int) send_data % nexus_shm_mtu;
   if ( max_send_amt == 0 )
   {
     max_send_amt = nexus_shm_mtu;
   }
   else
   {
     max_send_amt = nexus_shm_mtu - max_send_amt;
   }


  /*
   * send the data in chunks, syncing as needed with receiver
   */
   while ( send_size > 0 && send_blocked == NEXUS_FALSE )
   {
      stall_count = 0;
      while ( *segment_vector != 0 && stall_count < SHM_BLOCKED_CUTOFF ) 
      {
         SHMEM_THREAD_SCHED_YIELD();
         stall_count++; 
      }

      if ( stall_count >= SHM_BLOCKED_CUTOFF )
      {
        send_blocked = NEXUS_TRUE;
      }
      else 
      {
        if ( send_size >= max_send_amt ) 
        {
	   memcpy( send_loc, send_data, max_send_amt );  
           *segment_vector = max_send_amt; 
           send_size -= max_send_amt;
           send_data += max_send_amt;
        }
        else
        {
          memcpy( send_loc, send_data, send_size );
          *segment_vector = send_size;
          send_size = 0;
        } 


        max_send_amt = nexus_shm_mtu;
        segment_vector++;
        send_loc += nexus_shm_mtu;
        mtu_num++;
        if (mtu_num == SHM_MTU_CHUNKS_PER_MSG)
        {
          segment_vector = (int*) &send_tcb->send_cell->cell_data;
          send_loc = send_tcb->send_loc;
          mtu_num = 0;
        }
     }
   }


   if ( send_blocked == NEXUS_FALSE )
   {
     nexus_buffer_destroy(&send_tcb->buffer);
     NexusFree(send_tcb);
   }
   else
   {
     send_tcb->send_data    = send_data;
     send_tcb->cur_send_mtu = mtu_num;
     send_tcb->send_size    = send_size;

     if ( ! blocked_send_q_head )
     {
        send_tcb->next = NULL;
        Enqueue(blocked_send_q_head, blocked_send_q_tail, send_tcb);
     }
     else if ( send_tcb->send_cell->cell_number 
         < blocked_send_q_head->send_cell->cell_number )
     {
        send_tcb->next = blocked_send_q_head; 
        blocked_send_q_head = send_tcb;  
        if ( blocked_send_q_tail == NULL ) 
        {
           blocked_send_q_tail = send_tcb; 
        } 
     }
     else
     {
        send_tcb->next = NULL;  
        Enqueue(blocked_send_q_head, blocked_send_q_tail, send_tcb); 
     }


   }

} /* shm_send_preemptive() */


/*-----------------------------------------------------------------------*/



#endif /* HAVE_SHM_PROTO */


