/*
 * Copyright (c) 2001-2003 The Trustees of Indiana University.  
 *                         All rights reserved.
 * Copyright (c) 1998-2001 University of Notre Dame. 
 *                         All rights reserved.
 * Copyright (c) 1994-1998 The Ohio State University.  
 *                         All rights reserved.
 * 
 * This file is part of the LAM/MPI software package.  For license
 * information, see the LICENSE file in the top level directory of the
 * LAM/MPI source distribution.
 * 
 * $HEADER$
 *
 * $Id: ssi_rpi_gm_actions.c,v 1.19.2.8 2004/03/12 01:19:11 vsahay Exp $
 *
 *	Function:	- calls to gm
 */

#include <lam_config.h>
#include <lam-ssi-rpi-gm-config.h>

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#include <mpi.h>
#include <mpisys.h>
#include <rpisys.h>
#include <lamdebug.h>

#include <rpi_gm.h>
#include <rpi_gm_unexpected.h>
#include <rpi_gm_dreg.h>
#include <rpi_gm_interval.h>
#include <rpi_gm_recv_events.h>
#include <rpi_gm_recv_queue.h>
#include <rpi_gm_send_queue.h>
#include <rpi_gm_actions.h>


/*
 *       Function: performs inital setup for Myrinet/gm
 *       Accepts:  _proc for process
 *       Returns:  0 or LAMERROR
 */
int
lam_ssi_rpi_gm_gm_setup(struct _proc *p)
{
  int i;
  char *buf;
  struct lam_ssi_rpi_gm_envl *env;

  /* Get the number of tokens that gm will give to us */

  lam_ssi_rpi_gm_rtokens = gm_num_receive_tokens(p->p_rpi->cp_gm_port);
  lam_ssi_rpi_gm_stokens = gm_num_send_tokens(p->p_rpi->cp_gm_port);

  lam_debug_cond((lam_ssi_rpi_gm_did, 
                  "setup_gm: starting with %d recv tokens, %d send tokens", 
                  lam_ssi_rpi_gm_rtokens, lam_ssi_rpi_gm_stokens));

  /* Check for env variable overrides for tiny and short message size
     lengths.  Ensure that short > tiny. */

  lam_ssi_rpi_gm_tinymsglen = LAM_SSI_RPI_GM_TINYMSGLEN;
  if (getenv("LAM_MPI_SSI_rpi_gm_tinymsglen") != 0) {
    lam_ssi_rpi_gm_tinymsglen = atoi(getenv("LAM_MPI_SSI_rpi_gm_tinymsglen"));
    if (lam_ssi_rpi_gm_tinymsglen < 0)
      lam_ssi_rpi_gm_tinymsglen = LAM_SSI_RPI_GM_TINYMSGLEN;
  }
  lam_ssi_rpi_gm_shortmsglen = LAM_SSI_RPI_GM_SHORTMSGLEN;
  if (getenv("LAM_MPI_SSI_rpi_gm_shortmsglen") != 0) {
    lam_ssi_rpi_gm_shortmsglen = 
      atoi(getenv("LAM_MPI_SSI_rpi_gm_shortmsglen"));
    if (lam_ssi_rpi_gm_shortmsglen < 0)
      lam_ssi_rpi_gm_shortmsglen = LAM_SSI_RPI_GM_SHORTMSGLEN;
  }
  if (lam_ssi_rpi_gm_shortmsglen <= lam_ssi_rpi_gm_tinymsglen)
    lam_ssi_rpi_gm_shortmsglen = lam_ssi_rpi_gm_tinymsglen + 1;
  lam_ssi_rpi_gm_dma_env_len = lam_ssi_rpi_gm_tinymsglen + 
    sizeof(struct lam_ssi_rpi_gm_envl);
  lam_ssi_rpi_gm_dma_short_len = lam_ssi_rpi_gm_shortmsglen;
  lam_debug_cond((lam_ssi_rpi_gm_did, 
                  "setup_gm: Tiny msg len: %d, Short msg len %d",
                  lam_ssi_rpi_gm_tinymsglen, lam_ssi_rpi_gm_shortmsglen));
  lam_debug_cond((lam_ssi_rpi_gm_did, 
                  "setup_gm: Data dma tiny msg len (plus env): %d, "
                  "Data dma short msg len %d",
                  lam_ssi_rpi_gm_dma_env_len, lam_ssi_rpi_gm_dma_short_len));

  /* Setup for direct DMA */

  if (gm_allow_remote_memory_access(p->p_rpi->cp_gm_port) != GM_SUCCESS) {
    printf("WARNING: gm_allow_remote_memory_access failed!\n");
    return LAMERROR;
  }

  /* See if we want to force not to use pinning (and use
     dma_malloc/memcpy instead) */

  if ((buf = getenv("LAM_MPI_SSI_rpi_gm_nopin")) != NULL) {
    i = atoi(buf);
    if (i == 1)
      lam_ssi_rpi_gm_can_register_mem = 0;
  }

  /* Setup pinned interval registration */

  if (lam_ssi_rpi_gm_interval_init(p->p_rpi) == LAMERROR)
    return LAMERROR;

  /* Setup a lookaside table for quick messages */

  if (lam_ssi_rpi_gm_dma_init(p->p_rpi) == LAMERROR)
    return LAMERROR;

  /* Length to use in gm_send_with_callback.  Ensure that they're not
     the same. */

  lam_ssi_rpi_gm_env_min_size_for_length = 
    gm_min_size_for_length(lam_ssi_rpi_gm_dma_env_len);
  lam_ssi_rpi_gm_data_min_size_for_length = 
    gm_min_size_for_length(lam_ssi_rpi_gm_shortmsglen);
  if (lam_ssi_rpi_gm_data_min_size_for_length <= 
      lam_ssi_rpi_gm_env_min_size_for_length)
    lam_ssi_rpi_gm_data_min_size_for_length = 
      lam_ssi_rpi_gm_env_min_size_for_length + 1;
  lam_debug_cond((lam_ssi_rpi_gm_did, 
                  "setup_gm: env min size of len: %d, "
                  "Data min size for len: %d",
                  lam_ssi_rpi_gm_env_min_size_for_length,
                  lam_ssi_rpi_gm_data_min_size_for_length));

  /* Provide recv buffers for envelopes and short messages.  Use all
     available receive tokens -- long messages are sent using RMA
     PUTs. */

  for (i = 0; i < (lam_ssi_rpi_gm_rtokens / 2); ++i) {
    env = lam_ssi_rpi_gm_dma_env_malloc();
    gm_provide_receive_buffer(lam_myproc->p_rpi->cp_gm_port,
                              env,
                              lam_ssi_rpi_gm_env_min_size_for_length,
                              LAM_SSI_RPI_GM_PRIORITY);

    buf = lam_ssi_rpi_gm_dma_short_malloc();
    gm_provide_receive_buffer(lam_myproc->p_rpi->cp_gm_port,
                              buf,
                              lam_ssi_rpi_gm_data_min_size_for_length,
                              LAM_SSI_RPI_GM_PRIORITY);
  }

  /* All done */

  return 0;
}


/*
 *      get_gm_port
 *          
 *      Function:       - gets a port for gmnet communications
 *      Accepts:        - _proc for process, struct to hold results
 *      Returns:        - 0 or LAMERROR
 */
int 
lam_ssi_rpi_gm_get_port(struct _proc *p, lam_ssi_rpi_gm_port_t *rgp)
{
  char name[64];
  int result;
  int portNum = 0, cardNum = 0;
  int foundPort = 0;
  int minPort = lam_ssi_rpi_gm_port_num;
  int rank = p->p_gps.gps_grank;

  LAM_ZERO_ME(name);
  snprintf(name, sizeof(name), "LAM/MPI rank-%d", rank);

  /* Check for environment variable overrides */

  if (getenv("LAM_MPI_SSI_rpi_gm_maxport") != 0) {
    lam_ssi_rpi_gm_max_port_num = atoi(getenv("LAM_MPI_SSI_rpi_gm_maxport"));
    if (lam_ssi_rpi_gm_max_port_num < 1)
      lam_ssi_rpi_gm_max_port_num = LAM_SSI_RPI_GM_MAX_PORT_NUM;
  }
  if (getenv("LAM_MPI_SSI_rpi_gm_port") != 0)
    minPort = atoi(getenv("LAM_MPI_SSI_rpi_gm_port"));
  if (minPort < 0)
    minPort = 1;
  else
    lam_ssi_rpi_gm_max_port_num = minPort + 1;

  /* Try to find an available port by scanning from minPort - maxPort */

  for (foundPort = 0, portNum = minPort; 
       portNum < lam_ssi_rpi_gm_max_port_num; ++portNum){
    for (cardNum = 0; cardNum < 8; ++cardNum){
      if (gm_open(&(rgp->rgp_gm_port),
                  cardNum,
                  portNum,
                  name, 
                  LAM_SSI_RPI_GM_API_VERSION) == GM_SUCCESS) {
        foundPort = 1;
        break;
      }
    }

    if (foundPort == 1)
      break;
  }

  /* if we didn't find one, bail */

  if (!foundPort) {
    lam_debug_cond((lam_ssi_rpi_gm_did,
                    "get_gm_port: error finding port (1-%d tried)", 
                    portNum));
    return(LAMERROR);
  }
  rgp->rgp_port_id = portNum;
    
  /* get the GMid */

  result = gm_get_node_id(rgp->rgp_gm_port, &(rgp->rgp_local_node_id));
  if (result != GM_SUCCESS) {
    lam_debug_cond((lam_ssi_rpi_gm_did,
		    "get_gm_port: Couldn't get my gmID (%d)",
		    result));
    return(LAMERROR);
  }

#if LAM_SSI_RPI_GM_2
  /* If we're using gm 2.x, we need to convert my local node ID to a
     global node ID */

  if (gm_node_id_to_global_id(rgp->rgp_gm_port,
                              rgp->rgp_local_node_id,
                              &(rgp->rgp_global_node_id)) != GM_SUCCESS) {
    lam_debug_cond((lam_ssi_rpi_gm_did, "connect_all: "
                    "Unable to convert local ID to global ID"));
    return(LAMERROR);
  }
#else
  rgp->rgp_global_node_id = rgp->rgp_local_node_id;
#endif

  lam_debug_cond((lam_ssi_rpi_gm_did, "get_gm_port: gmID=%d port=%d", 
                  rgp->rgp_global_node_id, rgp->rgp_port_id));

  return 0;
}


/*
 * Shut down GM
 */
int
lam_ssi_rpi_gm_gm_finalize(lam_ssi_rpi_gm_port_t *myport)
{
  if (myport != NULL && myport->rgp_gm_port != NULL) {
    gm_close(myport->rgp_gm_port);
    myport->rgp_gm_port = NULL;
    myport->rgp_global_node_id = 0;
    myport->rgp_local_node_id = 0;
    myport->rgp_port_id = -1;
  }
  gm_finalize();

  return 0;
}

/************************************************************************/

/*
 * Check for and act on gm events.  Do so in a blocking or
 * non-blocking manner, as determined by lam_ssi_rpi_gm_flblock.
 */
int 
lam_ssi_rpi_gm_gm_advance(void)
{
  int fast;
  int done = 0;
  gm_recv_event_t *event;
  struct lam_ssi_rpi_proc *proc;
#if LAM_WANT_DEBUG
  int event_type;
#endif

  lam_debug_cond((lam_ssi_rpi_gm_did,
                  "%d: lam_ssi_rpi_gm_gm_advance(%d) started",
                  lam_myproc->p_gps.gps_grank,
                  lam_ssi_rpi_gm_flblock));

  proc = lam_myproc->p_rpi;

  /* If blocking, get one event and then keep going as long as there
     are more pending. If non-blocking, go until a null-event */

  while (1) {
    fast = 0;
    lam_debug_cond((lam_ssi_rpi_gm_did, 
                    "%d: lam_ssi_rpi_gm_gm_advance calling gm_*receive",
                    lam_myproc->p_gps.gps_grank));
    if (lam_ssi_rpi_gm_flblock == 1)
      event = gm_blocking_receive(proc->cp_gm_port);
    else
      event = gm_receive(proc->cp_gm_port);

#if LAM_WANT_DEBUG
    event_type = GM_RECV_EVENT_TYPE(event);
#endif
    switch (GM_RECV_EVENT_TYPE(event)) {
    case GM_FAST_RECV_EVENT:
    case GM_FAST_HIGH_RECV_EVENT:
    case GM_FAST_PEER_RECV_EVENT:
    case GM_FAST_HIGH_PEER_RECV_EVENT:
      fast = 1;
    case GM_RAW_RECV_EVENT:
    case GM_RECV_EVENT:
    case GM_HIGH_RECV_EVENT:
    case GM_PEER_RECV_EVENT:
    case GM_HIGH_PEER_RECV_EVENT:

      lam_debug_cond((lam_ssi_rpi_gm_did, 
                      "%d: lam_ssi_rpi_gm_gm_advance handling receive event",
                      lam_myproc->p_gps.gps_grank));
      if (lam_ssi_rpi_gm_recv_event(event, fast) != 0) {
	lam_debug_cond((lam_ssi_rpi_gm_did, 
                        "gm_advance: recv_event returns LAMERROR"));
	return LAMERROR;
      }

      /* We just handled a receive, so if we're in blocking mode, we
         are [potentially] done */

      if (lam_ssi_rpi_gm_flblock == 1)
	done = 1;
      break;

    case GM_NO_RECV_EVENT:

      /* There's no gm events pending.  So if we're not blocking, then
         we're done */

      lam_debug_cond((lam_ssi_rpi_gm_did,
                      "%d: no recv event:lam_ssi_rpi_gm_flblock=%d",
                      lam_myproc->p_gps.gps_grank,
                      lam_ssi_rpi_gm_flblock));
      if (lam_ssi_rpi_gm_flblock == 0)
	done = 1;
      break;

    case GM_ALARM_EVENT:
    case _GM_SLEEP_EVENT:

      /* If we're about to sleep, check to see if we have advanced.
	 If we're in blocking mode:
	 - if we have already advanced, return
	 - if we have not yet advanced, allow it to block
	 If we're in non-blocking mode:
	 - return, regardless of whether we have advanced or not */
      
      lam_debug_cond((lam_ssi_rpi_gm_did, "%d: alarm / sleep event",
                      lam_myproc->p_gps.gps_grank));
      if (lam_ssi_rpi_gm_flblock == 1) {
	if (lam_ssi_rpi_gm_haveadv == 1) {
	  lam_debug_cond((lam_ssi_rpi_gm_did, 
                          "%d: not sleeping in "
                          "lam_ssi_rpi_gm_gm_advance() -- "
                          "found someone ready",
                          lam_myproc->p_gps.gps_grank));
	  done = 1;
	  break;
	}
      } else {
	lam_debug_cond((lam_ssi_rpi_gm_did, 
                        "%d: not sleeping in non-blocking",
                        lam_myproc->p_gps.gps_grank));
	done = 1;
	break;
      }

      lam_debug_cond((lam_ssi_rpi_gm_did, 
                      "%d: sleep/alarm fall through to gm_unknown", 
                      lam_myproc->p_gps.gps_grank));

      /* This fall-through to the gm_unknown() is by design -- if no
         interesting events have happened yet, and we get a sleep
         event, fall through and let the gm device sleep/block. */

    default:
      lam_debug_cond((lam_ssi_rpi_gm_did, 
                      "%d: gm_unknown event: %d -- "
                      "calling gm_unknown, haveadv %d", 
                      lam_myproc->p_gps.gps_grank,
                      GM_RECV_EVENT_TYPE(event), lam_ssi_rpi_gm_haveadv));
      gm_unknown(proc->cp_gm_port, event);
      lam_debug_cond((lam_ssi_rpi_gm_did, 
                      "%d: back from gm_unknown event: %d", 
                      lam_myproc->p_gps.gps_grank,
                      GM_RECV_EVENT_TYPE(event)));
      
      /* Check the lam_ssi_rpi_gm_haveadv value to see if we just finished
         a send in a callback */
      
      if (lam_ssi_rpi_gm_flblock == 1)
	if (done == 0)
	  done = lam_ssi_rpi_gm_haveadv;
      lam_debug_cond((lam_ssi_rpi_gm_did, 
                      "%d: back from gm_unknown event: %d -- "
                      "haveadv %d, done %d", 
                      lam_myproc->p_gps.gps_grank,
                      GM_RECV_EVENT_TYPE(event), 
                      lam_ssi_rpi_gm_haveadv, done));
    }

    /* If we reclaimed a send token (i.e., some send completed), try
       to advance the pending send queue (if there are any).
       Reference a global variable here to avoid invoking a extra
       function call on every iteration through this loop.  */

    if (!LAM_SSI_RPI_GM_SEND_QUEUE_EMPTY() ||
        !LAM_SSI_RPI_GM_ACK_QUEUE_EMPTY())
      if (lam_ssi_rpi_gm_send_queue_advance() != 0) {
	lam_debug_cond((lam_ssi_rpi_gm_did, 
                        "gm_advance: send_queue_advance returns LAMERROR"));
	return LAMERROR;
      }
    
    /* Are we done?  This logic just seems a bit easier than trying to
       put it in a single test statement.  :-) */
    
    /* The logic here: "done" is set when the desired action has
       completed.  Hence, when "done" is set, we are eligible to
       return.  However, for efficiency's sake, if there is more stuff
       to read, we might as well read it now.  Command decision: only
       continue reading if we're in blocking mode.  If we're in
       non-blocking mode, return. */

    if (done == 1) {
      if (lam_ssi_rpi_gm_flblock == 1) {
	if (gm_receive_pending(proc->cp_gm_port) == 0)
	  break;
      } else
	break;
    }
  }

  lam_debug_cond((lam_ssi_rpi_gm_did,
                  "%d: dropping out of lam_ssi_rpi_gm_advance",
                  lam_myproc->p_gps.gps_grank));

  return 0;
}


/************************************************************************/

/*
 * All setup has been done already -- just do the send and setup the
 * callback.
 */
int 
lam_ssi_rpi_gm_push_envelope(MPI_Request req, 
                             gm_send_completion_callback_t callback)
{
  struct lam_ssi_rpi_gm_envl *env = req->rq_rpi->cq_envbuf;
  char *body = req->rq_packbuf;
  int body_len = req->rq_rpi->cq_envbuf->ge_env.ce_len;
  int copy_len = sizeof(struct lam_ssi_rpi_gm_envl);
  struct lam_ssi_rpi_proc *destproc = req->rq_proc->p_rpi;

  if (req->rq_rpi->cq_envbuf2 != NULL)
    env = req->rq_rpi->cq_envbuf2;

#if LAM_WANT_DEBUG
  lam_debug_cond((lam_ssi_rpi_gm_did, 
                  "gm_push_envelope: rank %d, tag %d, cid %d, flags 0x%x, len %d, seq %d",
                  env->ge_env.ce_rank,
                  env->ge_env.ce_tag,
                  env->ge_env.ce_cid,
                  env->ge_env.ce_flags,
                  env->ge_env.ce_len,
                  env->ge_env.ce_seq));
#endif

  if (body != NULL && body_len > 0 && body_len <= lam_ssi_rpi_gm_tinymsglen) {
    copy_len += body_len;
    lam_memcpy(env + 1, body, body_len);
  }

  /* Now do the actual send.  The caller has ensured that we already
     have a token, so we're good to go. */
  gm_send_with_callback(lam_myproc->p_rpi->cp_gm_port,
                        env,
			lam_ssi_rpi_gm_env_min_size_for_length,
			copy_len,
			LAM_SSI_RPI_GM_PRIORITY,
			destproc->cp_local_node_id,
			destproc->cp_port_id,
			callback,
			req);
  lam_debug_cond((lam_ssi_rpi_gm_did, 
                  "did gm_send / env to proc %d, body_len %d, copy_len %d, sendbuf %p",
                  destproc->cp_proc->p_gps.gps_grank, body_len, copy_len,
                  env));

  return 0;
}


/*
 * All setup has been done already -- just do the send and setup the
 * callback.
 */
int
lam_ssi_rpi_gm_push_short_body(MPI_Request req,
                               gm_send_completion_callback_t callback_unpin,
                               gm_send_completion_callback_t callback_no_unpin)
{
  char *sendbuf;
  char *buf = req->rq_packbuf;
  int length = req->rq_packsize;
  struct lam_ssi_rpi_proc *destproc = req->rq_proc->p_rpi;
  gm_send_completion_callback_t callback;

  /* If the dma_data_buf on the req is not NULL, then we could not pin
     and had to dma_malloc (way back in req_start).  Hence, we need to
     memcpy and setup to dma_free later. */

  if (req->rq_rpi->dma_data_buf != NULL) {
    callback = callback_unpin;
    sendbuf = req->rq_rpi->dma_data_buf;
    lam_memcpy(sendbuf, buf, length);
  } else {
    callback = callback_no_unpin;
    sendbuf = buf;
  }

  /* post the send.  Note that gm_send...() returns void! */

  lam_debug_cond((lam_ssi_rpi_gm_did, 
                  "send_short_body: to %d len %d size %d buf %p", 
                  destproc->cp_local_node_id,
                  length,
                  lam_ssi_rpi_gm_data_min_size_for_length,
                  sendbuf));
  gm_send_with_callback(lam_myproc->p_rpi->cp_gm_port,
			sendbuf,
			lam_ssi_rpi_gm_data_min_size_for_length,
			length,
			LAM_SSI_RPI_GM_PRIORITY,
			destproc->cp_local_node_id,
			destproc->cp_port_id,
			callback,
			req);
  lam_debug_cond((lam_ssi_rpi_gm_did, 
                  "did gm_send_with_callback / short body to proc %d",
                  destproc->cp_proc->p_gps.gps_grank));
  lam_debug_cond((lam_ssi_rpi_gm_did,
                  "Rank %d: push_short_body: short body to proc %d, "
                  "req %p, dma_buf %p\n",
                  lam_myproc->p_gps.gps_grank, 
                  destproc->cp_proc->p_gps.gps_grank,
                  req, req->rq_rpi->dma_data_buf));

  return 0;
}


/*
 * Simple wrapper around gm_provide_buffer, just so that we can try to
 * keep all gm_* calls here in this file.
 */
int 
lam_ssi_rpi_gm_provide_buffer(char *buffer, unsigned int size, int tag)
{
  /* gm_provide_receive_buffer returns void.  Uh huh, thanks.  <snap> */

  gm_provide_receive_buffer_with_tag(lam_myproc->p_rpi->cp_gm_port,
                                     buffer, 
                                     (unsigned int) tag,
                                     LAM_SSI_RPI_GM_PRIORITY,
                                     (unsigned int) tag);
  return 0;
}


/*
 * All setup has been done already -- just do the send and setup the
 * callback.
 */
int
lam_ssi_rpi_gm_push_long_body(MPI_Request req,
                              gm_send_completion_callback_t callback_unpin,
                              gm_send_completion_callback_t callback_no_unpin)
{
  char *sendbuf;
  gm_send_completion_callback_t callback;
  int length = req->rq_rpi->cq_envbuf->ge_env.ce_len;
  gm_remote_ptr_t targetbuf = 
    (gm_remote_ptr_t)(gm_up_t) req->rq_rpi->cq_envbuf->ge_target;
  struct lam_ssi_rpi_proc *destproc = req->rq_proc->p_rpi;

  /* If the dma_data_buf on the req is not NULL, then we could not pin
     and had to dma_malloc (way back in req_start).  Hence, we need to
     memcpy and setup to dma_free later. */

  if (req->rq_rpi->dma_data_buf != NULL) {
    callback = callback_unpin;
    sendbuf = req->rq_rpi->dma_data_buf;

    /* The message has already been copied from the original buffer to
       the DMA buffer (latency-hiding technique) -- it's ready to be
       sent out. */
  } else {
    callback = callback_no_unpin;
    sendbuf = req->rq_packbuf;
  }

  /* post the send.  Note that gm_directed_send...() returns void! */

  lam_debug_cond((lam_ssi_rpi_gm_did, 
                  "send_long_body: to %d len %d buf 0x%p, target buf 0x%p", 
                  destproc->cp_local_node_id, length, sendbuf, 
                  req->rq_rpi->cq_envbuf->ge_target));
  gm_directed_send_with_callback(lam_myproc->p_rpi->cp_gm_port,
                                 sendbuf,
                                 (gm_remote_ptr_t)(gm_up_t) targetbuf,
                                 length,
                                 LAM_SSI_RPI_GM_PRIORITY,
                                 destproc->cp_local_node_id,
                                 destproc->cp_port_id,
                                 callback,
                                 req);
  lam_debug_cond((lam_ssi_rpi_gm_did, 
                  "did gm_send_with_callback / long body to proc %d",
                  destproc->cp_proc->p_gps.gps_grank));

  return 0;
}
