/*
*         OpenPBS (Portable Batch System) v2.3 Software License
*
* Copyright (c) 1999-2000 Veridian Information Solutions, Inc.
* All rights reserved.
*
* ---------------------------------------------------------------------------
* For a license to use or redistribute the OpenPBS software under conditions
* other than those described below, or to purchase support for this software,
* please contact Veridian Systems, PBS Products Department ("Licensor") at:
*
*    www.OpenPBS.org  +1 650 967-4675                  sales@OpenPBS.org
*                        877 902-4PBS (US toll-free)
* ---------------------------------------------------------------------------
*
* This license covers use of the OpenPBS v2.3 software (the "Software") at
* your site or location, and, for certain users, redistribution of the
* Software to other sites and locations.  Use and redistribution of
* OpenPBS v2.3 in source and binary forms, with or without modification,
* are permitted provided that all of the following conditions are met.
* After December 31, 2001, only conditions 3-6 must be met:
*
* 1. Commercial and/or non-commercial use of the Software is permitted
*    provided a current software registration is on file at www.OpenPBS.org.
*    If use of this software contributes to a publication, product, or
*    service, proper attribution must be given; see www.OpenPBS.org/credit.html
*
* 2. Redistribution in any form is only permitted for non-commercial,
*    non-profit purposes.  There can be no charge for the Software or any
*    software incorporating the Software.  Further, there can be no
*    expectation of revenue generated as a consequence of redistributing
*    the Software.
*
* 3. Any Redistribution of source code must retain the above copyright notice
*    and the acknowledgment contained in paragraph 6, this list of conditions
*    and the disclaimer contained in paragraph 7.
*
* 4. Any Redistribution in binary form must reproduce the above copyright
*    notice and the acknowledgment contained in paragraph 6, this list of
*    conditions and the disclaimer contained in paragraph 7 in the
*    documentation and/or other materials provided with the distribution.
*
* 5. Redistributions in any form must be accompanied by information on how to
*    obtain complete source code for the OpenPBS software and any
*    modifications and/or additions to the OpenPBS software.  The source code
*    must either be included in the distribution or be available for no more
*    than the cost of distribution plus a nominal fee, and all modifications
*    and additions to the Software must be freely redistributable by any party
*    (including Licensor) without restriction.
*
* 6. All advertising materials mentioning features or use of the Software must
*    display the following acknowledgment:
*
*     "This product includes software developed by NASA Ames Research Center,
*     Lawrence Livermore National Laboratory, and Veridian Information
*     Solutions, Inc.
*     Visit www.OpenPBS.org for OpenPBS software support,
*     products, and information."
*
* 7. DISCLAIMER OF WARRANTY
*
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND. ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT
* ARE EXPRESSLY DISCLAIMED.
*
* IN NO EVENT SHALL VERIDIAN CORPORATION, ITS AFFILIATED COMPANIES, OR THE
* U.S. GOVERNMENT OR ANY OF ITS AGENCIES BE LIABLE FOR ANY DIRECT OR INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* This license will be governed by the laws of the Commonwealth of Virginia,
* without reference to its choice of law rules.
*/
/*
 * Functions which provide basic operation on the job structure
 *
 * Included public functions are:
 *
 *   job_abt   abort (remove from server) a job
 *   job_alloc    allocate job struct and initialize defaults
 *   job_free   free space allocated to the job structure and its
 *    childern structures.
 *   job_purge   purge job from server
 *
 *   job_clone    clones a job (for use with job_arrays)
 *   job_clone_wt work task for cloning a job
 *   job_unlink_file() unlinks a given file using job credentials
 *
 * Include private function:
 *   job_init_wattr() initialize job working attribute array to "unspecified"
 */

#include <pbs_config.h>   /* the master config generated by configure */

#include <sys/types.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <ctype.h>
#include <errno.h>
#include <assert.h>

#ifndef SIGKILL
#include <signal.h>
#endif
#if __STDC__ != 1
#include <memory.h>
#endif

#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <dirent.h>
#include <stdlib.h>
#include <string.h>
#include <netdb.h>
#include <pthread.h>

#include "pbs_ifl.h"
#include "list_link.h"
#include "work_task.h"
#include "attribute.h"
#include "resource.h"
#include "server_limits.h"
#include "server.h"
#include "queue.h"
#include "pbs_job.h"
#include "log.h"
#include "pbs_error.h"
#include "svrfunc.h"
#include "acct.h"
#include "net_connect.h"
#include "portability.h"


#ifndef TRUE
#define TRUE 1
#define FALSE 0
#endif

int conn_qsub(char *, long, char *);
void job_purge(job *);

/* External functions */
extern void mom_checkpoint_delete_files(job *pjob);

#if IBM_SP2==2  /* IBM SP PSSP 3.1 */
void unload_sp_switch A_((job *pjob));
#endif   /* IBM SP */

#ifdef PENABLE_LINUX26_CPUSETS
extern int use_cpusets(job *);
#endif /* PENABLE_LINUX26_CPUSETS */
/* Local Private Functions */

static void job_init_wattr A_((job *));

/* Global Data items */

extern gid_t pbsgroup;
extern char *msg_abt_err;
extern char *path_jobs;
extern char *path_spool;
extern char *path_aux;
extern char  server_name[];
extern time_t time_now;
extern int   LOGLEVEL;

extern tlist_head svr_newjobs;
extern tlist_head svr_alljobs;


void nodes_free A_((job *));
int TTmpDirName A_((job *, char *));


void tasks_free(

  job *pj)

  {
  task *tp = (task *)GET_NEXT(pj->ji_tasks);
  obitent *op;
  infoent *ip;

  while (tp != NULL)
    {
    op = (obitent *)GET_NEXT(tp->ti_obits);

    while (op != NULL)
      {
      delete_link(&op->oe_next);

      free(op);

      op = (obitent *)GET_NEXT(tp->ti_obits);
      }  /* END while (op != NULL) */

    ip = (infoent *)GET_NEXT(tp->ti_info);

    while (ip != NULL)
      {
      delete_link(&ip->ie_next);

      free(ip->ie_name);
      free(ip->ie_info);
      free(ip);

      ip = (infoent *)GET_NEXT(tp->ti_info);
      }

    close_conn(tp->ti_fd);

    delete_link(&tp->ti_jobtask);

    free(tp);

    tp = (task *)GET_NEXT(pj->ji_tasks);
    }  /* END while (tp != NULL) */

  return;
  }  /* END tasks_free() */





/*
 * remtree - remove a tree (or single file)
 *
 * returns  0 on success
 *  -1 on failure
 */

int remtree(

  char *dirname)

  {
  static char id[] = "remtree";
  DIR  *dir;

  struct dirent *pdir;
  char  namebuf[MAXPATHLEN], *filnam;
  int  i;
  int  rtnv = 0;
#if defined(HAVE_STRUCT_STAT64) && defined(HAVE_STAT64)

  struct stat64 sb;
#else

  struct stat sb;
#endif

#if defined(HAVE_STRUCT_STAT64) && defined(HAVE_STAT64)

  if (lstat64(dirname, &sb) == -1)
#else
  if (lstat(dirname, &sb) == -1)
#endif
    {

    if (errno != ENOENT)
      log_err(errno, id, "stat");

    return(-1);
    }

  if (S_ISDIR(sb.st_mode))
    {
    if ((dir = opendir(dirname)) == NULL)
      {
      if (errno != ENOENT)
        log_err(errno, id, "opendir");

      return(-1);
      }

    strcpy(namebuf, dirname);

    strcat(namebuf, "/");

    i = strlen(namebuf);

    filnam = &namebuf[i];

    while ((pdir = readdir(dir)) != NULL)
      {
      if ((pdir->d_name[0] == '.') &&
          ((pdir->d_name[1] == '\0') || (pdir->d_name[1] == '.')))
        continue;

      strcpy(filnam, pdir->d_name);

#if defined(HAVE_STRUCT_STAT64) && defined(HAVE_STAT64)
      if (lstat64(namebuf, &sb) == -1)
#else
      if (lstat(namebuf, &sb) == -1)
#endif
        {
        log_err(errno, id, "stat");

        rtnv = -1;

        continue;
        }

      if (S_ISDIR(sb.st_mode))
        {
        rtnv = remtree(namebuf);
        }
      else if (unlink(namebuf) < 0)
        {
        if (errno != ENOENT)
          {
          sprintf(log_buffer, "unlink failed on %s",
                  namebuf);

          log_err(errno, id, log_buffer);

          rtnv = -1;
          }
        }
      else if (LOGLEVEL >= 7)
        {
        sprintf(log_buffer, "unlink(1) succeeded on %s", namebuf);

        log_ext(-1, id, log_buffer, LOG_DEBUG);
        }
      }    /* END while ((pdir = readdir(dir)) != NULL) */

    closedir(dir);

    if (rmdir(dirname) < 0)
      {
      if ((errno != ENOENT) && (errno != EINVAL))
        {
        sprintf(log_buffer, "rmdir failed on %s",
                dirname);

        log_err(errno, id, log_buffer);

        rtnv = -1;
        }
      }
    else if (LOGLEVEL >= 7)
      {
      sprintf(log_buffer, "rmdir succeeded on %s", dirname);

      log_ext(-1, id, log_buffer, LOG_DEBUG);
      }
    }
  else if (unlink(dirname) < 0)
    {
    sprintf(log_buffer, "unlink failed on %s",
            dirname);

    log_err(errno, id, log_buffer);

    rtnv = -1;
    }
  else if (LOGLEVEL >= 7)
    {
    sprintf(log_buffer, "unlink(2) succeeded on %s", dirname);

    log_ext(-1, id, log_buffer, LOG_DEBUG);
    }

  return(rtnv);
  }  /* END remtree() */



/*
 * conn_qsub - connect to the qsub that submitted this interactive job
 * return >= 0 on SUCCESS, < 0 on FAILURE
 * (this was moved from resmom/mom_inter.c)
 */



int conn_qsub(

  char *hostname,  /* I */
  long  port,      /* I */
  char *EMsg)      /* O (optional,minsize=1024) */

  {
  pbs_net_t hostaddr;
  int s;

  int flags;

  if (EMsg != NULL)
    EMsg[0] = '\0';

  if ((hostaddr = get_hostaddr(hostname)) == (pbs_net_t)0)
    {
#if !defined(H_ERRNO_DECLARED) && !defined(_AIX)
    extern int h_errno;
#endif

    /* FAILURE */

    if (EMsg != NULL)
      {
      snprintf(EMsg, 1024, "cannot get address for host '%s', h_errno=%d",
               hostname,
               h_errno);
      }

    return(-1);
    }

  s = client_to_svr(hostaddr, (unsigned int)port, 0, EMsg);

  /* NOTE:  client_to_svr() can return 0 for SUCCESS */

  /* assume SUCCESS requires s > 0 (USC) was 'if (s >= 0)' */
  /* above comment not enabled */

  if (s < 0)
    {
    /* FAILURE */

    return(-1);
    }

  /* SUCCESS */

  /* this socket should be blocking */

  flags = fcntl(s, F_GETFL);

  flags &= ~O_NONBLOCK;

  fcntl(s, F_SETFL, flags);

  return(s);
  }  /* END conn_qsub() */




/*
 * job_alloc - allocate space for a job structure and initialize working
 * attribute to "unset"
 *
 * Returns: pointer to structure or null is space not available.
 */

job *
job_alloc(void)

  {
  job *pj;

  pj = (job *)calloc(1, sizeof(job));

  if (pj == NULL)
    {
    log_err(errno, "job_alloc", "no memory");

    return(NULL);
    }

  pj->ji_qs.qs_version = PBS_QS_VERSION;

  CLEAR_LINK(pj->ji_alljobs);
  CLEAR_LINK(pj->ji_jobque);

  CLEAR_HEAD(pj->ji_tasks);
  pj->ji_taskid = TM_NULL_TASK + 1;
  pj->ji_numnodes = 0;
  pj->ji_numvnod  = 0;
  pj->ji_hosts = NULL;
  pj->ji_vnods = NULL;
  pj->ji_resources = NULL;
  pj->ji_obit = TM_NULL_EVENT;
  pj->ji_preq = NULL;
  pj->ji_nodekill = TM_ERROR_NODE;
  pj->ji_flags = 0;
  pj->ji_globid = NULL;
  pj->ji_stdout = 0;
  pj->ji_stderr = 0;
  pj->ji_qs.ji_un.ji_momt.ji_exitstat = 0;

  pj->ji_momhandle = -1;  /* mark mom connection invalid */

  /* set the working attributes to "unspecified" */

  job_init_wattr(pj);

  return(pj);
  }  /* END job_alloc() */





/*
 * job_free - free job structure and its various sub-structures
 */

void job_free(

  job *pj)  /* I (modified) */

  {
  int    i;

  if (LOGLEVEL >= 8)
    {
    sprintf(log_buffer, "freeing job");

    log_record(PBSEVENT_DEBUG,
               PBS_EVENTCLASS_JOB,
               pj->ji_qs.ji_jobid,
               log_buffer);
    }

  /* remove any malloc working attribute space */

  for (i = 0;i < (int)JOB_ATR_LAST;i++)
    {
    job_attr_def[i].at_free(&pj->ji_wattr[i]);
    }

  if (pj->ji_grpcache)
    free(pj->ji_grpcache);

  assert(pj->ji_preq == NULL);

  nodes_free(pj);

  tasks_free(pj);

  if (pj->ji_resources)
    free(pj->ji_resources);

  if (pj->ji_globid)
    free(pj->ji_globid);

  /* now free the main structure */

  free((char *)pj);

  return;
  }  /* END job_free() */

/*
 * job_unlink_file - unlink file, but drop root credentials before
 * doing this to avoid removing objects that aren't belong to the user.
 */
int job_unlink_file(
  job *pjob,		/* I */
  const char *name)	/* I */
  {
  int saved_errno = 0, result = 0;
  uid_t uid = geteuid();
  gid_t gid = getegid();

  if (uid != 0)
    return unlink(name);

  if ((setegid(pjob->ji_qs.ji_un.ji_momt.ji_exgid) == -1))
    return -1;
  if ((seteuid(pjob->ji_qs.ji_un.ji_momt.ji_exuid) == -1))
    {
    saved_errno = errno;
    setegid(gid);
    errno = saved_errno;
    return -1;
    }
  result = unlink(name);
  saved_errno = errno;

  seteuid(uid);
  setegid(gid);

  errno = saved_errno;
  return result;
  }  /* END job_unlink_file() */


/*
 * job_init_wattr - initialize job working attribute array
 * set the types and the "unspecified value" flag
 */
static void job_init_wattr(

  job *pj)

  {
  int i;

  for (i = 0;i < (int)JOB_ATR_LAST;i++)
    {
    clear_attr(&pj->ji_wattr[i], &job_attr_def[i]);
    }

  return;
  }   /* END job_init_wattr() */





/*
 * job_purge_thread - purge job from system
 *
 * The job is dequeued; the job control file, script file and any spooled
 * output files are unlinked, and the job structure is freed.
 * If we are MOM, the task files and checkpoint files are also
 * removed.
 */

void *job_purge_thread(void *jobtopurge)
  {
  static char   id[] = "job_purge_thread";
  job *pjob;
  char          namebuf[MAXPATHLEN + 1];
  extern char  *msg_err_purgejob;
  int           rc;
  extern void MOMCheckRestart A_((void));

  pjob = (job *)jobtopurge;

  if (pjob->ji_flags & MOM_HAS_TMPDIR)
    {
    if (TTmpDirName(pjob, namebuf))
      {
      sprintf(log_buffer, "removing transient job directory %s",
        namebuf);

      log_record(
        PBSEVENT_DEBUG,
        PBS_EVENTCLASS_JOB,
        pjob->ji_qs.ji_jobid,
        log_buffer);

      if ((setegid(pjob->ji_qs.ji_un.ji_momt.ji_exgid) == -1) ||
          (seteuid(pjob->ji_qs.ji_un.ji_momt.ji_exuid) == -1))
        {
        /* FAILURE */

        pthread_exit(NULL);
        }

      rc = remtree(namebuf);

      seteuid(0);
      setegid(pbsgroup);

      if ((rc != 0) && (LOGLEVEL >= 5))
        {
        sprintf(log_buffer,
          "recursive remove of job transient tmpdir %s failed",
          namebuf);

        log_err(errno, "recursive (r)rmdir",
          log_buffer);
        }

      pjob->ji_flags &= ~MOM_HAS_TMPDIR;
      }
    }

#ifdef PENABLE_LINUX26_CPUSETS

  if (use_cpusets(pjob) == TRUE)
    {
    extern void cpuset_delete(char *);

    /* Delete the cpuset for the job. */

    cpuset_delete(pjob->ji_qs.ji_jobid);
    }

#endif /* PENABLE_LINUX26_CPUSETS */

  /* delete the nodefile if still hanging around */

  if (pjob->ji_flags & MOM_HAS_NODEFILE)
    {
    char file[MAXPATHLEN + 1];

    sprintf(file,"%s/%s",
      path_aux,
      pjob->ji_qs.ji_jobid);

    unlink(file);

    pjob->ji_flags &= ~MOM_HAS_NODEFILE;
    }

  delete_link(&pjob->ji_jobque);

  delete_link(&pjob->ji_alljobs);

  if (LOGLEVEL >= 6)
    {
    sprintf(log_buffer,"removing job");

    log_record(
      PBSEVENT_DEBUG,
      PBS_EVENTCLASS_JOB,
      pjob->ji_qs.ji_jobid,
      log_buffer);
    }

  strcpy(namebuf,path_jobs); /* delete script file */

  strcat(namebuf,pjob->ji_qs.ji_fileprefix);
  strcat(namebuf,JOB_SCRIPT_SUFFIX);

  if (unlink(namebuf) < 0)
    {
    if (errno != ENOENT)
      log_err(errno, id, msg_err_purgejob);
    }
  else if (LOGLEVEL >= 6)
    {
    sprintf(log_buffer, "removed job script");

    log_record(
      PBSEVENT_DEBUG,
      PBS_EVENTCLASS_JOB,
      pjob->ji_qs.ji_jobid,
      log_buffer);
    }


#if IBM_SP2==2        /* IBM SP PSSP 3.1 */
  unload_sp_switch(pjob);

#endif   /* IBM SP */
  strcpy(namebuf, path_jobs);     /* job directory path */

  strcat(namebuf, pjob->ji_qs.ji_fileprefix);

  strcat(namebuf, JOB_TASKDIR_SUFFIX);

  remtree(namebuf);

  mom_checkpoint_delete_files(pjob);

  strcpy(namebuf, path_jobs); /* delete job file */

  strcat(namebuf, pjob->ji_qs.ji_fileprefix);

  strcat(namebuf, JOB_FILE_SUFFIX);

  if (unlink(namebuf) < 0)
    {
    if (errno != ENOENT)
      log_err(errno, id, msg_err_purgejob);
    }
  else if (LOGLEVEL >= 6)
    {
    sprintf(log_buffer, "removed job file");

    log_record(
      PBSEVENT_DEBUG,
      PBS_EVENTCLASS_JOB,
      pjob->ji_qs.ji_jobid,
      log_buffer);
    }

  job_free(pjob);

  /* if no jobs are left, check if MOM should be restarted */

  if (((job *)GET_NEXT(svr_alljobs)) == NULL)
    MOMCheckRestart();

  pthread_exit(NULL);
  }

void job_purge(

  job *pjob)  /* I (modified) */

  {
  static char   id[] = "job_purge";
  pthread_attr_t attr;
  pthread_t *thread;
  int rc;

  rc = pthread_attr_init( &attr );
  if(rc)
    {
    sprintf(log_buffer, "pthread_attr_init failded. job number %s", pjob->ji_qs.ji_jobid);
    log_err(rc, id, log_buffer);
    return;
    }

  rc = pthread_attr_setdetachstate(
     &attr, PTHREAD_CREATE_DETACHED );
  if(rc)
    {
    sprintf(log_buffer, "pthread_attr_setdetachstate failded. job number %s", pjob->ji_qs.ji_jobid);
    log_err(rc, id, log_buffer);
    return;
    }
  thread = (pthread_t *)malloc(sizeof(pthread_t));
  if(thread == NULL) 
    {
    sprintf(log_buffer, "Could not allocate memory for job_purge thread");
    log_err(errno, id, log_buffer);
    return;
    }

  rc = pthread_create(thread, &attr, &job_purge_thread, pjob);
  if(rc)
    {
    sprintf(log_buffer, "pthread_create failed: %d", rc);
    log_err(rc, id, log_buffer);
    }

  return;
  }  /* END job_purge() */





/*
 * find_job() - find job by jobid
 *
 * Search list of all server jobs for one with same job id
 * Return NULL if not found or pointer to job struct if found
 */

job *find_job(

  char *jobid)

  {
  char *at;
  job  *pj;

  if ((at = strchr(jobid, (int)'@')) != NULL)
    * at = '\0'; /* strip off @server_name */

  pj = (job *)GET_NEXT(svr_alljobs);

  while (pj != NULL)
    {
    if (!strcmp(jobid, pj->ji_qs.ji_jobid))
      break;

    pj = (job *)GET_NEXT(pj->ji_alljobs);
    }

  if (at)
    *at = '@'; /* restore @server_name */

  return(pj);  /* may be NULL */
  }   /* END find_job() */

/* END job_func.c */

