/* -*- mode: c; c-file-style: "gnu" -*-
 * cgi.c -- CGI support functions
 * Copyright (C) 2002, 2003, 2004 Gergely Nagy <algernon@bonehunter.rulez.org>
 *
 * This file is part of Thy.
 *
 * Thy is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 2 dated June, 1991.
 *
 * Thy is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
 * License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */

/** @file cgi.c
 * CGI support functions.
 *
 * This module implements the bulk of CGI (and handler) support. Well,
 * not really. Since the bulk of the thing is I/O, which is all in
 * daemon_handle_io(), this module mostly deals with parsing and
 * setup: creating the necessary environment for CGI, and parsing the
 * returned header, optionally adding our own.
 */

#include "system.h"

#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#if defined(HAVE_LIBGEN_H) && !defined(_AIX)
#include <libgen.h>
#endif
#ifdef HAVE_PATHS_H
#include <paths.h>
#endif
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef HAVE_STRINGS_H
#include <strings.h>
#endif
#ifdef HAVE_SYS_SELECT_H
#include <sys/select.h>
#endif
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>

#include "compat/compat.h"
#include "bh-libs/list.h"

#include "cgi.h"
#include "config.h"
#include "fabs.h"
#include "misc.h"
#include "network.h"
#include "session.h"
#include "thy.h"
#include "tls.h"
#include "types.h"

#ifndef __DOXYGEN__
#ifndef _PATH_DEVNULL
#define _PATH_DEVNULL "/dev/null"
#endif
#endif

/** @internal Converts a hexadecimal char-pair to a decimal number.
 */
#define HEX_TO_DECIMAL(char1, char2)	\
    (((char1 >= 'A') ? (((char1 & 0xdf) - 'A') + 10) : \
	(char1 - '0')) * 16) + \
    (((char2 >= 'A') ? (((char2 & 0xdf) - 'A') + 10) : (char2 - '0')))
/** @internal Maximum number of arguments to pass to a CGI script.
 */
#define CGI_ARGC_MAX 16

/** @internal Unescape an URI.
 * Unescapes the URI url, and puts the query string into QUERY_STRING.
 *
 * @param uri is the URI to unescape.
 * @param query_string is where the stuff following the first "?"
 * should be put (if any).
 *
 * @returns One on success, zero on error.
 */
static int
unescape_uri (char *uri, char **query_string)
{
  char c, d;
  char *uri_old;

  uri_old = uri;

  while ((c = *uri_old))
    {
      if (c == '%')
	{
	  uri_old++;
	  if ((c = *uri_old++) && (d = *uri_old++))
	    *uri++ = HEX_TO_DECIMAL(c, d);
	  else
	    return 0;       /* NULL in chars to be decoded */
	}
      else
	if (c == '?')
	  {
	    /* query string */
	    if (query_string)
	      *query_string = ++uri_old;
	    /* stop here */
	    *uri = '\0';
	    return 1;
	  }
	else
	  if (c == '#')
	    {
	      /* fragment */
	      /* legal part of URL, but we do *not* care.
	       * However, we still have to look for the query string */
	      if (query_string)
		{
		  ++uri_old;
		  while ((c = *uri_old))
		    {
		      if (c == '?')
			{
			  *query_string = ++uri_old;
			  break;
			}
		      ++uri_old;
		    }
		}
	      break;
	    }
	  else
	    {
	      *uri++ = c;
	      uri_old++;
	    }
    }

  *uri = '\0';
  return 1;
}

/** @internal Split a string into an argv-style array.
 * Splits up SESSION->request->query_string into an argv-style array.
 * Uses FN as argv[0].
 *
 * @note Modifies SESSION in-place.
 */
static void
_cgi_argv_create (session_t *session, const char *fn)
{
  char *p, *q, *r;
  int aargc, start_argc = 1;

  if (session->cgi.handler)
    {
      start_argc = 2;
      XSREALLOC (session->cgi.argv, char *, CGI_ARGC_MAX + 1);
    }
  else
    {
      if (!session->cgi.argv)
	session->cgi.argv =
	  (char **)bhc_calloc (CGI_ARGC_MAX + 1, sizeof (char *));
      else
	XSREALLOC (session->cgi.argv, char *, CGI_ARGC_MAX + 1);
      session->cgi.argv[0] = bhc_strdup (fn);
    }

  q = session->request->query_string;

  if (q && !strchr (q, '='))
    {
      q = bhc_strdup (q);

      for (aargc = start_argc; q && (aargc < CGI_ARGC_MAX);)
	{
	  r = q;
	  /* for an index-style CGI, + is used to seperate arguments
	   * an escaped '+' is of no concern to us
	   */
	  if ((p = strchr (q, '+')))
	    {
	      *p = '\0';
	      q = p + 1;
	    }
	  else
	    q = NULL;

	  if (unescape_uri (r, NULL))
	    session->cgi.argv[aargc++] = r;
	}
      session->cgi.argv[aargc] = NULL;
    }
  else
    session->cgi.argv[start_argc] = NULL;
}

/** @internal Get the default path.
 * @returns A newly allocated buffer on success, or NULL if an error
 * occurred.
 */
static char *
_cgi_getdefpath (void)
{
  size_t len = confstr (_CS_PATH, NULL, 0);
  char *buffer = (char *) bhc_malloc (len);

  if (confstr (_CS_PATH, buffer, len + 1) == 0)
    {
      free (buffer);
      return NULL;
    }

  return buffer;
}

/** Add a new environment variable.
 * Adds the environment variable NAME with value VALUE to the list of
 * environment variables in REQUEST.
 */
void
cgi_addenv (request_t *request, const char *name, const char *value)
{
  char *tmp;

  if (!name || !value)
    return;

  if (request->cgienvlen >= request->cgienvalloc)
    {
      request->cgienvalloc *= 2;
      XSREALLOC (request->cgienv, char *, request->cgienvalloc);
    }
  asprintf (&tmp, "%s=%s", name, value);
  request->cgienv[request->cgienvlen++] = tmp;
}

/** @internal Set up the CGI environment.
 * @param session is the session for which the environment should be
 * set up.
 */
static void
_cgi_env_setup (session_t *session)
{
  char *tmp;
  char server_protocol[] = "HTTP/1.X";
  size_t lreq, i, lquery = 0, lpinfo = 0;
  thy_mappable_config_t *config =
    config_get_mapped (session->absuri, session->request->resolved);
  pair_t *pair;

  server_protocol[7] = '0' + session->request->http_minor;

  if (session->cgi.handler)
    cgi_addenv (session->request, "SCRIPT_FILENAME", session->cgi.argv[1]);
  cgi_addenv (session->request, "SERVER_PROTOCOL", server_protocol);
  cgi_addenv (session->request, "SERVER_SOFTWARE", thy_servername (NULL));
  cgi_addenv (session->request, "GATEWAY_INTERFACE", "CGI/1.1");
  cgi_addenv (session->request, "REMOTE_ADDR", session->origin);
  cgi_addenv (session->request, "SCRIPT_NAME", session->request->url);
  cgi_addenv (session->request, "SERVER_NAME", session->request->host);
  if (session->request->query_string)
    cgi_addenv (session->request, "QUERY_STRING",
		session->request->query_string);
  else
    cgi_addenv (session->request, "QUERY_STRING", "");
  cgi_addenv (session->request, "CONTENT_TYPE",
	      session->request->content_type);
  cgi_addenv (session->request, "PATH_INFO", session->request->path_info);

  if (session->request->url)
    {
      lreq = strlen (session->request->url);
      if (session->request->path_info)
	lpinfo = strlen (session->request->path_info);
      if (session->request->query_string)
	lquery = strlen (session->request->query_string);

      tmp = (char *)bhc_calloc (1, lreq + lpinfo + lquery + 3);
      memcpy (tmp, session->request->url, lreq);
      if (session->request->path_info)
	{
	  memcpy (&tmp[lreq], session->request->path_info, lpinfo);
	  lreq += lpinfo;
	}
      if (session->request->query_string)
	memcpy (mempcpy (&tmp[lreq], "?", 1),
		session->request->query_string,	lquery);
      cgi_addenv (session->request, "REQUEST_URI", tmp);
      free (tmp);
    }

  tmp = fabs_urlmap (session->request->path_info,
		     session->request->host, session->absuri);
  cgi_addenv (session->request, "PATH_TRANSLATED", tmp);
  free(tmp);

  tmp = _cgi_getdefpath();
  cgi_addenv (session->request, "PATH", tmp);
  free (tmp);

  asprintf (&tmp, "%d", session->port);
  cgi_addenv (session->request, "SERVER_PORT", tmp);
  free (tmp);
  if (session->request->content_length > 0)
    {
      asprintf (&tmp, SIZET_FORMAT, session->request->content_length);
      cgi_addenv (session->request, "CONTENT_LENGTH", tmp);
      free (tmp);
    }

  cgi_addenv (session->request, "REQUEST_METHOD",
	      session->request->method_str);

  if (session->header.code != HTTP_STATUS_UNKNOWN)
    {
      cgi_addenv (session->request, "REDIRECT_URL",
		  session->request->url);
      asprintf (&tmp, "%s %s HTTP/%d.%d", session->request->method_str,
		session->request->url, session->request->http_major,
		session->request->http_minor);
      cgi_addenv (session->request, "REDIRECT_REQUEST", tmp);
      free (tmp);
      asprintf (&tmp, "%d", session_code_map[session->header.code].code);
      cgi_addenv (session->request, "REDIRECT_STATUS", tmp);
      free (tmp);
    }
  else
    cgi_addenv (session->request, "REDIRECT_STATUS", "200");

  cgi_addenv (session->request, "DOCUMENT_ROOT", session->root);

  if (session->request->auth_realm)
    cgi_addenv (session->request, "AUTH_TYPE", "Basic");


#ifdef THY_OPTION_TLS
  thy_tls_cgi_setup_env (session);
#endif

  for (i = 0; i < bhl_list_size (config->env); i++)
    {
      bhl_list_get (config->env, i, (void **)&pair);
      cgi_addenv (session->request, pair->field, pair->value);
      free (pair);
    }

  session->request->cgienv[session->request->cgienvlen] = NULL;

  free (config);
}

/** Launch a CGI script.
 * @param fn is the script to launch.
 * @param session is the corresponding session.
 *
 * @returns zero on success, -1 otherwise.
 */
int
cgi_launch (const char *fn, session_t *session)
{
  char *tmp, *tmp2;
  int fd;

  if (session->cgi.running != 0)
    return -1;

  if (pipe (session->cgi.pipes.in) < 0)
    {
      bhc_error ("pipe(): %s", strerror (errno));
      return -1;
    }
  if (pipe (session->cgi.pipes.out) < 0)
    {
      bhc_error ("pipe(): %s", strerror (errno));
      return -1;
    }

  session_finalise (session);

  session->cgi.child = fork();
  switch (session->cgi.child)
    {
    case 0:
      /* Child */
      if ((fd = open (_PATH_DEVNULL, O_RDWR, 0)) == -1)
	{
	  bhc_log ("%s", "Error opening /dev/null. Exiting.");
	  bhc_exit (1);
	}

      if (session->request->method == HTTP_METHOD_POST ||
	  session->request->method == HTTP_METHOD_UNKNOWN)
	{
	  if (dup2 (session->cgi.pipes.out[0], STDIN_FILENO) == -1)
	    {
	      bhc_log ("%s",
		       "Error bindig CGI's STDIN to pipe. Exiting.");
	      bhc_exit (1);
	    }
	}
      else
	dup2 (fd, STDIN_FILENO);

      if (dup2 (session->cgi.pipes.in[1], STDOUT_FILENO) == -1)
	{
	  bhc_log ("%s", "Error binding CGI's STDOUT to pipe. Exiting.");
	  bhc_exit (1);
	}

      dup2 (fd, STDERR_FILENO);
      if (fd > 2)
	close (fd);
      close (session->cgi.pipes.in[1]);
      close (session->cgi.pipes.in[0]);
      close (session->cgi.pipes.out[1]);
      close (session->cgi.pipes.out[0]);
      closelog ();
      tmp2 = bhc_strdup (session->request->resolved);
      tmp = dirname (tmp2);
      chdir (tmp);
      free (tmp2);
      /* Set up environment variables */
      _cgi_env_setup (session);
      /* ...and arguments */
      _cgi_argv_create (session, fn);
      execve (fn, session->cgi.argv, session->request->cgienv);
      /* PANIC! execve() failed. */
      openlog ("thy", LOG_PID, LOG_DAEMON);
      bhc_error ("execve(%s): %s", fn, strerror (errno));
      closelog ();
      bhc_exit (1);
      break;
    case -1:
      /* Error */
      bhc_error ("fork(): %s", strerror (errno));
      return -1;
    default:
      /* Parent */
      close (session->cgi.pipes.in[1]);
      close (session->cgi.pipes.out[0]);
      session->cgi.pipes.in[1] = -1;
      session->cgi.pipes.out[0] = -1;

      free (session->body.buffer);
      session->body.buffer = NULL;
      session->body.offset = 0;

      session->cgi.running = 1;
      if (session->request->method == HTTP_METHOD_POST ||
	  session->request->method == HTTP_METHOD_UNKNOWN)
	session_state_change (session, SESSION_STATE_POST_INPUT);
      else
	session_state_change (session, SESSION_STATE_CGI_HEADER_INPUT);
      thy_active_cgis++;
      break;
    }
  return 0;
}

/** Set up the basic CGI environment.
 * Set up SESSION->query_string and ->path_info.
 */
void
cgi_setup (session_t *session)
{
  char *tmp, *rest;
  int i, end = 0;
  struct stat st;
  char *handler = NULL;
  thy_mappable_config_t *config =
    config_get_mapped (session->absuri, NULL);
  thy_bool_t lazycgi = THY_BOOL_FALSE;

  /* Nor ->request->query_string, nor ->path_info is released in this
     function, because they are guaranteed to be NULL when cgi_setup()
     takes control. */

  if ((tmp = strchr (session->request->url, '?')) != NULL)
    {
      session->request->query_string = bhc_strdup (&tmp[1]);
      tmp[0] = '\0';
    }

  lazycgi = config->options.lazycgi;
  free (config);

  rest = bhc_strdup (session->request->url);
  tmp = NULL;
  i = strlen (rest);
  while (end < 2)
    {
      char *idx = NULL;

      tmp = fabs_urlmap (rest, session->request->host,
			 session->absuri);
      if (!tmp)
	{
	  free (rest);
	  return;
	}
      fabs_stat (tmp, &st);

      if ((lazycgi == THY_BOOL_TRUE) && S_ISDIR (st.st_mode) &&
	  ((idx = session_isindex (session, tmp)) != NULL))
	{
	  ssize_t l = strlen (session->request->url);

	  if ((l == i && tmp[strlen (tmp)] == '/') || l != i)
	    {
	      free (tmp);
	      tmp = idx;
	      fabs_stat (tmp, &st);
	    }
	}

      if (((!fabs_access (tmp, X_OK) && cgi_iscgi (session, tmp)) ||
	   (!fabs_access (tmp, F_OK) &&
	    ((handler = session_handler_check
	      (tmp, session->absuri)) != NULL))) &&
	  S_ISREG (st.st_mode))
	{
	  size_t offs = strlen (rest);

	  if (handler)
	    {
	      session->cgi.argv = (char **)bhc_calloc (3, sizeof (char *));
	      session->cgi.argv[0] = bhc_strdup (handler);
	      session->cgi.argv[1] = NULL;
	      session->cgi.argv[2] = NULL;
	      session->cgi.handler = bhc_strdup (handler);
	    }
	  free (handler);
	  if (i <= 0)
	    offs--;
	  session->request->path_info =
	    bhc_strdup (&session->request->url[offs]);
	  free (session->request->url);
	  session->request->url = bhc_strdup (rest);
	  free (session->request->resolved);
	  session->request->resolved = bhc_strdup (tmp);
	  free (tmp);
	  break;
	}
      if (!fabs_access (tmp, F_OK))
	break;
      free (tmp);
      tmp = strrchr (rest, '/');
      if (tmp)
	{
	  i -= strlen (tmp);
	  if (i > 0)
	    rest[i] = '\0';
	  else
	    {
	      rest[1] = '\0';
	      end++;
	    }
	}
      else
	break;
    }
  free (rest);
}

/** Check if a given session should be handled as CGI.
 * Determine if SESSION->request->resolved (or FN) is a CGI script or
 * not.
 * @returns Zero if it is not a CGI, one if it is.
 */
int
cgi_iscgi (const session_t *session, const char *fn)
{
  thy_mappable_config_t *config;
  int iscgi = 0;
  const char *file = (fn) ? fn : session->request->resolved;
  size_t i, flen;

  if (!file)
    return 0;

  if (session->cgi.handler != NULL)
    return 1;

  flen = strlen (file);

  config = config_get_mapped (session->absuri, fn);
  if (config->options.cgi != THY_BOOL_TRUE)
    {
      free (config);
      return 0;
    }

  for (i = 0; i < bhl_list_size (config->cgiexts); i++)
    {
      char *t;
      bhl_list_get (config->cgiexts, i, (void **)&t);

      if (!strcmp (&file[flen - strlen (t)], t))
	{
	  iscgi = 1;
	  free (t);
	  break;
	}
      free (t);
    }

  for (i = 0; i < bhl_list_size (config->cgidirs); i++)
    {
      char *t, *tc;

      bhl_list_get (config->cgidirs, i, (void **)&t);
      tc = fabs_realpath (t);
      free (t);

      if (tc == NULL)
	continue;

      if (strstr (file, tc) == file)
	{
	  iscgi = 1;
	  free (tc);
	  break;
	}
      free (tc);
    }

  if (fabs_access (file, X_OK))
    iscgi = 0;

  if (session->cgi.handler)
    iscgi = 1;

  if (!iscgi)
    {
      free (config);
      return 0;
    }

  if (!fn && session->request->url[1] == '~' &&
      config->options.usercgi == THY_BOOL_TRUE)
    iscgi = 0;

  free (config);
  return iscgi;
}

/** @internal Dummy version of session_header().
 * Sets up the HTTP status line for the CGI.
 *
 * @param session is the session we work on.
 * @param code is the HTTP return code, in numeric format (unlike
 * session_header(), this function does not take a #http_status_t).
 * @param msg is the HTTP status message.
 */
static void
_cgi_session_header (session_t *session, int code, const char *msg)
{
  free (session->header.fields);

  asprintf (&session->header.fields, "HTTP/1.%d %d %s\r\n",
	    session->request->http_minor, code, msg);
}

/** @internal Find a header in a buffer.
 * Tries to find a given header in a lower-cased buffer. A header is
 * considered to be found if it is on the beginning of the buffer, or
 * if it is preceded by a newline.
 *
 * @param lbuf is the buffer to search in.
 * @param header is the header to look for.
 *
 * @returns A pointer to the start of the header, or NULL if it wasn't
 * found.
 *
 * @note The return value is a subset of lbuf, it must not be freed!
 */
static char *
_cgi_header_find (const char *lbuf, const char *header)
{
  char *tmp;

  tmp = strstr (lbuf, header);
  if (!tmp)
    return NULL;
  if (tmp == lbuf)
    return tmp;
  if (*(tmp - 1) == '\n')
    return tmp;
  return NULL;
}

/** Set up (parse) the returned CGI headers.
 * This function deals with the gory details of parsing the CGI's
 * returned headers, transforming into something usable, and
 * optionally adding missing elements (like the HTTP status line :).
 *
 * @param session is the session we are working with.
 */
void
cgi_headers_setup (session_t *session)
{
  char *fn;
  int seen_server = 0, seen_date = 0, seen_loc = 0, seen_cl = 0;
  char *seen_status = NULL;
  char *lbuf;
  size_t i;

  free (session->header.fields);
  session->header.fields = NULL;
  session->header.offset = 0;

  if (session->header.code == HTTP_STATUS_UNKNOWN ||
      session->header.code == HTTP_STATUS_101)
    session->header.code = HTTP_STATUS_200;

  fn = basename (session->request->resolved);
  if (strncmp (fn, "nph-", 4))
    {
      /* Convert session->body.buffer to lowercase, and place it into
	 lbuf. */
      lbuf = (char *)bhc_calloc (1, session->body.len + 1);
      for (i = 0; i < session->body.len; i++)
	lbuf[i] = tolower (session->body.buffer[i]);

      if (_cgi_header_find (lbuf, "content-length: "))
	seen_cl = 1;
      if (_cgi_header_find (lbuf, "server: "))
	seen_server = 1;
      if (_cgi_header_find (lbuf, "date: "))
	seen_date = 1;
      if ((session->header.code != HTTP_STATUS_302) &&
	  _cgi_header_find (lbuf, "location: "))
	seen_loc = 1;
      if (!seen_loc &&
	  ((seen_status = _cgi_header_find (lbuf, "status: ")) != NULL))
	{
	  char *tmp, *tmp2;

	  tmp = bhc_strdup (&session->body.buffer[seen_status - lbuf + 8]);
	  tmp2 = strstr (tmp, "\n");
	  if (tmp2)
	    tmp2[0] = '\0';
	  tmp2 = strstr (tmp, "\r");
	  if (tmp2)
	    tmp2[0] = '\0';

	  session->cgi.status = strtoul (tmp, &tmp2, 10);
	  seen_status = bhc_strdup (&tmp2[1]);
	  free (tmp);
	}

      if (!seen_loc && !seen_status)
	_cgi_session_header
	  (session, session_code_map[session->header.code].code,
	   session_code_map[session->header.code].message);
      else if (seen_loc)
	{
	  _cgi_session_header
	    (session, session_code_map[HTTP_STATUS_301].code,
	     session_code_map[HTTP_STATUS_301].message);
	  session->header.code = HTTP_STATUS_301;
	}
      else if (!session->request->auth_token &&
	       session->request->auth_realm)
	{
	  _cgi_session_header
	    (session, session_code_map[HTTP_STATUS_401].code,
	     session_code_map[HTTP_STATUS_401].message);
	  session->header.code = HTTP_STATUS_401;
	}
      else
	_cgi_session_header (session, session->cgi.status, seen_status);
      free (seen_status);

      if (!seen_server)
	session_header_add (session, "Server", thy_servername (NULL));
      if (!seen_date)
	session_header_add (session, "Date", rfc822_date (time (NULL)));
      if (!seen_cl)
	{
	  if (session->request->http_minor == 0 ||
	      !session->request->keepalive)
	    {
	      session_header_add (session, "Connection", "close");
	      session->request->keepalive = 0;
	    }
	  else
	    {
	      session_header_add (session, "Transfer-Encoding", "chunked");
	      session->chunked.enabled = 1;
	    }
	}
      /* ->auth_realm is already filled in by auth_need(), so we don't
	 need to call that again here. */
      if (session->request->auth_realm && !session->request->auth_token)
	{
	  char *tmp = NULL;
	  asprintf (&tmp, "Basic realm=\"%s\"",
		    session->request->auth_realm);
	  session_header_add (session, "WWW-Authenticate", tmp);
	  free (tmp);
	}

      if (session->header.fields)
	{
	  session->body.size += strlen (session->header.fields) + 2;
	  session->header.fields = (char *)bhc_realloc
	    (session->header.fields, session->body.size);
	  strcat (session->header.fields, session->body.buffer);
	  free (session->body.buffer);
	  session->body.buffer = session->header.fields;
	  session->header.fields = NULL;
	  session->body.len = strlen (session->body.buffer);
	  session->body.content_size = session->body.size;
	  session->body.content_length = session->body.size;
	}
    }
}
