/*
   Name: $RCSfile: mod_tidy.c,v $
   Author: Alan Moran
   $Date: 2005/11/26 15:04:17 $
   $Revision: 1.6 $
   $Id: mod_tidy.c,v 1.6 2005/11/26 15:04:17 a_j_moran Exp $

   Legal Notice:

   This program is free software; you can redistribute it and/or
   modify it under the terms of the license contained in the
   COPYING file that comes with this distribution.

 */

/**
   @file

   @brief Functions to support the tidy module.

   The tidy module performs all necessary actions required to implement
   the module interface.
 */

#include <tidy.h>
#include <buffio.h>
#include "globals.h"
#include "mod_tidy.h"

static rpl_str_t tidyCfgFile;
static TidyDoc config;
static rpl_str_t rpl_tidy_base_dir;
rpl_str_t terr_dir, tout_dir;

/**
   Configure the tidy module.

   @param fns pointer to module interface to be configured.
 */
void
rpl_mod_tidy_configure(rpl_mod_fns *fns)
{
	fns->init = rpl_mod_tidy_init;
	fns->process = rpl_mod_tidy_process;
	fns->cleanup = rpl_mod_tidy_cleanup;
	rpl_tidy_base_dir = rpl_wk_get_srcdir();
	fns->basedir = rpl_tidy_base_dir;
}

/**
   Create master configuration for tidy module. Doing this once per workflow is chiefly of
   benefit if the configuration is being read from a file. 
 */
rpl_wk_status 
rpl_mod_tidy_init()
{
	rpl_wk_status status = RPL_WK_OK;
	rpl_str_t msg;

	/* define error and output directories */
	terr_dir = rpl_str_concat(rpl_cfg_get_ds_basedir(), "/", RPL_DS_TERR_DIR, RPL_STR_EOC);
	tout_dir = rpl_str_concat(rpl_cfg_get_ds_basedir(), "/", RPL_DS_TOUT_DIR, RPL_STR_EOC);

    /* set up tidy internals */
    config = tidyCreate();

    /* use an external tidy config file if one is available */
    tidyCfgFile = rpl_cfg_get_trf_tidy_config();
    if ((tidyCfgFile != NULL) && tidyFileExists(tidyCfgFile))
    {
        if(tidyLoadConfig(config, rpl_cfg_get_trf_tidy_config()) != 0)
        {
            msg = rpl_message_get("TIDY_CONFIG_FAILED",rpl_cfg_get_trf_tidy_config(), RPL_EOM);
            rpl_log_warn(msg);
			rpl_me_free(msg);
			status = RPL_WK_WARN;
        }
    } else {
        /* configure tidy */
        tidyOptSetBool(config,TidyXhtmlOut,yes);
        tidyOptSetBool(config,TidyMark,no);
		tidyOptSetBool(config,TidyNumEntities, yes);
        /* specific MS Word measures */
        tidyOptSetBool(config,TidyWord2000,yes);
        tidyOptSetBool(config,TidyMakeBare,yes);
        tidyOptSetBool(config,TidyMakeClean,yes);
        tidyOptSetBool(config,TidyDropPropAttrs,yes);
        /* options that simplify XSLT processing */
        tidyOptSetBool(config,TidyEncloseBodyText,yes);
		tidyOptSetValue(config, TidyDoctype, "omit");
    }

	return status; 
}

/**
   Performs tidy operations on web asset.

   @param filename name of file to be transformed relative to website base directory.
   @param st_buf stat of file.
 */
rpl_wk_status 
rpl_mod_tidy_process(rpl_c_str_t filename, struct stat statbuf)
{
	TidyDoc tdoc;
	rpl_reg_item item;
    int rc = -1;
	rpl_str_t msg, rdp, fp, key, errfp, outfp;
	rpl_wk_status status = RPL_WK_OK;

    assert(filename != NULL);

	msg = rpl_str_concat(rpl_message_get("WK_PROCESSING", RPL_EOM), "tidy ", filename, RPL_STR_EOC);
	rpl_log_info(msg);
	rpl_me_free(msg);

	/* extract key information */
	if(rpl_fs_resolve_paths(filename, rpl_tidy_base_dir, &rdp, &fp))
		return RPL_WK_ERR;
	key = rpl_reg_create_key(rdp, fp);

	/* use relative path to resolve directories for errors and output */
	errfp = (rpl_str_t)rpl_me_malloc(strlen(terr_dir) + strlen(rdp) + strlen(fp) + 2);
	sprintf(errfp, "%s/%s/%s", terr_dir, rdp, fp);
	outfp = (rpl_str_t)rpl_me_malloc(strlen(tout_dir) + strlen(rdp) + strlen(fp) + 2);
	sprintf(outfp, "%s/%s/%s", tout_dir, rdp, fp);

	if(S_ISREG(statbuf.st_mode))
	{
		/* retrieve asset from registry */
		item = rpl_reg_search(key);
		if(item == &RPL_REG_WA_NULL)
		{
			rpl_log_error(rpl_message_get("REG_ASSET_NOT_FOUND", key, RPL_EOM));
			return RPL_WK_ERR;
		}

		/* either transform the asset or simply copy it */	
    	if(rpl_wa_is_transformable(*item))
		{
			/* create and configure doc */
			tdoc = tidyCreate();
			tidyOptCopyConfig(tdoc, config);

			/* perform tidy operations */
			rc = (int)tidySetErrorFile(tdoc, errfp);
			if (rc >= 0)
				rc = tidyParseFile(tdoc, filename); 
			if (rc >= 0)
				rc = tidyCleanAndRepair(tdoc);
			if (rc >= 0)
				rc = tidyRunDiagnostics(tdoc);
			if (rc > 1)
				rc = (tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1);
			if (rc >= 0)
				rc = tidySaveFile(tdoc, outfp);

			/* report outcome */
			if (rc >= 0)
			{
				rc = (rc > 0) ? RPL_WA_TRF_ST_WARNING : RPL_WA_TRF_ST_SUCCESS;
			} else {
				rc = RPL_WA_TRF_ST_FAILURE;
			}
			rpl_wa_set_tidy_stat_desc(rpl_wa_get_trf_status_desc(rc), item);
			rpl_reg_insert(item);

			/* free resources */
			tidyRelease(tdoc);

		} else {
			rpl_fs_cp(filename, (rpl_c_str_t)outfp);
		}
	}

	/* free resources */
	if(strlen(rdp) > 0)
		rpl_me_free(rdp); 
	if(strlen(fp) > 0)
		rpl_me_free(fp); 
	if(strlen(key) > 0)
		rpl_me_free(key);
	rpl_me_free(errfp);
	rpl_me_free(outfp);

	return status; 
}

/**
   Release resources held during processing. 
 */
rpl_wk_status 
rpl_mod_tidy_cleanup()
{
	rpl_wk_status status = RPL_WK_OK;

	/* do not free
	rpl_me_free(rpl_tidy_base_dir);
	*/
	rpl_me_free(terr_dir);
	rpl_me_free(tout_dir);
    tidyRelease(config);

	return status;
}

