/* Copyright (C) 2000-2002 Lavtech.com corp. All rights reserved.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
*/

#include "udm_config.h"

#ifdef USE_XML

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <expat.h>

#include "udm_common.h"
#include "udm_log.h"
#include "udm_uniconv.h"
#include "udm_vars.h"
#include "udm_textlist.h"
#include "udm_utils.h"
#include "udm_parsexml.h"
#include "udm_hrefs.h"
#include "udm_sgml.h"

typedef struct {
  UDM_AGENT *Indexer;
  UDM_DOCUMENT *Doc;
  int body_sec;
  char *sec;
  char *secpath;
  size_t pathlen, curlen;
} XML_PARSER_DATA;


static void startElement(void *userData, const char *name, const char **attr) {
       XML_PARSER_DATA *D = userData;
       UDM_AGENT *Indexer = D->Indexer;
       UDM_DOCUMENT *Doc = D->Doc;
       size_t i;

       UDM_FREE(D->sec);
       D->sec = (char*)strdup(name);

       if (D->curlen + (i = strlen(name)) + 1 > D->pathlen) {
	 D->pathlen += 256 + i;
	 D->secpath = (char*)realloc(D->secpath, D->pathlen);
       }
       D->curlen += i + ((D->secpath[0]) ? 1 : 0);
       sprintf(UDM_STREND(D->secpath), "%s%s", (D->secpath[0]) ? "" : "|", name);

       for (i = 0; attr[i]; i += 2) {
	 if (!strcasecmp(attr[i], "href")) {
		UDM_HREF	Href;
	   
		UdmSGMLUnescape(attr[i+1]);
		UdmHrefInit(&Href);
		Href.referrer = UdmVarListFindInt(&Doc->Sections, "Referrer-ID", 0);
		Href.hops = 1 + UdmVarListFindInt(&Doc->Sections, "Hops", 0);
		Href.site_id = UdmVarListFindInt(&Doc->Sections, "Site_id", 0);
		Href.url = attr[i+1];
		Href.method = UDM_METHOD_GET;
		UdmHrefListAdd(&Doc->Hrefs, &Href);
	 }
       }


}

static void endElement(void *userData, const char *name) {
       XML_PARSER_DATA *D = userData;
       char *p = strrchr(D->secpath, '|');
       
       UDM_FREE(D->sec);
       if (p == NULL) {
	 D->secpath[0] = '\0';
	 D->curlen = 0;
       } else {
	 D->curlen -= strlen(p);
	 *p = '\0';
	 p++;
	 D->sec = (char*)strdup(p);
       }
}


static void Decl(void *userData, const XML_Char *version, const XML_Char *encoding, int standalone) {
       XML_PARSER_DATA *D = userData;
       UDM_DOCUMENT *Doc = D->Doc;

       if (encoding != NULL) {
	 UdmVarListReplaceStr(&Doc->Sections, "Meta-Charset", UdmCharsetCanonicalName(encoding));
       }

}

static void Text(void *userData, const XML_Char *s, int len) {
        XML_PARSER_DATA *D = userData;
        UDM_DOCUMENT *Doc = D->Doc;
        UDM_TEXTITEM	Item;
	UDM_VAR		*Sec;

	bzero((void*)&Item, sizeof(Item));
	Item.str = strndup(s, (size_t)len);
	if((D->sec != NULL) &&  (Sec = UdmVarListFind(&Doc->Sections, D->sec))){
	  Item.section = Sec->section;
	  Item.section_name = D->sec;
	} else {
	  Item.section = D->body_sec;
	  Item.section_name = "body";
	}
	UdmTextListAdd(&Doc->TextList, &Item);
	free(Item.str);
}

static int EncHandler(void *encodingHandlerData, const XML_Char *name, XML_Encoding *info) {
  UDM_AGENT *indexer = encodingHandlerData;
  UDM_CHARSET	*cs;
  size_t i;

  if (!(cs = UdmGetCharSet(name))) {
    return 0;
  }
  if (cs->tab_to_uni == NULL) return 0; /* FIXME: rewrite this for multibytes encodings */

  info->convert = NULL;
  info->release = NULL;
  info->data = NULL;
  for(i = 0; i < 256; i++) info->map[i] = cs->tab_to_uni[i];

  return 1;
}

int UdmXMLParse(UDM_AGENT *Indexer, UDM_DOCUMENT *Doc){
        int res = UDM_OK;
        XML_PARSER_DATA Data;
        XML_Parser parser = XML_ParserCreate(NULL);
	UDM_VAR		*BSec=UdmVarListFind(&Doc->Sections,"body");
	int		body_sec  = BSec ? BSec->section : 0;

	bzero(&Data, sizeof(Data));
	Data.Indexer = Indexer;
	Data.Doc = Doc;
	Data.body_sec = body_sec;

       XML_SetUserData(parser, &Data);
       XML_SetElementHandler(parser, startElement, endElement);
       XML_SetXmlDeclHandler(parser, Decl);
       XML_SetCharacterDataHandler(parser, Text);
       XML_SetUnknownEncodingHandler(parser,EncHandler, Indexer);

       if (XML_Parse(parser, Doc->Buf.content, (int)strlen(Doc->Buf.content), 1) == XML_STATUS_ERROR) {
	 UdmLog(Indexer, UDM_LOG_ERROR,
              "XML parsing error: %s at line %d\n",
              XML_ErrorString(XML_GetErrorCode(parser)),
              XML_GetCurrentLineNumber(parser));
	 res = UDM_ERROR;
       }
       XML_ParserFree(parser);

       UDM_FREE(Data.secpath);

       return res;
}

#else
char a[]="";
#endif
