/* Copyright (C) 2000-2002 Lavtech.com corp. All rights reserved.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
*/

#include "udm_config.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h> 
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
              
#include "udmsearch.h"
#include "udm_mkind.h"

#ifdef HAVE_GETOPT_H
#include <getopt.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif

#define TREEDIR "tree"

#define UDM_INDEX_CATEGORY     0
#define UDM_INDEX_TAG          1
#define UDM_INDEX_TIME_HOUR    2
#define UDM_INDEX_TIME_MIN     3
#define UDM_INDEX_HOSTNAME     4
#define UDM_INDEX_LANGUAGE     5
#define UDM_INDEX_SITEID       6
#define UDM_INDEX_CONTENT_TYPE 7


static int cmp_ind8(const UDM_UINT8URLID *c1,const UDM_UINT8URLID *c2){
     uint4 n1=c1->hi;
     uint4 n2=c2->hi;
     
     if(n1==n2){
          n1=c1->lo; n2=c2->lo;
          if(n1==n2){
               n1=c1->url_id; n2=c2->url_id;
          }
     }
     if(n1<n2)return(-1);
     if(n1>n2)return(1);
     return(0);
}

static int cmp_ind4(const UDM_UINT4URLID *c1,const UDM_UINT4URLID *c2){
     uint4 n1=c1->val;
     uint4 n2=c2->val;
     
     if(n1==n2){
          n1=c1->url_id; n2=c2->url_id;
     }
     if(n1<n2)return(-1);
     if(n1>n2)return(1);
     return(0);
}

static int MakeNestedIndex(UDM_ENV *Conf, const char *field, const char *lim_name, int type, void *db){
     UDM_UINT8URLIDLIST  L;
     size_t    k,prev;
     urlid_t   *data=NULL;
     UDM_UINT8_POS_LEN *ind=NULL;
     size_t    mind=1000,nind=0;
     char fname[1024];
     int  dat_fd=0, ind_fd=0;
     int  rc=UDM_OK;
     const char *vardir=UdmVarListFindStr(&Conf->Vars,"VarDir",UDM_VAR_DIR);
     
     if(UDM_OK!= (rc = UdmLimit8(Conf, &L, field, type, db)))
          return 1;
     
     UdmSort(L.Item, L.nitems, sizeof(UDM_UINT8URLID), (qsort_cmp)cmp_ind8);
     
     data = (urlid_t*)malloc(L.nitems * 4);
     if(!data){
          fprintf(stderr,"Error1: %s\n",strerror(errno));
          goto err1;
     }
     ind=(UDM_UINT8_POS_LEN*)malloc(mind*sizeof(UDM_UINT8_POS_LEN));
     if(!ind){
          fprintf(stderr,"Error2: %s\n",strerror(errno));
          goto err1;
     }
     prev=0;
     for(k=0;k<L.nitems;k++){
          data[k]=L.Item[k].url_id;
          if((k==L.nitems-1) || (L.Item[k].hi!=L.Item[prev].hi) || (L.Item[k].lo!=L.Item[prev].lo)){
               if(nind==mind){
                    mind+=1000;
                    ind=(UDM_UINT8_POS_LEN*)realloc(ind,mind*sizeof(UDM_UINT8_POS_LEN));
                    if(!ind){
                         fprintf(stderr,"Error3: %s\n",strerror(errno));
                         goto err1;
                    }
               }
               /* Fill index */
               ind[nind].hi=L.Item[prev].hi;
               ind[nind].lo=L.Item[prev].lo;
               ind[nind].pos=prev*4;
               if (k == L.nitems - 1) ind[nind].len = (k - prev + 1) * sizeof(*data);
               else ind[nind].len = (k - prev) * sizeof(*data);
               printf("%08X%08X - %d %d\n",ind[nind].hi,ind[nind].lo,(int)ind[nind].pos,ind[nind].len);
               nind++;
               
               prev=k;
          }
     }
     UDM_FREE(L.Item);
     
     udm_snprintf(fname,sizeof(fname)-1,"%s%c%s%c%s.dat", vardir,UDMSLASH,TREEDIR,UDMSLASH, lim_name);
     if((dat_fd=open(fname,O_CREAT|O_WRONLY|O_TRUNC|UDM_BINARY,UDM_IWRITE))<0){
          fprintf(stderr,"Can't open '%s': %s\n",fname,strerror(errno));
          goto err1;
     }
     UdmWriteLock(dat_fd);
     if((L.nitems * sizeof(*data)) != (size_t)write(dat_fd, data, L.nitems * sizeof(*data))) {
          fprintf(stderr,"Can't write '%s': %s\n",fname,strerror(errno));
          goto err1;
     }
     UdmUnLock(dat_fd);
     close(dat_fd);
     UDM_FREE(data);

     udm_snprintf(fname,sizeof(fname)-1,"%s%c%s%c%s.ind", vardir, UDMSLASH,TREEDIR, UDMSLASH, lim_name);
     if((ind_fd=open(fname,O_CREAT|O_WRONLY|O_TRUNC|UDM_BINARY,UDM_IWRITE))<0){
          fprintf(stderr,"Can't open '%s': %s\n",fname,strerror(errno));
          goto err1;
     }
     UdmWriteLock(ind_fd);
     if((nind*sizeof(UDM_UINT8_POS_LEN)) != (size_t)write(ind_fd,ind,nind*sizeof(UDM_UINT8_POS_LEN))){
          fprintf(stderr,"Can't write '%s': %s\n",fname,strerror(errno));
          goto err1;
     }
     UdmUnLock(ind_fd);
     close(ind_fd);
     UDM_FREE(ind);
     
     return(0);
     
err1:
     UDM_FREE(L.Item);
     UDM_FREE(data);
     UDM_FREE(ind);
     if(dat_fd)close(dat_fd);
     if(ind_fd)close(ind_fd);
     return(1);
}

static int MakeLinearIndex(UDM_ENV *Conf, const char *field, const char *lim_name, int type, void *db){
     UDM_UINT4URLIDLIST  L;
     size_t    k,prev;
     urlid_t   *data = NULL;
     UDM_UINT4_POS_LEN *ind=NULL;
     size_t    mind=1000,nind=0;
     char fname[1024];
     int  dat_fd=0, ind_fd=0;
     const char *vardir=UdmVarListFindStr(&Conf->Vars,"VarDir",UDM_VAR_DIR);
     
     if(UDM_OK!=UdmLimit4(Conf, &L, field, type, db)){
          fprintf (stderr,"Error: %s\n", UdmEnvErrMsg(Conf));
          return 1;
     }
     
     if(!L.Item)return(1);
     
     UdmSort(L.Item,L.nitems,sizeof(UDM_UINT4URLID),(qsort_cmp)cmp_ind4);
     
     data = (urlid_t*)malloc(L.nitems * sizeof(*data));
     if(!data){
          fprintf(stderr,"Error1: %s\n",strerror(errno));
          goto err1;
     }
     ind=(UDM_UINT4_POS_LEN*)malloc(mind*sizeof(UDM_UINT4_POS_LEN));
     if(!ind){
          fprintf(stderr,"Error2: %s\n",strerror(errno));
          goto err1;
     }
     prev=0;
     for(k=0;k<L.nitems;k++){
          data[k]=L.Item[k].url_id;
          if((k==L.nitems-1) || (L.Item[k].val!=L.Item[prev].val)){
               if(nind==mind){
                    mind+=1000;
                    ind=(UDM_UINT4_POS_LEN*)realloc(ind,mind*sizeof(UDM_UINT4_POS_LEN));
                    if(!ind){
                         fprintf(stderr,"Error3: %s\n",strerror(errno));
                         goto err1;
                    }
               }
               /* Fill index */
               ind[nind].val=L.Item[prev].val;
               ind[nind].pos=prev*4;
               if (k == L.nitems - 1) ind[nind].len = (k - prev + 1) * sizeof(*data);
               else ind[nind].len = (k - prev) * sizeof(*data);
               /*printf("%d - %d %d\n",ind[nind].val,(int)ind[nind].pos,ind[nind].len);*/
               nind++;
               
               prev=k;
          }
     }
     UDM_FREE(L.Item);
     
     udm_snprintf(fname,sizeof(fname),"%s%c%s%c%s.dat", vardir,UDMSLASH,TREEDIR, UDMSLASH, lim_name);
     if((dat_fd=open(fname,O_CREAT|O_WRONLY|O_TRUNC|UDM_BINARY,UDM_IWRITE))<0){
          fprintf(stderr,"Can't open '%s': %s\n",fname,strerror(errno));
          goto err1;
     }
     UdmWriteLock(dat_fd);
     if((L.nitems * sizeof(*data)) != (size_t)write(dat_fd, data, L.nitems * sizeof(*data))) {
          fprintf(stderr,"Can't write '%s': %s\n",fname,strerror(errno));
          goto err1;
     }
     UdmUnLock(dat_fd);
     close(dat_fd);
     UDM_FREE(data);

     udm_snprintf(fname,sizeof(fname),"%s%c%s%c%s.ind", vardir,UDMSLASH,TREEDIR, UDMSLASH, lim_name);
     if((ind_fd=open(fname,O_CREAT|O_WRONLY|O_TRUNC|UDM_BINARY,UDM_IWRITE))<0){
          fprintf(stderr,"Can't open '%s': %s\n",fname,strerror(errno));
          goto err1;
     }
     UdmWriteLock(ind_fd);
     if((nind*sizeof(UDM_UINT4_POS_LEN)) != (size_t)write(ind_fd,ind,nind*sizeof(UDM_UINT4_POS_LEN))){
          fprintf(stderr,"Can't write '%s': %s\n",fname,strerror(errno));
          goto err1;
     }
     UdmUnLock(ind_fd);
     close(ind_fd);
     UDM_FREE(ind);
     
     return(0);
     
err1:
     UDM_FREE(L.Item);
     UDM_FREE(data);
     UDM_FREE(ind);
     if(dat_fd)close(dat_fd);
     if(ind_fd)close(ind_fd);
     return(1);
}


__C_LINK int __UDMCALL UdmCacheMakeIndexes(UDM_AGENT *Indexer, UDM_DB *db) {
  UDM_VARLIST *v = &Indexer->Conf->Vars;
  size_t i;
  char *ind;
  
  for (i = 0; i < v->nvars; i++) {
    if (!strncasecmp("Limit-", v->Var[i].name, 6)) {
      ind = v->Var[i].val;
      if (!strcasecmp(ind, "category")) {
	UdmLog(Indexer, UDM_LOG_EXTRA, "Creating category index");
	MakeNestedIndex(Indexer->Conf, "category", UDM_LIMFNAME_CAT, UDM_IFIELD_TYPE_HEX8STR, db);
      } else 
	if (!strcasecmp(ind, "tag")) {
	  UdmLog(Indexer, UDM_LOG_EXTRA, "Creating tag index");
	  MakeLinearIndex(Indexer->Conf, "tag", UDM_LIMFNAME_TAG, UDM_IFIELD_TYPE_STRCRC32, db);
      } else
	if (!strcasecmp(ind, "time")) {
	  UdmLog(Indexer, UDM_LOG_EXTRA, "Creating time index");
	  MakeLinearIndex(Indexer->Conf, "last_mod_time", UDM_LIMFNAME_TIME, UDM_IFIELD_TYPE_HOUR, db);
      } else
	if (!strcasecmp(ind, "hostname")) {
	  UdmLog(Indexer, UDM_LOG_EXTRA, "Creating hostname index");
	  MakeLinearIndex(Indexer->Conf, "url", UDM_LIMFNAME_HOST, UDM_IFIELD_TYPE_HOSTNAME, db);
      } else
	if (!strcasecmp(ind, "language")) {
	  UdmLog(Indexer, UDM_LOG_EXTRA, "Creating language index");
	  MakeLinearIndex(Indexer->Conf, "Content-Language", UDM_LIMFNAME_LANG, UDM_IFIELD_TYPE_STRCRC32, db);
      } else
	if (!strcasecmp(ind, "content")) {
	  UdmLog(Indexer, UDM_LOG_EXTRA, "Creating Content-Type index");
	  MakeLinearIndex(Indexer->Conf, "Content-Type", UDM_LIMFNAME_CTYPE, UDM_IFIELD_TYPE_STRCRC32, db);
      } else
	if (!strcasecmp(ind, "siteid")) {
	  UdmLog(Indexer, UDM_LOG_EXTRA, "Creating Site_id index");
       MakeLinearIndex(Indexer->Conf, "site_id", UDM_LIMFNAME_SITE, UDM_IFIELD_TYPE_INT, db);
      }
      UdmLog(Indexer, UDM_LOG_EXTRA, "Done");
    }
  }
  return UDM_OK;
}

