#include "apachetop.h"

#include "inlines.cc"

extern map *um, /* urlmap */
// unused  *im, /* ipmap */
           *hm, /* hostmap */
           *rm, /* referrermap */
           *fm; /* filemap */

extern time_t now;
extern config cf;


/* CommonLogParser handles common and combined, despite its name */
int CommonLogParser::parse(char *logline, struct logbits *b)
{
	char *bufsp, *bufcp, *ptr;
	//struct qresult *q;

	bufsp = logline;

	/* host first */
	bufcp = strchr(logline, ' ');
	if (!bufcp)
		return -1;
	
	*bufcp = (char) NULL;
	++bufcp;

#if 0
	/* host stuff, unused {{{ */
	/* quickly figure out if this is an IP or a host. We do this by
	 * checking each character of it; if every character is either a
	 * digit or a dot, then it's an IP (no host can just be digits)
	*/
	for(workptr = bufsp ; *workptr ; workptr++)
	{
		if (isdigit(*workptr)) continue;
		if (*workptr == '.') continue;

		/* it's neither a digit or a dot */
		break;
	}

	/* inefficient, fix */
	q = (struct qresult *)malloc(sizeof(struct qresult));

	if (*workptr)
	{
		dprintf("host\n");
		/* we get here when we aborted prematurely out of the for()
		 * loop; thus we detected a nondigit/nondot, and it's a host
		*/
		b->host = bufsp;
		if ((b->host_pos = hm->lookup(b->host)) == -1)
		{
			/* it's new, insert it */
			b->host_pos = hm->insert(b->host);
		}

		q->src = strdup(b->host);
		q->pos = b->host_pos;
	}
	else
	{
		/* loop finished (workptr is NULL), thus no nondigit/nondot
		 * was found, hence we have ourselves an IP */
		b->ip = bufsp;
		/* get ip_pos for this ip; for circle_struct (c) later */
		if ((b->ip_pos = im->lookup(b->ip)) == -1)
		{
			/* its new, insert it */
			b->ip_pos = im->insert(b->ip);
		}

		q->src = strdup(b->ip);
		q->pos = b->ip_pos;
		dprintf("dupped is %s\n", q->src);
		queue->push((void *)q);
	}
	/* }}} */
#endif

	//b->ip = bufsp;
	ptr = bufsp;
	/* get ip_pos for this ip; for circle_struct (c) later */
	b->host_pos = hm->insert(ptr);
	b->host_hash = TTHash(ptr);

	/* need to long-ify it for the circle */
//	b->ipl = inet_addr(ptr); // unused

	/* now skip to date */
	if (!(bufcp = strchr(bufcp, '[')))
		return -1;

	bufcp++;

	b->time = now; /* be lazy */

	bufcp += 29; /* from dayofmonth to first char of method */

	/* URL. processURL() will update bufcp to point at the end so we can
	 * continue processing from there */
	if ((ptr = this->processURL(&bufcp)) == NULL)
		return -1;

	/* get url_pos for this url; for circle_struct (c) later */
	b->url_pos = um->insert(ptr);
	b->url_hash = TTHash(ptr);

	/* return code */
	b->retcode = atoi(bufcp);
	bufcp += 4;

	/* bytecount */
	b->bytes = atoi(bufcp);


	/* this may be the end of the line if it's a common log; if
	 * it's combined then we have referrer and user agent left */
	if (!(bufsp = strchr(bufcp, ' ')))
	{
		/* nothing left, its common */
		
		/* fill in a dummy value for referrer map */
		b->ref_pos = rm->insert("Unknown");
		return 0;
	}

	bufsp += 2; /* skip to first character of referrer */

	/* find the end of referrer and null it */
	if (!(bufcp = strchr(bufsp, '"')))
		return -1;
	*bufcp = (char) NULL;

	/* unless they want to keep it, skip over the protocol, ie http:// */
	if ((cf.preserve_ref_protocol == 0) && (bufcp = strstr(bufsp, "://")))
		bufsp = bufcp + 3;
	

	/* we could munge the referrer now; cut down the path elements,
	 * remove querystring, but we'll leave that for a later date */

//	b->referrer = bufsp;

	/* get ref_pos for this url; for circle_struct (c) later */
	b->ref_pos = rm->insert(bufsp);
	b->ref_hash = TTHash(bufsp);

	/* user-agent is as yet unused */

	return 0;
}


int AtopLogParser::parse(char *logline, struct logbits *b)
{
	return 0;
}


/* generic parser helper functions */
char *LogParser::processURL(char **buf)
{
	char *bufcp, *realstart, *endptr;
	int length;

	bufcp = *buf;

	/* this skips past the method */
	if (!(bufcp = strchr(bufcp, ' ')) )
		return NULL;
	++bufcp; // skip space

	realstart = bufcp;

	/* find the end of url; locate " HTTP/" then NULL the space */
	if (!(endptr = strstr(bufcp, " HTTP/")))
		return NULL;
	*endptr = (char) NULL;

	/* this is all mungeURL is interested in */
	length = endptr - realstart;

	/* now find the finishing ", so parse* can deal with rest of line */
	if (!(endptr = strstr(endptr+1, "\" ")))
		return NULL;

	mungeURL(&realstart, &length);
	
	/* feed back where the end of the URL is */
	*buf = endptr+2;

	return realstart;
}

/* munge the url passed in *url inplace;
 * *length is the original length, and we update it once we're done */
int LogParser::mungeURL(char **url, int *length)
{
	int skipped = 0;
	char *bufcp, *endptr, *workptr;

	endptr = *url + *length;
	*endptr = (char) NULL;

	/* do we want to keep the query string? */
	if (!cf.keep_querystring)
	{
		/* null the first ? or & - anything after
		 * it is unrequired; it's the querystring */
		if ((workptr = strchr(*url, '?')) ||
		    (workptr = strchr(*url, '&')) )
		{
			/* we might have overrun the end of the real URL and
			 * gone into referrer or something. Check that. */
			if (workptr < endptr)
			{
				/* we're ok */
				*workptr = (char) NULL;
				bufcp = workptr+1;
			}
		}
	}

	/* how many path segments of the url are we keeping? */
	if (cf.keep_segments > 0)
	{
		/* given a path of /foo/bar/moo/ and a keep_segments of 2,
		 * we want the / after the second element */

		bufcp = workptr = *url + 1; /* skip leading / */

		//dprintf("workptr is %s\n", workptr);

		/* now skip the next keep_segments slashes */
		while (skipped < cf.keep_segments && workptr < endptr)
		{
			workptr++;

			if (*workptr == '/')
			{
				/* discovered a slash */
				skipped++;

				/* bufcp becomes the char after / */
				bufcp = workptr+1;
			}

			/* if we hit the end before finding the right number
			 * of slashes, we just keep it all */
			if (workptr == endptr)
				bufcp = workptr;
		}
		*bufcp = (char) NULL;
	}


	/* do we want to lowercase it all? */
	if (cf.lowercase_urls)
	{
		workptr = *url;
		while(workptr < endptr)
		{
			*workptr = tolower(*workptr);
			workptr++;
		}
	}

	/* fin */

	return 0;
}
