/* Distributed Checksum Clearinghouse
 *
 * Copyright (c) 2005 by Rhyolite Software, LLC
 *
 * This agreement is not applicable to any entity which sells anti-spam
 * solutions to others or provides an anti-spam solution as part of a
 * security solution sold to other entities, or to a private network
 * which employs the DCC or uses data provided by operation of the DCC
 * but does not provide corresponding data to other users.
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * Parties not eligible to receive a license under this agreement can
 * obtain a commercial license to use DCC and permission to use
 * U.S. Patent 6,330,590 by contacting Commtouch at http://www.commtouch.com/
 * or by email to nospam@commtouch.com.
 *
 * A commercial license would be for Distributed Checksum and Reputation
 * Clearinghouse software.  That software includes additional features.  This
 * free license for Distributed ChecksumClearinghouse Software does not in any
 * way grant permision to use Distributed Checksum and Reputation Clearinghouse
 * software
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL
 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC
 * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES
 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
 * SOFTWARE.
 *
 * Rhyolite Software DCC 1.3.42-1.51 $Revision$
 */

#include "dcc_ck.h"


static char *
ck_word(char *p,
	const char *word,
	int word_len)
{
	int sps;

	if (strncasecmp(p, word, word_len))
		return 0;

	p += word_len;
	sps = strspn(p, DCC_WHITESPACE);
	if (sps == 0)
		return 0;

	return p+sps;
}



const char *
wf_fnm(const DCC_WF *wf, int fno)
{
	if (!fno) {
		return wf->ascii_nm;
	} else {
		return wf->info->hdr.white_incs[fno-1].nm;
	}
}



const char *
wf_fnm_lno(DCC_FNM_LNO_BUF buf, const DCC_WF *wf)
{
	int fno;

	if (!wf)
		return "";

	fno = wf->fno;
	if (!fno)
		return fnm_lno(buf, wf->ascii_nm, wf->lno);

	snprintf(buf, sizeof(DCC_FNM_LNO_BUF),
		 DCC_FNM_LNO_PAT" included from %s",
		 wf->lno, path2fnm(wf->info->hdr.white_incs[fno-1].nm),
		 wf->ascii_nm);
	return buf;
}



int					/* -1=fatal 0=problems 1=ok */
dcc_parse_whitefile(DCC_EMSG emsg,
		    DCC_WF *wf,
		    int main_fd,	/* main file */
		    DCC_PARSED_CK_FNC add_fnc,
		    DCC_PARSED_CK_CIDR_FNC cidr_fnc)
{
	struct f {
	    int	    fd;
	    char    *start;
	    char    *eob;
	    char    c[1024];
	} main_buf, inc_buf, *cur_buf;
	char tgts_buf[16];
	char *bol, *eol;
	DCC_FNM_LNO_BUF fnm_buf;
	char *type_nm, *ck, *p;
	DCC_TGTS new_tgts;
	DCC_CK_TYPES type;
	struct stat sb;
	int white_fno;
	int main_lno;
	u_char result, hex;
	int i, j;

	result = 1;
	main_buf.fd = main_fd;
	main_buf.eob = main_buf.c;
	main_buf.start = main_buf.c;
	cur_buf = &main_buf;
	wf->fno = white_fno = 0;
	wf->lno = main_lno = 0;
	new_tgts = DCC_TGTS_INVALID;
	for (;;) {
		/* Each substantive line has one of the forms:
		 *	tgts	[hex] type	string
		 *		[hex] type	string
		 *	include	pathname
		 *	option ...
		 * A missing number of targets means the line has the
		 * same number of targets as the previous line */

		++wf->lno;
		while (cur_buf->start >= cur_buf->eob
		       || !(eol = memchr(cur_buf->start, '\n',
					 cur_buf->eob - cur_buf->start))) {
			if (cur_buf->start != cur_buf->c) {
				i = cur_buf->eob - cur_buf->start;
				if (i > 0)
					memmove(cur_buf->c, cur_buf->start, i);
				cur_buf->start = cur_buf->c;
				cur_buf->eob = &cur_buf->c[i];
			}
			j = &cur_buf->c[sizeof(cur_buf->c)] - cur_buf->eob;
			if (j <= 0) {
				dcc_pemsg(EX_DATAERR, emsg,
					  "line too long%s",
					  wf_fnm_lno(fnm_buf, wf));
				result = 0;
			} else {
				i = read(cur_buf->fd, cur_buf->eob, j);
				if (i > 0) {
					cur_buf->eob += i;
					continue;
				}

				if (i < 0) {
					dcc_pemsg(EX_IOERR, emsg,
						  "read(%s, %d): %s",
						  wf_fnm(wf, wf->fno), j,
						  ERROR_STR());
					result = 0;
				}
				/* act as if the last line in the file ends
				 * with '\n' even if it does not */
				if (cur_buf->start < cur_buf->eob) {
					eol = cur_buf->eob++;
					break;
				}
			}
			if (cur_buf == &main_buf)
				return result;
			if (0 > close(cur_buf->fd)) {
				dcc_pemsg(EX_IOERR, emsg, "close(%s): %s",
					  wf_fnm(wf, wf->fno), ERROR_STR());
				result = 0;
			}
			/* return to the main file at end of included file */
			cur_buf = &main_buf;
			wf->fno = 0;
			wf->lno = main_lno;
			continue;
		}
		bol = cur_buf->start;
		cur_buf->start = eol+1;

		/* trim trailing blanks */
		do {
			*eol-- = '\0';
		} while (eol > bol
			 && (*eol == ' ' || *eol == '\t' || *eol == '\r'));

		/* Ignore blank lines and lines starting with '#' */
		type_nm = bol+strspn(bol, DCC_WHITESPACE);
		if (*type_nm == '\0' || *type_nm == '#')
			continue;

		/* parse
		 *	include	pathname */
		p = ck_word(type_nm, "include", STRZ("include"));
		if (p) {
			if (cur_buf != &main_buf) {
				dcc_pemsg(EX_DATAERR, emsg,
					  "nested \"include\" not allowed%s",
					  wf_fnm_lno(fnm_buf, wf));
				result = 0;
				continue;
			}

			/* trim quotes if present from the file name */
			i = strlen(p);
			if (i > 1
			    && ((p[0] == '"' && p[i-1] == '"')
				|| (p[0] == '<' && i > 1 && p[i-1] == '>'))) {
				p[i-1] = '\0';
				++p;
				i -= 2;
			}

			if (i == 0 || i >= ISZ(DCC_PATH)) {
				dcc_pemsg(EX_DATAERR, emsg,
					  "unrecognized \"include %s\"%s",
					  p, wf_fnm_lno(fnm_buf, wf));
				result = 0;
				continue;
			}
			if (white_fno >= DIM(wf->info->hdr.white_incs)) {
				dcc_pemsg(EX_DATAERR, emsg,
					  "too many \"include\" files%s",
					  wf_fnm_lno(fnm_buf, wf));
				result = 0;
				continue;
			}

			if (!fnm2path(wf->info->hdr.white_incs[white_fno].nm,
				      p, 0)) {
				dcc_pemsg(EX_DATAERR, emsg,
					  "name \"%s\" too long%s",
					  p, wf_fnm_lno(fnm_buf, wf));
				result = 0;
				continue;
			}

			inc_buf.fd = open(p, O_RDONLY, 0);
			if (inc_buf.fd < 0) {
				dcc_pemsg(EX_DATAERR, emsg,
					  "\"include %s\": %s%s",
					  p, ERROR_STR(),
					  wf_fnm_lno(fnm_buf, wf));
				result = 0;
				continue;
			}
			inc_buf.eob = inc_buf.c;
			inc_buf.start = inc_buf.c;
			cur_buf = &inc_buf;

			if (0 > fstat(inc_buf.fd, &sb)) {
				wf->info->hdr.white_incs[white_fno].mtime = 0;
			} else {
				wf->info->hdr.white_incs[white_fno
							].mtime = sb.st_mtime;
			}

			wf->fno = ++white_fno;
			main_lno = wf->lno;
			wf->lno = 0;
			continue;
		}

		/* honor greylist controls in client whitelists of the forms:
		 *	option log-{all,normal}
		 *	option greylist-{on,off,log-on,log-off}
		 *	option DCC-{on,off}
		 *	option DCC-reps-{on,off}
		 *	option dnsbl-{on,off}
		 *	option xfltr-{on,off}
		 *	option MTA-{first,last}
		 *	option forced-discard-{ok,nok}
		 * change sample whiteclnt file when this changes
		 */
		p = ck_word(type_nm, "option", STRZ("option"));
		if (p) {
			static const struct {
			    const char *str; int len; u_int on; u_int off;
			} *tp, tbl[] = {
#			define DE(s,on,off) {s,STRZ(s),on,(u_int)~(off)},
				DE("log-all",
				   DCC_WHITE_FG_LOG_ALL,
				   0)
				DE("log-normal",
				   DCC_WHITE_FG_LOG_NORMAL,
				   0)

				DE("greylist-on",
				   DCC_WHITE_FG_GREY_ON,
				   DCC_WHITE_FG_GREY_OFF)
				DE("greylist-off",
				   DCC_WHITE_FG_GREY_OFF,
				   DCC_WHITE_FG_GREY_ON)

				DE("greylist-log-on",
				   DCC_WHITE_FG_GREY_LOG_ON,
				   DCC_WHITE_FG_GREY_LOG_OFF)
				DE("greylist-log-off",
				   DCC_WHITE_FG_GREY_LOG_OFF,
				   DCC_WHITE_FG_GREY_LOG_ON)

				DE("DCC-on",
				   DCC_WHITE_FG_DCC_ON,
				   DCC_WHITE_FG_DCC_OFF)
				DE("DCC-off",
				   DCC_WHITE_FG_DCC_OFF,
				   DCC_WHITE_FG_DCC_ON)

				DE("forced-discard-ok",
				   DCC_WHITE_FG_DISCARD_OK,
				   DCC_WHITE_FG_DISCARD_NOK)
				DE("forced-discard-nok",
				   DCC_WHITE_FG_DISCARD_NOK,
				   DCC_WHITE_FG_DISCARD_OK)

				DE("MTA-first",
				   DCC_WHITE_FG_MTA_FIRST,
				   DCC_WHITE_FG_MTA_LAST)
				DE("MTA-last",
				   DCC_WHITE_FG_MTA_LAST,
				   DCC_WHITE_FG_MTA_FIRST)

				DE("DCC-reps-on",
				   DCC_WHITE_FG_REP_ON,
				   DCC_WHITE_FG_REP_OFF)
				DE("DCC-reps-off",
				   DCC_WHITE_FG_REP_OFF,
				   DCC_WHITE_FG_REP_ON)

				DE("DNSBL-on",
				   DCC_WHITE_FG_DNSBL_ON,
				   DCC_WHITE_FG_DNSBL_OFF)
				DE("DNSBL-off",
				   DCC_WHITE_FG_DNSBL_OFF,
				   DCC_WHITE_FG_DNSBL_ON)

				DE("xfltr-on",
				   DCC_WHITE_FG_XFLTR_ON,
				   DCC_WHITE_FG_XFLTR_OFF)
				DE("xfltr-off",
				   DCC_WHITE_FG_XFLTR_OFF,
				   DCC_WHITE_FG_XFLTR_ON)
#			undef DE
			};

			if (!wf->info) {
				dcc_pemsg(EX_DATAERR, emsg, "\"option\""
					  " not legal in server whitelist%s",
					  wf_fnm_lno(fnm_buf, wf));
				result = 0;
				continue;
			}
			i = strlen(p);
			for (tp = tbl; ; ++tp) {
				if (tp > LAST(tbl)) {
					dcc_pemsg(EX_DATAERR, emsg,
						  "unrecognized option%s",
						  wf_fnm_lno(fnm_buf, wf));
					result = 0;
					break;
				}
				if (i == tp->len && !strcasecmp(tp->str, p)) {
					wf->info_flags |= tp->on;
					wf->info->hdr.flags |= tp->on;
					wf->info_flags &= tp->off;
					wf->info->hdr.flags &= tp->off;
					break;
				}
			}
			continue;
		}

		/* honor logging controls in the obsolete form
		 *	log {all-grey,no-grey}
		 */
		p = ck_word(type_nm, "log", STRZ("log"));
		if (p) {
			if (!wf->info) {
				dcc_pemsg(EX_DATAERR, emsg, "\"log\""
					  " not legal in server whitelist%s",
					  wf_fnm_lno(fnm_buf, wf));
				result = 0;
			} else if (!strcasecmp(p, "all-grey")) {
				wf->info_flags |= DCC_WHITE_FG_GREY_LOG_ON;
				wf->info->hdr.flags |= DCC_WHITE_FG_GREY_LOG_ON;
			} else if (!strcasecmp(p, "no-grey")) {
				wf->info_flags |= DCC_WHITE_FG_GREY_LOG_OFF;
				wf->info->hdr.flags |=DCC_WHITE_FG_GREY_LOG_OFF;
			} else {
				dcc_pemsg(EX_DATAERR, emsg, "syntax error%s",
					  wf_fnm_lno(fnm_buf, wf));
				result = 0;
			}
			continue;
		}


		/* Look for the number of targets in a simple line */
		if (type_nm != bol) {
			/* If the line started with white space, the number
			 * of targets is the same as the previous line. */
			*bol = '\0';
		} else {
			type_nm += strcspn(type_nm, DCC_WHITESPACE);
			if (*type_nm == '\0') {
				dcc_pemsg(EX_DATAERR, emsg,
					  "missing type in \"%s\"%s",
					  bol, wf_fnm_lno(fnm_buf, wf));
				result = 0;
				continue;
			}
			*type_nm++ = '\0';
			/* bol now starts with null-terminated
			 * number of targets, "include", or "log" */
			type_nm += strspn(type_nm, DCC_WHITESPACE);
		}

		ck = type_nm+strcspn(type_nm, DCC_WHITESPACE);

		if (*ck != '\0') {
			/* null terminate the type */
			*ck++ = '\0';
			ck += strspn(ck, DCC_WHITESPACE);
		}

		if (strcasecmp(type_nm, "hex")) {
			hex = 0;
		} else {
			hex = 1;
			type_nm = ck;
			ck = type_nm+strcspn(type_nm, DCC_WHITESPACE);
			if (*ck != '\0') {
				*ck++ = '\0';
				ck += strspn(ck, DCC_WHITESPACE);
			}
		}

		/* parse the target count if it is present instead of blank */
		if (*bol != '\0')
			new_tgts = dcc_str2cnt(bol);
		if (new_tgts == 0 || new_tgts == DCC_TGTS_INVALID) {
			dcc_pemsg(EX_DATAERR, emsg,
				  "missing or invalid # of targets \"%s\"%s",
				  bol, wf_fnm_lno(fnm_buf, wf));
			new_tgts = DCC_TGTS_INVALID;
			result = 0;
			continue;
		}

		if (*ck == '\0') {
			dcc_pemsg(EX_DATAERR, emsg, "missing value%s",
				  wf_fnm_lno(fnm_buf, wf));
			new_tgts = DCC_TGTS_INVALID;
			result = 0;
			continue;
		}

		type = dcc_str2type(type_nm);

		if (new_tgts == DCC_TGTS_OK_MX
		    || new_tgts == DCC_TGTS_OK_MXDCC) {
			if (type != DCC_CK_IP) {
				dcc_pemsg(EX_DATAERR, emsg,
					  "MX servers must be IP addresses%s",
					  wf_fnm_lno(fnm_buf, wf));
				new_tgts = DCC_TGTS_INVALID;
				result = 0;
				continue;
			}
			if (wf->wf_flags & DCC_WF_PER_USER) {
				dcc_pemsg(EX_DATAERR, emsg,
					  "%s illegal in per-user whitelist%s",
					  dcc_tgts2str(tgts_buf,
						       sizeof(tgts_buf),
						       new_tgts, 0),
					  wf_fnm_lno(fnm_buf, wf));
				new_tgts = DCC_TGTS_INVALID;
				result = 0;
				continue;
			}
		}

		/* Look for the type of the checksum, compute the checksum,
		 * and write the checksum.  If it is a host name, write
		 * all of its aliases to the hash table. */
		if (hex) {
			i = dcc_parse_hex_ck(emsg, wf,
					     type_nm, type, ck, new_tgts,
					     add_fnc);
		} else {
			i = dcc_parse_ck(emsg, wf,
					 type_nm, type, ck, new_tgts,
					 add_fnc, cidr_fnc);
		}
		/* give up now on a fatal problem adding a
		 * checksum to the file */
		if (i < 0)
			break;
		if (i == 0)
			result = 0;
	}

	/* failed */
	if (cur_buf != &main_buf)
		close(cur_buf->fd);
	return -1;
}
