/* Distributed Checksum Clearinghouse
 *
 * compute simple checksums
 *
 * Copyright (c) 2005 by Rhyolite Software, LLC
 *
 * This agreement is not applicable to any entity which sells anti-spam
 * solutions to others or provides an anti-spam solution as part of a
 * security solution sold to other entities, or to a private network
 * which employs the DCC or uses data provided by operation of the DCC
 * but does not provide corresponding data to other users.
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * Parties not eligible to receive a license under this agreement can
 * obtain a commercial license to use DCC and permission to use
 * U.S. Patent 6,330,590 by contacting Commtouch at http://www.commtouch.com/
 * or by email to nospam@commtouch.com.
 *
 * A commercial license would be for Distributed Checksum and Reputation
 * Clearinghouse software.  That software includes additional features.  This
 * free license for Distributed ChecksumClearinghouse Software does not in any
 * way grant permision to use Distributed Checksum and Reputation Clearinghouse
 * software
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL
 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC
 * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES
 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
 * SOFTWARE.
 *
 * Rhyolite Software DCC 1.3.42-1.73 $Revision$
 */

#include "dcc_ck.h"
#include "dcc_heap_debug.h"
#include "dcc_xhdr.h"
#ifndef DCC_WIN32
#include <arpa/inet.h>
#endif


/* "substitute" or locally configured checksums */
typedef struct {
    u_int	nm_len;
    const char	*nm;			/* name of the checksum */
} DCC_SUB_CK;
static DCC_SUB_CK sub_cks[DCC_MAX_SUB_CKS];
static u_int num_sub_cks;


/* get the checksum of an IPv6 address */
void
dcc_ck_ipv6(DCC_SUM sum, const struct in6_addr *addr)
{
	MD5_CTX ctx;

	MD5Init(&ctx);
	MD5Update(&ctx, (void *)addr, sizeof(*addr));
	MD5Final(sum, &ctx);
}



/* add an IP address to the set of checksums */
void
dcc_get_ipv6_ck(DCC_GOT_CKS *cks, const struct in6_addr *addrp)
{
	cks->sums[DCC_CK_IP].type = DCC_CK_IP;
	cks->sums[DCC_CK_IP].rpt2srvr = 1;
	cks->sums[DCC_CK_IP].tgts = DCC_TGTS_INVALID;
	dcc_ck_ipv6(cks->sums[DCC_CK_IP].sum, addrp);

	if (&cks->ip_addr != addrp)
		cks->ip_addr = *addrp;
}



void
dcc_unget_ipv6_ck(DCC_GOT_CKS *cks)
{
	memset(&cks->ip_addr, 0, sizeof(cks->ip_addr));
	memset(&cks->sums[DCC_CK_IP], 0, sizeof(cks->sums[0]));
}



/* Make DCC_CK_IP from a string containing an IPv4 or IPv6 address.
 *	Because inet_pton() is picky, the string must be unambiguous and
 *	fussy. */
u_char
dcc_get_str_ip_ck(DCC_GOT_CKS *cks,	/* put checksum here */
		  const char *str)	/* from this IP address string */
{
	DCC_SOCKU su;

	if (!dcc_str2ip(&su, str))
		return 0;

	if (su.sa.sa_family == AF_INET) {
		/* treat IPv4 addresses as IPv6 so that everyone computes
		 * the same checksum */
		dcc_ipv4toipv6(&cks->ip_addr, su.ipv4.sin_addr);
	} else {
		cks->ip_addr = su.ipv6.sin6_addr;
	}

	dcc_get_ipv6_ck(cks, &cks->ip_addr);
	return 1;
}



/* Compute a checksum from a string with matching but optional carets or
 * quotes, after stripping the quotes or carets.
 * Ignore case and white space */
void
dcc_str2ck(DCC_SUM sum,
	   const char *hdr,		/* substitute header type */
	   u_int hdr_len,
	   const char *str)		/* string to checksum */
{
	MD5_CTX ctx;
	u_int len;
	char *p;
	char c, cbuf[DCC_HDR_CK_MAX];

	/* ignore whitespace and case */
	p = cbuf;
	while ((c = *str++) != '\0' && p <= LAST(cbuf)) {
		if (DCC_IS_WHITE(c))
			continue;
		*p++ = DCC_TO_LOWER(c);
	}
	str = cbuf;
	len = p - str;
	/* "<>" is legal at least as a sender
	 * Remove a matching pair of leading and trailing <> or " characters */
	if (len >= 2
	    && ((*str == '<' && *(p-1) == '>')
		|| (*str == '"' && *(p-1) == '"'))) {
		++str;
		len -= 2;
		p -= 2;
	}
	/* strip trailing periods, mostly for mail_host */
	while (len >= 1
	       && *(p-1) == '.') {
		--len;
		--p;
	}
	MD5Init(&ctx);
	if (hdr)
		MD5Update(&ctx, hdr, hdr_len);
	MD5Update(&ctx, str, len);
	MD5Final(sum, &ctx);
}



/* make checksum from a string for headers and envelope */
u_char					/* 1=ok 0=bad string */
dcc_get_cks(DCC_GOT_CKS *cks,
	    DCC_CK_TYPES type, const char *str, u_char rpt2srvr)
{
	DCC_GOT_SUM *g;

	g = &cks->sums[type];

	switch (type) {
	case DCC_CK_INVALID:
	case DCC_CK_IP:
	case DCC_CK_SUB:
	case DCC_CK_SRVR_ID:
	case DCC_CK_BODY:
	case DCC_CK_FUZ1:
	case DCC_CK_FUZ2:
	case DCC_CK_G_MSG_R_TOTAL:
	case DCC_CK_G_TRIPLE_R_BULK:
		dcc_logbad(EX_SOFTWARE, "invalid checksum %s",
			   dcc_type2str_err(type, 0, 0, 0));
		return 0;

	case DCC_CK_ENV_FROM:
	case DCC_CK_FROM:
	case DCC_CK_ENV_TO:
	case DCC_CK_RECEIVED:
	case DCC_CK_MESSAGE_ID:
		dcc_str2ck(g->sum, 0, 0, str);
		break;
	}

	g->type = type;
	g->rpt2srvr = rpt2srvr;
	g->tgts = DCC_TGTS_INVALID;
	return 1;
}



/* make checksum for a locally configured header */
u_char					/* 1=done 0=failed */
dcc_ck_get_sub(DCC_GOT_CKS *cks,
		 const char *hdr,	/* header name, not '\0' terminated */
		 const char *str)	/* header value if not after hdr */
{
	DCC_GOT_SUM *g;
	const DCC_SUB_CK *sck;
	DCC_CK_TYPES type;
	int i;

	/* look for the header name in the list of locally configured headers */
	sck = &sub_cks[0];
	for (i = num_sub_cks; ; ++sck, --i) {
		if (i <= 0)
			return 0;	/* this header is not in the list */
		if (!strncasecmp(hdr, sck->nm, sck->nm_len)
		    && (hdr[sck->nm_len] == '\0'
			|| hdr[sck->nm_len] == ':'))
			break;
	}

	/* Get the header value if the caller did not separate it.
	 * The colon is present if the header field was not separated */
	if (!str)
		str = hdr+sck->nm_len+1;

	/* find a free checksum slot
	 * or a slot already assigned to the header */
	type = DCC_CK_SUB;
	g = &cks->sums[type];
	for (;;) {
		if (type >= DIM(cks->sums))
			return 0;	/* none free */

		if (g->type == DCC_CK_INVALID
		    && (type > DCC_CK_TYPE_LAST
			|| type == DCC_CK_SUB))
			break;		/* found a free slot */

		if (g->type == DCC_CK_SUB
		    && g->hdr == sck->nm)
			break;		/* found previously assigned slot */
		++g;
		++type;
	}

	dcc_str2ck(g->sum, sck->nm, sck->nm_len, str);
	g->type = DCC_CK_SUB;
	g->rpt2srvr = 1;
	g->tgts = DCC_TGTS_INVALID;
	g->hdr = sck->nm;
	return 1;
}



/* add to the list of locally configured or substitute headers */
u_char
dcc_add_sub_hdr(DCC_EMSG emsg, const char *hdr)
{
	const char *p;
	char c, *q;
	u_int n, len;

	if (num_sub_cks >= DIM(sub_cks)) {
		dcc_pemsg(EX_USAGE, emsg,
			  "too many substitute headers with \"%s\"", hdr);
		return 0;
	}

	p = hdr;
	for (;;) {
		if (*p == '\0')
			break;
		if (*p == ':' && p[1] == '\0') {
			--p;
			break;
		}
		if (*p <= ' ' || *p >= 0x7f || *p == ':') {
			dcc_pemsg(EX_USAGE, emsg,
				  "illegal SMTP field name character in \"%s\"",
				  hdr);
			return 0;
		}
		++p;
	}

	len = p - hdr;
	if (len == 0) {
		dcc_pemsg(EX_USAGE, emsg, "illegal empty field name");
		return 0;
	}

	/* ignore duplicates */
	for (n = 0; n < num_sub_cks; ++n) {
		if (len == sub_cks[n].nm_len
		    && !strncasecmp(hdr, sub_cks[n].nm, len))
			return 1;
	}

	sub_cks[num_sub_cks].nm_len = len;
	q = dcc_malloc(len+1);
	sub_cks[num_sub_cks].nm = q;
	do {
		c = *hdr++;
		*q++ = DCC_TO_LOWER(c);
	} while (--len > 0);
	*q = '\0';
	++num_sub_cks;

	return 1;
}



static int
get_received_addr(char addr_buf[INET6_ADDRSTRLEN+2],
      const char *hdr)			/* *hdr == '[' before the address */
{
	int a_len;


	a_len = 1+strspn(hdr+1, ".:abcdefABCDEF0123456789");
	if (a_len <= 6+1 || a_len >= INET6_ADDRSTRLEN+1)
		return 0;
	if (hdr[a_len] != ']')
		return 0;

	/* capture the address
	 *	include leading '[' in case we later need a host name */
	memcpy(addr_buf, hdr, a_len);
	addr_buf[a_len] = '\0';

	return a_len;
}



/* find IP address, client host name, and HELO string in a Received:
 *	    header of forms:
 *  #1	Received: from helo (hostname [addr] ...
 *	Received: from helo ([addr] ...
 *  #2	Received: from hostname [addr] ...
 *	Received: from  [addr] ...
 *  #3	Received: from qmailheloandhostname (addr) ...
 *  #4	Received: from qmailhostname (HELO qmailhelo) (addr) ...
 *   or	Received: from qmailhostname (HELO qmailhelo) ([addr]) ...
 *
 * ignore these forms:
 *  #5	Received: from localhost by hostname with LMTP
 *  #6	Received  (qmail 4824 invoked by uid 1000); 8 Nov 2005 12:13:33 -0000
 *  #7	Received: (qmail 21530 invoked from network); 29 Aug 2005 16:05:04 -0000
 *  #8	Received: (from user@localhost) by lochost (8.12.10/8.12.10/Submit) ...
 *
 * This should be called only with Received: headers that are known to
 *	have been added by trustworthy code such as the local system
 *	or an MX secondary.
 * Return 0 for unknown header, "" if IP address found, or stupid type string
 */
const char *
parse_received(const char *hdr,		/* the null terminated header */
	       DCC_GOT_CKS *cks,	/* put address checksum here */
	       char *helo,		/* optionally put HELO value here */
	       int helo_len,
	       char *clnt_str, int clnt_str_len,
	       char *clnt_name, int clnt_name_len)
{
	char addr_buf[INET6_ADDRSTRLEN+2];
	const char *h, *n;
	int h_len, n_len, a_len;
	int i;

	/* make the field name optional */
	if (!CSTRCMP(hdr, "Received:"))
		hdr += STRZ("Received");
	hdr += strspn(hdr, " \t\r\n");

/* #define DCC_DEBUG_PARSE_RECEIVED */
#ifdef DCC_DEBUG_PARSE_RECEIVED
	printf("\n\nReceived: %s\n", hdr);
#endif
#define SPAN_ADDR(l,p) (*(p) >= '0' && *(p) <= '9'			\
			&& ((l) = strspn((p), "0123456789.")) >= 7	\
			&& (l) < INET_ADDRSTRLEN)

	if (CSTRCMP(hdr, "from")) {
		/* It does not match "Received: from" in #1, #2, #3, and #5
		 * Recognize #6 and #7 */
		if (!CSTRCMP(hdr, "(qmail ")) {
			hdr += STRZ("(qmail ");
			i = strspn(hdr, "0123456789");
			if (i == 0)
				return 0;
			hdr += i;
			if (!CSTRCMP(hdr, " invoked from network)")
			    || !CSTRCMP(hdr, " invoked by uid "))
				return "qmail";
			return 0;
		}
		/* recognize #8 */
		if (!CSTRCMP(hdr, "(from ")) {
			hdr += STRZ("(from ");
			hdr = strpbrk(hdr, DCC_WHITESPACE"@");
			if (!hdr || *hdr != '@')
				return 0;
			hdr = strpbrk(hdr, DCC_WHITESPACE")");
			if (!hdr || *hdr != ')')
				return 0;
			hdr += 1+strspn(hdr+1, DCC_WHITESPACE);
			if (CSTRCMP(hdr, "by "))
				return 0;
			hdr += STRZ("by ");
			if (strstr(hdr, "/Submit"))
				return "sendmail Submit";
			return 0;
		}
		/* unrecognized */
	}

	hdr += STRZ("from");
	i = strspn(hdr, DCC_WHITESPACE);
	if (i == 0)
		return 0;
	hdr += i;

	/* We have "Received: from "
	 * get the host name or HELO value before '(' or '[' in
	 * #1, #2, #3, and #5 */
	h = hdr;
	hdr = strpbrk(hdr, DCC_WHITESPACE"([");
	if (!hdr)
		return 0;		/* unrecognized */
	h_len = hdr - h;
	hdr += strspn(hdr, DCC_WHITESPACE);

	if (*hdr == '(') {
		/* look for client host name of #1
		 * or IPv4 address of #3
		 * or HELO value and IPv4 address of #4 */
		++hdr;

		if (SPAN_ADDR(a_len, hdr)
		    && hdr[a_len] == ')') {
			/* we seem to have the IPv4 address of #3 */
			n = h;
			n_len = h_len;
			addr_buf[0] = '[';
			memcpy(addr_buf+1, hdr, a_len);
			addr_buf[a_len+1] = '\0';

		} else if (!strncmp(hdr, "HELO ", STRZ("HELO "))
			   && hdr[STRZ("HELO ")] != '[') {
			/* we have the #4 qmail HELO form when reverse DNS name
			 * and helo value differ or unrecognizable */
			n = h;
			n_len = h_len;
			h = hdr + STRZ("HELO ");
			hdr = strpbrk(h, " \t'\"()[]");
			if (!hdr)
				return 0;
			h_len = hdr - h;
			if (!h_len
			    || strncmp(hdr, ") (", STRZ(") (")))
			    return 0;
			hdr += STRZ(") (");
			if (SPAN_ADDR(a_len, hdr)
			    && hdr[a_len] == ')') {
				addr_buf[0] = '[';
				memcpy(addr_buf+1, hdr, a_len);
				addr_buf[a_len+1] = '\0';
			} else if (hdr[0] == '['
				   && SPAN_ADDR(a_len, hdr+1)
				   && hdr[1+a_len] == ']'
				   && hdr[2+a_len] == ')') {
				memcpy(addr_buf, hdr, a_len+1);
				addr_buf[a_len+1] = '\0';
			} else {
				return 0;
			}

		} else {
			/* it is #1 or unrecognizable */
			n = hdr;
			hdr = strpbrk(hdr, DCC_WHITESPACE"[");
			if (!hdr)
				return 0;
			n_len = hdr - n;
			hdr += strspn(hdr, DCC_WHITESPACE);
			if (*hdr != '[')
				return 0;
			a_len = get_received_addr(addr_buf, hdr);
			if (!a_len)
				return 0;
		}

	} else if (*hdr == '[') {
		/* format #2; we have possibly null client name and no HELO */
		n = h;
		n_len = h_len;
		h_len = 0;
		a_len = get_received_addr(addr_buf, hdr);
		if (!a_len)
			return 0;

	} else if (!CSTRCMP(hdr, "by ")) {
		/* recognize #5 */
		hdr += STRZ("by ");
		n = strchr(hdr, ' ');
		if (!n || n > hdr+MAXHOSTNAMELEN)
			return 0;
		if (!CSTRCMP(n, " with LMTP"))
			return "LMTP";	/* stupid type string */
		return 0;

	} else {
		return 0;
	}

	/* it looks ok so send out all the answers
	 * if the IP address makes sense */
	if (!dcc_get_str_ip_ck(cks, addr_buf+1))
		return 0;

	dcc_ipv6tostr(clnt_str, clnt_str_len, &cks->ip_addr);

	if (clnt_name && clnt_name_len) {
		if (n_len == 0) {
			/* use address as the client host name */
			addr_buf[a_len] = ']';
			n_len = a_len+1;
			n = addr_buf;
		}
		if (clnt_name_len > n_len+1)
			clnt_name_len = n_len+1;
		STRLCPY(clnt_name, n, clnt_name_len);
	}

	if (helo && helo_len) {
		if (helo_len > h_len+1)
			helo_len = h_len+1;
		STRLCPY(helo, h, helo_len);
	}

#ifdef DCC_DEBUG_PARSE_RECEIVED
	printf("helo=%s  clnt_str=%s  clnt_name=%s\n",
	       helo, clnt_str, clnt_name);
#endif

	return "";

#undef SPAN_ADDR
}



u_char					/* 1=found env_From value */
parse_return_path(const char *hdr,
		  DCC_GOT_CKS *cks,
		  char env_from_buf[DCC_HDR_CK_MAX+1])
{
	int i;

	/* don't bother if we already have an env_From value */
	if (cks->sums[DCC_CK_ENV_FROM].type != DCC_CK_INVALID)
		return 0;

	hdr += strspn(hdr, " \t");
	i = strlen(hdr);
	while (i > 0 && (hdr[i-1] == '\r' || hdr[i-1] == '\n'))
		--i;
	if (i >= 2 && *hdr == '<' && hdr[i-1] == '>') {
		++hdr;
		i -= 2;
	}
	if (i == 0)
		return 0;

	if (env_from_buf) {
		if (i > DCC_HDR_CK_MAX)
			i = DCC_HDR_CK_MAX;
		memcpy(env_from_buf, hdr, i);
		env_from_buf[i] = '\0';
	}
	dcc_get_cks(cks, DCC_CK_ENV_FROM, hdr, 1);
	return 1;
}



void
dcc_print_cks(LOG_WRITE_FNC out, void *arg,
	      u_char is_spam, DCC_TGTS local_tgts,
	      const DCC_GOT_CKS *cks, DCC_CKS_WTGTS wtgts, u_char have_wlist)
{
#define CK_PAT_CK_H	"                            reported: %-15s checksum"
	char local_tgts_buf[12], white_tgts_buf[12], dcc_tgts_buf[12];
	char type_buf[26], cbuf[DCC_CK2STR_LEN];
	char buf[80];
	const DCC_GOT_SUM *g;
	u_char have_server, headed;
	int inx, i;

	/* decide which column headings are needed */
	have_server = 0;
	for (g = cks->sums, inx = 0; g <= LAST(cks->sums); ++g, ++inx) {
		if (g->type == DCC_CK_INVALID)
			continue;
		if (g->tgts != DCC_TGTS_INVALID)
			have_server = 1;
		if (wtgts[inx] != 0)
			have_wlist = 1;
	}

	headed = 0;
	for (g = cks->sums, inx = 0; g <= LAST(cks->sums); ++g, ++inx) {
		if (g->type == DCC_CK_INVALID)
			continue;

		if (!headed) {
			headed = 1;
			dcc_tgts2str(local_tgts_buf, sizeof(local_tgts_buf),
				     local_tgts, 0);
			if (is_spam)
				STRLCAT(local_tgts_buf, " spam",
					sizeof(local_tgts_buf));
			i = snprintf(buf, sizeof(buf), CK_PAT_CK_H,
				     local_tgts_buf);
			if (i < ISZ(buf)
			    && (have_server || have_wlist))
				i += snprintf(buf+i, sizeof(buf)-i,
					      PRINT_CK_PAT_SRVR,
					      have_server ? "server" : "");
			if (i < ISZ(buf)
			    && have_wlist)
				i += snprintf(buf+i, sizeof(buf)-i,
					      PRINT_CK_PAT_WLIST, "wlist");
			if (i >= ISZ(buf)-1)
				i = sizeof(buf)-2;
			buf[i] = '\n';
			buf[++i] = '\0';
			out(arg, buf, i);
		}

		i = snprintf(buf, sizeof(buf), PRINT_CK_PAT_CK,
			     dcc_type2str(type_buf, sizeof(type_buf),
					  g->type, g->hdr, 0, 0),
			     dcc_ck2str(cbuf, sizeof(cbuf),
					g->type, g->sum));
		if (i < ISZ(buf)
		    && ((g->rpt2srvr && g->tgts != DCC_TGTS_INVALID)
			|| wtgts[inx] != 0))
			i += snprintf(buf+i, sizeof(buf)-i,
				      PRINT_CK_PAT_SRVR,
				      (g->rpt2srvr == 0
				       || g->tgts==DCC_TGTS_INVALID)
				      ? ""
				      : dcc_tgts2str(dcc_tgts_buf,
						     sizeof(dcc_tgts_buf),
						     g->tgts, 0));
		if (i < ISZ(buf)
		    && wtgts[inx] != 0)
			i += snprintf(buf+i, sizeof(buf)-i,
				      PRINT_CK_PAT_WLIST,
				      dcc_tgts2str(white_tgts_buf,
						   sizeof(white_tgts_buf),
						   wtgts[inx], 0));
		if (i >= ISZ(buf)-1)
			i = sizeof(buf)-2;
		buf[i] = '\n';
		buf[++i] = '\0';
		out(arg, buf, i);
	}
#undef CK_PAT_CK_H
}
