/* Distributed Checksum Clearinghouse
 *
 * database cleaner
 *
 * Copyright (c) 2005 by Rhyolite Software, LLC
 *
 * This agreement is not applicable to any entity which sells anti-spam
 * solutions to others or provides an anti-spam solution as part of a
 * security solution sold to other entities, or to a private network
 * which employs the DCC or uses data provided by operation of the DCC
 * but does not provide corresponding data to other users.
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * Parties not eligible to receive a license under this agreement can
 * obtain a commercial license to use DCC and permission to use
 * U.S. Patent 6,330,590 by contacting Commtouch at http://www.commtouch.com/
 * or by email to nospam@commtouch.com.
 *
 * A commercial license would be for Distributed Checksum and Reputation
 * Clearinghouse software.  That software includes additional features.  This
 * free license for Distributed ChecksumClearinghouse Software does not in any
 * way grant permision to use Distributed Checksum and Reputation Clearinghouse
 * software
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL
 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC
 * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES
 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
 * SOFTWARE.
 *
 * Rhyolite Software DCC 1.3.42-1.182 $Revision$
 */

#include "srvr_defs.h"
#include "dcc_ck.h"
#include <syslog.h>
#include <signal.h>

static DCC_EMSG dcc_emsg;

static DCC_WF dbclean_wf;
static DCC_WHITE_TBL dbclean_white_tbl;
static DCC_CLNT_CTXT *ctxt;
static AOP_RESP aop_resp;
static int flods_off;
static int dccd_unlocked;		/* dccd has been told to unlock	*/

static DCC_SRVR_NM srvr = DCC_SRVR_NM_DEF;
static DCC_CLNT_ID srvr_clnt_id = DCC_ID_INVALID;
static ID_TBL *srvr_clnt_tbl;
static u_char info_flags = 0;
#ifdef USE_DBCLEAN_F
static u_char dbclean_db_mode = DB_OPEN_NO_MMAP;
#else
static u_char dbclean_db_mode = 0;
#endif

static const DB_HDR def_db_hdr = DB_HDR_DEF;
static DB_HDR old_db_hdr;

static u_char dccd_started_us;
static u_char cleardb;			/* 1=clear the database */
static u_char repair;			/* 1=only repair the database */
static u_char standalone;		/* 1=don't even try talk to dccd */
static u_char quiet;			/* 1=don't announce results to stdout */
static u_char keep_white;		/* 1=do not rebuild whitelist */

static const char *homedir;
static u_char cur_db_created;
static const char *cur_db_nm_str = DB_DCC_NAME;
static DCC_PATH cur_db_nm;
static DCC_PATH cur_hash_nm;
static int old_db_fd = -1;
static DB_HADDR old_db_hash_used;
static DB_NOKEEP_CKS old_db_nokeep_cks;
static DB_FLOD_THOLDS old_db_flod_tholds;
static u_int old_db_flags;
static DB_PTR old_db_pos,  new_db_csize;
static off_t new_db_fsize;
static u_int new_db_page_size, tgt_db_page_size;
static FLOD_MMAPS new_flod_mmaps;
static u_char adj_delay_pos;
static int lock_db_fd = -1;
static DCC_PATH lock_db_nm;
static u_char new_db_created;
static DCC_PATH new_db_nm;
static int new_db_fd = -1;
static u_char new_hash_created;
static DCC_PATH new_hash_nm;
static DCC_PATH old_db_nm;

static int expire_secs = -1;
static int def_expire_secs = DB_EXPIRE_SECS_DEF;
static int expire_spamsecs = -1;
static int def_expire_spamsecs = DB_EXPIRE_SPAMSECS_DEF;
static int have_expire_parms = 0;
static DB_EX_TS new_ex_ts;
static DB_EX_SECS new_ex_secs;
static DCC_TS spamts[DCC_DIM_CKS];

static DB_HADDR new_hash_len;

static int expired_rcds, comp_rcds, obs_rcds, expired_cks;
static int white_cks, kept_cks;

static struct timeval start;
static DCC_TS future_ts;

#define RESTART_DELAY	(60*5)

static time_t progress_rpt;
#define REPORT_INTERVAL		(5*60)
#define REPORT_INTERVAL_FAST	10
static u_char progress_rpt_started;
static int progress_rpt_percent;

static u_char write_new_flush(u_char);
static u_char write_new_rcd(const void *, int);
static void write_new_hdr(u_char);
static void unlink_whine(const char *);
static void rename_bail(const char *, const char *);
static u_char expire(DB_PTR);
static u_char copy_db(void);
static u_char catchup(void);
static void parse_white(void);
static void build_hash(void);
static void server_listening(DCC_AOPS, u_int32_t);
static void dccd_new_db(const char *);
static void dbclean_msg(const char *p, ...) PATTRIB(1,2);
static void finish(void);
static void deadman(int);
static void sigterm(int);


static void
usage(u_char die)
{
	const char str[] = {
		"usage: [-64dDfFNRPSVq] [-i id]"
		" [-a [server-addr][,server-port]] [-h homedir]\n"
		"   [-G on] [-s hash-size] [-e seconds] [-E spamsecs]\n"
		"   [-t type,allsecs,threshold,spamsecs]"
		" [-L ltype,facility.level]"};
	static u_char complained;

	/* its important to try to run, so don't give up unless necessary */
	if (die) {
		dcc_logbad(EX_USAGE, complained ? "giving up" : str);
	} else if (!complained) {
		dcc_error_msg("%s\ncontinuing", str);
		complained = 1;
	}
}


int NRATTRIB
main(int argc, char **argv)
{
	char hostname[MAXHOSTNAMELEN];
	const char *rest;
	char *duparg, *cntstr, *allstr, *spamstr, *p;
	int allsecs, spamsecs;
	DCC_TGTS tgts;
	u_char print_version = 0;
	u_long l;
	DCC_CK_TYPES type;
	struct stat sb;
	int i;

	gettimeofday(&start, 0);
	progress_rpt = start.tv_sec;
	dcc_timeval2ts(future_ts, &start, 24*60*60);

	dcc_syslog_init(1, argv[0], 0);

	/* this must match DBCLEAN_GETOPTS in cron-dccd.in */
	while ((i = getopt(argc, argv,
			   "64dDfFNRPSVqi:a:h:G:s:e:E:t:L:")) != EOF) {
		switch (i) {
		case '6':
#ifndef NO_IPV6
			info_flags = DCC_INFO_FG_IPV6;
#endif
			break;
		case '4':
			info_flags = 0;
			break;

		case 'd':
			if (db_debug++)
				++dcc_clnt_debug;
			break;

		case 'D':
			dccd_started_us = 1;
			break;

		case 'f':
			dbclean_db_mode &= ~DB_OPEN_NO_MMAP;
			break;


		case 'F':
			dbclean_db_mode |= DB_OPEN_NO_MMAP;
			break;

		case 'N':		/* make a new, clear database */
			cleardb = 1;
			standalone = 1;
			break;

		case 'R':
			repair = 1;
			break;

		case 'P':
			if (have_expire_parms > 0)
				dcc_logbad(EX_USAGE,
					   "do not use -P with -e, -E, or -t");
			have_expire_parms = -1;
			break;

		case 'S':
			standalone = 1;
			break;

		case 'V':
			fprintf(stderr, DCC_VERSION"\n");
			print_version = 1;
			break;

		case 'q':
			quiet = 1;
			break;

		case 'i':
			l = strtoul(optarg, &p, 0);
			if (*p != '\0'
			    || l < DCC_SRVR_ID_MIN
			    || l > DCC_SRVR_ID_MAX)
				dcc_logbad(EX_USAGE, "invalid DCC ID \"-i %s\"",
					   optarg);
			srvr_clnt_id = l;
			break;

		case 'a':
			rest = dcc_parse_nm_port(dcc_emsg, optarg, srvr.port,
						 hostname, sizeof(hostname),
						 &srvr.port, 0, 0,
						 0, 0);
			if (!rest) {
				dcc_error_msg("%s", dcc_emsg);
				break;
			}
			rest += strspn(rest, DCC_WHITESPACE);
			if (*rest != '\0') {
				dcc_error_msg("unrecognized port number in"
					      "\"-a %s\"", optarg);
				break;
			}
			if (hostname[0] == '\0')
				strcpy(srvr.hostname, DCC_SRVR_NM_DEF_HOST);
			else
				BUFCPY(srvr.hostname, hostname);
			break;

		case 'h':
			homedir = optarg;
			break;

		case 'G':
			if (strcasecmp(optarg, "on"))
				usage(0);
			dcc_syslog_init(1, argv[0], " grey");
			if (have_expire_parms > 0)
				dcc_logbad(EX_USAGE,
					   "do not use -G with -e, -E, or -t");
			grey_on = 1;
			dcc_syslog_init(1, argv[0], " grey");
			have_expire_parms = -1;
			cur_db_nm_str = DB_GREY_NAME;
			break;

		case 's':		/* hash table size in entries */
			new_hash_len = strtoul(optarg, &p, 0);
			if (*p != '\0'
			    || new_hash_len < MIN_HASH_ENTRIES
			    || new_hash_len > MAX_HASH_ENTRIES)
				dcc_logbad(EX_USAGE,
					   "invalid database size \"%s\"",
					   optarg);
			break;

		case 'e':		/* expiration for non-bulk checksums */
			if (grey_on)
				dcc_logbad(EX_USAGE,
					   "-e cannot be used with -G");
			if (have_expire_parms < 0)
				dcc_logbad(EX_USAGE,
					   "-e cannot be used with -P");
			have_expire_parms = 1;
			expire_secs = dcc_get_secs(optarg, 0,
						   DB_EXPIRE_SECS_MIN,
						   DB_EXPIRE_SECS_MAX, -1);
			if (expire_secs < 0)
				dcc_logbad(EX_USAGE,
					   "invalid expiration seconds"
					   " \"-e %s\"",
					   optarg);
			break;

		case 'E':		/* expiration for bulk checksums */
			if (grey_on)
				dcc_logbad(EX_USAGE,
					   "-E cannot be used with -G");
			if (have_expire_parms < 0)
				dcc_logbad(EX_USAGE,
					   "-E cannot be used with -P");
			have_expire_parms = 1;
			expire_spamsecs = dcc_get_secs(optarg, 0,
						       DB_EXPIRE_SECS_MIN,
						       DB_EXPIRE_SECS_MAX, -1);
			if (expire_spamsecs < 0)
				dcc_logbad(EX_USAGE,
					   "invalid long term spam"
					   " expiration seconds"
					   " \"-E %s\"",
					   optarg);
			break;

		case 't':
			if (grey_on)
				dcc_logbad(EX_USAGE,
					   "-t cannot be used with -G");
			if (have_expire_parms < 0)
				dcc_logbad(EX_USAGE,
					   "-t cannot be used with -P");
			have_expire_parms = 1;
			duparg = dcc_strdup(optarg);
			allstr = strchr(duparg, ',');
			if (!allstr)
				dcc_logbad(EX_USAGE,
					   " missing comma in \"-t %s\"",
					   optarg);
			*allstr++ = '\0';
			cntstr = strchr(allstr, ',');
			if (!cntstr) {
				spamstr = 0;
			} else {
				*cntstr++ = '\0';
				spamstr = strchr(cntstr, ',');
				if (!spamstr)
					dcc_logbad(EX_USAGE,
						   "missing comma after"
						   " \"%s\" in \"-t %s\"",
						   cntstr, optarg);
				*spamstr++ = '\0';
			}
			type = dcc_str2type(duparg);
			if (type <= DCC_CK_INVALID
			    || type > DCC_CK_G_TRIPLE_R_BULK)
				dcc_logbad(EX_USAGE,
					   "unrecognized checksum type in"
					   " \"-t %s\"", optarg);
			allsecs = dcc_get_secs(allstr, 0,
					       DB_EXPIRE_SECS_MIN,
					       DB_EXPIRE_SECS_MAX, -1);
			if (allsecs < 0)
				dcc_logbad(EX_USAGE,
					   "invalid seconds \"%s\" in \"%s\"",
					   allstr, optarg);
			if (!cntstr) {
				tgts = DCC_TGTS_TOO_MANY;
				if (allsecs == 0
				    || DCC_CK_LONG_TERM(type)) {
					spamsecs = allsecs;
				} else if (DCC_CK_LONG_TERM(type)) {
					spamsecs = max(DB_EXPIRE_SPAMSECS_DEF,
						       allsecs);
				} else {
					spamsecs = allsecs;
				}
			} else {
				tgts = dcc_str2cnt(cntstr);
				if (tgts > DCC_TGTS_TOO_MANY || tgts <= 1)
					dcc_logbad(EX_USAGE,
						   "unrecognized count \"%s\""
						   " in \"-t %s\"",
						   cntstr, optarg);
				spamsecs = dcc_get_secs(spamstr, 0,
							DB_EXPIRE_SECS_MIN,
							DB_EXPIRE_SECS_MAX, -1);
				if (spamsecs < 0)
					dcc_logbad(EX_USAGE,
						   "invalid seconds"
						   " \"%s\" in \"%s\"",
						   spamstr, optarg);
				if ((spamsecs < allsecs && spamsecs != 0)
				    || (allsecs == 0 && spamsecs != 0))
					dcc_logbad(EX_USAGE,
						   "\"%s\""
						   " must not be smaller than"
						   " \"%s\" in \"%s\"",
						   spamstr, allstr, optarg);
			}
			dcc_free(duparg);
			new_ex_secs[type].all = allsecs;
			new_ex_secs[type].spam = spamsecs;
			new_ex_secs[type].clean_thold = tgts;
			break;

		case 'L':
			dcc_parse_log_opt(optarg);
			break;

		default:
			usage(0);
		}
	}
	argc -= optind;
	argv += optind;
	if (argc != 0)
		usage(1);

	if (srvr_clnt_id == DCC_ID_INVALID && !standalone) {
		if (print_version)
			exit(EX_OK);
		usage(1);
	}

	if (srvr.port == 0)
		srvr.port = DCC_GREY2PORT(grey_on);

	fnm2path_good(cur_db_nm, cur_db_nm_str, 0);
	syslog(dcc_trace_priority, DCC_VERSION" %s %s",
	       repair ? "repairing" : "cleaning",
	       cur_db_nm);

	dcc_clnt_unthread_init();

	atexit(finish);
	signal(SIGALRM, deadman);
	signal(SIGHUP, sigterm);
	signal(SIGTERM, sigterm);
	signal(SIGINT, sigterm);

	/* move to the target directory */
	srvr.clnt_id = srvr_clnt_id;
	if (!dcc_cdhome(dcc_emsg, homedir))
		dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
	if (!standalone) {
		i = load_ids(dcc_emsg, &srvr_clnt_tbl, srvr_clnt_id);
		if (i < 0)
			dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
		else if (!i)
			dcc_error_msg("%s", dcc_emsg);
		memcpy(srvr.passwd, srvr_clnt_tbl->cur_passwd,
		       sizeof(srvr.passwd));
	}

	fnm2path_good(lock_db_nm, cur_db_nm, DB_LOCK_SUFFIX);
	fnm2path_good(cur_hash_nm, cur_db_nm, DB_HASH_SUFFIX);
	fnm2path_good(old_db_nm, cur_db_nm, "-old");
	fnm2path_good(new_db_nm, cur_db_nm, "-new");
	fnm2path_good(new_hash_nm, new_db_nm, DB_HASH_SUFFIX);

	/* exclude other instances of this program */
	lock_db_fd = dcc_lock_open(dcc_emsg, lock_db_nm, O_RDWR|O_CREAT,
				   DCC_LOCK_OPEN_NOWAIT, DCC_LOCK_ALL_FILE, 0);
	if (lock_db_fd < 0) {
		dcc_logbad(dcc_ex_code, "%s: dbclean already running?",
			   dcc_emsg);
	} else {
		char pid[32];

		i = 1+snprintf(pid, sizeof(pid), "%ld\n", (long)getpid());
		if (i != write(lock_db_fd, pid, i))
			dcc_logbad(EX_IOERR, "write(%s, pid): %s",
				   lock_db_nm, ERROR_STR());

		/* Let anyone write in it in csae we are running as root
		 * and get interrupted by a crash or gdb.  A stray, stale
		 * private lock file cannot be locked */
		chmod(lock_db_nm, 0666);
	}

	/* create & the lock new database file */
	new_db_fd = dcc_lock_open(dcc_emsg, new_db_nm, O_RDWR|O_CREAT,
				  DCC_LOCK_OPEN_NOWAIT, DCC_LOCK_ALL_FILE, 0);
	if (new_db_fd == -1)
		dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
	if (0 > ftruncate(new_db_fd, 0))
		dcc_logbad(EX_IOERR, "truncate(%s,0): %s",
			   new_db_nm, ERROR_STR());
	new_db_fsize = 0;
	new_db_created = 1;
	new_db_csize = DB_PTR_BASE;

	if (0 > stat(cur_db_nm, &sb)) {
		if (errno != ENOENT)
			dcc_logbad(EX_IOERR, "stat(%s): %s",
				   cur_db_nm, ERROR_STR());
		/* empty a missing database */
		cleardb = 1;
		tgt_db_page_size = grey_on ? 1 : 0;
	} else {
		tgt_db_page_size = grey_on ? sb.st_size/4 : 0;
	}
	if (tgt_db_page_size < MIN_HASH_ENTRIES*sizeof(HASH_ENTRY)
	    && tgt_db_page_size != 0)
		tgt_db_page_size = MIN_HASH_ENTRIES*sizeof(HASH_ENTRY);
	new_db_page_size = db_get_page_size(0, tgt_db_page_size);
	write_new_hdr(1);


	if (standalone) {
		u_char busy;

		/* open and lock the current database to ensure
		 * the daemon is not running */
		old_db_fd = dcc_lock_open(dcc_emsg, cur_db_nm, O_RDWR,
					  DCC_LOCK_OPEN_NOWAIT,
					  DCC_LOCK_ALL_FILE, &busy);
		if (busy)
			dcc_logbad(EX_USAGE, "database %s in use: %s",
				   cur_db_nm, dcc_emsg);
		if (cleardb
		    && stat(cur_db_nm, &sb) >= 0)
			dcc_logbad(EX_USAGE, "%s already exists",
				   cur_db_nm);

		/* create and lock the current database if it did not exist
		 * to ensure the daemon is not running */
		if (old_db_fd < 0) {
			old_db_fd = dcc_lock_open(dcc_emsg, cur_db_nm,
						  O_RDWR|O_CREAT,
						  DCC_LOCK_OPEN_NOWAIT,
						  DCC_LOCK_ALL_FILE, 0);
			if (old_db_fd < 0)
				dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
			cur_db_created = 1;
		}

	} else {
		/* Tell the daemon to start turning off the flooding
		 * so we can adjust its positions in the flood map file
		 * Try very hard to reach it. */
		dcc_min_delay = DCC_MAX_RTT;
		ctxt = dcc_tmp_clnt_init(dcc_emsg, 0, &srvr, 0,
					 grey_on ? DCC_CLNT_FG_GREY: 0,
					 info_flags);
		if (!ctxt)
			dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
		++flods_off;
		if (DCC_OP_OK != dcc_aop(dcc_emsg, ctxt, grey_on, DCC_NO_SRVR,
					 DCC_AOP_FLOD, DCC_AOP_FLOD_SHUTDOWN,
					 0, 0, 0, 0, 0, &aop_resp, 0))
			dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
	}

	/* resolve white-listed host names before locking the database */
	parse_white();

	/* Tell the daemon to unlock the database between operations
	 * and insist it stop flooding. */
	if (!standalone) {
		/* give the daemon a chance to stop pumping the floods */
		for (;;) {
			DCC_OPS aop;

			aop = dcc_aop(dcc_emsg, ctxt, grey_on, DCC_NO_SRVR,
				      DCC_AOP_FLOD, DCC_AOP_FLOD_CHECK,
				      0, 0, 0, 0, 0, &aop_resp, 0);
			if (aop ==  DCC_OP_ERROR
			    || aop == DCC_OP_INVALID) {
				dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
			} else if (aop != DCC_OP_ADMN) {
				dcc_logbad(EX_UNAVAILABLE, "%s",
					   dcc_aop2str(0, 0,
						       DCC_AOP_FLOD,
						       DCC_AOP_FLOD_CHECK));
			}
			i = flod_running(aop_resp.resp.val.string);
			if (i < 0)
				dcc_logbad(EX_PROTOCOL,
					   "%s: unrecognized \"%s\"",
					   dcc_aop2str(0, 0,
						       DCC_AOP_FLOD,
						       DCC_AOP_FLOD_CHECK),
					   aop_resp.resp.val.string);
			if (i == 0)
				break;
			if (time(0) > start.tv_sec+30) {
				if (flods_off < 2) {
					++flods_off;
					if (DCC_OP_OK != dcc_aop(dcc_emsg,
							ctxt, grey_on,
							DCC_NO_SRVR,
							DCC_AOP_FLOD,
							DCC_AOP_FLOD_HALT, 0,
							0, 0, 0, 0,
							&aop_resp, 0))
					    dcc_logbad(dcc_ex_code, "%s",
						       dcc_emsg);
					continue;
				}
				if (time(0) > start.tv_sec+45)
					dcc_logbad(EX_UNAVAILABLE,
						   "failed to stop floods: %s",
						   aop_resp.resp.val.string);
			}
			usleep(100*1000);
		}
		dccd_unlocked = 1;
		if (DCC_OP_OK != dcc_aop(dcc_emsg, ctxt, grey_on, DCC_NO_SRVR,
					 DCC_AOP_DB_UNLOCK,
					 0, 0, 0, 0, 0, 0,
					 &aop_resp, 0))
			dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
	}

	if (cleardb) {
		dcc_trace_msg(DCC_VERSION" %s database %s",
			      cur_db_created ? "creating" : "clearing",
			      cur_db_nm);

	} else if (repair) {
		dcc_error_msg("explicit repair of %s", cur_db_nm);

	} else {
		if (old_db_fd >= 0) {
			close(old_db_fd);
			old_db_fd = -1;
		}
		if (!db_open(0, cur_db_nm, 0,
			     DB_OPEN_RDONLY | dbclean_db_mode
			     | (standalone
				? DB_OPEN_LOCK_NOWAIT : DB_OPEN_LOCK_WAIT))) {
			/* If the hash table is sick, check timestamps only
			 * as much as no hash table allows.
			 * Then rebuild the hash table. */
			repair = 1;

		} else {
			old_db_flags = db_parms.flags;
			/* save a handle on the old database to get
			 * reports that arrive while we expire it */
			old_db_fd = dup(db_fd);
			if (old_db_fd < 0)
				dcc_logbad(EX_OSERR, "dup(%s): %s",
					   cur_db_nm, ERROR_STR());

			old_db_hash_used = db_hash_used;
			old_db_nokeep_cks = db_parms.nokeep_cks;
			memcpy(old_db_flod_tholds, db_parms.flod_tholds,
			       sizeof(old_db_flod_tholds));

			/* read old and create new database file */
			if (!expire(db_csize)) {
				close(old_db_fd);
				old_db_fd = -1;
				old_db_hash_used = 0;
				repair = 1;
			}
		}

		if (repair)
			dcc_error_msg("repairing %s", cur_db_nm);
	}

	/* if we are repairing the hash table (including now repairing
	 * after encountering problems while expiring),
	 * copy the current file with minimal expiring */
	if (repair && !cleardb) {
		if (!copy_db())
			exit(EX_UNAVAILABLE);
	}
	build_hash();

	/* we have the new database locked
	 *
	 * Preserve the current files as "*-old" and install the new files
	 */
	rename_bail(cur_db_nm, old_db_nm);
	rename_bail(new_hash_nm, cur_hash_nm);
	new_hash_created = 0;
	rename_bail(new_db_nm, cur_db_nm);
	new_db_created = 0;
	cur_db_created = 0;
	if (cleardb) {
		if (0 > unlink(flod_mmap_path)
		    && errno != ENOENT)
			dcc_error_msg("unlink(%s): %s",
				      flod_mmap_path, ERROR_STR());
		if (!db_close(1))
			exit(EX_UNAVAILABLE);
		exit(EX_OK);
	}
	strcpy(new_db_nm, cur_db_nm);
	strcpy(new_hash_nm, cur_hash_nm);

	/* if the daemon was not running, we're finished */
	if (standalone) {
		/* install the flood positions if things are ok */
		if (flod_mmaps) {
			memcpy(flod_mmaps, &new_flod_mmaps,
			       sizeof(new_flod_mmaps));
			flod_unmap(0, 0);
		}
		if (!db_close(1))
			exit(EX_UNAVAILABLE);
		exit(EX_OK);
	}

	/* try to finish as much disk I/O as we can to minimize stalling
	 * by dccd when we close the file and hand it over */
	db_unload(0, 0);
	if (0 > fsync(db_fd))
		dcc_logbad(EX_IOERR, "fsync(%s): %s",
			   db_nm, ERROR_STR());
	if (0 > fsync(db_hash_fd))
		dcc_logbad(EX_IOERR, "fsync(%s): %s",
			   db_hash_nm, ERROR_STR());

	/* Copy any records from the old file to the new file that were
	 * added to the old file while we were creating the new file. */
	if (!catchup()) {
		write_new_hdr(1);
		exit(EX_UNAVAILABLE);
	}

	/* tell the daemon to switch to the new database and stop unlocking
	 * the database on every operation.  This will leave the daemon
	 * stuck waiting for us to unlock the new database. */
	dccd_new_db("copy late arrivals");

	/* install the flood positions if things are ok */
	if (flod_mmaps) {
		memcpy(flod_mmaps, &new_flod_mmaps,
		       sizeof(new_flod_mmaps));
		flod_unmap(0, 0);
	}

	/* Copy any records from the old file to the new file in the
	 * race to tell the daemon to switch to the new file.
	 * The new file is still locked from build_hash().
	 * The daemon should be stuck waiting to open it in the
	 * DCC_AOP_DB_NEW request via the preceding dccd_new_db().
	 *
	 * Since the daemon has switched and probably cannot go back,
	 * ignore any errors */
	catchup();
	if (!db_close(1))
		exit(EX_UNAVAILABLE);

	/* finish() will be called via exit() to tell the daemon to resume
	 * flooding if necessary.  However, in the normal case, we removed
	 * all counts against flooding before calling dccd_new_db() */
	 exit(EX_OK);
}



/* adjust output flood positions */
static DB_PTR
adj_mmap(void)
{
	FLOD_MMAP *mp;
	DB_PTR delta, new_pos;

	delta = new_db_csize - old_db_pos;
	new_pos = 0;
	for (mp = new_flod_mmaps.mmaps;
	     mp <= LAST(new_flod_mmaps.mmaps);
	     ++mp) {
		/* do nothing to marks we have already adjusted */
		if (mp->oflod_index >= 0)
			continue;
		if (mp->confirm_pos > old_db_pos) {
			/* note the next mark that will need adjusting
			 * but do not adjust it yet */
			if (new_pos == 0
			    || new_pos > mp->confirm_pos)
				new_pos = mp->confirm_pos;
		} else {
			/* adjust marks not past the current position */
			mp->confirm_pos += delta;
			mp->oflod_index = 0;
		}
	}
	if (adj_delay_pos) {
		if (new_flod_mmaps.delay_pos > old_db_pos) {
			if (new_pos == 0
			    || new_pos > new_flod_mmaps.delay_pos)
				new_pos = new_flod_mmaps.delay_pos;
		} else {
			new_flod_mmaps.delay_pos += delta;
			adj_delay_pos = 0;
		}
	}

	return new_pos;
}



static void NRATTRIB
deadman(int s UATTRIB)
{
	dcc_logbad(EX_IOERR, "db_lock() timed out; dccd restarted?");
}



/* find a checksum
 *	Leave db_sts.rcd2 pointing at the leading record. */
static u_char
get_ck(DB_RCD_CK **ckp, DCC_CK_TYPES type, const DCC_SUM sum)
{
	DB_FOUND db_result;

	/* we must lock the file to keep the daemon from changing the
	 * internal hash table links */
	if (!DB_IS_LOCKED()) {
		alarm(60*60);		/* don't stall for more than an hour */
		if (0 > db_lock())
			return 1;
		/* cheat and don't turn off the alarm, since we ought
		 * to be back here long before an hour has passed */
	}

	dcc_emsg[0] = '\0';
	db_result = db_lookup(dcc_emsg, type, sum, 0, MAX_HASH_ENTRIES,
			      &db_sts.hash, &db_sts.rcd2, ckp);
	switch (db_result) {
	case DB_FOUND_LATER:
	case DB_FOUND_SYSERR:
		dcc_error_msg("hash lookup for %s from "L_HPAT" = %d: %s",
			      DB_TYPE2STR(type), old_db_pos, db_result,
			      dcc_emsg);
		break;

	case DB_FOUND_IT:
		return 1;

	case DB_FOUND_EMPTY:
	case DB_FOUND_CHAIN:
	case DB_FOUND_INTRUDER:
		*ckp = 0;
		return 1;
	}

	return 0;
}



/* Get the leading report for a checksum
 *	Leave db_sts.rcd2 pointing at the leading record. */
static int				/* -1=broken database 0=expire 1=keep */
get_lead(DCC_CK_TYPES type, const DCC_SUM sum, DCC_TGTS rcd_tgts)
{
	DB_RCD_CK *ck;
	DCC_TGTS total_tgts;

	if (!get_ck(&ck, type, sum))
		return -1;

	if (!ck) {
		dcc_error_msg("no leader for %s %s at "L_HPAT,
			      DB_TYPE2STR(type), dcc_ck2str_err(type, sum),
			      old_db_pos);
		return -1;
	}
	total_tgts = DB_TGTS_CK(ck);

	if (DCC_CK_IS_REP(grey_on, type)) {
		/* do not keep reputations on systems without reputation code */
		return 0;
	}

	/* do not keep checksums that later become spam */
	if (total_tgts == DCC_TGTS_TOO_MANY
	    && rcd_tgts != DCC_TGTS_TOO_MANY)
		return 0;
	return (total_tgts >= new_ex_secs[type].clean_thold);
}



static void  PATTRIB(1,2)
dbclean_msg(const char *p, ...)
{
	va_list args;

	va_start(args, p);
	if (quiet) {
		vsyslog(dcc_trace_priority, p, args);
	} else {
		dcc_vtrace_msg(p, args);
	}
	va_end(args);
}



static void
report_progress(u_char force_time,
		const char *s1, DB_PTR done, const char *s2, DB_PTR total)
{
	int secs, interval;
	struct timeval tv;
	float percent;

	/* don't start progress reporting at the end */
	if (!total)
		percent = 100.0;
	else
		percent = (done*100.0)/total;
	if (!progress_rpt_started
	    && percent > 30.0)
		return;

	gettimeofday(&tv, 0);
	secs = tv.tv_sec - progress_rpt;
	interval = ((db_debug > 1)
		    ? REPORT_INTERVAL_FAST
		    : REPORT_INTERVAL);
	if (secs >= interval
	    || (force_time && progress_rpt_percent != 100)) {
		progress_rpt_started = 1;
		progress_rpt_percent = percent;
		secs = tv.tv_sec - start.tv_sec;
		secs /= interval;
		secs = secs*interval;
		progress_rpt = start.tv_sec + secs;
		if (db_debug > 1)
			dbclean_msg("%s "L_DPAT" of "L_DPAT" %s or %d%%"
				    "\tdb_mmaps=%d hash=%d",
				    s1, done, total, s2, progress_rpt_percent,
				    db_stats.db_mmaps, db_stats.hash_mmaps);
		else
			dbclean_msg("%s "L_DPAT" of "L_DPAT" %s or %d%%",
				    s1, done, total, s2, progress_rpt_percent);
	}
}



/* mark obsolete old, less fuzzy checksums in the new record */
static void
fuzzy_obs(DB_RCD *new, const DB_RCD_CK *end_ck)
{
	DB_RCD_CK *rcd_ck;
	DCC_CK_TYPES type;

	for (rcd_ck = new->cks; rcd_ck < end_ck; ++rcd_ck) {
		type = DB_CK_TYPE(rcd_ck);
		if (!DCC_TS_OLDER_TS(new->ts, new_ex_ts[type].all))
			continue;
		rcd_ck->type_fgs |= DB_CK_FG_OBS;
		++obs_rcds;
	}
}



/* copy the existing flag file, discard junk and old entries */
static u_char				/* 1=done 0=database broken */
expire(DB_PTR old_db_csize)
{
#define EXPIRE_BAIL() {alarm(0); flod_unmap(0, 0); db_close(0); return 0;}

	DCC_TS ts;
	u_char emptied, reduced_defaults;
	u_char old_ok[DCC_CK_TYPE_LAST+1-DCC_CK_TYPE_FIRST];
	DB_RCD rcd, new;
	const DB_RCD_CK *rcd_ck, *rcd_ck2;
	DB_RCD_CK *new_ck;
	DCC_TGTS tgts, tgts_raw, ck_tgts;
	u_char needed, obs_lvl, timely;
	int old_num_cks, new_num_cks, nokeep_cks;
	DB_PTR min_confirm_pos, next_adj_pos;
	FLOD_MMAP *mp;
	struct timeval now;
	DCC_CK_TYPES prev_type, type, type2;
	int rcd_len;
	struct stat sb;
	int progress_rpt_cnt;
	int i;

	/* Compute default durations
	 *  Assume the hash table will be 50% of the size of the database */
	reduced_defaults = 0;
	i = (((db_csize*3)/2)*60) / db_max_rss;
	if (i > 60
	    && db_parms.ex_secs[DCC_CK_FUZ2].all != 0
	    && db_parms.ex_secs[DCC_CK_FUZ2].spam != 0) {
		def_expire_secs = (db_parms.ex_secs[DCC_CK_FUZ2].all*60)/i;
		def_expire_secs -= def_expire_secs % (60*60);
		if (def_expire_secs < DB_EXPIRE_SECS_DEF_MIN)
			def_expire_secs = DB_EXPIRE_SECS_DEF_MIN;

		def_expire_spamsecs = (db_parms.ex_secs[DCC_CK_FUZ2].spam*60)/i;
		def_expire_spamsecs -= def_expire_spamsecs % (24*60*60);
		if (def_expire_spamsecs < DB_EXPIRE_SPAMSECS_DEF_MIN)
			def_expire_spamsecs = DB_EXPIRE_SPAMSECS_DEF_MIN;
#if DCC_MIN_DB_MBYTE == 0 && !defined(GOT_PHYSMEM)
		if (def_expire_secs == DB_EXPIRE_SECS_DEF_MIN
		    || def_expire_spamsecs == DB_EXPIRE_SPAMSECS_DEF_MIN)
			dbclean_msg("no way to determine physical RAM;"
				    " rebuild with ./configure with-db-memory");
#endif
		}
	if (expire_secs < 0 || expire_spamsecs < 0) {
		if (expire_secs < 0) {
			expire_secs = def_expire_secs;
			if (expire_secs > expire_spamsecs
			    && expire_spamsecs > 0)
				expire_secs = expire_spamsecs;
		}
		if (expire_spamsecs < 0)
			expire_spamsecs = max(def_expire_spamsecs, expire_secs);
		if (expire_secs != DB_EXPIRE_SECS_DEF
		    || expire_spamsecs != DB_EXPIRE_SPAMSECS_DEF)
			reduced_defaults = 1;
	}

#ifdef DCC_DBCLEAN_ADJ_EPOCH     /* for testing */
	/* generate DCC_DBCLEAN_ADJ_EPOCH from something like
	 * date -j -f '%m/%d/%y %T' '02/27/04 15:30:24' '+%s'
	 */
	{
	time_t adj = time(0) - DCC_DBCLEAN_ADJ_EPOCH;
	if (expire_secs > adj)
		expire_secs -= adj;
	if (expire_spamsecs > adj)
		expire_spamsecs -= adj;
	}
#endif

	if (expire_spamsecs > 0 && expire_spamsecs < expire_secs)
		dcc_logbad(EX_USAGE,
			   "long term spam expiration -E"
			   " must be longer than -e");

	expired_rcds = 0;
	expired_cks = 0;
	kept_cks = white_cks;
	progress_rpt_cnt = 0;
	progress_rpt_started = 0;

	/* Compute thresholds for records we keep.
	 * Use the values from the previous use of dbclean as defaults
	 * unless they are bogus */
	memset(old_ok, 0, sizeof(old_ok));
	dcc_secs2ts(ts, start.tv_sec);
	for (type = DCC_CK_TYPE_FIRST; type <= DCC_CK_TYPE_LAST; ++type) {
		char buf[30];
		DB_EX_SEC *th = &db_parms.ex_secs[type];

		if (DB_TEST_NOKEEP(db_parms.nokeep_cks, type))
			continue;

		if ((th->clean_thold <= 0
		     || th->clean_thold > DCC_TGTS_TOO_MANY)
		    && !DCC_CK_IS_REP(grey_on, type)) {
			if (db_debug)
				dbclean_msg("bad old clean thold %s for %s",
					    dcc_tgts2str(buf, sizeof(buf),
							th->clean_thold,
							grey_on),
					    DB_TYPE2STR(type));
			continue;
		}
		if (th->spam <= 0
		    || th->spam > DB_EXPIRE_SECS_MAX) {
			if (db_debug)
				dbclean_msg("bad old spam threshold %s for %s",
					    dcc_tgts2str(buf, sizeof(buf),
							th->spam, grey_on),
					    DB_TYPE2STR(type));
			continue;
		}
		if (th->all <= 0
		    || th->all > DB_EXPIRE_SECS_MAX) {
			if (db_debug)
				dbclean_msg("bad old all threshold %s for %s",
					    dcc_tgts2str(buf, sizeof(buf),
							th->all, grey_on),
					    DB_TYPE2STR(type));
			continue;
		}
		if (DCC_TS_NEWER_TS(db_parms.ex_ts[type].all, ts)) {
			if (db_debug)
				dbclean_msg("bad old timestamp %s for %s",
					    dcc_ts2str_err(db_parms.ex_ts[type
							].all),
					    DB_TYPE2STR(type));
			continue;
		}

		old_ok[type] = 1;	/* the old values are ok */
	}

	for (type = DCC_CK_TYPE_FIRST; type <= DCC_CK_TYPE_LAST; ++type) {
		DB_EX_SEC *new_th = &new_ex_secs[type];

		/* always keep server-ID declarations one week */
		if (type == DCC_CK_SRVR_ID) {
			new_th->all = 7*24*60*60;
			new_th->spam = 7*24*60*60;
			new_th->clean_thold = 1;
			dcc_secs2ts(spamts[type], start.tv_sec - 7*24*60*60);
			memcpy(new_ex_ts[type].all, spamts[type],
			       sizeof(new_ex_ts[type].all));
			continue;
		}

		if (new_th->clean_thold == 0) {
			/* we have no explicit settings for this checksum type.
			 * Use the dccd bulk thresholds by default */
			if (grey_on) {
				new_th->clean_thold = DCC_TGTS_TOO_MANY;
			} else if ((tgts = db_parms.flod_tholds[type
							]) <= DCC_TGTS_TOO_MANY
				   && tgts > 0) {
				new_th->clean_thold = tgts;
			} else {
				new_th->clean_thold = DEF_FLOD_THOLDS(0, type);
			}
			if (have_expire_parms < 0 && old_ok[type]) {
				/* if we have no global expiriation parameters
				 * and the old durations are valid, use them */
				new_th->all = db_parms.ex_secs[type].all;
				new_th->spam = db_parms.ex_secs[type].spam;
			} else if (grey_on) {
				if (DCC_CK_IS_GREY_TRIPLE(1, type)) {
					new_th->all = DEF_GREY_WINDOW;
					new_th->spam = DEF_GREY_WHITE;
				} else if (DCC_CK_IS_GREY_MSG(1, type)
					   || type == DCC_CK_BODY) {
					new_th->all = DEF_GREY_WINDOW;
					new_th->spam = DEF_GREY_WINDOW;
				} else {
					new_th->all = 1;
					new_th->spam = 1;
				}
			} else {
				new_th->all = expire_secs;
				new_th->spam = (DCC_CK_LONG_TERM(type)
						? expire_spamsecs
						: expire_secs);
				if (reduced_defaults) {
					syslog(dcc_trace_priority,
					       "reducing defaults to -e"
					       " %dhours  -E %ddays",
					       expire_secs / (60*60),
					       expire_spamsecs / (24*60*60));
					reduced_defaults = 0;
				}
			}
		}
		dcc_secs2ts(spamts[type], start.tv_sec - new_th->spam);

		/* use previous time limits if they are ok & newer */
		dcc_secs2ts(ts, start.tv_sec - new_th->all);
		if (!old_ok[type]
		    || DCC_TS_NEWER_TS(ts, db_parms.ex_ts[type].all)) {
			memcpy(new_ex_ts[type].all, ts,
			       sizeof(new_ex_ts[type].all));
		} else {
			new_ex_ts[type] = db_parms.ex_ts[type];
		}
	}

	/* put the thresholds into the new file */
	write_new_hdr(1);

	/* if we are running as root,
	 * don't change the owner of the database */
	if (getuid() == 0) {
		if (0 > fstat(old_db_fd, &sb))
			dcc_logbad(EX_IOERR, "fstat(%s): %s",
				   old_db_nm, ERROR_STR());
		if (0 > fchown(new_db_fd, sb.st_uid, sb.st_gid))
			dcc_logbad(EX_IOERR, "fchown(%s,%d,%d): %s",
				   new_db_nm,
				   (int)sb.st_uid, (int)sb.st_gid,
				   ERROR_STR());
	}

	if (DB_PTR_BASE != lseek(old_db_fd, DB_PTR_BASE, SEEK_SET))
		dcc_logbad(EX_IOERR, "lseek(%s,%d): %s",
			   cur_db_nm, DB_PTR_BASE, ERROR_STR());
	read_rcd_invalidate();

	flod_mmap(0, db_parms.sn, 0, 1, 1);
	if (flod_mmaps)
		memcpy(&new_flod_mmaps, flod_mmaps, sizeof(new_flod_mmaps));
	min_confirm_pos = new_flod_mmaps.delay_pos;
	next_adj_pos = DB_PTR_BASE;
	for (mp = new_flod_mmaps.mmaps;
	     mp <= LAST(new_flod_mmaps.mmaps);
	     ++mp) {
		if (mp->rem_hostname[0] == '\0') {
			mp->oflod_index = 0;
		} else {
			mp->oflod_index = -1;
			if (min_confirm_pos > mp->confirm_pos)
				min_confirm_pos = mp->confirm_pos;
		}
	}
	adj_delay_pos = (new_flod_mmaps.delay_pos != 0) ? 1 : 0;

	emptied = cleardb;
	dcc_timeval2ts(new_flod_mmaps.sn, &start, 0);

	/* copy the old file to the new, discarding old data as we go */
	for (old_db_pos = DB_PTR_BASE;
	     old_db_pos < old_db_csize;
	     old_db_pos += rcd_len) {
		if (--progress_rpt_cnt <= 0) {
			report_progress(0, "  processed",
					old_db_pos/1000000,
					"Mbytes", old_db_csize/1000000);
			progress_rpt_cnt = 1000;
		}

		if (old_db_pos == next_adj_pos)
			next_adj_pos = adj_mmap();

		rcd_len = read_rcd(0, &rcd,
				   old_db_fd, old_db_pos, cur_db_nm);
		if (rcd_len <= 0) {
			if (rcd_len == 0)
				dcc_error_msg("unexpected EOF in %s at "L_HPAT
					      " instead of "L_HPAT,
					      cur_db_nm,
					      old_db_pos,
					      old_db_csize);
			/* ask our neighbors to fix our database */
			emptied = 1;
			old_db_pos = old_db_csize;
			break;
		}

		/* skip end-of-page padding */
		if (rcd_len == sizeof(rcd)-sizeof(rcd.cks))
			continue;

		if (DB_RCD_ID(&rcd) == DCC_ID_WHITE) {
			/* skip whitelist entries if whitelist source is ok */
			if (!keep_white)
				continue;
			/* refresh whitelist entries if source is bad */
			dcc_timeval2ts(rcd.ts, &start, 0);
		}

		old_num_cks = DB_NUM_CKS(&rcd);

		/* expire or throw away deleted reports */
		tgts_raw = DB_TGTS_RCD_RAW(&rcd);
		if (tgts_raw == 0) {
			++expired_rcds;
			expired_cks += old_num_cks;
			continue;
		}
		if (tgts_raw > DCC_TGTS_MAX_DB) {
			dcc_error_msg("discarding report at "L_HPAT
				      " with bogus target count %#x",
				      old_db_pos, tgts_raw);
			++expired_rcds;
			expired_cks += old_num_cks;
			continue;
		}

		if (DCC_TS_NEWER_TS(rcd.ts, future_ts)) {
			dcc_error_msg("discarding report at "L_HPAT
				      " from the future %s",
				      old_db_pos,
				      dcc_ts2str_err(rcd.ts));
			++expired_rcds;
			expired_cks += old_num_cks;
			continue;
		}


		needed = 0;
		obs_lvl = 0;
		timely = 1;
		nokeep_cks = 0;
		memcpy(&new, &rcd, sizeof(new)-sizeof(new.cks));
		new.fgs_num_cks &= (DB_RCD_FG_TRIM | DB_RCD_FG_SUMRY
				    | DB_RCD_FG_DELAY);
		new_ck = new.cks;
		for (prev_type = DCC_CK_INVALID, rcd_ck = rcd.cks;
		     rcd_ck < &rcd.cks[old_num_cks];
		     prev_type = type, ++rcd_ck) {
			type = DB_CK_TYPE(rcd_ck);
			if (!DCC_CK_OK_DB(grey_on, type)) {
				static int whines = 0;
				if (whines < 20)
					dcc_error_msg("discarding %s"
						      " checksum at "L_HPAT"%s",
						      DB_TYPE2STR(type),
						      old_db_pos,
						      ++whines >= 20
						      ? "; stop complaining"
						      : "");
				++expired_cks;
				new.fgs_num_cks |= DB_RCD_FG_TRIM;
				new.fgs_num_cks &= ~DB_RCD_FG_DELAY;
				continue;
			}

			if (type <= prev_type
			    && prev_type != DCC_CK_FLOD_PATH) {
				dcc_error_msg("discarding out of order %s"
					      " checksum at "L_HPAT,
					      DB_TYPE2STR(type),
					      old_db_pos);
				++expired_cks;
				new.fgs_num_cks |= DB_RCD_FG_TRIM;
				new.fgs_num_cks &= ~DB_RCD_FG_DELAY;
				continue;
			}

			/* Silently discard pure junk from other servers,
			 * provided it is junk by default */
			if (DB_TEST_NOKEEP(db_parms.nokeep_cks, type)
			    && DB_GLOBAL_NOKEEP(grey_on, type)
			    && type != DCC_CK_FLOD_PATH
			    && type != DCC_CK_SRVR_ID
			    && DB_RCD_ID(&rcd) != DCC_ID_WHITE) {
				++expired_cks;
				continue;
			}

			/* Keep paths except on old records or records that
			 * have been trimmed or compressed.
			 * Never remove paths from server-ID declarations. */
			if (type == DCC_CK_FLOD_PATH) {
				if (DB_RCD_TRIMMED(&new)
				    || DB_RCD_ID(&new) == DCC_ID_COMP)
					continue;
				/* forget line number on old whitelist entry */
				if (DB_RCD_ID(&rcd) == DCC_ID_WHITE)
					continue;
				rcd_ck2 = rcd_ck+1;
				for (;;) {
					type2 = DB_CK_TYPE(rcd_ck2);
					if (type2 == DCC_CK_SRVR_ID
					    || !DCC_TS_OLDER_TS(rcd.ts,
							new_ex_ts[type2].all)) {
					    /* keep this path since this report
					     * is a server-ID declaration
					     * or not old */
					    *new_ck = *rcd_ck;
					    ++new_ck;
					    ++new.fgs_num_cks;
					    ++nokeep_cks;
					    break;
					}
					if (++rcd_ck2>=&rcd.cks[old_num_cks]) {
					    /* we are discarding this path */
					    new.fgs_num_cks |= DB_RCD_FG_TRIM;
					    new.fgs_num_cks &= ~DB_RCD_FG_DELAY;
					    break;
					}
				}
				continue;
			}

			/* throw this checksum away if it is extremely old */
			if (DCC_TS_OLDER_TS(rcd.ts, spamts[type])) {
				++expired_cks;
				new.fgs_num_cks |= DB_RCD_FG_TRIM;
				new.fgs_num_cks &= ~DB_RCD_FG_DELAY;
				continue;
			}

			if (!DCC_TS_OLDER_TS(rcd.ts, new_ex_ts[type].all)) {
				/* This report is recent.
				 * However, obsolete or junk checksums
				 * don't make the report needed */
				if (DB_TEST_NOKEEP(db_parms.nokeep_cks, type)
				    && DB_RCD_ID(&rcd) != DCC_ID_WHITE) {
					++nokeep_cks;
				} else if (DB_CK_OBS(rcd_ck)) {
					/* This checksum is obsolete.
					 * If it has the highest level of
					 * fuzziness, then it controls whether
					 * the whole report is needed,. */
					if (obs_lvl < db_ck_fuzziness[type]) {
					    obs_lvl = db_ck_fuzziness[type];
					    needed = 0;
					}
				} else {
					/* This checksum is not obsolete.
					 * If it is at least as fuzzy as any
					 * other checksum, then it can say
					 * the report is needed */
					if (obs_lvl <= db_ck_fuzziness[type]) {
					    obs_lvl = db_ck_fuzziness[type];
					    needed = 1;
					}
				}

			} else {
				/* This checksum is old.
				 * Throw away delete requests
				 * and other servers' useless checksums */
				if (tgts_raw == DCC_TGTS_DEL
				    || DB_TEST_NOKEEP(db_parms.nokeep_cks,
						      type)) {
					++expired_cks;
					new.fgs_num_cks |= DB_RCD_FG_TRIM;
					new.fgs_num_cks &= ~DB_RCD_FG_DELAY;
					continue;
				}
				/* Throw away old obsolete checksums
				 * and entire reports if the fuzziest
				 * checksum is obsolete */
				if (DB_CK_OBS(rcd_ck)) {
					if (obs_lvl < db_ck_fuzziness[type]) {
					    obs_lvl = db_ck_fuzziness[type];
					    needed = 0;
					}
					++expired_cks;
					new.fgs_num_cks |= DB_RCD_FG_TRIM;
					new.fgs_num_cks &= ~DB_RCD_FG_DELAY;
					continue;
				}

				/* old summaries are unneeded, because
				 * they have already been flooded.
				 * They do not contribute to local counts */
				if (DB_RCD_SUMRY(&rcd))
					continue;

				/* The checksum is old enough to compress,
				 * but not old enough to delete, so mark
				 * the record as eligible for splitting. */
				timely = 0;

				/* Discard this checksum if its ultimate total
				 * is low or if it reaches spam after this
				 * report.
				 * To determine the ultimate total, we must
				 * have a hash table to find the newest record,
				 * which contains the final total*/
				ck_tgts = DB_TGTS_CK(rcd_ck);
				if (!repair
				    &&  ck_tgts < new_ex_secs[type
							].clean_thold) {
					i = get_lead(type, rcd_ck->sum,
						     ck_tgts);
					if (i < 0)
					    EXPIRE_BAIL();
					if (!i) {
					    ++expired_cks;
					    new.fgs_num_cks |= DB_RCD_FG_TRIM;
					    new.fgs_num_cks &= ~DB_RCD_FG_DELAY;
					    continue;
					}
				}

				if (obs_lvl <= db_ck_fuzziness[type]) {
					/* Since we did not delete this
					 * checksum, we need the record if this
					 * checksum is fuzzy enough to control
					 * our need. */
					needed = 1;
					/* If this is the fuzziest checksum we
					 * have seen, then preceding and so
					 * less fuzzy checksums are obsolete,
					 * if they are old.
					 * Assume that checksums are ordered
					 * in the record by fuzziness. */
					if (obs_lvl < db_ck_fuzziness[type]) {
					    obs_lvl = db_ck_fuzziness[type];
					    if (obs_lvl != DCC_CK_FUZ_LVL_REP
						&& !grey_on)
						fuzzy_obs(&new, new_ck);
					}
				}
			}

			/* Keep this checksum if we decide the whole report
			 * is needed. */
			*new_ck = *rcd_ck;
			/* we will want to sum this checksum */
			new_ck->type_fgs &= ~DB_CK_FG_DUP;

			++new_ck;
			++new.fgs_num_cks;
		}

		if (!repair && !standalone && DB_IS_LOCKED()) {
			gettimeofday(&now, 0);
			if (db_locked.tv_sec != now.tv_sec
			    || db_locked.tv_usec + 500*1000 < now.tv_usec) {
				if (!db_unlock())
					EXPIRE_BAIL();
			}
		}

		/* if none of its checksums are needed,
		 * then discard the entire record */
		if (!needed) {
			expired_cks += DB_NUM_CKS(&new);
			++expired_rcds;
			continue;
		}

		new_num_cks = DB_NUM_CKS(&new);
		kept_cks += new_num_cks - nokeep_cks;

		/* Put the new record into the new file.
		 *
		 * If all of the record is recent, if it contains 1 checksum,
		 * or if all of its totals are the same, then simply add it.
		 *
		 * Otherwise, divide it into records of identical counts
		 * to allow compression or combining with other records. */
		if (new_num_cks > 1
		    && (!timely
			|| DB_RCD_ID(&new) == DCC_ID_COMP
			|| DB_RCD_TRIMMED(&new))) {
			for (;;) {
				/* skip the checksums that have the same total
				 * as the first checksum to leave them with the
				 * original new report */
				new_ck = new.cks;
				ck_tgts = DB_TGTS_CK(new_ck);
				for (i = 1; i < new_num_cks; ++i) {
					++new_ck;
					if (DB_TGTS_CK(new_ck) != ck_tgts)
					    break;
				}
				if (new_num_cks <= i)
					break;
				new_num_cks -= i;

				/* write the checksums with the common total */
				new.srvr_id_auth = DCC_ID_COMP;
				new.fgs_num_cks = i;
				if (!write_new_rcd(&new,
						   sizeof(new) - sizeof(new.cks)
						   + i*sizeof(new.cks[0])))
					EXPIRE_BAIL();

				/* handle the remaining checksums */
				new.fgs_num_cks = new_num_cks;
				memmove(&new.cks[0], &new.cks[i],
					new_num_cks*sizeof(new.cks[0]));
			}
		}

		/* write the rest (or all) of the new record */
		if (!write_new_rcd(&new,
				   sizeof(new) - sizeof(new.cks)
				   + new_num_cks*sizeof(new.cks[0])))
			EXPIRE_BAIL();
	}
	write_new_flush(1);
	alarm(0);

	/* do final adjustment of the flooding positions */
	adj_mmap();
	/* force them to be right if the system crashed with the
	 * flod.map file on the disk more up to date and so after the
	 * database file on the disk */
	for (mp = new_flod_mmaps.mmaps;
	     mp <= LAST(new_flod_mmaps.mmaps);
	     ++mp) {
		if (mp->rem_hostname[0] != '\0'
		    && mp->confirm_pos > new_db_csize)
			mp->confirm_pos = new_db_csize;
	}

	i = db_close(1);
	write_new_hdr(emptied);
	report_progress(1, "  processed",
			old_db_pos/1000000,
			"Mbytes", old_db_csize/1000000);
	if (grey_on)
		dbclean_msg("expired %d records and %d checksums in %s",
			    expired_rcds, expired_cks, cur_db_nm);
	else
		dbclean_msg("expired %d records and %d checksums,"
			    " obsoleted %d checksums in %s",
			    expired_rcds, expired_cks, obs_rcds, cur_db_nm);
	return i;
}



/* copy the database copy while doing minimal expiring */
static u_char
copy_db(void)
{
	/* do not lock the old database because the daemon must continue
	 * to answer requests */
	if (old_db_fd < 0) {
		old_db_fd = open(cur_db_nm, O_RDONLY, 0);
		if (old_db_fd == -1)
			dcc_logbad(EX_IOERR, "open(%s): %s",
				   cur_db_nm, ERROR_STR());
	}

	if (!read_db_hdr(dcc_emsg, &old_db_hdr, old_db_fd, cur_db_nm))
		dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
	if (memcmp(old_db_hdr.p.version, def_db_hdr.p.version,
		   sizeof(old_db_hdr.p.version)))
		dcc_logbad(EX_IOERR, "%s has the wrong magic \"%.*s\"",
			   cur_db_nm,
			   ISZ(old_db_hdr.p.version), old_db_hdr.p.version);

	memcpy(db_parms.sn, old_db_hdr.p.sn, sizeof(db_parms.sn));
	memcpy(&db_parms.ex_ts, &old_db_hdr.p.ex_ts,
	       sizeof(db_parms.ex_ts));
	memcpy(&db_parms.ex_secs, &old_db_hdr.p.ex_secs,
	       sizeof(db_parms.ex_secs));
	old_db_nokeep_cks = db_parms.nokeep_cks = old_db_hdr.p.nokeep_cks;
	memcpy(db_parms.flod_tholds, old_db_hdr.p.flod_tholds,
	       sizeof(db_parms.flod_tholds));
	memcpy(old_db_flod_tholds, old_db_hdr.p.flod_tholds,
	       sizeof(old_db_flod_tholds));
	old_db_flags = db_parms.flags = old_db_hdr.p.flags;

	return expire(old_db_hdr.p.db_csize);
}



/* Copy any records from the old file to the new file that were
 * added to the old file while we were creating the new file. */
static u_char
catchup(void)
{
	DB_RCD rcd;
	int rcd_len;
	u_char result;
	int count, old_count;

	/* Because the old file should still be unlocked, the daemon
	 * will have been keeping its magic number block accurate */
	result = 1;
	count = 0;
	do {
		old_count = count;
		if (!read_db_hdr(dcc_emsg, &old_db_hdr,
				old_db_fd, old_db_nm)) {
			dcc_error_msg("%s", dcc_emsg);
			result = 0;
			break;
		}
		if (old_db_hdr.p.db_csize < old_db_pos) {
			dcc_error_msg("%s mysteriously truncated", old_db_nm);
			result = 0;
			break;
		}
		if ((off_t)old_db_pos != lseek(old_db_fd, old_db_pos,
					       SEEK_SET)) {
			dcc_error_msg("lseek(%s, "L_HPAT"): %s",
				      old_db_nm, old_db_pos, ERROR_STR());
			result = 0;
			break;
		}
		read_rcd_invalidate();
		while (old_db_pos < old_db_hdr.p.db_csize) {
			rcd_len = read_rcd(0, &rcd,
					   old_db_fd, old_db_pos, old_db_nm);
			if (rcd_len <= 0) {
				if (rcd_len == 0)
					dcc_error_msg("premature EOF in %s"
						      " at "L_HPAT
						      " instead of "L_HPAT,
						      old_db_nm,
						      old_db_pos,
						      old_db_hdr.p.db_csize);
				result = 0;
				break;
			}
			/* If something bad happens, we may not be able to
			 * go back to the old file.  Carry on to get as much
			 * data as we can although we know the dccd daemon
			 * may croak when we release it */
			if (!db_add_rcd(0, &rcd)) {
				result = 0;
				break;
			}
			old_db_pos += rcd_len;
			++count;
		}
	} while (result && old_count != count);

	if (count > 0 && db_debug >= 1)
		dbclean_msg("added %d late reports", count);

	return result;
}



/* try to compress old report pointed to by db_sts.rcd with a predecessor */
static void
compress_old(void)
{
	DB_PTR prev, prev1;
	DB_RCD_CK *new_ck, *prev_ck;
	int new_ck_num, prev_ck_num;
	DCC_TGTS new_tgts, prev_tgts;
	DCC_CK_TYPES new_type, prev_type;
	DCC_SRVR_ID new_srvr, prev_srvr;

	/* Before spending the time to map a preceding checksum,
	 * find at least one checksum worth keeping and that might
	 * be combined or compressed with its predecessor. */
	if (DB_RCD_TRIMMED(db_sts.rcd.d.r))
		new_srvr = DCC_ID_COMP;
	else
		new_srvr = DB_RCD_ID(db_sts.rcd.d.r);
	prev = DB_PTR_NULL;
	prev_type = DCC_CK_INVALID;
	for (new_ck_num = DB_NUM_CKS(db_sts.rcd.d.r),
	     new_ck = db_sts.rcd.d.r->cks;
	     new_ck_num != 0;
	     --new_ck_num, ++new_ck) {
		new_type = DB_CK_TYPE(new_ck);
		if (DB_TEST_NOKEEP(db_parms.nokeep_cks, new_type))
			continue;
		/* all of the checksums in this record must be old */
		if (new_srvr != DCC_ID_COMP
		    && !DCC_TS_OLDER_TS(db_sts.rcd.d.r->ts,
					new_ex_ts[new_type].all))
			return;
		/* you can compress reports only if you have >=2 */
		prev1 = DB_PTR_EX(new_ck->prev);
		if (prev1 != DB_PTR_NULL) {
			prev = prev1;
			prev_type = new_type;
		}
	}
	if (prev_type == DCC_CK_INVALID)
		return;

	/* having picked a checksum,
	 * map the record with its predecessor */
	prev_ck = db_map_rcd_ck(dcc_emsg, &db_sts.rcd2, prev, prev_type);
	if (!prev_ck)
		dcc_logbad(dcc_ex_code, "%s", dcc_emsg);

	/* The current and previous records must be old
	 * and contain the same useful checksums. */
	new_ck_num = DB_NUM_CKS(db_sts.rcd.d.r);
	new_ck = db_sts.rcd.d.r->cks;
	if (DB_RCD_TRIMMED(db_sts.rcd2.d.r))
		prev_srvr = DCC_ID_COMP;
	else
		prev_srvr = DB_RCD_ID(db_sts.rcd2.d.r);
	prev_ck_num = DB_NUM_CKS(db_sts.rcd2.d.r);
	prev_ck = db_sts.rcd2.d.r->cks;
	for (;;) {
		/* we must run out of checksums in the two reports at the
		 * same time */
		if (prev_ck_num == 0 || new_ck_num == 0) {
			if (prev_ck_num == new_ck_num)
				break;
			return;
		}

		/* ignore paths */
		prev_type = DB_CK_TYPE(prev_ck);
		if (DB_TEST_NOKEEP(db_parms.nokeep_cks, prev_type)) {
			--prev_ck_num;
			++prev_ck;
			continue;
		}
		new_type = DB_CK_TYPE(new_ck);
		if (DB_TEST_NOKEEP(db_parms.nokeep_cks, new_type)) {
			--new_ck_num;
			++new_ck;
			continue;
		}

		/* because the checksums are ordered,
		 * give up at the first difference in checksums */
		if (new_type != prev_type
		    || memcmp(new_ck->sum, prev_ck->sum, sizeof(new_ck->sum)))
			return;

		/* Give up at the first recent and valuable checksum. */
		if ((new_srvr != DCC_ID_COMP
		     && !DCC_TS_OLDER_TS(db_sts.rcd.d.r->ts,
					 new_ex_ts[new_type].all))
		    || (prev_srvr != DCC_ID_COMP
			&& !DCC_TS_OLDER_TS(db_sts.rcd2.d.r->ts,
					    new_ex_ts[new_type].all)))
			return;

		--prev_ck_num;
		++prev_ck;
		--new_ck_num;
		++new_ck;
	}

	/* The current and previous records are compatiable.
	 * Add the count of the previous record to the current record
	 * and mark the previous record useless.
	 * The individual totals in the current record are already correct,
	 * so postpone worrying about the deleted record. */
	new_tgts = DB_TGTS_RCD_RAW(db_sts.rcd.d.r);
	if (new_tgts < DCC_TGTS_TOO_MANY) {
		prev_tgts = DB_TGTS_RCD(db_sts.rcd2.d.r);
		if (prev_tgts > DCC_TGTS_TOO_MANY)
			return;
		if (prev_tgts == DCC_TGTS_TOO_MANY) {
			new_tgts = DCC_TGTS_TOO_MANY;
		} else {
			new_tgts += prev_tgts;
			if (new_tgts > DCC_TGTS_TOO_MANY)
				new_tgts = DCC_TGTS_TOO_MANY;
		}
		DB_TGTS_RCD_SET(db_sts.rcd.d.r, new_tgts);
	}

	/* mark one of the records to be deleted next time */
	DB_TGTS_RCD_SET(db_sts.rcd2.d.r, 0);

	db_sts.rcd.d.r->srvr_id_auth = DCC_ID_COMP;
	db_sts.rcd.d.r->fgs_num_cks &= ~(DB_RCD_FG_TRIM
					 | DB_RCD_FG_SUMRY
					 | DB_RCD_FG_DELAY);
	/* use the newest timestamp */
	if (DCC_TS_OLDER_TS(db_sts.rcd.d.r->ts, db_sts.rcd2.d.r->ts))
		memcpy(db_sts.rcd.d.r->ts, db_sts.rcd2.d.r->ts,
		       sizeof(db_sts.rcd.d.r->ts));
	++comp_rcds;
}



/* write a parsed whitelist checksum */
static int
white_write(DCC_EMSG emsg, DCC_WF *wf,
	    DCC_CK_TYPES type, DCC_SUM sum, DCC_TGTS tgts)
{
	DB_RCD rcd;
	int rcd_len;
	char buf[30];
	DCC_FNM_LNO_BUF fnm_buf;

	/* ignore checksums that clients are never supposed to send
	 * to the server or for some other reason cannot be whitelisted */
	switch (type) {
	case DCC_CK_INVALID:
	case DCC_CK_ENV_TO:
	case DCC_CK_G_MSG_R_TOTAL:
	case DCC_CK_G_TRIPLE_R_BULK:
	case DCC_CK_SRVR_ID:
		dcc_pemsg(EX_DATAERR, emsg,
			  "%s checksum cannot be used%s",
			  dcc_type2str_err(type, 0, 0, grey_on),
			  wf_fnm_lno(fnm_buf, wf));
		return 0;

	case DCC_CK_IP:
	case DCC_CK_ENV_FROM:
	case DCC_CK_FROM:
	case DCC_CK_MESSAGE_ID:
	case DCC_CK_RECEIVED:
	case DCC_CK_SUB:
	case DCC_CK_BODY:
	case DCC_CK_FUZ1:
	case DCC_CK_FUZ2:
		break;			/* these are ok */
	}

	if (tgts == DCC_TGTS_OK_MX || tgts == DCC_TGTS_OK_MXDCC) {
		dcc_pemsg(EX_DATAERR, emsg,"\"%s\" ignored%s",
			  dcc_tgts2str(buf, sizeof(buf), tgts, 0),
			  wf_fnm_lno(fnm_buf, wf));
		return 0;
	}

	/* greylist whitelist entries cannot involve blacklisting
	 * and use DCC_TGTS_GREY_WHITE to signal whitelisting */
	if (grey_on) {
		/* ignore anything except whitelisting */
		if (tgts != DCC_TGTS_OK) {
			dcc_pemsg(EX_DATAERR, emsg, "\"%s\" ignored%s",
				  dcc_tgts2str(buf, sizeof(buf), tgts, 0),
				  wf_fnm_lno(fnm_buf, wf));
			return 0;
		}
		tgts = DCC_TGTS_GREY_WHITE;
	}

	memset(&rcd, 0, sizeof(rcd));
	dcc_timeval2ts(rcd.ts, &start, 0);
	rcd.srvr_id_auth = DCC_ID_WHITE;
	DB_TGTS_RCD_SET(&rcd, tgts);

	rcd.cks[0].type_fgs = DCC_CK_FLOD_PATH;
	memcpy(rcd.cks[0].sum, &wf->lno, sizeof(wf->lno));
	rcd.cks[0].sum[sizeof(wf->lno)] = wf->fno;

	rcd.cks[1].type_fgs = type;
	memcpy(rcd.cks[1].sum, sum, sizeof(rcd.cks[1]));

	rcd_len = sizeof(rcd) - sizeof(rcd.cks) + 2*sizeof(rcd.cks[0]);
	rcd.fgs_num_cks = 2;

	++white_cks;
	if (!write_new_rcd(&rcd, rcd_len))
		return -1;

	return 1;
}



/* Add the whitelist of certified non-spam senders and otherwise
 *	start the database */
static void
parse_white(void)
{
	int white_fd;

	white_cks = 0;

	if (!keep_white) {
		memset(&dbclean_white_tbl, 0,sizeof(dbclean_white_tbl));
		dcc_wf_init(&dbclean_wf, 0);
		fnm2path_good(dbclean_wf.ascii_nm, WHITELIST_NM(grey_on), 0);
		dbclean_wf.info = &dbclean_white_tbl;
		white_fd = open(dbclean_wf.ascii_nm, O_RDONLY, 0);
		if (white_fd < 0) {
			/* worry only if the file exists but can't be used */
			if (errno != ENOENT) {
				dcc_error_msg("open(%s): %s",
					      dbclean_wf.ascii_nm, ERROR_STR());
				keep_white = 1;
			}
		} else {
			if (0 > dcc_parse_whitefile(0, &dbclean_wf, white_fd,
						    white_write, 0))
				keep_white = 1;
			if (0 > close(white_fd))
				dcc_error_msg("close(%s): %s",
					      dbclean_wf.ascii_nm, ERROR_STR());
		}
	}
	if (keep_white) {
		/* If the whitelist was bad, purge the new database of
		 * the bad new whitelist.  We will use the existing
		 * whitelist */
		write_new_flush(1);
		new_db_csize = DB_PTR_BASE;
		if (0 > ftruncate(new_db_fd, DB_PTR_BASE))
			dcc_logbad(EX_IOERR, "truncate(%s, %d): %s",
				   new_db_nm, DB_PTR_BASE, ERROR_STR());
		new_db_fsize = DB_PTR_BASE;
		white_cks = 0;
	}

	/* update the counts in the database file */
	write_new_hdr(1);
}



/* check for conflicts in the whitelist file in the record pointed to
 *	by db_sts.rcd */
static void
check_white(void)
{
	static int msgs;
	static int prev_lno1, prev_lno2;
	static int prev_fno1, prev_fno2;
	const DB_RCD_CK *rcd_ck, *prev_ck;
	int lno1, lno2;
	int fno1, fno2;
	DCC_TGTS tgts1, tgts2;
	char tgts1_buf[30], tgts2_buf[30];
	const char *fname1, *fname2;
	DCC_CK_TYPES type;
	DB_PTR prev;

	/* don't check if we have already complained enough */
	if (msgs > 20)
		return;

	rcd_ck = db_sts.rcd.d.r->cks;

	/* it is pointless without line numbers, which are lacking only
	 * if we saved the old whitelist entries because the file is
	 * broken */
	if (DB_NUM_CKS(db_sts.rcd.d.r) != 2
	    || DB_CK_TYPE(rcd_ck) != DCC_CK_FLOD_PATH)
		return;

	/* conflict is impossible with a single line */
	++rcd_ck;
	prev = DB_PTR_EX(rcd_ck->prev);
	if (prev == DB_PTR_NULL)
		return;

	type = DB_CK_TYPE(rcd_ck);
	prev_ck = db_map_rcd_ck(dcc_emsg, &db_sts.rcd2, prev, type);
	if (!prev_ck)
		dcc_logbad(dcc_ex_code, "%s", dcc_emsg);

	tgts1 = DB_TGTS_RCD(db_sts.rcd2.d.r);
	tgts2 = DB_TGTS_RCD(db_sts.rcd.d.r);
	if (tgts1 == tgts2)
		return;			/* no conflict */

	memcpy(&lno1, db_sts.rcd2.d.r->cks[0].sum, sizeof(lno1));
	fno1 = db_sts.rcd2.d.r->cks[0].sum[sizeof(lno1)];
	memcpy(&lno2, db_sts.rcd.d.r->cks[0].sum, sizeof(lno2));
	fno2 = db_sts.rcd.d.r->cks[0].sum[sizeof(lno2)];

	if (lno1 == prev_lno1 && fno1 == prev_fno1
	    && lno2 == prev_lno2 && fno2 == prev_fno2)
		return;

	fname1 = wf_fnm(&dbclean_wf, fno1);
	fname2 = wf_fnm(&dbclean_wf, fno2);
	if (fname1 == fname2) {
		fname1 = "";
	} else {
		fname1 = path2fnm(fname1);
	}
	dcc_error_msg("\"%s\" in line %d%s%s conflicts with \"%s\""
		      " in line %d of %s",
		      dcc_tgts2str(tgts1_buf, sizeof(tgts1_buf),
				   tgts1, grey_on),
		      lno1,
		      *fname1 != '\0' ? " of " : "", fname1,
		      dcc_tgts2str(tgts2_buf, sizeof(tgts2_buf),
				   tgts2, grey_on),
		      lno2,
		      fname2);
	++msgs;
	prev_lno1 = lno1;
	prev_fno1 = fno1;
	prev_lno2 = lno2;
	prev_fno2 = fno2;
}



/* rebuild the hash table and the totals and links within the database file
 *	finish with the file locked */
static void
build_hash(void)
{
	DB_PTR rcd_pos;
	DB_HADDR haddr_window, haddr_lo, haddr_hi;
	int pass, total_passes;
	u_int rcd_len;
	int rcd_cks, rcd_sums, rcds, sums;
	const DB_RCD_CK *rcd_ck;
	int progress_rpt_cnt;
	DB_HADDR guess_hash_len;

	db_buf_init(new_db_page_size);

	if (!new_hash_len) {
		/* Try to choose a hash table size now so that when it
		 * is next time to rebuild after 24 hours of incoming
		 * checksums, the alpha or load factor will still be 0.9.
		 * We probably ran 24 hours ago, so the old hash size
		 * is a good estimate of the size tomorrow. */
		new_hash_len = old_db_hash_used;

		/* Take what turns out to be a good guess if we do not
		 * have a good measure of the old hash table size. */
		guess_hash_len = kept_cks+white_cks;
		guess_hash_len += guess_hash_len/10;
		if (new_hash_len < guess_hash_len)
			new_hash_len = guess_hash_len;

		/* go for alpha 0.9 in 24 hours */
		new_hash_len += new_hash_len/10;

		if (new_hash_len > db_max_hash_entries)
			dbclean_msg("default hash table size of %d"
				    " larger than maximum %d",
				    new_hash_len, db_max_hash_entries);

		if (new_hash_len < MIN_HASH_ENTRIES)
			new_hash_len = MIN_HASH_ENTRIES;
		if (new_hash_len < DEF_HASH_ENTRIES
		    && !grey_on)
			new_hash_len = DEF_HASH_ENTRIES;
	}

	/* Open and lock the new database */
	unlink(new_hash_nm);
	new_hash_created = 1;
	if (!db_open(0, new_db_nm, new_hash_len,
		     DB_OPEN_LOCK_NOWAIT | dbclean_db_mode)) {
		dcc_logbad(dcc_ex_code, "could not start database %s",
			   new_db_nm);
	}
	db_parms.nokeep_cks = old_db_nokeep_cks;
	memcpy(db_parms.flod_tholds, old_db_flod_tholds,
	       sizeof(db_parms.flod_tholds));

	/* add every record in the database file to the hash table and
	 * fix its accumulated counts and reverse links */
	comp_rcds = 0;
	sums = 0;
	rcds = 0;
	progress_rpt_cnt = 0;
	progress_rpt_started = 0;
	haddr_window = db_hash_page_len*((db_buf_total*3)/4);
	total_passes = (db_hash_len+haddr_window-1)/haddr_window;
	for (haddr_lo = 0, pass = 1;
	     haddr_lo < db_hash_len;
	     haddr_lo = haddr_hi, ++pass) {
		if (haddr_lo > db_hash_len-haddr_window)
			haddr_hi = MAX_HASH_ENTRIES;
		else
			haddr_hi = haddr_lo+haddr_window;
		for (rcd_pos = DB_PTR_BASE;
		     rcd_pos < db_csize;
		     rcd_pos += rcd_len) {
			/* skip reports crossing page bounardies */
			if (rcd_pos%db_page_size > db_page_max) {
				rcd_len = DB_RCD_HDR_LEN;
				continue;
			}
			if (--progress_rpt_cnt <= 0) {
				report_progress(0, "  hash rebuilt",
						sums/total_passes,
						"checksums", kept_cks);
				progress_rpt_cnt = 1000;
			}

			db_end_pg_num = rcd_pos / db_page_size;
			if (!db_map_rcd(0, &db_sts.rcd, rcd_pos, &rcd_len)) {
				dcc_logbad(dcc_ex_code,
					   "hash build failed reading"
					   " record at "L_HPAT,
					   rcd_pos);
			}

			/* skip end of page padding */
			if (db_sts.rcd.d.r->fgs_num_cks == 0)
				continue;

			++rcds;

			/* count the checksums we'll link in this record */
			rcd_cks = DB_NUM_CKS(db_sts.rcd.d.r);
			rcd_sums = 0;
			for (rcd_ck = db_sts.rcd.d.r->cks;
			     rcd_ck < &db_sts.rcd.d.r->cks[rcd_cks];
			     ++rcd_ck) {
				if (!DB_TEST_NOKEEP(db_parms.nokeep_cks,
						    DB_CK_TYPE(rcd_ck)))
					++rcd_sums;
			}
			sums += rcd_sums;

			if (!db_link_rcd(0, haddr_lo, haddr_hi)) {
				dcc_logbad(dcc_ex_code,
					   "failed to relink record at "L_HPAT,
					   rcd_pos);
			}

			/* check for conflicts in the whitelist file */
			if (DB_RCD_ID(db_sts.rcd.d.r) == DCC_ID_WHITE)
				check_white();

			compress_old();
		}
	}

	report_progress(1, "  hash rebuilt", sums/total_passes,
			"checksums", kept_cks);
	dbclean_msg("hashed %d records containing %d checksums,"
		    " compressed %d records",
		    rcds, sums/total_passes, comp_rcds);
	dbclean_msg("%d hash entries total, %d or %d%% used",
		    db_hash_len,
		    HASH_LEN_EXT(db_hash_used),
		    (HASH_LEN_EXT(db_hash_used)*100)
		    / HASH_LEN_EXT(db_hash_len));
}



static u_char
write_new_db(const void *buf, int buflen, off_t pos, u_char fatal)
{
	int i;

	if (pos != lseek(new_db_fd, pos, SEEK_SET)) {
		if (fatal) {
			dcc_logbad(EX_IOERR, "lseek(%s, 0): %s",
				   new_db_nm, ERROR_STR());
		} else {
			dcc_error_msg("lseek(%s, 0): %s",
				      new_db_nm, ERROR_STR());
		}
		return 0;
	}

	i = write(new_db_fd, buf, buflen);
	if (i == buflen) {
		if (new_db_fsize < pos+buflen)
			new_db_fsize = pos+buflen;
		return 1;
	}

	if (fatal) {
		if (i < 0)
			dcc_logbad(EX_IOERR, "write(%s): %s",
				   new_db_nm, ERROR_STR());
		else
			dcc_logbad(EX_IOERR, "write(%s)=%d instead of %d",
				   new_db_nm, i, buflen);
	} else {
		if (i < 0)
			dcc_error_msg("write(%s): %s",
				      new_db_nm, ERROR_STR());
		else
			dcc_error_msg("write(%s)=%d instead of %d",
				      new_db_nm, i, buflen);
	}
	return 0;
}



static u_char write_new_db_buf[64*1024];
static u_int write_new_db_buflen = 0;
static DB_PTR write_new_base;

static u_char
write_new_flush(u_char fatal)
{
	u_char result = 1;

	if (write_new_db_buflen != 0
	    && !write_new_db(write_new_db_buf, write_new_db_buflen,
			     write_new_base, fatal))
		result = 0;

	write_new_base = new_db_csize;
	write_new_db_buflen = 0;
	return result;
}


static u_char
write_new_buf(const void *buf, int buflen)
{
	if (write_new_db_buflen + buflen > ISZ(write_new_db_buf)
	    && !write_new_flush(0))
		return 0;

	memcpy(&write_new_db_buf[write_new_db_buflen], buf, buflen);
	write_new_db_buflen += buflen;
	return 1;
}



/* add a record to the new file */
static u_char
write_new_rcd(const void *buf, int buflen)
{
	static const u_char zeros[128] = {0};
	DB_PTR new_page_num;
	u_char result;
	int pad, i;

	/* pad accross page boundaries */
	new_page_num = (new_db_csize + buflen)/new_db_page_size;
	if (new_page_num != new_db_csize/new_db_page_size) {
		pad = new_page_num*new_db_page_size - new_db_csize;
		pad = (((pad + DB_RCD_HDR_LEN-1) / DB_RCD_HDR_LEN)
		       * DB_RCD_HDR_LEN);
		do {
			i = sizeof(zeros);
			if (i > pad)
				i = pad;
			if (!write_new_buf(zeros, i))
				return 0;
			pad -= i;
			new_db_csize += i;
		} while (pad != 0);
	}

	result = write_new_buf(buf, buflen);
	new_db_csize += buflen;
	return result;
}



/* write the magic string at the head of the database file */
static void
write_new_hdr(u_char emptied)
{
	DB_HDR new_hdr;
	DCC_CK_TYPES type;
	DCC_TGTS thold;
	int i;

	write_new_flush(1);

	new_hdr = def_db_hdr;
	dcc_timeval2ts(new_hdr.p.sn, &start, 0);
	new_hdr.p.db_csize = new_db_csize;
	if (grey_on)
		new_hdr.p.flags |= DB_PARM_FG_GREY;
	if (repair || dccd_started_us) {
		if (old_db_flags & DB_PARM_FG_SELF_CLEAN)
			new_hdr.p.flags |= DB_PARM_FG_SELF_CLEAN2;
		new_hdr.p.flags |= DB_PARM_FG_SELF_CLEAN;
	}

	for (type = DCC_CK_TYPE_FIRST; type <= DCC_CK_TYPE_LAST; ++type) {
		thold = new_ex_secs[type].clean_thold;
		if (thold != 0) {
			new_hdr.p.ex_secs[type].clean_thold = thold;
			new_hdr.p.ex_secs[type].all = new_ex_secs[type].all;
			new_hdr.p.ex_secs[type].spam = new_ex_secs[type].spam;
			new_hdr.p.ex_ts[type] = new_ex_ts[type];
		} else {
			new_hdr.p.ex_secs[type].clean_thold
					= DEF_FLOD_THOLDS(grey_on, type);
			new_hdr.p.ex_secs[type].all = def_expire_secs;
			new_hdr.p.ex_secs[type].spam = (DCC_CK_LONG_TERM(type)
							? def_expire_spamsecs
							: def_expire_secs);
		}
		if (emptied)
			new_hdr.p.flod_tholds[type] = DEF_FLOD_THOLDS(grey_on,
							type);
		else
			new_hdr.p.flod_tholds[type] = old_db_flod_tholds[type];
	}
	if (emptied || (old_db_flags & DB_PARM_FG_CLEARED))
		new_hdr.p.flags |= DB_PARM_FG_CLEARED;
	else
		new_hdr.p.flags &= ~DB_PARM_FG_CLEARED;
	new_hdr.p.nokeep_cks = emptied ? def_nokeep_cks() : old_db_nokeep_cks;
	new_hdr.p.page_size = new_db_page_size;
	write_new_db(&new_hdr, sizeof(new_hdr), 0, 1);

	/* ensure that the last page of the file is complete */
	if (new_db_page_size != 0) {
		i = new_db_fsize % new_db_page_size;
		if (i != 0) {
			if (!db_extend(dcc_emsg, new_db_fd, new_db_nm,
				       new_db_fsize + (new_db_page_size - i),
				       new_db_fsize))
				dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
			new_db_fsize += (new_db_page_size - i);
		}
	}
}



static void
unlink_whine(const char *nm)
{
	if (0 > unlink(nm))
		dcc_error_msg("unlink(%s): %s",
			      cur_db_nm, ERROR_STR());
}



static void
rename_bail(const char *from, const char *to)
{
	if (0 > rename(from, to))
		dcc_logbad(EX_IOERR, "rename(%s, %s): %s",
			   from, to, ERROR_STR());
}



/* try to wait until the server is willing to listen */
static void
server_listening(DCC_AOPS aop, u_int32_t val1)
{
	struct timeval s, now;
	DCC_OPS result;
	char respbuf[DCC_OPBUF];
	u_char first;

	gettimeofday(&s, 0);
	first = 1;
	for (;;) {
		/* Ask about status to ensure the server has caught up
		 * and to wait the full measure.  We wait the full
		 * measure instead of giving up early if the server
		 * is persistently sick with the args to dcc_clnt_rdy */
		dcc_ctxts_lock();
		dcc_clnt_rdy(dcc_emsg, ctxt,
			     (grey_on ? DCC_CLNT_FG_GREY : 0)
			     | DCC_CLNT_FG_NO_FAIL);
		dcc_ctxts_unlock();

		result = dcc_aop(dcc_emsg, ctxt, grey_on, DCC_NO_SRVR,
				 DCC_AOP_FLOD, DCC_AOP_FLOD_CHECK,
				 0, 0, 0, 0, 0, &aop_resp, 0);
		if (result == DCC_OP_ADMN)
			return;

		gettimeofday(&now, 0);

		/* eventually give up and try to open the new database */
		if (db_debug || first
		    || now.tv_sec > s.tv_sec + RESTART_DELAY
		    || now.tv_sec < s.tv_sec) {
			if (result ==  DCC_OP_ERROR
			    || result == DCC_OP_INVALID) {
				dcc_error_msg("%s", dcc_emsg);
			} else {
				dcc_error_msg("%s for %s: %s",
					      dcc_aop2str(0, 0,
							DCC_AOP_FLOD,
							DCC_AOP_FLOD_CHECK),
					      dcc_aop2str(respbuf,
							sizeof(respbuf),
							aop,
							val1),
					      aop_resp.resp.val.string);
			}
			if (now.tv_sec > s.tv_sec + RESTART_DELAY
			    || now.tv_sec < s.tv_sec)
				return;
		}
		sleep(5);
		first = 0;
	}
}



/* tell the daemon to switch to the new database */
static void
dccd_new_db(const char *msg)
{
	if (!dccd_unlocked)
		return;

	/* Send a round of NOPs and ask about status to ensure the server
	 * has dealt with requests that arrived while we had the database
	 * locked and otherwise caught up.  We want to try to ensure that
	 * the server is listening when we re-open the database so that
	 * it does not leave flooding off.
	 * On some systems with lame mmap() support including BSD/OS, the
	 * the daemon can stall for minutes in close(). */
	server_listening(DCC_AOP_DB_NEW, 0);

	dccd_unlocked = 0;
	if (DCC_OP_OK == dcc_aop(dcc_emsg, ctxt, grey_on,
				 DCC_NO_SRVR, DCC_AOP_DB_NEW,
				 0, 0, 0, 0, 0, 0, &aop_resp, 0)) {
		/* the daemon assumes a resumption of flooding with
		 * a new database */
		if (flods_off > 0)
			--flods_off;
	} else {
		/* cannot be a fatal error,
		 * lest we leave the database broken */
		dcc_error_msg("%s: %s", msg, dcc_emsg);
	}
}



static void
finish(void)
{
	int bailing = 0;

	/* delete the new files */
#ifndef DCC_DBCLEAN_KEEP_NEW
	if (new_db_created) {
		unlink_whine(new_db_nm);
		new_db_created = 0;
		bailing = -1;
	}
	/* we don't really know if the new hash file was created,
	 * so don't worry about problems */
	if (new_hash_created) {
		unlink_whine(new_hash_nm);
		new_hash_created = 0;
		bailing = -1;
	}
#endif
	if (cur_db_created) {
		unlink_whine(cur_db_nm);
		unlink(cur_hash_nm);
		cur_db_created = 0;
		bailing = -1;
	}

	if (new_db_fd >= 0) {
		if (0 > close(new_db_fd))
			dcc_error_msg("close(%s): %s",
				      new_db_nm, ERROR_STR());
		new_db_fd = -1;
	}
	if (old_db_fd >= 0) {
		if (0 > close(old_db_fd))
			dcc_error_msg("close(%s): %s",
				      old_db_nm, ERROR_STR());
		old_db_fd = -1;
	}
	flod_unmap(0, 0);

	if (lock_db_fd >= 0) {
		unlink_whine(lock_db_nm);
		close(lock_db_fd);
		lock_db_fd = -1;
	}

	/* release the daemon, but if the database is still open, it's bad */
	db_close(bailing);
	dccd_new_db("finish");

	while (flods_off > 0) {
		server_listening(DCC_AOP_FLOD, DCC_AOP_FLOD_RESUME);
		--flods_off;
		if (DCC_OP_OK != dcc_aop(dcc_emsg, ctxt, grey_on, DCC_NO_SRVR,
					 DCC_AOP_FLOD, DCC_AOP_FLOD_RESUME,
					 0, 0, 0, 0, 0, &aop_resp, 0)) {
			dcc_error_msg("%s", dcc_emsg);
		}
	}
}



/* terminate with a signal */
static void NRATTRIB
sigterm(int s)
{
	dcc_error_msg("interrupted");
	exit(-s);
}
