/*************************************************************************
 *
 *  $RCSfile: hashtab.cxx,v $
 *
 *  $Revision: 1.3 $
 *
 *  last change: $Author: rt $ $Date: 2004/06/16 10:23:29 $
 *
 *  The Contents of this file are made available subject to the terms of
 *  either of the following licenses
 *
 *         - GNU Lesser General Public License Version 2.1
 *         - Sun Industry Standards Source License Version 1.1
 *
 *  Sun Microsystems Inc., October, 2000
 *
 *  GNU Lesser General Public License Version 2.1
 *  =============================================
 *  Copyright 2000 by Sun Microsystems, Inc.
 *  901 San Antonio Road, Palo Alto, CA 94303, USA
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License version 2.1, as published by the Free Software Foundation.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 *  MA  02111-1307  USA
 *
 *
 *  Sun Industry Standards Source License Version 1.1
 *  =================================================
 *  The contents of this file are subject to the Sun Industry Standards
 *  Source License Version 1.1 (the "License"); You may not use this file
 *  except in compliance with the License. You may obtain a copy of the
 *  License at http://www.openoffice.org/license.html.
 *
 *  Software provided under this License is provided on an "AS IS" basis,
 *  WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
 *  WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
 *  MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
 *  See the License for the specific provisions governing your rights and
 *  obligations concerning the Software.
 *
 *  The Initial Developer of the Original Code is: Sun Microsystems, Inc.
 *
 *  Copyright: 2000 by Sun Microsystems, Inc.
 *
 *  All Rights Reserved.
 *
 *  Contributor(s): _______________________________________
 *
 *
 ************************************************************************/
/*
	$Id: hashtab.cxx,v 1.3 2004/06/16 10:23:29 rt Exp $

	btw: tabs are 4

	@doc HashTab

	@module hashtab.cxx | General purpose hash table base class
	<c HashTabBase> and derived classes <c CharHashTab> and <c StringHashTab>


	@topic Who did it? |

	General purpose self-sorting hash table class

	Developed and enhanced by Eike Rathke (erAck) in October 1995 and July
    1996. For comments and suggestions write an eMail to er@erack.de

	Hash bucket elements and separate chaining collision-resolution are based
	on hash.c by Allen I. Holub, keyed by J. R. Van Zandt on 31 Jan 87 from
	listing in Dr. Dobb's Journal #124 (Feb 87). (C) 1986, Allen I. Holub.
	All rights reserved.

	Class layout, implementation, Sieve of Eratosthenes and self-sorting
	algorithm by erAck

	AutoDuck 2.10 style documentation added by erAck

	AutoDuck is a "Documentation Extraction and Formatting Tool"
	by Eric Artzt, which produces RTF output for the Microsoft (tm) HC Windows
	help compiler as well as printable RTF and HTML output. For more
	information see the accompanying hashtab.hlp file or contact the author
	at erica@microsoft.com

	AutoDuck v2.10 for win95/NT inluding MFC v4.x compliant C++ Source
    is also available in the net, look for autoduck.zip

	AutoDuck v1.09.03 for DOS and winNT (without sources and no HTML support)
	is available as AUTO444B.*

	AutoDuck 2.10 source patches by erAck (written for MFC 4.x under win95/NT
	it still runs now with MFC 2.5 and PharLap under DOS and parses pure
	virtual member function declarations correctly :-)

    If you are interested in the modified sources contact me at er@erack.de
    but be warned: it's a very Quick'n'Dirty Hack, and I won't structure it
    since I don't use it anymore.

	P.S.: excuse the strange English, it's not my native language..

	EnJoy!


	@topic The hash table |

	The hash table structures are defined in hashtab.hxx. A <c HashTabBase>
	is an object that contains, among others, the table size and a pointer
	<md .ppTable> to the table itself, this last an array of <t HASHBUCKET>
	pointers. Collisions are resolved by putting the HASHBUCKETs into a
	doubly linked list. In each HASHBUCKET, the <e .pNext> field points at
	the next bucket in the chain or is NULL if there isn't another bucket.
	The <e .ppPrev> field points at the <e .pNext> field
	of the previous bucket. In the case of the first HASHBUCKET in the chain,
	it will point at the position in <md .ppTable> where the first hash-table
	element resides.

	The HASHBUCKET itself is actually a header, similar to the one used by
	malloc(). The pointer returned by <mf HashTabBase::AddSym> or
	<mf HashTabBase::FindSym> points behind this header and may be used as a
	structure pointer by the applications program.

	More frequently accessed elements are moved towards the top of a chain
	and will be found faster the next time. See <mf HashTabBase::FindSym> in
	module hashtab.cxx for how this is accomplished.


	@object what?!? |
	You don't want to make a hash table become an OLE object, do you? ;-)

*/

#include <string.h>		// strcmp(), memset()
#include <math.h>		// sqrt()
#include <limits.h>		// CHAR_BIT

#ifndef _STRING_HXX //autogen
#include <tools/string.hxx>
#endif
#include "hashtab.hxx"

#ifdef HASH_DEBUG
#include <stdlib.h>		// qsort()
#include <stdio.h>		// printf()
#define DBG_OUT(s) printf(s)
#define DBG_OUT2(s,v) printf(s,v)
#else
#define DBG_OUT(s)
#define DBG_OUT2(s,v)
#endif

/* @const Maximum new'able memory:
	sizeof(HASHTYPE) \< sizeof(ULONG)
	? (HASHTYPE)~0x1F - 1
	: (((ULONG)~0x1F - 1) / CHAR_BIT) / 2;	<nl>
	leave 32 bytes room for header, on 64kB segmented PCs the maximum number
	of new'able chars is represented, else the maximum number of new'able
	chars wherein the bits can be addressed by an ULONG value
	<t nHashMaxNewBits> * 2, which is necessary for the prime table used in
	<mf HashTabBase::GetNearPrime>.

 */
const static HASHTYPE nHashMaxNewSize =
	sizeof(HASHTYPE) < sizeof(ULONG)
	? (HASHTYPE)~0x1F - 1
	: (((ULONG)~0x1F - 1) / CHAR_BIT) / 2;
// @xref <t HASHTYPE> <t HASHMAXNEWABLE( xType )>

// @type HASHMAXNEWABLE( xType ) | Maximum new'able elements of type xType:
// (<t nHashMaxNewSize> / sizeof(<p xType>))
#define HASHMAXNEWABLE( xType ) (nHashMaxNewSize / sizeof(xType))

// @const Maximum number of bits for prime table:
// (ULONG)HASHMAXNEWABLE(char) * CHAR_BIT;
const static ULONG nHashMaxNewBits = (ULONG)HASHMAXNEWABLE(char) * CHAR_BIT;
// @xref <t HASHMAXNEWABLE( xType )>

// @const Aligned size of <t HASHBUCKET>
const static size_t nAlignedHashBucket =
	sizeof(HASHBUCKET) % sizeof(void*)
	? ((sizeof(HASHBUCKET) + sizeof(void*)) / sizeof(void*)) * sizeof(void*)
	: sizeof(HASHBUCKET);

// bit masks for bit manipulations in GetNearPrime
const static unsigned char nPrimeBitMask[8] =
	{ 0x80, 0x40, 0x20, 0x10, 8, 4, 2, 1 };

// --- CharHashTab ----------------------------------------------------------

// @mfunc Compute hash value. Const virtual member function.
// @rdesc raw <t HASHTYPE> value
HASHTYPE CharHashTab::Hash(
		const HASHNAMEPTR pName		// @parm Pointer to symbol name for which
									// to compute the hash value
	) const
{
	register const char* p = (const char*)pName;
	register HASHTYPE nHash = 0;
	while ( *p )
		nHash = (nHash << 1) + *p++;
	return nHash;
}
// @comm Note that the MOD table_length is not done here,
// it's done in <mf HashTabBase::AddSym> and <mf HashTabBase::FindSym>.


// @mfunc Symbol name compare function. Const virtual member function.
// @rdesc <f strcmp>() return value
// @flag -1 | name 1 less than name 2
// @flag  0 | both names equal
// @flag  1 | name 1 greater than name 2
int CharHashTab::NameCmp(
		const HASHNAMEPTR pName1,	// @parm Pointer to symbol name 1
		const HASHNAMEPTR pName2    // @parm Pointer to symbol name 2
	) const
{
	return strcmp( (const char*) pName1, (const char*) pName2);
}


// @mfunc Duplicate function for symbol name. Const virtual member function.
// @rdesc Pointer to new'ed memory, 0 if new failed.
HASHNAMEPTR CharHashTab::NameDup(
		const HASHNAMEPTR pName		// @parm Pointer to symbol name
	) const
{
	char* p = new char[ strlen( (const char*) pName ) + 1 ];
	if ( !p )
		return 0;
	register char* p1 = p;
	register const char* p2 = (const char*) pName;
	while ( (*p1++ = *p2++) )
		;
	return p;
}


// @mfunc Delete function for symbol name. Virtual member function.
void CharHashTab::NameDel( HASHNAMEPTR pName )
{
	delete [] (char*)pName;
}

// @mfunc | CharHashTab | ~CharHashTab | Dtor
// @xref <t HASH_IMPL_DTOR( ClassName )>
HASH_IMPL_DTOR( CharHashTab );		// don't forget it!


// --- StringHashTab --------------------------------------------------------

#ifndef HASH_NO_STRING_CLASS

// No Unicode String? typedef char HASHSTRINGDATATYPE;
typedef sal_Unicode HASHSTRINGDATATYPE;

// @mfunc Compute hash value. Const virtual member function.
// @rdesc raw <t HASHTYPE> value
// @comm Note that the MOD table_length is not done here,
// it is done in <mf HashTabBase::AddSym> and <mf HashTabBase::FindSym>.
HASHTYPE StringHashTab::Hash(
		const HASHNAMEPTR pName		// @parm Pointer to symbol name for which
									// to compute the hash value
	) const
{
	register const HASHSTRINGDATATYPE* p = ((const String*)pName)->GetBuffer();
	register const HASHSTRINGDATATYPE* const pEnd = p + ((const String*)pName)->Len();
	register HASHTYPE nHash = 0;
	while ( p < pEnd )
		nHash = (nHash << 1) + *p++;
	return nHash;
}


// @mfunc Symbol name compare function. Const virtual member function.
// @rdesc <f strcmp>() return value
// @flag -1 | name 1 less than name 2
// @flag  0 | both names equal
// @flag  1 | name 1 greater than name 2
int StringHashTab::NameCmp(
		const HASHNAMEPTR pName1,	// @parm Pointer to symbol name 1
		const HASHNAMEPTR pName2    // @parm Pointer to symbol name 2
	) const
{
	switch ( ((const String*)pName1)->CompareTo( *(const String*)pName2 ) )
	{
		case COMPARE_LESS:
			return -1;
		case COMPARE_EQUAL:
			return 0;
		case COMPARE_GREATER:
			return 1;
		default:
			return 0;
	}
}


// @mfunc Duplicate function for symbol name. Const virtual member function.
// @rdesc Pointer to new'ed memory, 0 if new failed.
HASHNAMEPTR StringHashTab::NameDup(
		const HASHNAMEPTR pName		// @parm Pointer to symbol name
	) const
{
	return new String( *(const String*)pName );
}
// @comm Preferably the copy-ctor should increment a reference counter first
// instead of copying the String data.


// @mfunc Delete function for symbol name. Virtual member function.
void StringHashTab::NameDel( HASHNAMEPTR pName )
{
	delete (String*)pName;
}


// @mfunc Set <md HashTabBase::aDataDelLink> to <mf .StringDel>.
// Symbol bucket data is assumed to be String pointers, the String
// will be deleted on calls to <mf HashTabBase::DelSym> or
// <mf HashTabBase::DtorCall>.
void StringHashTab::SetStringDataDelLink()
{
	SetDataDelLink( STATIC_LINK( this, StringHashTab, StringDel ) );
}
// @xref <mf HashTabBase::SetDataDelLink>


// @mfunc long | StringHashTab | StringDel |
// Delete String pointed to by symbol bucket data, done as
// delete *(String**)pSymbolData
// @parm StringHashTab* | pThis | pointer to instance of link
// @parm HASHSYMPTR | pSymbolData | pointer to symbol bucket data
IMPL_STATIC_LINK( StringHashTab, StringDel, HASHSYMPTR, pSymbolData )
{
	// @comm You could pass the link to a non-static member function as:
	// if (<p pThis>)
	//   ((StringHashTab*)<p pThis>)->Method( (HASHSYMPTR)<p pSymbolData> );
	String** pp;
	if ( pSymbolData && *(pp = (String**)pSymbolData) )
	{
		delete *pp;
		*pp = 0;
	}
	return 1;
}
// @xref <mf HashTabBase::SetDataDelLink> <mf .SetStringDataDelLink>


// @mfunc Unlink any Link stored in <md HashTabBase::aDataDelLink>
void StringHashTab::ClearDataDelLink()
{
	SetDataDelLink( Link() );
}
// @xref <mf HashTabBase::SetDataDelLink>


// @mfunc | StringHashTab | ~StringHashTab | Dtor
// @xref <t HASH_IMPL_DTOR( ClassName )>
HASH_IMPL_DTOR( StringHashTab );		// don't forget it!


#endif	// HASH_NO_STRING_CLASS


// --- HashTabBase ----------------------------------------------------------

// Bit manipulation routines for GetNearPrime.
// Even in 16bit environments offset b>>3 works fine because we limited
// the value in GetNearPrime.
inline void SetBit( char* a, ULONG b )
{
	a[ b >> 3 ] |= nPrimeBitMask[ b & 7 ];
}

inline FASTBOOL GetBit( char* a, ULONG b )
{
	return (a[ b >> 3 ] & nPrimeBitMask[ b & 7 ]) ? TRUE : FALSE;
}

// @mfunc static member function. Get a prime number near or below <p nVal>.
// It's an improved version of the old Sieve of Eratosthenes.
// @rdesc The returned prime number <p n> will be n==nVal or n==nVal+1 or
// n\<nVal and is guaranteed to be small enough to represent a new'able amount
// of <t HASHBUCKET> pointers, thus may be used as a parameter in a
// <mf .HashTabBase> Ctor call.
HASHTYPE HashTabBase::GetNearPrime(
		HASHTYPE nVal		// @parm value for which to get the near prime number
	)
{
	if ( !nVal )
		return 1;
	if ( nVal <= 3 )
		return nVal;

	if ( nVal >= HASHMAXNEWABLE( HASHBUCKET* ) )
		nVal = HASHMAXNEWABLE( HASHBUCKET* ) - 1;
		// ppTable must fit into memory, -1 because found prime could be nVal+1
	// the algorithm itself would work without limiting nVal

	ULONG s2 = (ULONG) sqrt( (double)nVal ) + 1;
	// sieve must fit into memory, we don't need even numbers
	if ( s2 * s2 > (nHashMaxNewBits - CHAR_BIT) * 2 )
		s2 = (ULONG) sqrt( (double)((nHashMaxNewBits - CHAR_BIT) * 2) );
	ULONG s = s2;
	s2 = s2 * s2;
	size_t siz = (size_t)(s2/2 / CHAR_BIT) + 1;
	char* a = new char[siz];
	memset( a, 0, siz );
	ULONG j;
	for ( ULONG c=3, i=1; c<=s; c+=2, i++ )
	{	// all odd numbers
		if ( !GetBit( a, i ) )
		{	// not a multiple of a previous run
			ULONG x = c * 2;
			ULONG c2 = c * c;
			for ( j=c2/2; c2<=s2; c2+=x, j+=c )
				SetBit( a, j );
			// numbers above s have been processed when c==s is reached
		}
	}
	j = (nVal <= s2 ? nVal/2 : s2/2);		// don't run wild
	// find the prime
	while ( j > 1 && GetBit( a, j ) )
		j--;
	delete [] a;
	return (HASHTYPE)(j*2+1);	// cast is ok because nVal has been limited
}


// @mfunc Increments a <t HASHBUCKET>* to a <t HASHSYMPTR> with correct
// alignment by size <t nAlignedHashBucket> to prevent strange compiler
// alignment behavior.
inline HASHSYMPTR HashTabBase::IncBucketPtr(
		HASHBUCKET* pB		// @parm Pointer to HASHBUCKET
	) const
{
	return (HASHSYMPTR)( ((char*)pB) + nAlignedHashBucket );
}


// @mfunc Decrements a <t HASHSYMPTR> to a <t HASHBUCKET>* with correct
// alignment by size <t nAlignedHashBucket> to prevent strange compiler
// alignment behavior.
inline HASHBUCKET* HashTabBase::DecBucketPtr(
		HASHSYMPTR pSD		// @parm pointer to symbol bucket data
	) const
{
	return (HASHBUCKET*)( ((char*)pSD) - nAlignedHashBucket );
}


// @mfunc Set <md .pActBucket> to <p pB> and <md .bActOnNext> to FALSE
inline void HashTabBase::SetActBucket( HASHBUCKET* pB )
{
	pActBucket = pB;
	bActOnNext = FALSE;
}


// @mfunc Class <c HashTabBase> Ctor, construct a hash table of the indicated
// size <p nMaxSlots>. If <p rDelLink> is specified it is assigned to
// <md .aDataDelLink>.
HashTabBase::HashTabBase(
		HASHTYPE nMaxSlots,	// @parm Number of Slots, should be a prime number,
							// ex. 47 61 89 113 127 157 193 211 257 293 359 401
		const Link& rDelLink	// @parm DataDel function
	) :
	ppTable(0),
	pActBucket(0),
	nNumSyms(0),
	nSize( nMaxSlots ? nMaxSlots : 1 ),
	nLastFindSymHash(0),
	nActPos(0),
	bActOnNext( FALSE ),
	bDtorCalled( FALSE ),
	bOwnDtorCall( FALSE ),
	bToTop( TRUE )
{
	// make sure we won't blow away memory
	if ( nSize <= HASHMAXNEWABLE( HASHBUCKET* ) )
	{
		ppTable = new HASHBUCKET* [ nSize ];
		if ( ppTable )
		{
			memset( ppTable, 0, nSize * sizeof( HASHBUCKET* ) );
			SetDataDelLink( rDelLink );
		}
	}
}
/*	@comm
	It's a good idea to make <p nMaxSlots> a prime number (though that's not
	required). Some useful primes are: 47 61 89 113 127 157 193 211 257 293
	359 401. You may find a prime number with <mf .GetNearPrime>. Note that
	<p nMaxSlots> is NOT the maximum number of symbols the symbol table can
	hold. The number of symbols is limited only by heap space. However,
	access time gradually increases when the number of symbols gets much
	larger than <p nMaxSlots>.
	@xref <t HASHTYPE>

 */


// @mfunc Dtor.
// The derived class deletes all <t HASHBUCKET>s and their symbol names
// and symbol bucket data by calling <mf .DtorCall> in it's Dtor.
// We do the DtorCall here in case the derived class didn't, so only the
// symbol name data won't get deleted.
// @xref <mf CharHashTab::~CharHashTab> <mf StringHashTab::~StringHashTab>
HashTabBase::~HashTabBase()
{
	if ( !bDtorCalled )
	{
		bOwnDtorCall = TRUE;
		DtorCall();
	}
}


/*  @mfunc Function must be called by the derived classes Dtor if that class
	implements an own <mf .NameDel>.
	In case no Dtor didn't call it, <mf .~HashTabBase> will call it and
	it will free the table memory but not the symbol name data,
	because it can't call <mf .NameDel> anymore.
	If a link is set via <mf .SetDataDelLink>, this link will be called
	for each element with the symbol bucket data pointer as the argument.

	@xref <t HASH_IMPL_DTOR( ClassName )> <mf CharHashTab::~CharHashTab>,
	<mf StringHashTab::~StringHashTab>

 */
void HashTabBase::DtorCall()
{
	if ( bDtorCalled )
		return ;		// all done
	bDtorCalled = TRUE;
	if ( ppTable )
	{
		HASHBUCKET *p1, *p2;
		register HASHBUCKET** pp = ppTable;
		register HASHTYPE j;
		for ( j=nSize; j-- >0; ++pp )
		{
			if ( *pp )
			{
				p1 = *pp;
				do
				{
					p2 = p1->pNext;
					// call the symbol bucket data delete function
					if ( aDataDelLink.IsSet() )
						aDataDelLink.Call( IncBucketPtr(p1) );
					// can't call NameDel anymore if derived object already destroyed
					if ( !bOwnDtorCall && p1->pName )
						NameDel( p1->pName );
					delete [] p1;
					p1 = p2;
				} while ( p1 );
			}
		}
		delete [] ppTable;
	}
}


/*	@mfunc Add a symbol to the hash table.
	Calls <mf .AddSym>, the <p pSymbol> Pointer is stored in symbol bucket
	data as *((HASHSYMPTR*)pSym) = pSymbol where <p pSym> is the pointer
	returned by AddSym.
	@rdesc Pointer to memory of size of <p pSymbol>
	or <t 0> if there isn't enough memory (symbol is not added).
	@xref <t HASHSYMPTR> <t HASHNAMEPTR>

 */
HASHSYMPTR HashTabBase::AddPtrSym(
		const HASHNAMEPTR pName,	// @parm Pointer to symbol name
		void* pSymbol,				// @parm Pointer to associated data
		BOOL bUseLastFindSymHash	// @parm If TRUE the hash value from the
									// last <mf .FindSym> is used instead of
									// calculating it again
	)
{
	HASHSYMPTR pSym = AddSym( pName, sizeof( void* ), bUseLastFindSymHash );
	if ( pSym )
		*((HASHSYMPTR*)pSym) = pSymbol;
	return pSym;
}
/*	@ex Usage of
	HASHSYMPTR AddPtrSym( const HASHNAMEPTR <p pName>,
	void* <p pSymbol>, BOOL <p bUseLastFindSymHash> = FALSE ) |

typedef struct
{
	char	str[12];
	int		count;
}
DATA;

int foo( HashTabBase* pTab )
{
	DATA* p = new DATA;
	strcpy( p->str, "hello world");
	p->count = 1;
	if ( !pTab->AddPtrSym( "MyFirst", p ) )
	{
		delete p;
		return -1;		// error
	}
}

// use as
void bar( HashTabBase* pTab )
{
	DATA* p;
	if ( p = (DATA*) FindPtrSym( "MyFirst" ) )
	{
		int i = p->count;
		//...
	}
}

*/


/*	@mfunc Add a symbol to the hash table.
	@rdesc Pointer to memory for associated data of size <p nDataSize>
	or <t 0> if there isn't enough memory (symbol is not added).

*/
HASHSYMPTR HashTabBase::AddSym(
		const HASHNAMEPTR pName,	// @parm Pointer to name of symbol
		size_t nDataSize,			// @parm Size of associated data
		BOOL bUseLastFindSymHash	// @parm If TRUE the hash value from the
									// last <mf .FindSym> call is used
									// instead of calculating it again
	)
{
	HASHBUCKET **pp, *tmp, *pSym;
	if ( !ppTable )
		return 0;
	if ( !(pSym = (HASHBUCKET*) new char[ nAlignedHashBucket + nDataSize ] ) )
		return 0;
	memset( pSym, 0, nAlignedHashBucket + nDataSize );

	// copy symbol name
	pSym->pName = NameDup( pName );
	if ( !pSym->pName )
	{
		delete [] pSym;
		return 0;
	}

	nActPos = ( bUseLastFindSymHash ? nLastFindSymHash :
		Hash( pName ) % nSize );
	SetActBucket( pSym );

	pp = &ppTable[ nActPos ];
	pSym->pNext = tmp = *pp;	// @comm Assigns the pointer stored in
								// <md .ppTable>[ <mf .Hash>( <p pName> ) % <md .nSize> ]
								// to <e HASHBUCKET.pNext>.
	*pp	= pSym;					// @comm The entry in <md .ppTable> becomes
								// the pointer to this <t HASHBUCKET>.
	pSym->ppPrev	= pp;		// @comm The address of the position in
								// <md .ppTable> is assigned to
								// <e HASHBUCKET.ppPrev>.
								// Thus, the position in ppTable acts like a
								// HASHBUCKET.pNext field,
								// *(HASHBUCKET.ppPrev) == this HASHBUCKET.
	if( tmp )
		tmp->ppPrev = &pSym->pNext;	// @comm If the entry in <md .ppTable>
								// was pointing at an existing <t HASHBUCKET>,
								// the next <e HASHBUCKET.ppPrev> is assigned
								// the address of this <e HASHBUCKET.pNext>.
	nNumSyms++;
	// pSym+1 points to the symbol bucket data area
	return IncBucketPtr( pSym );
}
/*	@ex Usage of
	HASHSYMPTR AddSym( const HASHNAMEPTR <p pName>,
	size_t <p nDataSize>, BOOL <p bUseLastFindSymHash> = FALSE ) |

typedef struct
{
	char	str[12];
	int		count;
}
DATA;

int foo( HashTabBase* pTab )
{
	DATA* sp;
	if ( sp = (DATA *) pTab->AddSym( "MyFirst", sizeof(DATA) ) )
	{
		strcpy( sp->str, "hello world");
		sp->count = 1;
		return 0;		// ok
	}
	else
		return -1;		// error
}

int bar( HashTabBase* pTab )
{
	DATA** sp;
	if ( sp = (DATA **) pTab->AddSym( "MyFirst", sizeof(DATA*) ) )
	{
		DATA* p = new DATA;
		strcpy( p->str, "hello world");
		p->count = 1;
		*sp = p;
		return 0;		// ok
	}
	else
		return -1;		// error
}

*/


/*	@mfunc Find a symbol.
	The calculated hash value is stored in <md .nLastFindSymHash>
	for quicker access in a subsequent <mf .AddSym> call with the second
	parameter <p bUseLastFindSymHash> set to TRUE.
	If <md .bToTop> is set to TRUE (which is the default unless you change it
	via <mf .SetToTop>) the element will be moved to the top of the chain in
	this slot if <e HASHBUCKET.nAccess> of this element exceeds nAccess of
	the top element. If nAccess of this element is somewhere below the top
	nAccess, the element will slide towards the top (see source code).
	The chain is not strictly sorted but a good guess. This way more
	frequently accessed symbols are found faster without the expense of a
	full sort.

	@rdesc Returns a pointer to the applications symbol bucket data of the
	hash table element having the indicated name or NULL if the name isn't in
	the table. If more than one such entry is in the table, the
	most-recently added one is found.

 */
HASHSYMPTR HashTabBase::FindSym(
		const HASHNAMEPTR pName		// @parm Symbol name to find
	)
{
// change to #if 1 and define HASH_DEBUG if you wanna have a glance..
#if 0
#define TOP_OUT DBG_OUT
#define TOP_OUT2 DBG_OUT2
#else
#define TOP_OUT(s)
#define TOP_OUT2(s,v)
#endif
	if ( !ppTable )
		return 0;
	register HASHBUCKET* p =
		ppTable[ (nLastFindSymHash = Hash( pName ) % nSize) ];
	register USHORT nLastAccess = (USHORT)~0;
	register HASHBUCKET* pT = p;
	TOP_OUT2("slot %lu: ", (ULONG)nLastFindSymHash);
	while( p && NameCmp( pName, p->pName ) )
	{
		if ( bToTop )
		{
			TOP_OUT2("%u ",p->nAccess);
			if ( p->pNext && p->pNext->nAccess < nLastAccess )
			{	// remember the last element with a higher access count
				pT = p;
				nLastAccess = p->nAccess;
			}
		}
		p = p->pNext;
	}
	if ( p )
	{
		nActPos = nLastFindSymHash;
		if ( bToTop && p->nAccess < (USHORT)~0 )
		{	// don't wrap to 0, and it shouldn't be moved anymore
			p->nAccess++;
			// if p==pT it's the very top element, no move
			if ( p != pT )
			{
				// Insert position is on top or one behind or one before
				// the last higher order element.
				// The chain is not strictly sorted
				// but a much better guess than unsorted.
				// To really sort it you would have to scan it from
				// top until you find the propper position, which is slow.
				if ( p->nAccess > (ppTable[nActPos])->nAccess )
					pT = ppTable[nActPos];		// on top
				else if ( p->nAccess <= pT->nAccess )
					pT = pT->pNext;
					// behind the last higher or same value which could
					// be the same position
				// else before the last higher value
				if ( pT != p )
				{
#ifdef HASH_DEBUG
					// assumes HASHBUCKET* == &HASHBUCKET.pNext !!!
					if ( pT->ppPrev != &ppTable[nActPos] )
						TOP_OUT2("insert behind %u: ",((HASHBUCKET*)(pT->ppPrev))->nAccess);
#endif
					TOP_OUT2("insert before %u: ",pT->nAccess);
					TOP_OUT2("%u",p->nAccess);
					// remove
					*(p->ppPrev) = p->pNext;
					if( *(p->ppPrev) )
						p->pNext->ppPrev = p->ppPrev;
					// insert
					*(p->ppPrev = pT->ppPrev) = p;
					*(pT->ppPrev = &p->pNext) = pT;
				}
			}
		}
		TOP_OUT("\n");
		SetActBucket(p);
		return IncBucketPtr(p);
	}
	TOP_OUT("NOT FOUND\n");
	return 0;
}


// @mfunc Remove a symbol from the hash table and free the memory.
void HashTabBase::DelSym(
		HASHSYMPTR pSymbolData	/* @parm Pointer to symbol bucket data returned
								by a previous <mf .AddSym> or <mf .FindSym>
								call (it will actually be pointing just below
								the <t HASHBUCKET> header).	 */

	)
{
	if ( !pSymbolData )
		return ;
	// point to header
	HASHBUCKET* p = DecBucketPtr( pSymbolData );

	// @comm If this is the current element the next element, if any,
	// becomes the current element, <md .bActOnNext> set to TRUE.
	if ( p == pActBucket )
	{
		bActOnNext = FALSE;
		Next();
		bActOnNext = TRUE;
	}

	// @comm Assigns pointer to next symbol to <e HASHBUCKET.pNext> of
	// previous symbol. Remember that when there is no previous symbol
	// then <e HASHBUCKET.ppPrev> points to the position of this
	// <t HASHBUCKET> in <md .ppTable>, thus pNext is stored there.
	*(p->ppPrev) = p->pNext;
	if( *(p->ppPrev) )
		p->pNext->ppPrev = p->ppPrev;
		// @comm If there is a next symbol that <e HASHBUCKET.ppPrev>
		// will point to the previous <e HASHBUCKET.pNext>.

	// @comm If a link is set via <mf .SetDataDelLink>, this link will be
	// called with the symbol bucket data pointer as the argument.
	if ( aDataDelLink.IsSet() )
		aDataDelLink.Call( pSymbolData );

	if ( p->pName )
		NameDel( p->pName );
	delete [] (char*)p;
	--nNumSyms;
}


/*	@mfunc Retrieve the first element of the hash table.
	Sets <md .pActBucket> to 0 if there isn't any element.
	@rdesc <t HASHSYMPTR> pointer to the symbol bucket data or <t 0> if there
	isn't any element.

 */
HASHSYMPTR HashTabBase::First()
{
	HASHTYPE nTmp = nActPos = 0;
	while ( nTmp < nSize && !ppTable[ nTmp ] )
		nTmp++;
	nActPos = (nTmp < nSize ? nTmp : nSize - 1);
	// pActBucket will become 0 here if there isn't any element.
	SetActBucket( ppTable[ nActPos ] );
	if ( pActBucket )
		return IncBucketPtr( pActBucket );
	return (HASHSYMPTR)0;
}


/*	@mfunc Retrieve the next element of the hash table.
	Sets <md .pActBucket> to 0 if there is no next element.
	@rdesc <t HASHSYMPTR> pointer to the symbol bucket data or <t 0> if there
	is no next element.

 */
HASHSYMPTR HashTabBase::Next()
{
	if ( pActBucket )
	{
		if ( bActOnNext )
		{
			bActOnNext = FALSE;
			return IncBucketPtr( pActBucket );
		}
		if ( pActBucket->pNext )
		{
			pActBucket = pActBucket->pNext;
			return IncBucketPtr( pActBucket );
		}
		HASHTYPE nTmp;
		if ( (nTmp = nActPos + 1) < nSize )
		{	// we haven't been at the last slot
			while ( nTmp < nSize && !ppTable[ nTmp ] )
				nTmp++;
			nActPos = (nTmp < nSize ? nTmp : nSize - 1);
			if ( nTmp < nSize && ppTable[ nActPos ] )
			{
				pActBucket = ppTable[ nActPos ];
				return IncBucketPtr( pActBucket );
			}
		}
		pActBucket = 0;
	}
	bActOnNext = FALSE;
	return (HASHSYMPTR)0;
}


/*	@mfunc
	Get the next symbol in the current chain, if any. Useful only if you have
	the same symbol name twice in the table with different symbol data.
	@rdesc Pointer to next symbol bucket data or <t 0> if there is no next
	element.

 */
HASHSYMPTR HashTabBase::NextInChain()
{
	if ( pActBucket && pActBucket->pNext )
	{
		SetActBucket( pActBucket->pNext );
		return IncBucketPtr( pActBucket );
	}
	return (HASHSYMPTR)0;
}
/*	@ex Usage of NextInChain |

int foo( HashTabBase* pTab )
{
	DATA* p = (DATA*) pTab->FindSym( "bar" );
	while ( p )
	{
		if ( p->IsExpectedData && !strcmp( GetName( p ), "bar" ) )
		{
			// ...
			return 0;		// ok
		}
		p = (DATA*) pTab->NextInChain();
	}
	return -1;		// error
}

*/


// @mfunc const member function.
// @rdesc The name field for a HASHBUCKET's symbol bucket data,
// <e HASHBUCKET.pName>
const HASHNAMEPTR HashTabBase::GetName(
		const HASHSYMPTR pSymbolData	// @parm Pointer to symbol bucket data
	) const
{
	return (const HASHNAMEPTR) (DecBucketPtr( pSymbolData )->pName);
}


// @mfunc const member function.
// @rdesc The number of times a symbol is accessed, <e HASHBUCKET.nAccess>
inline USHORT HashTabBase::GetTimesAccessed(
		const HASHSYMPTR pSymbolData	// @parm Pointer to symbol bucket data
	) const
{
	return DecBucketPtr( pSymbolData )->nAccess;
}


/*	@mfunc const member function.
	Get various statistics showing the lengths of the chains
	(number of collisions) along with the mean depth of non-empty
	chains, variance, etc.
 */
void HashTabBase::GetStats(
		HashStats* pStat		// @parm Pointer to <t HashStats> structure
	) const
{
	HASHBUCKET*			p;				// pointer to current hash element
	HASHTYPE			i;				// counter
	HASHTYPE			nChainsU = 0;	// chains used
	HASHSYMCOUNTTYPE	chain_len;		// chain length
	HASHSYMCOUNTTYPE	nMaxLen = 0;	// maximum chain length
	HASHSYMCOUNTTYPE	nMinLen = (HASHSYMCOUNTTYPE)~0; // minimum chain length
	double	sumlen = 0;
	double	sumqlen = 0;
	double	usumlen = 0;
	double	usumqlen = 0;
	memset( pStat->nLengths, 0, sizeof(pStat->nLengths) );
	for ( i=nSize; i-- >0; )
	{
		chain_len=0;
		for(p=ppTable[i]; p; p=p->pNext)
			chain_len++;
		if ( chain_len >= nHashChainMaxLen )
			++pStat->nLonger;
		else
			++pStat->nLengths[chain_len];
		if ( nMinLen > chain_len )
			nMinLen = chain_len;
		if ( nMaxLen < chain_len )
			nMaxLen = chain_len;
		sumlen += chain_len;
		sumqlen += chain_len * chain_len;
		if ( chain_len )
		{
			++nChainsU;
			usumlen += chain_len;
			usumqlen += chain_len * chain_len;
		}
	}
	pStat->nNumSyms = nNumSyms;
	pStat->nSize = nSize;
	pStat->nMinLen = nMinLen;
	pStat->nMaxLen = nMaxLen;
	pStat->nMeanLen = ( nSize ? (sumlen / nSize) : 0 );
	pStat->nVariance = ( nSize > 1 ?
		((sumqlen - sumlen * sumlen / nSize) / (nSize-1)) :
		0 );
	pStat->nChainsU = nChainsU;
	pStat->nMeanLenU = ( nChainsU ? usumlen / nChainsU : 0 );
	pStat->nVarianceU = ( nChainsU > 1 ?
		(usumqlen - usumlen * usumlen / nChainsU) / (nChainsU-1) :
		0 );
}
/*	@comm
	A word on statistics: a perfectly occupied hash table would be completely
	filled with chains of length one, so that no memory would be wasted and
	the access would be as quick as possible. Unfortunately you probably will
	never see this happen. Anyhow, the statistics for a perfect table were a
	running mean length of all chains of 1.0000 with a deviation
	(=sqrt(fabs(variance))) of 0.0000  AND	a running mean length of all used
	chains of 1.0000 with a deviation of 0.0000

 */


#ifdef HASH_DEBUG

// @func Symbol name (ASCIIZ char*) compare function for use with qsort,
// only defined ifdef HASH_DEBUG.
// qsort is called by <mf HashTabBase::PrintTab>.
// @rdesc <f strcmp>() return value
// @xref <t HASHBUCKET>
inline static int HashNameCmp(
		const void* s1,		// @parm HASHBUCKET**
		const void* s2		// @parm HASHBUCKET**
	)
{
	return strcmp(	(const char*)((*(HASHBUCKET**)s1)->pName),
					(const char*)((*(HASHBUCKET**)s2)->pName));
}


// @type void (* HashPrintFunc)( const HASHNAMEPTR, const HASHSYMPTR ) |
// Print routine that is passed a pointer to the name and a
// pointer to the applications data of a <t HASHBUCKET>

/*	@mfunc const member function, only defined ifdef HASH_DEBUG. Print the
		hash table, eventually sorted by key.
	@parm HashPrintFunc | Print |
		Print routine that is passed a pointer to the name and a
		pointer to the applications data of a <t HASHBUCKET>
	@parm BOOL | bSort | TRUE if output is to be sorted on symbol names,
		requires enough memory to copy all symbols and can only be done
		on pure ASCIIZ char* symbol names
	@rdesc Error code
	@flag  0 | Everything ok
	@flag -1 | Insufficient memory for sort
 */
int HashTabBase::PrintTab(
		void (* Print)( const HASHNAMEPTR, const HASHSYMPTR ),
		BOOL bSort
	) const
{
	HASHBUCKET **outtab, **outp, *sym, **symtab;
	HASHTYPE i;
	if ( bSort )
	{
		/*	Allocate memory for the outtab, an array of pointers to HASHBUCKET,
			and initialize it.	The outtab is different from the actual
			hash table in that every outtab element points to a single
			HASHBUCKET structure, rather than to a linked list of them.	 */
		size_t nNumSyms_size_t;
		// @comm Warning! With parameter <p bSort> set to TRUE,
		// <t HASHMAXNEWABLE( xType )> might be less than needed by
		// <md .nNumSyms> if <t unsigned long> differs from <t size_t> but new
		// and qsort won't accept more.
		if ( nNumSyms <= HASHMAXNEWABLE( HASHBUCKET* ) )
			nNumSyms_size_t = nNumSyms;
		else
			nNumSyms_size_t = HASHMAXNEWABLE( HASHBUCKET* );
		DBG_OUT2( "nNumSyms: %lu, ", (ULONG)nNumSyms );
		DBG_OUT2( "nNumSyms_size_t: %lu\n", (ULONG)nNumSyms_size_t );
		if ( !(outtab = new HASHBUCKET* [ nNumSyms_size_t * sizeof( HASHBUCKET* ) ] ) )
			return -1;
		size_t j;
		for( symtab=ppTable, outp=outtab, i=0, j=0;
				i < nSize && j < nNumSyms_size_t; i++, symtab++ )
		{
			for ( sym=*symtab; sym && j < nNumSyms_size_t;
					sym=sym->pNext, j++, outp++ )
				*outp=sym;
		}
		/*	Sort the outtab and then print it. The (*outp)+1 in the print
			call increments the pointer past the header part of the HASHBUCKET
			structure */
		qsort( outtab, nNumSyms_size_t, sizeof( HASHBUCKET* ), HashNameCmp );
		for ( outp=outtab, j=0; j < nNumSyms_size_t; j++, outp++ )
			(*Print)((*outp)->pName, IncBucketPtr( *outp ));
		delete [] outtab;
	}
	else
	{
		for( symtab=ppTable, i=nSize; i-- >0; symtab++)
		{
			for (sym=*symtab; sym; sym=sym->pNext)
				(*Print)(sym->pName, IncBucketPtr( sym ) );
		}
	}
	return 0;
}


// @mfunc Debug function: print hash table, only defined ifdef HASH_DEBUG.
// Just for simple CharHashTab! unless you don't mind a garbled
// HASHBUCKET.pName output..
void HashTabBase::dPrintTab()
{
	HASHBUCKET	**p, *bukp;
	HASHTYPE	i;
	printf("HashTabBase at %p (%lu element table, %lu symbols)\n",
		this, (ULONG)nSize, (ULONG)nNumSyms );
	for (p=ppTable, i=0; i<nSize; ++p, ++i)
	{
		if(!*p) continue;
		printf("Htab[%3d]   %p :", i, p);
		for (bukp=*p; bukp; bukp=bukp->pNext)
		{
			printf("= %p %-17s p=%p, n=%p, a=%u, user=%p\n",
				bukp, bukp->pName, bukp->ppPrev, bukp->pNext, bukp->nAccess,
				IncBucketPtr( bukp ) );
			printf("                       ");
		}
		putchar('\r');
	}
}

#endif


