/*
  Copyright Mission Critical Linux, 2000

  Kimberlite is free software; you can redistribute it and/or modify it
  under the terms of the GNU General Public License as published by the
  Free Software Foundation; either version 2, or (at your option) any
  later version.

  Kimberlite is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with Kimberlite; see the file COPYING.  If not, write to the
  Free Software Foundation, Inc.,  675 Mass Ave, Cambridge, 
  MA 02139, USA.
*/
#include "hash.h"
#include "allocate.h"
#include <stdlib.h>
#include <string.h>
#include <stdio.h>

/* hash.c

   author: Ron Lawrence <lawrence@missioncriticallinux.com>
*/

#define USE_MALLOC


static const char *version __attribute__ ((unused)) = "$Id: hash.c,v 1.6 2000/11/10 20:26:27 lawrence Exp $";

#define INITIAL_BUCKET_COUNT 127
#define BUCKET_CHAIN_LENGTH 7

/* A list of prime numbers that are reasonable candidates for the
   number of buckets in the hash tables created by this
   implementation.  The numbers are chosen to be prime, not too large,
   reasonable incremental growth values, and never excessively
   large. */
static long primesizes[] = {127, 163, 211, 269, 337, 431, 541, 677, 
                            853, 1069, 1361, 1709, 2137, 2677, 3347, 
                            4201, 5261, 6577, 8231, 10289, 12889, 16127, 
                            20161, 25219, 31531, 39419, 49277, 61603, 
                            77017, 96281, 120371, 150473, 188107, 235159, 
                            293957, 367453, 459317, 574157, 717697, 
                            897133, 1121423, 1401791, 1752239, 2190299, 
                            2737937, 3422429, 4278037, 5347553, 6684443, 
                            8355563, 10444457, 13055587, 16319519};

typedef struct hash_entry {
    int active;
    void *key;
    void *value;
} hash_entry;

typedef struct hash_bucket {
    long length;
    hash_entry entries[BUCKET_CHAIN_LENGTH];
} hash_bucket;

typedef struct hash_table {
    int used_malloc:1;
    long bucket_count:31;
    hash_bucket *buckets;
    unsigned int (*hash_function)(void*);    
    int (*same_function)(void*,void*);
} hash_table;

long table_count=0;

struct MEM_pool * mp=0;

/* Private: private_hash_new -- Create a new hash table and return a
   pointer to it.  Expects a hash function to apply to the void
   pointers contained in the table.  Expects an equality function that
   acts upon the elements pointed to by the void pointers.  Also
   expects an initial bucket count. */

/*@null@*/
static struct hash_table *
private_hash_new(
  unsigned int (*hash_function)(void *),
  int (*same_function)(void *,void*),
  long initial_bucket_count)
{
  struct hash_table* new_table;
  long bucket, entry;

  if(NULL == mp) {
    mp = MEM_create_pool(1000000L);
  }

#ifdef USE_MALLOC
  new_table = (struct hash_table*)malloc(sizeof(struct hash_table));
  if(new_table == 0) { return 0; }
  
  new_table->buckets = (struct hash_bucket*)malloc(initial_bucket_count *
                                                   sizeof(struct hash_bucket));
  if(new_table->buckets == 0) {
#ifndef NDEBUG
    fprintf(stderr,"Table too large\n");
#endif
    return NULL;
  }
#else
  new_table = (struct hash_table*)
    MEM_pool_allocate_string(mp,sizeof(struct hash_table));
  if(new_table == 0) { return 0; }
  memset(new_table,0,sizeof(struct hash_table));

  new_table->buckets = (struct hash_bucket*)
    MEM_pool_allocate_string(mp,
                             initial_bucket_count * 
                             sizeof(struct hash_bucket));
  if(new_table->buckets == 0) {
    /* Try to allocate the buckets using malloc instead */
    new_table->used_malloc = 1;
    new_table->buckets = (struct hash_bucket*)
      malloc(initial_bucket_count *
             sizeof(struct hash_bucket));
    if(new_table->buckets == 0) {
#ifndef NDEBUG
      fprintf(stderr,"Table too large\n");
#endif
      return NULL;
    }
  }
  else {
    new_table->used_malloc = 0;
  }
#endif

  memset(new_table->buckets,0,
         initial_bucket_count * sizeof(struct hash_bucket));

  new_table->bucket_count = initial_bucket_count;
  for(bucket=0; bucket < initial_bucket_count; ++bucket)
  {
    new_table->buckets[bucket].length=BUCKET_CHAIN_LENGTH;
    for(entry=0; entry < BUCKET_CHAIN_LENGTH; ++entry) 
    {
      new_table->buckets[bucket].entries[entry].active=0;
    }
  }
  new_table->hash_function = hash_function;
  new_table->same_function = same_function;
  return new_table;
}

/* hash_new -- Create a hash table and return a pointer to it.
   Expects a pointer to a hash function to apply to the void pointers
   stored in the table. */

/*@null@*/
struct hash_table*
hash_new(
  unsigned int (*hash_function)(void *),
  int (*same_function)(void *,void*))
{
  table_count++;
  return private_hash_new(hash_function,
                          same_function,
                          INITIAL_BUCKET_COUNT);

}

/* hash_destroy -- Destroy a hash table.  De-allocates all of the data
   used by the table. */
void hash_destroy(struct hash_table* table)
{
#ifdef USE_MALLOC
  free(table->buckets);
  free(table);
#else
  if(table->used_malloc) {
    free(table->buckets);
  }
  else {
    MEM_pool_release_string(mp,(char*)table->buckets);
  }
  MEM_pool_release_string(mp,(char*)table);
#endif
}

/* hash_find -- Find an entry in the hash table.  Returns a void
   pointer to the object originally inserted at that key, or null if
   there is no entry. */
void *hash_find(
  struct hash_table *table, 
  void *key)
{
  void *result=0;
  unsigned long hash_value, bucket, entry;

  hash_value = table->hash_function(key);
  bucket = hash_value % table->bucket_count;

  for(entry=0; entry < (unsigned long)table->buckets[bucket].length; ++entry)
  {
    if(table->buckets[bucket].entries[entry].active && 
       table->same_function(table->buckets[bucket].entries[entry].key,key))
    {
      result = table->buckets[bucket].entries[entry].value;
      break;
    }
  }
  return result;
}

/* hash_remove -- Removes an entry from the hash table.  The memory
   associated with the pointers in the table is not affected.  The
   entry is simply marked as inactive. */
void* hash_remove(
  struct hash_table* table,
  void * key)
{
  unsigned long hash_value, bucket, entry;

  hash_value = table->hash_function(key);
  bucket = hash_value % table->bucket_count;

  for(entry=0; entry < (unsigned long)table->buckets[bucket].length; ++entry)
  {
    if(table->buckets[bucket].entries[entry].active && 
       table->same_function(table->buckets[bucket].entries[entry].key, key))
    {
      table->buckets[bucket].entries[entry].active = 0;
      return table->buckets[bucket].entries[entry].key;
      break;
    }
  }
  return NULL;
}

/* Look up a new size for a hash table, when it needs to be resized.
   A table of constants, which appears at the top of this file is used
   for the table sizes in increasing order.  The table is sized to be
   not too large, to allow reasonable incrementatl growth of typical
   hash tables, and to prevent growth to excessively large sizes.  If
   tuning is required, it might be best to directly adjust the entries
   in the table. */
static long nextSize(long size)
{
  size_t i=0;
  for(i=0; i < sizeof(primesizes); ++i)
  {
    if(primesizes[i] > size)
      return primesizes[i];
  }
  /* No more prime numbers in the table: */
  return 0;
}

/* Private: hash_resize -- Resize a hash table.  Expects a pointer to
   to a hash table.  The original hash table's guts are destroyed and
   replaced with larger guts that have been rehashed.  If the new size
   argument is zero, the newly created hash table has a number of
   buckets equal to the size of the next prime number that is 1 1/4
   times as large as the last size.  If the new size argument is not
   zero, the new table has that many buckets. */
static void hash_resize(
  struct hash_table *table, 
  long new_size)
{
  long computed_new_size;
  long computed_chain_length;
  struct hash_table* new_table;
  long bucket, entry;

/*  static int count = 0;
  count++;*/

  if(0==new_size)
  {
    computed_new_size = nextSize(table->bucket_count);
    if(0==computed_new_size)
    {
      computed_new_size = table->bucket_count;
      /* We have run out of prime numbers in our table.  From now on,
         increase the length of the chain of entries in the bucket by
         doubling the size of the buckets, instead of increasing the
         breadth of the table with a new prime number.  This should
         avoid problems with some compilers that only allow 32k maximum
         array indexes. */

      /* N.B. This is untested, and may not be a good idea. */
      computed_chain_length = table->buckets[0].length * 2;
    }
    else
    {
      computed_chain_length = table->buckets[0].length;
    }
  }
  else
  {
    computed_new_size = new_size;
    computed_chain_length = table->buckets[0].length;
  }
  new_table = private_hash_new(table->hash_function, 
                               table->same_function,
                               computed_new_size);

  /* Copy the contents of the original table into the new table by
     inserting each element.*/
  for(bucket=0; bucket < table->bucket_count; ++bucket)
  {
    for(entry=0; entry < table->buckets[bucket].length; ++entry)
    {
      if(table->buckets[bucket].entries[entry].active)
      {
        hash_insert(new_table,
                    table->buckets[bucket].entries[entry].key,
                    table->buckets[bucket].entries[entry].value);
      }
    }
  }
  /* free the guts of the old table. */
#ifdef USE_MALLOC
  free(table->buckets);
#else
  MEM_pool_release_string(mp,(char*)table->buckets);
#endif
  /* copy the guts of the new table into the guts of the old table. */
  table->bucket_count = new_table->bucket_count;
  table->buckets = new_table->buckets;
  /* free the outer shell of the new table. */
#ifdef USE_MALLOC
  free(new_table);
#else
  MEM_pool_release_string(mp,(char*)new_table);
#endif
  return;
}

/* hash_insert -- Insert an entry into a hash table.  If there are no
   more available entries in the selected bucket, the the table is
   resized, at potentially enormous expense. */
void hash_insert(
  struct hash_table *table, 
  void *key,
  void *value)
{
  unsigned long hash_value, bucket, entry;

  hash_value = table->hash_function(key);
  bucket = hash_value % table->bucket_count;

  /* Is the key already in the table?  If so, replace the value
     associated with the key. */
  for(entry=0; entry < (unsigned long)table->buckets[bucket].length; ++entry)
  {
    if(table->buckets[bucket].entries[entry].active &&
       table->same_function(table->buckets[bucket].entries[entry].key,key))
    {
      table->buckets[bucket].entries[entry].value = value;
      return;
    }
  }
  /* Didn't find the key actively in the table.  Go back and find the
     first entry that isn't active.  If we find one, set the key and
     value, then return. */
  for(entry=0; entry < (unsigned long)table->buckets[bucket].length; ++entry)
  {
    if(!table->buckets[bucket].entries[entry].active)
    {
      table->buckets[bucket].entries[entry].key   = key;
      table->buckets[bucket].entries[entry].value = value;
      table->buckets[bucket].entries[entry].active = 1;
      return;
    }
  }
  /* Oh no! There wasn't an empty slot in the bucket, we need to
     resize the table and then rehash the whole thing. */
  hash_resize(table,0);
  hash_insert(table,key,value);
  return;
}

/* Some hash functions that will most likely be generally useful. */

/* Rotating hash function. */
unsigned int hash_string(void *key)
{
  int hash, i;
  int len;
  len = strlen(key);
  for (hash=len, i=0; i<len; ++i)
    hash = (hash<<5)^(hash>>27)^((char *)key)[i];
  return (unsigned int)hash;
}

unsigned int hash_integer(void *i)
{
  char str[sizeof(int)+1];
  memcpy(str,(char*)i,sizeof(int));
  str[sizeof(int)] = '\0';
  return hash_string(str);
}

/* Some equality functions that will most likely be useful. */

int same_string(void *a,void*b)
{
  return !strcmp((char*)a,(char*)b);
}

int same_integer(void *a,void*b)
{
  return (*(int*)a) == (*(int*)b);
}

static int count_active_entries(struct hash_table *table) {
  int b = table->bucket_count;
  int i, j;
  int count = 0;

  for(i=0; i<b; i++) {
    for(j=0; j<BUCKET_CHAIN_LENGTH; j++) {
      if(table->buckets[i].entries[j].active) {
        count++;
      }
    }
  }
  return count;
}

char** hash_keys(struct hash_table* table, int *count) {
  int entries_count = count_active_entries(table);
  int b = table->bucket_count;
  int i, j, c;

  char ** result;

  result = (char **) malloc((entries_count + 1) * sizeof(char*));
  c=0;
  for(i=0; i<b; ++i) {
    for(j=0; j<BUCKET_CHAIN_LENGTH; ++j) {
      if(table->buckets[i].entries[j].active) {
        result[c] = table->buckets[i].entries[j].key;
        c++;
      }
    }
  }
  result[entries_count] = NULL;
  *count = entries_count;
  return result;
}

#ifndef NDEBUG

/* Debugging routines.  These could be improved considerably. */

static int hash_entry_dump(hash_entry *e) {
  if(e->active) {
    fprintf(stderr, "*");
    return 1;
  } 
  else {
    fprintf(stderr,"-");
    return 0;
  }

}

static void hash_bucket_dump(hash_bucket* b) {
  int i;
  int count=0;

  fprintf(stderr, "length: %ld (", b->length);
  for( i=0; i < b->length; ++i) {
    count += hash_entry_dump(&(b->entries[i]));
  }
  fprintf(stderr, ") used: %d\n", count);
}

void hash_dump(struct hash_table* table) {
  int i;
  fprintf(stderr, "table count = %ld\n",table_count);
  fprintf(stderr, "bucket_count = %ld\n", table->bucket_count);
  for(i=0; i< table->bucket_count; ++i) {
    fprintf(stderr,"%05d ",i);
    hash_bucket_dump(&(table->buckets[i]));
  }
}

#endif

