#include <bench/Benchmark.h>
#include <tagcoll/stringf.h>
#include <tagcoll/InputMerger.h>
#include <tagcoll/TextFormat.h>
#include <tagcoll/BasicStringDiskIndex.h>
#include <tagcoll/TDBIndexer.h>
#include <tagcoll/TDBReadonlyDiskIndex.h>
#include <tagcoll/CardinalityStore.h>

#include <vector>
#include <iostream>
#include <math.h>

using namespace std;
using namespace stringf;
using namespace Tagcoll;

namespace bench_collection {

/* Some utility random generator functions */

static inline int rnd(int min, int max)
{
	return min + (int) (max * (rand() / (RAND_MAX + 1.0)));
}

static inline double rnd(double min, double max)
{
	return min + (int) (max * (rand() / (RAND_MAX + 1.0)));
}

class CollectionBench : public Benchmark
{
protected:
	// Vector of available tags.  The tags at the beginning are the most
	// popular ones.
	static vector<string> tags;
	// Vector of available items.  The items at the beginning are the ones with
	// most tags.
	static vector<string> items;
	// Sample collection
	static InputMerger<string, string> coll;
	// Precomputed tagsets
	static vector< OpSet<string> > tagsets;
	// Precomputed itemsets
	static vector< OpSet<string> > itemsets;

	// Compute a random number between min and max (inclusive), following a
	// geometric distribution with parameter p
	int rndGeom(int min, int max, double p)
	{
		int res = (int)rint((double)min + ((double)max-(double)min)*(log(1.0 - drand48()) / log(p)));
		if (res > max)
			return max;
		else
			return res;
	}
	
	void outputROCollection(Tagcoll::Consumer<string, string>& cons);
	void benchROCollection(const Tagcoll::ReadonlyCollection<string, string>& coll);
	void benchCollection(Tagcoll::Collection<string, string>& coll);

	OpSet<string> makeTagset(double p)
	{
		OpSet<string> res;
		size_t ntags = rndGeom(1, 20, p);
		for (size_t j = 0; j < ntags; j++)
		{
			int idx = rndGeom(0, tags.size() - 1, 0.01);
			res += tags[idx];
		}
		return res;
	}

	OpSet<string> makeItemset(double p)
	{
		OpSet<string> res;
		size_t nitems = rndGeom(1, 10, p);
		for (size_t j = 0; j < nitems; j++)
		{
			int idx = rndGeom(0, items.size() - 1, 0.001);
			res += items[idx];
		}
		return res;
	}

	CollectionBench(const std::string& name)
		: Benchmark(name)
	{
		if (tags.empty())
		{
			// Create the tag vocabulary
			for (int i = 0; i < 500; i++)
				tags.push_back("tag" + fmt(i));

			// Create the package set
			for (int i = 0; i < 10000; i++)
				items.push_back("pkg" + fmt(i));

			// Create the test collection
			for (size_t i = 0; i < items.size(); i++)
				coll.consume(
						items[i],
						makeTagset((((double)items.size() - (double)i) / (double)items.size())/2)
						);
			
			// Precompute tagsets used for benchmarking
			for (size_t i = 0; i < 100; i++)
				tagsets.push_back(makeTagset(0.02));
			
			// Precompute itemsets used for benchmarking
			for (size_t i = 0; i < 100; i++)
				itemsets.push_back(makeItemset(0.02));
		}
	}
};

vector<string> CollectionBench::tags;
vector<string> CollectionBench::items;
InputMerger<string, string> CollectionBench::coll;
vector< OpSet<string> > CollectionBench::tagsets;
vector< OpSet<string> > CollectionBench::itemsets;

void CollectionBench::outputROCollection(Tagcoll::Consumer<string, string>& cons)
{
	coll.output(cons);
}

void CollectionBench::benchROCollection(const Tagcoll::ReadonlyCollection<string, string>& coll)
{
	{
		Timer t = mktimer("hasTag");
		for (size_t i = 0; i < tags.size(); i++)
			coll.hasTag(tags[i]);
	}{
		Timer t = mktimer("getTags[item]");
		for (size_t i = 0; i < items.size(); i++)
			coll.getTags(items[i]);
	}{
		Timer t = mktimer("getTags[items]");
		for (size_t i = 0; i < itemsets.size(); i++)
			coll.getTags(itemsets[i]);
	}{
		Timer t = mktimer("getItems[tag]");
		for (size_t i = 0; i < tags.size(); i++)
			coll.getItems(tags[i]);
	}{
		Timer t = mktimer("getItems[tags]");
		for (size_t i = 0; i < tagsets.size(); i++)
			coll.getItems(tagsets[i]);
	}{
		Timer t = mktimer("getTaggedItems");
		coll.getTaggedItems();
	}{
		Timer t = mktimer("getAllTags");
		coll.getAllTags();
	}{
		Timer t = mktimer("getCardinality");
		for (size_t i = 0; i < tags.size(); i++)
			coll.getCardinality(tags[i]);
	}{
		Timer t = mktimer("getCompanionTags");
		for (size_t i = 0; i < tagsets.size(); i++)
			coll.getCompanionTags(tagsets[i]);
	}
	// TODO: getRelatedItems
	Sink<string, string> sink;
	{
		Timer t = mktimer("output");
		for (size_t i = 0; i < tagsets.size(); i++)
			coll.output(sink);
	}{
		Timer t = mktimer("outputHavingTags");
		for (size_t i = 0; i < tagsets.size(); i++)
			coll.outputHavingTags(tagsets[i], sink);
	}
}

void CollectionBench::benchCollection(Tagcoll::Collection<string, string>& coll)
{
}

class benchInputMerger : public CollectionBench
{
protected:
	virtual void main()
	{
		Timer main = mktimer("total");

		Timer t1 = mktimer("instantiating collection");
		InputMerger<string, string> coll;
		t1.done();

		Timer t2 = mktimer("populating collection");
		outputROCollection(coll);
		t2.done();

		{
			Timer t = mktimer("read only collection");
			benchROCollection(coll);
		}
	}

public:
	benchInputMerger() : CollectionBench("InputMerger") {}
};

class benchBasicStringDiskIndex : public CollectionBench
{
protected:
	virtual void main()
	{
		Timer main = mktimer("total");
		{
			BasicStringDiskIndexer indexer;

			Timer t1 = mktimer("indexing collection");
			outputROCollection(indexer);
			t1.done();
			
			Timer t2 = mktimer("writing indexed collection");
			indexer.write("bench-basicstringdiskindex.tmp");
			t2.done();
		}

		Timer t3 = mktimer("instantiating indexed collection");
		BasicStringDiskIndex coll("bench-basicstringdiskindex.tmp");
		t3.done();

		{
			Timer t = mktimer("read only collection");
			benchROCollection(coll);
		}
	}

public:
	benchBasicStringDiskIndex() : CollectionBench("BasicStringDiskIndex") {}
	~benchBasicStringDiskIndex() {
		unlink("bench-basicstringdiskindex.tmp");
	}
};


class benchTDBReadonlyDiskIndex : public CollectionBench
{
protected:
	virtual void main()
	{
		Timer main = mktimer("total");
		TrivialConverter<string, string> conv;
		{
			TDBIndexer<string, string> indexer;

			Timer t1 = mktimer("indexing collection");
			outputROCollection(indexer);
			t1.done();
			
			Timer t2 = mktimer("writing indexed collection");
			indexer.writeIndex(conv, conv,
					"bench-TDBReadonlyDiskIndex1.tmp",
					"bench-TDBReadonlyDiskIndex2.tmp");
			t2.done();
		}

		Timer t3 = mktimer("instantiating indexed collection");
		TDBReadonlyDiskIndex<string, string> coll(
				"bench-TDBReadonlyDiskIndex1.tmp",
				"bench-TDBReadonlyDiskIndex2.tmp",
				conv, conv, conv, conv);
		t3.done();

		{
			Timer t = mktimer("read only collection");
			benchROCollection(coll);
		}
	}

public:
	benchTDBReadonlyDiskIndex() : CollectionBench("TDBReadonlyDiskIndex") {}
	~benchTDBReadonlyDiskIndex() {
		unlink("bench-TDBReadonlyDiskIndex1.tmp");
		unlink("bench-TDBReadonlyDiskIndex2.tmp");
	}
};

class benchTDBIndexer : public CollectionBench
{
protected:
	virtual void main()
	{
		Timer main = mktimer("total");
		TDBIndexer<string, string> coll;

		Timer t1 = mktimer("indexing collection");
		outputROCollection(coll);
		t1.done();
			
		{
			Timer t = mktimer("read only collection");
			benchROCollection(coll);
		}
	}

public:
	benchTDBIndexer() : CollectionBench("TDBIndexer") {}
};


class benchCardinalityStore : public CollectionBench
{
protected:
	virtual void main()
	{
		Timer main = mktimer("total");
		CardinalityStore<string, string> coll;

		Timer t1 = mktimer("indexing collection");
		outputROCollection(coll);
		t1.done();
			
		{
			Timer t = mktimer("read only collection");
			benchROCollection(coll);
		}
	}

public:
	benchCardinalityStore() : CollectionBench("CardinalityStore") {}
};

class top : public Benchmark
{
public:
	top() : Benchmark("collection")
	{
		addChild(new benchInputMerger());
		addChild(new benchBasicStringDiskIndex());
		addChild(new benchTDBReadonlyDiskIndex());
		addChild(new benchTDBIndexer());
		addChild(new benchCardinalityStore());
	}
};

static RegisterRoot r(new top());

}

/* vim:set ts=4 sw=4: */
