/***************************************************************************
 Mutella - A commandline/HTTP client for the Gnutella filesharing network.

 This program is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License
 as published by the Free Software Foundation; either version 2
 of the License, or (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 gnusearch.cpp  -  Representation of a search in progress

    begin                : Wed May 30 2001
    copyright            : (C) 2001 by 
    email                : maksik@gmx.co.uk
 ***************************************************************************/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <time.h>

#include "mutella.h"
#include "structures.h"

#include "asyncsocket.h"
#include "packet.h"
#include "gnusearch.h"
#include "gnudirector.h"
#include "property.h"
#include "preferences.h"
#include "packet.h"
#include "conversions.h"
#include "common.h"
#include "gnumarkedfiles.h"

MGnuSearch::MGnuSearch(MGnuDirector* pControl, const CString& search, const SHA1Hash& sha1, int type, int size/*=0*/, int sizeMode/*=LIMIT_NONE*/, bool bForceStrictGrouping /*=false*/)
{
	m_pDirector = pControl;
	m_pPrefs = m_pDirector->GetPrefs();
	m_nType = type;

	if (sha1.isValid())
		m_setSha1.insert(sha1);
	
	m_szSearch = NULL;

	m_Packet = new BYTE[256];
	ASSERT(m_Packet);
	m_nPacketLength = 0;

	m_SizeFilterMode   = sizeMode;
	m_SizeFilterValue  = size;

	m_bForceStrictGrouping = bForceStrictGrouping;
	if (bForceStrictGrouping)
		m_bStrictSha1Grouping = true;
	else
		m_bStrictSha1Grouping = m_pPrefs->m_bStrictSha1Grouping;

	m_nHits = 0;
	m_nGroups = 0;
	m_bUpdated = false;	
	m_bAutoget = false;
	
	m_nFileHandle = 0;

	m_dwChangeTime = xtime();
	
	m_dwID = 0;
	//
	SetSearchString(search, false);
}

MGnuSearch::~MGnuSearch()
{
	delete [] m_Packet;
	delete [] m_szSearch;
}

void MGnuSearch::SetSearchString(const CString& s, bool bLockMutex /*=true*/)
{
	if (bLockMutex)
		m_mutex.lock();
	m_Search   = StripWhite(s);
	if (m_szSearch)
		delete [] m_szSearch;
	m_szSearch = new char[m_Search.length()+1];
	ASSERT(m_szSearch);
	strcpy(m_szSearch, m_Search.c_str());
	MakeLower(m_szSearch);
	m_PlusWords.clear();
	m_MinusWords.clear();
	MakeWordList(m_szSearch, m_PlusWords, m_MinusWords);
	if (bLockMutex)
		m_mutex.unlock();
}

void MGnuSearch::SetSearchStringAndFilename(const CString& s, const CString& sFilename)
{
	MLock lock(m_mutex);
	SetSearchString(s, false);
	m_Filename = sFilename;
}

void MGnuSearch::SetFilename(const CString& sFilename)
{
	MLock lock(m_mutex);
	m_Filename = sFilename;
}

CString MGnuSearch::GetSearchString()
{
	MLock lock(m_mutex);
	return m_Search;
}

CString MGnuSearch::GetFilename()
{
	MLock lock(m_mutex);
	return m_Filename;
}

void MGnuSearch::AddSha1Hash(const SHA1Hash& sha1)
{
	if (sha1.isValid()) {
		m_mutex.lock();
		m_setSha1.insert(sha1);
		m_mutex.unlock();
	}
}

void MGnuSearch::RemoveSha1Hash(const SHA1Hash& sha1)
{
	m_mutex.lock();
	m_setSha1.erase(sha1);
	m_mutex.unlock();
}

bool MGnuSearch::IsInList(const SHA1Hash& sha1)
{
	MLock lock(m_mutex);
	return m_setSha1.find(sha1) != m_setSha1.end();
}

void MGnuSearch::SendQuery()
{
	// Send query through network
	// but dont send queries smaller than 4 chars
	CString sQuery;
	int nWords = m_PlusWords.size();
	for (int i = 0; i<nWords; ++i)
	{
		sQuery += m_PlusWords[i];
		if (i+1<nWords)
			sQuery += " ";
	}
	if (sQuery.length() >= 4)
	{
		SendSpecificQuery(sQuery, SHA1Hash());
	}
	m_mutex.lock();
	for (set<SHA1Hash>::iterator it = m_setSha1.begin(); it != m_setSha1.end(); ++it)
		SendSpecificQuery("", *it);
	m_mutex.unlock();
}

void MGnuSearch::SendSpecificQuery(const CString& sText, const SHA1Hash& sha1)
{
	int nSearchTextLen = 0;
	m_nPacketLength = 25;
	if (!sText.empty())
	{
		nSearchTextLen = min(sText.length(), 256-25-1-5); // -packet_header -trailing0 -urn:0
		if (sha1.isValid())
			nSearchTextLen -= 32 + 5; // +"sha1:"
		memcpy(m_Packet + m_nPacketLength, sText.c_str(), nSearchTextLen);
		m_nPacketLength += nSearchTextLen;
	}
	m_Packet[m_nPacketLength] = '\0';
	++m_nPacketLength;
	if (sha1.isValid())
	{
		ASSERT(m_nPacketLength <=256-9-33 );
		memcpy(m_Packet + m_nPacketLength, "urn:sha1:", 9);
		m_nPacketLength += 9;
		memcpy(m_Packet + m_nPacketLength, sha1.toStr().c_str(), 33);
		m_nPacketLength += 33;
	}
	else
	{
		memcpy(m_Packet + m_nPacketLength, "urn:", 5);
		m_nPacketLength += 5;
	}
	ASSERT(m_nPacketLength <=256 );
	//
	m_pDirector->Broadcast_LocalQuery(m_Packet, m_nPacketLength);
}

bool MGnuSearch::IsFull()
{
	MLock lock(m_mutex);
	return m_vecResults.size() >= m_pPrefs->m_nMaxPerSearchResults;
}

void MGnuSearch::Clear()
{
	MLock lock(m_mutex);
	m_nHits = 0;
	m_nGroups = 0;
	m_bUpdated = false;
	m_dwChangeTime = xtime();
	//
	m_vecResults.clear();
	m_mapSha1.clear();
	m_mapStrings.clear();
	m_mapSize.clear();
	m_mapGroupIndex.clear();
	m_vecGroups.clear();
	//
	if (!m_bForceStrictGrouping)
		m_bStrictSha1Grouping = m_pPrefs->m_bStrictSha1Grouping;
}

bool MGnuSearch::UpdateExisting(const Result & result)
{
	m_mutex.lock();
	// build reduced set of indexes
	tIndexSet setSimilar;
	if (result.Sha1.isValid())
	{
		const tIndexSet& setBySha1 = m_mapSha1[result.Sha1];
		const tIndexSet& setBySize = m_mapSize[result.Size];
		set_intersection(setBySize.begin(), setBySize.end(),
						 setBySha1.begin(), setBySha1.end(),
						 insert_iterator<tIndexSet>(setSimilar, setSimilar.begin()));
	}
	else
		setSimilar = m_mapSize[result.Size];
	// iterate through the set
	Result* pRes;
	for (tIndexSet::iterator it = setSimilar.begin(); it != setSimilar.end(); ++it)
	{
		ASSERT(*it < m_vecResults.size());
		pRes = &m_vecResults[*it];
		if (result.Port        == pRes->Port        &&
		    result.Host.S_addr == pRes->Host.S_addr &&
		    result.Name        == pRes->Name        )
		{
			// we have a hit, but we are not going to add it again
			// we not only update its index and change time
			pRes->FileIndex = result.FileIndex;
			pRes->ChangeTime = xtime();
			// but also a GUID and Origin
			pRes->PushID   = result.PushID;
			pRes->OriginID = result.OriginID;
			m_bUpdated = true;
			m_mutex.unlock();
			return true;
		}
	}
	m_mutex.unlock();
	return false;
}

static bool CheckNotSubstr(const CString& haystack, const CString& needle) {
	return 0 > haystack.find(needle);
}

bool MGnuSearch::CheckAgainstResult(const Result & result)
{
	// first check the size filter: it cheap and does not get changed by user
	// than check if we have found this result already. It's not cheap
	// and then continue with casual checking
	// TODO: optimize what we really should do first
	//
	// Size Filter
	if(m_SizeFilterMode != LIMIT_NONE)
	{
		if(!CheckLimit(m_SizeFilterMode, m_SizeFilterValue, result.Size))
			return false;
	}
	// if the search is full
	if (m_nHits >= m_pPrefs->m_nMaxPerSearchResults)
	{
		// the search is full but we might still update the results
		return UpdateExisting(result);
	}

	CString sNameLower = result.Name;
	MakeLower(sNameLower);
	// Word Filter
	if (m_pPrefs->m_bWordFilterActive)
	{
		/*set<CString> BadWords;
		m_pPrefs->m_setWordFilter.CopyTo(BadWords); // TODO: make it more efficient to eliminate copying of the set
		// POSSIBLE BUG: we assume that BadWords are all lower case
		for (set<CString>::iterator itWord = BadWords.begin(); itWord != BadWords.end(); ++itWord)
			if(result.NameLower.find(*itWord) != -1)
				return false;*/
		if (!m_pPrefs->m_setWordFilter.ForEach(sNameLower, CheckNotSubstr))
			return false;
	}

	// Sha1 Check:
	// important: based on preferences we either consider sha1 as a way to
	// broaden the search, or to narrow it
	m_mutex.lock();
	bool bSha1Match = false;
	bool bStrict = m_bStrictSha1Grouping && result.Sha1.isValid();
	if (result.Sha1.isValid())
		bSha1Match = (m_setSha1.find(result.Sha1) != m_setSha1.end());
	m_mutex.unlock();

	if (!bSha1Match)
	{
		if (bStrict)
			return false;
		// name Check
		// match against exclusive word list than against incluseive word list
		if (MatchWordList(sNameLower, m_MinusWords, false) ||
			!MatchWordList(sNameLower, m_PlusWords)        )
			return false;
	}
	// now we have a definite hit
	// if it's an old hit -- just update the respective result
	if (UpdateExisting(result))
		return true;
		
	// nope, it's new -- add it
	m_mutex.lock();
	m_vecResults.push_back(result);
	m_nHits = m_vecResults.size();
	m_dwChangeTime = xtime();
	m_vecResults.back().dwID = GenID();
	m_vecResults.back().ChangeTime = xtime();
	
	// "group" the result with the others
	CString sSearch = MakeSearchOfFilename(result.Name);
	set<CString> setWords;
	char * szSearch = (char*) alloca(sSearch.length()+1);
	strcpy(szSearch, sSearch.c_str());
	MakeWordList(szSearch, setWords);
	tIndexSet setByWords = m_mapStrings[setWords];

	tIndexSet setBySize = m_mapSize[result.Size];

	tIndexSet setBySha1;
	if (result.Sha1.isValid())
		setBySha1 = m_mapSha1[result.Sha1];

	tIndexSet setGroup;
	insert_iterator<tIndexSet> iit(setGroup, setGroup.begin());

	if (!m_bStrictSha1Grouping)
		set_intersection(setBySize.begin(), setBySize.end(), setByWords.begin(), setByWords.end(), iit);
	if (result.Sha1.isValid())
		set_intersection(setBySize.begin(), setBySize.end(), setBySha1.begin(), setBySha1.end(), iit);
	if (setGroup.size())
	{
		// got a group!
		// but, it might be not complete because of 2 grouping mechanisms
		// on the other hand it might be that the result we have now will
		// cause "linking" of two (or more) groups
		tIndexSet setInvGroups;
		tIndexMap::iterator itGMI;
		for (tIndexSet::iterator itGI = setGroup.begin(); itGI != setGroup.end(); ++itGI)
			if ( (itGMI = m_mapGroupIndex.find(*itGI)) != m_mapGroupIndex.end())
				setInvGroups.insert(itGMI->second);
		//
		int nGrpIndex;
		if (setInvGroups.size())
			nGrpIndex = *setInvGroups.begin();
		//
		switch (setInvGroups.size()) 
		{
			case 0: {
					setGroup.insert(m_nHits-1);
					tGroup g;
					g.dwID = GenID();
					g.set  = setGroup;
					nGrpIndex = m_vecGroups.size();
					m_vecGroups.push_back(g);
					for (tIndexSet::iterator itGI = setGroup.begin(); itGI != setGroup.end(); ++itGI)
						m_mapGroupIndex[*itGI] = nGrpIndex;
				}
				break;
			default: {
				// the most complex case, which requires m_verArray
				// and consequent modification m_mapGroupIndex update
				//TRACE("Group Joining Activated!");
				//cout << "==== grouping diagnostics (pre-regrouping)\n";
				//cout << "m_nHits = " << m_nHits << endl;
				//cout << "-------------- group ---------------------\n";
				//copy(setGroup.begin(), setGroup.end(), std::ostream_iterator<int>(cout, " "));
				//cout << endl;
				//cout << "-ID:------ groups -------------------------\n";
				//for (tGroupVec::iterator itG = m_vecGroups.begin(); itG != m_vecGroups.end(); ++itG)
				//{
					//cout << itG->dwID << ": ";
					//copy(itG->set.begin(), itG->set.end(), std::ostream_iterator<int>(cout, " "));
					//cout << endl;
				//}
				//cout << "=========== end ==========================\n";
				// first remove the affected groups except the first one
				for (tIndexSet::reverse_iterator itGR = setInvGroups.rbegin(); itGR != setInvGroups.rend(); ++itGR)
					if (nGrpIndex != *itGR)
						m_vecGroups.erase(m_vecGroups.begin() + *itGR);
				// update the remaining entry
				m_vecGroups[nGrpIndex].set = setGroup;
				//cout << "==== grouping diagnostics (post-regrouping)\n";
				//cout << "-ID:------ groups -------------------------\n";
				//for (tGroupVec::iterator itG = m_vecGroups.begin(); itG != m_vecGroups.end(); ++itG)
				//{
					//cout << itG->dwID << ": ";
					//copy(itG->set.begin(), itG->set.end(), std::ostream_iterator<int>(cout, " "));
					//cout << endl;
				//}
				//cout << "=========== end ==========================\n";

				// now rebuild the groups index
				m_mapGroupIndex.clear();
				int i = 0;
				for (tGroupVec::iterator itG = m_vecGroups.begin(); itG != m_vecGroups.end(); ++itG)
				{
					for (tIndexSet::iterator itI = itG->set.begin(); itI != itG->set.end(); ++itI)
					{
						//cout << "*itI = " << *itI << "  m_mapGroupIndex.size() = " << m_mapGroupIndex.size() << endl;
						ASSERT(m_mapGroupIndex.find(*itI)==m_mapGroupIndex.end());
						m_mapGroupIndex[*itI] = i;
					}
					++i;
				}
			}
			case 1:{
				// a standard case of the element addition
				ASSERT(nGrpIndex < m_vecGroups.size());
				m_vecGroups[nGrpIndex].set.insert(m_nHits-1);
				m_mapGroupIndex[m_nHits-1] = nGrpIndex;
			}
		}
	}
	// update indexes for grouping
	m_mapStrings[setWords].insert(m_nHits-1);
	m_mapSize[result.Size].insert(m_nHits-1);
	if (result.Sha1.isValid())
		m_mapSha1[result.Sha1].insert(m_nHits-1);
	// update groups counter
	m_nGroups = m_nHits - m_mapGroupIndex.size() + m_vecGroups.size();

	// hard to believe, but it's finished
	m_mutex.unlock();
	m_bUpdated = true;
	return true;
}

bool MGnuSearch::CheckLimit(int Limit, DWORD Value, DWORD Compare)
{
	switch (Limit)
	{
		case LIMIT_MORE:    return (Compare >= Value);
		case LIMIT_EXACTLY: return (Compare == Value);
		case LIMIT_LESS:    return (Compare <= Value);
		case LIMIT_APPROX:  return (Compare >= 0.9 * Value && Compare <= 1.1 * Value); // +- 10%
	}
	return true; // unknown Limit parameter: be tolerant
}

void MGnuSearch::GetSGnuSearch(SGnuSearch& gs) const
{
	m_mutex.lock();
	gs = *this;
	m_mutex.unlock();
}

void MGnuSearch::GetResults(vector<Result>& vecRes)
{
	m_mutex.lock();
	vecRes = m_vecResults;
	m_mutex.unlock();
}

void MGnuSearch::GetAllResults(SGnuSearch* pGS, vector<Result>* pRV, vector<ResultGroup>* pGV)
{
	m_mutex.lock();
	// update "known" flags
	MGnuMarkedFiles* pMF = m_pDirector->GetMarkedFiles();
	ASSERT(pMF);
	for (ResultVec::iterator itr = m_vecResults.begin(); itr != m_vecResults.end(); ++itr)
		itr->nKnownFlags = pMF->MatchFile(itr->Size, itr->Sha1, MakeSearchOfFilename(itr->Name));
	//
	if (pGS)
		*pGS = *this;
	if (pRV)
		*pRV = m_vecResults;
	if (pGV)
	{
		//cout << "==== grouping diagnostics\n";
		//cout << "m_nHits = " << m_nHits << endl;
		//cout << "m_nGroups = " << m_nGroups << endl;
		//cout << "----------- map ---------\n";
		//copy(m_mapGroupIndex.begin(), m_mapGroupIndex.end(), std::ostream_iterator<pair<const int,int>>(cout, " "));
		//cout << endl;
		//cout << "-ID:------ groups -------\n";
		//for (tGroupVec::iterator itG = m_vecGroups.begin(); itG != m_vecGroups.end(); ++itG)
		//{
			//cout << itG->dwID << ": ";
			//copy(itG->set.begin(), itG->set.end(), std::ostream_iterator<int>(cout, " "));
			//cout << endl;
		//}
		//cout << "=========== end =========\n";
		// have to build groups
		pGV->clear();
		pGV->resize(m_nGroups);
		ASSERT(m_nHits == m_vecResults.size());
		int nG = 0;
		for (int i=0; i<m_nHits; ++i)
		{
			// either create a real group or just copy the current result data
			tIndexMap::iterator itGI;
			if ((itGI = m_mapGroupIndex.find(i)) != m_mapGroupIndex.end())
			{
				// lets see if it's the first groups item
				ASSERT(m_vecGroups.size() > itGI->second);
				tIndexSet::iterator itI;
				if (*(itI = m_vecGroups[itGI->second].set.begin())==i)
				{
					// now we have a set, go create a group
					ASSERT(nG < m_nGroups);
					ResultGroup& rg = (*pGV)[nG];
					++nG;
					//
					tIndexSet& setInd = m_vecGroups[itGI->second].set;
					//
					rg.dwID = m_vecGroups[itGI->second].dwID;
					rg.ResultSet = setInd;
					// preinit the group with the first item values
					ASSERT(*itI < m_vecResults.size());
					rg.Name = m_vecResults[*itI].Name;
					rg.bAllHaveSha1 = true;
					if (m_vecResults[*itI].Sha1.isValid())
						rg.m_setSha1.insert(m_vecResults[*itI].Sha1);
					else
						rg.bAllHaveSha1 = false;
					rg.Size = m_vecResults[*itI].Size;
					rg.nKnownFlags = m_vecResults[*itI].nKnownFlags;
					rg.AvgSpeed = m_vecResults[*itI].Speed;
					// for all other
					for (++itI; itI != setInd.end(); ++itI)
					{
						ASSERT(*itI < m_vecResults.size());
						if (rg.Name.length() > m_vecResults[*itI].Name.length())
							rg.Name = m_vecResults[*itI].Name;
						if (m_vecResults[*itI].Sha1.isValid())
							rg.m_setSha1.insert(m_vecResults[*itI].Sha1);
						else
							rg.bAllHaveSha1 = false;
						ASSERT(rg.Size == m_vecResults[*itI].Size);
						if ((MFT_NoMatch == rg.nKnownFlags || MFT_NoMatch == m_vecResults[*itI].nKnownFlags) &&
							rg.nKnownFlags != m_vecResults[*itI].nKnownFlags )
							rg.nKnownFlags |= MFR_Possible;
						rg.nKnownFlags |= m_vecResults[*itI].nKnownFlags;
						rg.AvgSpeed += m_vecResults[*itI].Speed;
					}
					rg.AvgSpeed /= setInd.size();
				}
			}
			else
			{
				// create a trivial sigle-element group
				ASSERT(nG < m_nGroups);
				ResultGroup& rg = (*pGV)[nG];
				++nG;
				// fill in the fields
				rg.Name = m_vecResults[i].Name;
				if (m_vecResults[i].Sha1.isValid())
					rg.m_setSha1.insert(m_vecResults[i].Sha1);
				rg.bAllHaveSha1 = rg.m_setSha1.size();
				rg.Size = m_vecResults[i].Size;
				rg.AvgSpeed = m_vecResults[i].Speed;
				rg.ResultSet.insert(i);
				rg.nKnownFlags = m_vecResults[i].nKnownFlags;
				rg.dwID = m_vecResults[i].dwID;
			}
		}
		ASSERT(nG == m_nGroups);
	}
	m_mutex.unlock();
}

