
#include "filereader.h"

#include <unistd.h>
#include "fileio.h"
#include <fcntl.h>
#include <sys/mman.h>
#include <errno.h>
#include <limits>

#include "md5.h"
#include "sha1.h"
#include "csmapping.h"

#define FLAG_EOF 8
#define FLAG_ERROR 16
#define FLAG_COMPEOF 32

#include <iostream>

#define BUFSIZE 20000

#ifdef MINIBUILD
#undef HAVE_LIBBZ2
#undef HAVE_ZLIB
#undef HAVE_LZMA
#endif

using namespace MYSTD;


class IDecompressor
{
public:
	bool inited;
	IDecompressor() : inited(false) {};
	virtual ~IDecompressor() {};
	virtual UINT UncompMore(char *szInBuf, size_t nBufSize, size_t &nBufPos, acbuf &UncompBuf)
	{ return FLAG_ERROR; }

};

#ifdef HAVE_LIBBZ2
#include <bzlib.h>
class tBzDec : public IDecompressor
{
	bz_stream strm;
public:
	tBzDec()
	{
		::memset(&strm, 0, sizeof(strm));
		inited = (BZ_OK == BZ2_bzDecompressInit(&strm, 1, EXTREME_MEMORY_SAVING));
	}
	~tBzDec()
	{
		BZ2_bzDecompressEnd(&strm);
		inited = false;
	}
	virtual UINT UncompMore(char *szInBuf, size_t nBufSize, size_t &nBufPos, acbuf &UncompBuf)
	{
		strm.next_in=szInBuf+nBufPos;
		strm.avail_in=nBufSize-nBufPos;
		strm.next_out=UncompBuf.wptr();
		strm.avail_out=UncompBuf.freecapa();

	int ret=BZ2_bzDecompress(&strm);
	if(ret==BZ_STREAM_END || ret==BZ_OK)
	{
		nBufPos += ( (nBufSize-nBufPos) - strm.avail_in);
		unsigned int nGotBytes= UncompBuf.freecapa() - strm.avail_out;
		UncompBuf.got(nGotBytes);
		return ret == BZ_STREAM_END ? FLAG_COMPEOF : 0;
	}
	// or corrupted data?
	return (FLAG_COMPEOF|FLAG_ERROR);
	}
};
#else
#define tBzDec IDecompressor
#endif

#ifdef HAVE_ZLIB
#include <zlib.h>
class tGzDec : public IDecompressor
{
	z_stream strm;
public:
	tGzDec()
	{
		::memset(&strm, 0, sizeof(strm));
		inited = (Z_OK == inflateInit2(&strm, 47));
	}
	~tGzDec()
	{
		deflateEnd(&strm);
		inited = false;
	}
	virtual UINT UncompMore(char *szInBuf, size_t nBufSize, size_t &nBufPos, acbuf &UncompBuf)
	{
		strm.next_in = (uint8_t*) szInBuf + nBufPos;
		strm.avail_in = nBufSize - nBufPos;
		strm.next_out = (uint8_t*) UncompBuf.wptr();
		strm.avail_out = UncompBuf.freecapa();

		int ret = inflate(&strm, Z_NO_FLUSH);
		if (ret == Z_STREAM_END || ret == Z_OK)
		{
			nBufPos += ((nBufSize - nBufPos) - strm.avail_in);
			unsigned int nGotBytes = UncompBuf.freecapa() - strm.avail_out;
			UncompBuf.got(nGotBytes);
			return ret == Z_STREAM_END ? FLAG_COMPEOF : 0;
		}
		// or corrupted data?
		return (FLAG_COMPEOF | FLAG_ERROR);
	}
};

#else
#define tGzDec IDecompressor
#endif

#ifdef HAVE_LZMA
#include <lzma.h>

class tXzDec : public IDecompressor
{
	lzma_stream strm;
public:
	tXzDec(bool lzmaFormat=false)
	{
		::memset(&strm, 0, sizeof(strm));
		if(lzmaFormat)
			inited = (LZMA_OK == lzma_alone_decoder(&strm,
					EXTREME_MEMORY_SAVING ? 32000000 : MAX_VAL(uint64_t)));
		else
			inited = (LZMA_OK == lzma_stream_decoder(&strm,
				EXTREME_MEMORY_SAVING ? 32000000 : MAX_VAL(uint64_t),
						LZMA_TELL_UNSUPPORTED_CHECK | LZMA_CONCATENATED));
	}
	~tXzDec()
	{
		lzma_end(&strm);
		inited = false;
	}
	virtual UINT UncompMore(char *szInBuf, size_t nBufSize, size_t &nBufPos, acbuf &UncompBuf)
	{
		strm.next_in = (uint8_t*) szInBuf + nBufPos;
		strm.avail_in = nBufSize - nBufPos;
		strm.next_out = (uint8_t*) UncompBuf.wptr();
		strm.avail_out = UncompBuf.freecapa();

		lzma_ret ret=lzma_code(&strm, LZMA_RUN);
		if (ret == LZMA_STREAM_END || ret == LZMA_OK)
		{
			nBufPos += ((nBufSize - nBufPos) - strm.avail_in);
			unsigned int nGotBytes = UncompBuf.freecapa() - strm.avail_out;
			UncompBuf.got(nGotBytes);
			return ret == LZMA_STREAM_END ? FLAG_COMPEOF : 0;
		}
		// or corrupted data?
		return (FLAG_COMPEOF | FLAG_ERROR);
	}
};
#else
class tXzDec : public IDecompressor
{
public:
	tXzDec() {}
	tXzDec(bool) {}
};
#endif

filereader::filereader() 
:
	flags(FLAG_ERROR|FLAG_EOF),
	m_szFileBuf((char*)MAP_FAILED),
	m_nBufSize(0),
	m_nBufPos(0),
	m_nCurLine(0),
	m_fd(-1),
	m_nEofLines(0)
{
};

static const uint8_t gzMagic[] =
{ 0x1f, 0x8b, 0x8 }, bz2Magic[] =
{ 'B', 'Z', 'h' }, xzMagic[] =
{ 0xfd, '7', 'z', 'X', 'Z', 0x0 },
lzmaMagic[] = {0x5d, 0, 0, 0x80};

bool filereader::OpenFile(const string & sFilename, bool bNoMagic)
{
	Close(); // reset to clean state
	
	m_fd = open(sFilename.c_str(), O_RDONLY);

	if (m_fd < 0)
		return false;

	if (bNoMagic)
		m_Dec.reset();
	else if (endsWithSzAr(sFilename, ".bz2"))
		m_Dec.reset(new tBzDec);
	else if (endsWithSzAr(sFilename, ".gz"))
		m_Dec.reset(new tGzDec);
	else if(endsWithSzAr(sFilename, ".xz"))
		m_Dec.reset(new tXzDec);
	else if (endsWithSzAr(sFilename, ".lzma"))
		m_Dec.reset(new tXzDec(true));
	else // unknown... ok, probe it
	{
		filereader fh;
		if (fh.OpenFile(sFilename, true) && fh.GetSize() >= 10)
		{
			if (0 == memcmp(gzMagic, fh.GetBuffer(), _countof(gzMagic)))
				m_Dec.reset(new tGzDec);
			else if (0 == memcmp(bz2Magic, fh.GetBuffer(), _countof(bz2Magic)))
				m_Dec.reset(new tBzDec);
			else if (0 == memcmp(xzMagic, fh.GetBuffer(), _countof(xzMagic)))
				m_Dec.reset(new tXzDec);
			else if (0 == memcmp(lzmaMagic, fh.GetBuffer(), _countof(lzmaMagic)))
				m_Dec.reset(new tXzDec(true));
		}
	}

	if (m_Dec.get())
	{
		if(!m_Dec->inited)
		{
			//aclog::err("Unable to uncompress file, algo not available");
			return false;
		}
		m_UncompBuf.init(BUFSIZE);
	}

	struct stat statbuf;
	if(0!=fstat(m_fd, &statbuf))
		return false;

	// LFS on 32bit? That's not good for mmap. Don't risk incorrect behaviour.
	if(uint64_t(statbuf.st_size) >  MAX_VAL(size_t))
    {
        errno=EFBIG;
        return false;
    }
	
	if(statbuf.st_size>0)
	{
		m_szFileBuf = (char*) mmap(0, statbuf.st_size, PROT_READ, MAP_SHARED, m_fd, 0);
		if(m_szFileBuf==MAP_FAILED)
			return false;
		m_nBufSize = statbuf.st_size;
	}
	else
	{
		m_szFileBuf = NULL;
		m_nBufSize = 0;
	}
	
#ifdef HAVE_MADVISE
	// if possible, prepare to read that
	posix_madvise(m_szFileBuf, statbuf.st_size, POSIX_MADV_SEQUENTIAL);
#endif
	
	m_nBufPos=0;
	m_nCurLine=0;
	flags = 0;
	return true;
}

bool filereader::CheckGoodState(bool bErrorsConsiderFatal, cmstring *reportFilePath) const
{	
	if (flags&FLAG_ERROR)
	{
		if(bErrorsConsiderFatal)
		{
			cerr << "Error opening file";
			if(reportFilePath)
				cerr << " " << *reportFilePath;
			cerr << ", terminating."<<endl;
			exit(EXIT_FAILURE);
		}
		return false;
	}
	return true;
}

void filereader::Close()
{
	m_nCurLine=0;
	
	if (m_szFileBuf != MAP_FAILED)
	{
		munmap(m_szFileBuf, m_nBufSize);
		m_szFileBuf = (char*) MAP_FAILED;
	}

	checkforceclose(m_fd);
	m_Dec.reset();

	m_nBufSize=0;

	flags = (FLAG_ERROR|FLAG_EOF); // will be cleared in open method
}

filereader::~filereader() {
	Close();
}

// TODO: can split up a line when it doesn't fit into one buffer
bool filereader::GetOneLine(string & sOut) {
	
	sOut.clear();
	
	// stop flags set in previous run
	if(flags & (FLAG_EOF|FLAG_ERROR))
		return false;
	
	//cout<< "wtf, m_pos: " << m_pos <<endl;
	
	const char *rbuf;
	size_t nRest;
	bool bCanRetry=true;

	maybe_got_more:
	
	if(!m_Dec.get())
	{
		bCanRetry=false;
		
		if(m_nBufPos>=m_nBufSize)
			flags|=FLAG_EOF;
		// detect eof and remember that, for now or later calls
		nRest = (flags&FLAG_EOF) ? 0 : m_nBufSize-m_nBufPos;
		rbuf=m_szFileBuf+m_nBufPos;
	}
	else 
	{
		nRest=m_UncompBuf.size();
		
		if(nRest==0 && (flags&(FLAG_COMPEOF|FLAG_ERROR|FLAG_EOF)))
		{
			if(m_nEofLines-- >0)
			{
				sOut.clear();
				return true;
			}
			return false;
		}
		
		rbuf=m_UncompBuf.rptr();
	}
	
	// look for end in the rest of buffer (may even be nullsized then it fails implicitely, newline decides), 
	// on miss -> try to get more, check whether the available size changed, 
	// on success -> retry
	
	//const char *newline=mempbrk(rbuf, "\r\n", nRest);
  //const char *crptr=(const char*) memchr(rbuf, '\r', nRest);
  //const char *lfptr=(const char*) memchr(rbuf, '\n', nRest);
  //const char *newline = (crptr&&lfptr) ? MYSTD::min(crptr,lfptr) : MYSTD::max(crptr,lfptr);
	const char *newline=0;
	for(const char *x=rbuf; x<rbuf+nRest; ++x)
	{
		if('\r' == *x || '\n' == *x) // that's what compilers like most :-(
		{
			newline=x;
			break;
		}
	}
	
	tStrPos nLineLen, nDropLen;
	
	if(newline)
	{
		nLineLen=newline-rbuf;
		nDropLen=nLineLen+1;
		// cut optional \r or \n but only when it's from another kind
		if(nRest > nDropLen &&  newline[0]+newline[1]== '\r'+'\n')
			nDropLen++;
	}
	else
	{
		if(bCanRetry)
		{
			bCanRetry=false;
			UncompressMoreData();
			goto maybe_got_more;
		}
		
		// otherwise can continue to the finish 
		nDropLen=nLineLen=nRest;
	}
	
	sOut.assign(rbuf, nLineLen);
	
	if(!m_Dec.get())
		m_nBufPos+=nDropLen;
	else
		m_UncompBuf.drop(nDropLen);
	
	m_nCurLine++;
	return true;
}

//! @return: new text buffer size
inline void filereader::UncompressMoreData() {

	// work with uncompressed buffer/window...
	m_UncompBuf.move(); // get unused space if possible
		
	if(	(flags&FLAG_COMPEOF) // cannot uncompress more
		|| m_UncompBuf.freecapa()==0 )
		return;
	
	if(m_nBufPos>=m_nBufSize )
	{
		// shouldn't be here. Decompressor's errors or eof must have been handled before. Undiscovered somehow?
		flags|=(FLAG_ERROR|FLAG_EOF);
		return;
	}
	
	UINT wtf=m_Dec->UncompMore(m_szFileBuf, m_nBufSize, m_nBufPos, m_UncompBuf);
	flags |= wtf;

}

/*
// TODO: make this use a string?
bool filereader::GetMd5String(const MYSTD::string & sFileName, char out[])
{
	uint8_t buf[16];
	if(!GetMd5Sum(sFileName, buf))
		return false;
	
	for(UINT i=0;i<16;i++)
		sprintf(&out[2*i], "%02x", buf[i]);
	
	return true;
}
*/
/*
bool filereader::GetMd5Sum(const MYSTD::string & sFileName, uint8_t out[], bool bTryUnpack,
		off_t &scannedSize)
{
	md5_state_s ctx;
	md5_init(&ctx);
	filereader f;
	scannedSize=0;
	if (!f.OpenFile(sFileName, !bTryUnpack))
		return false;
	if (f.flags&FLAG_PLAIN)
	{
		md5_append(&ctx, (md5_byte_t*) f.m_szFileBuf, f.m_nBufSize);
		scannedSize=f.m_nBufSize;
	}
	else
	{
		while(true)
		{
			f._UncompressMoreData();
			if(f.flags&FLAG_ERROR)
				return false;
			UINT nRest=f.m_UncompBuf.size();
			if(nRest==0)
				break;
			md5_append(&ctx, (md5_byte_t*) f.m_UncompBuf.rptr(), nRest);
			scannedSize+=nRest;
			f.m_UncompBuf.clear();
		}
		
	}
	
	md5_finish(&ctx, out);
	return f.CheckGoodState(false);
}

bool filereader::GetSha1Sum(const MYSTD::string & sFileName, uint8_t out[], 
		bool bTryUnpack, off_t &scannedSize, FILE *fDump)
{
	filereader f;
	return (f.OpenFile(sFileName, !bTryUnpack)
			&& f.GetSha1Sum(out, scannedSize, fDump));
}
*/

#ifndef MINIBUILD
class csumSHA1 : public csumBase, public SHA_INFO
{
public:
	csumSHA1() { sha_init(this); }
	void add(const char *data, size_t size) { sha_update(this, (SHA_BYTE*) data, size); }
	void finish(uint8_t* ret) { sha_final(ret, this); }
};
class csumMD5 : public csumBase, public md5_state_s
{
public:
	csumMD5() { md5_init(this); }
	void add(const char *data, size_t size) { md5_append(this, (md5_byte_t*) data, size); }
	void finish(uint8_t* ret) { md5_finish(this, ret); }
};

auto_ptr<csumBase> csumBase::GetChecker(CSTYPES type)
{
	switch(type)
	{
	case CSTYPE_MD5:
		return auto_ptr<csumBase>(new csumMD5);
	case CSTYPE_SHA1:
	default: // for now
		return auto_ptr<csumBase>(new csumSHA1);
	}
}

bool filereader::GetChecksum(const mstring & sFileName, int csType, uint8_t out[],
		bool bTryUnpack, off_t &scannedSize, FILE *fDump)
{
	filereader f;
	return (f.OpenFile(sFileName, !bTryUnpack)
			&& f.GetChecksum(csType, out, scannedSize, fDump));
}

bool filereader::GetChecksum(int csType, uint8_t out[], off_t &scannedSize, FILE *fDump)
//bool filereader::GetSha1Sum(uint8_t out[], off_t &scannedSize, FILE *fDump)
{
	auto_ptr<csumBase> summer = csumBase::GetChecker(CSTYPES(csType));
	scannedSize=0;
	
	if(!m_Dec.get())
	{
		summer->add(m_szFileBuf, m_nBufSize);
		//sha_update(&ctx, (SHA_BYTE*) m_szFileBuf, m_nBufSize);
		if(fDump)
			fwrite(m_szFileBuf, sizeof(char), m_nBufSize, fDump);
		scannedSize=m_nBufSize;
	}
	else
	{
		while (true)
		{
			UncompressMoreData();
			if (flags&FLAG_ERROR)
				return false;
			UINT nRest=m_UncompBuf.size();
			if (nRest==0)
				break;
			summer->add(m_UncompBuf.rptr(), nRest);
			//sha_update(&ctx, (md5_byte_t*) m_UncompBuf.rptr(), nRest);
			if(fDump)
				fwrite(m_UncompBuf.rptr(), sizeof(char), nRest, fDump);
			scannedSize+=nRest;
			m_UncompBuf.clear();
		}

	}
	//sha_final(out, &ctx);
	summer->finish(out);
	
	return CheckGoodState(false);
}

// test checksum wrapper classes and their algorithms, also test conversion methods
void check_algos()
{
	const char testvec[]="abc";
	uint8_t out[20];
	auto_ptr<csumBase> ap = csumBase::GetChecker(CSTYPE_SHA1);
	ap->add(testvec, sizeof(testvec)-1);
	ap->finish(out);
	if(!CsEqual("a9993e364706816aba3e25717850c26c9cd0d89d", out, 20))
	{
		cerr << "Incorrect SHA1 implementation detected, check compilation settings!\n";
		exit(EXIT_FAILURE);
	}

	ap = csumBase::GetChecker(CSTYPE_MD5);
	ap->add(testvec, sizeof(testvec) - 1);
	ap->finish(out);
	if (BytesToHexString(out, 16) != "900150983cd24fb0d6963f7d28e17f72")
	{
		cerr << "Incorrect MD5 implementation detected, check compilation settings!\n";
		exit(EXIT_FAILURE);
	}
}


/*
foreach $b (0..255) {
   print "\n" if($b%16==0);
   if( $b>=48 && $b<58 ) { $b-=48;}
   elsif($b>=97 && $b<103) { $b-=87;}
   elsif($b>=65 && $b<71) { $b-=55;}
   else {$b= --$dummy}
   print "$b,";
}
print "\n";
*/


/*
bool filereader::CheckMd5Sum(const string & sFileName, const string & sReference)
{
	uint8_t sum[16];

	if(sFileName.length()!=16)
		return false;
	
	if(!GetMd5Sum(sFileName, sum))
		return false;
	
	return CsEqual( (unsigned char*) sReference.data(), sum, 16);
}

*/

#endif

#ifdef HAVE_LIBBZ2

bool Bz2compressFile(const char *pathIn, const char*pathOut)
{
	bool bRet=false;
	filereader reader;
	FILE *f(NULL);
	BZFILE *bzf(NULL);
	int nError(0);

	if(!reader.OpenFile(pathIn, true))
		return false;

	if(NULL !=(f = fopen(pathOut, "w")))
	{
		if(!ferror(f))
		{
			if(NULL != (bzf = BZ2_bzWriteOpen( &nError, f, 9, 0, 30)))
			{
				if(BZ_OK == nError)
				{
					BZ2_bzWrite(&nError, bzf, (void*) reader.GetBuffer(), reader.GetSize());
					if(BZ_OK == nError)
						bRet=true;
				}
				BZ2_bzWriteClose(&nError, bzf, 0, 0, 0);
				if(BZ_OK != nError)
					bRet=false;
			}
		}
		if(ferror(f))
			bRet=false;

		checkForceFclose(f);
	}
	return bRet;
}

#endif

/*
 * Playing (z|bz|xz)cat
int main(int argc, char **argv)
{
	filereader fh;
	fh.OpenFile(argv[1]);
	mstring sLine;
	while(fh.GetOneLine(sLine))
	{
		cout << sLine <<endl;
	}
}
*/
