/*
    BFilter - a smart ad-filtering web proxy
    Copyright (C) 2002-2005  Joseph Artsimovich <joseph_a@mail.ru>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#include "pch.h"

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include "HttpStateResponseHeaders.h"
#include "HttpResponseParser.h"
#include "HttpRequestMetadata.h"
#include "HttpResponseMetadata.h"
#include "HttpStatusLine.h"
#include "HttpHeader.h"
#include "HttpHeadersCollection.h"
#include "HttpHeaderStructure.h"
#include "HttpHeaderElement.h"
#include "HttpVersion.h"
#include "BString.h"
#include "SplittableBuffer.h"
#include "SBOutStream.h"
#include "StringUtils.h"
#include "InsensitiveEqual.h"
#include "ErrorDescriptor.h"
#include "ErrorResponse.h"
#include "ErrorCodes.h"
#include <memory>
#include <string>
#include <list>

using namespace std;

HttpStateResponseHeaders::HttpStateResponseHeaders(HttpResponseParser& parser)
:	HttpStateHeaders(parser),
	m_rParser(parser)
{
}

HttpStateResponseHeaders::~HttpStateResponseHeaders()
{
}

void
HttpStateResponseHeaders::activate(
	ConstRequestPtr const& req, HttpStatusLine const& status_line)
{
	m_ptrRequest = req;
	m_ptrMetadata.reset(new HttpResponseMetadata(status_line));
}

void
HttpStateResponseHeaders::cleanup()
{
	reset();
	m_ptrRequest.reset(0);
	m_ptrMetadata.reset(0);
}

void
HttpStateResponseHeaders::addHeader(BString const& name, BString const& value)
{
	if (isSingularHeader(name)) {
		/*
		Believe it or not, but some pages output two Content-Length or
		Content-Type headers, and manage to get away with that.
		We only leave the last one of them, as otherwise they would
		cause parse errors.
		*/
		m_ptrMetadata->headers().setHeader(name, value);
	} else {
		m_ptrMetadata->headers().addHeader(name, value);
	}
}

HttpState*
HttpStateResponseHeaders::headersReceived(bool& body_eof)
{
	int const status = m_ptrMetadata->statusLine().getCode();
	
	if (isConnectResponse() && status == 200) {
		m_ptrMetadata->setBodyStatus(HttpResponseMetadata::BODY_SIZE_UNKNOWN);
		m_rParser.eventGotMetadata(m_ptrMetadata, /* is_persistent = */ false);
		return m_rParser.activateStateUnsizedFlatBody();
	}
	
	if (status >= 100 && status < 200) {
		m_ptrMetadata->setBodyStatus(HttpResponseMetadata::BODY_FORBIDDEN);
		m_rParser.eventGotProvisionalResponse(m_ptrMetadata);
		return m_rParser.activateStateStatusLine(m_ptrRequest);
	}
	
	HttpHeadersCollection& headers = m_ptrMetadata->headers();
	
	bool const is_persistent = isPersistentConnection(
		headers, m_ptrMetadata->statusLine().getHttpVersion()
	);
	
	if (status == 304 || status == 204 || isHeadResponse()) {
		m_ptrMetadata->setBodyStatus(HttpResponseMetadata::BODY_FORBIDDEN);
		m_rParser.eventGotMetadata(m_ptrMetadata, is_persistent);
		body_eof = true;
		return m_rParser.activateStateInactive();
	}
	
	m_ptrMetadata->setBodyStatus(HttpResponseMetadata::BODY_SIZE_UNKNOWN);
	
	BString const chunked("chunked");
	BString const identity("identity");
	InsensitiveEqual const icomp;
		
	HttpHeader* tenc = headers.getHeaderPtr(BString("Transfer-Encoding"));
	if (tenc && !icomp(tenc->getValue(), identity)) {
		HttpHeaderStructure structure(*tenc);
		HttpHeaderElement const* last = structure.getLastElement();
		if (last && icomp(last->getName(), chunked)) {
			structure.elements().pop_back();
			if (structure.empty()) {
				headers.removeHeader(BString("Transfer-Encoding"));
			} else {
				structure.commitChanges(*tenc);
			}
			m_ptrMetadata->setBodyStatus(HttpResponseMetadata::BODY_SIZE_UNKNOWN);
			m_rParser.eventGotMetadata(m_ptrMetadata, is_persistent);
			return m_rParser.activateStateChunkHeader();
		} else {
			// Content-Length is ignored if any transfer encoding except 'identity' has been applied
			m_rParser.eventGotMetadata(m_ptrMetadata, is_persistent);
			return m_rParser.activateStateUnsizedFlatBody();
		}
	}
	
	HttpHeader* clen = headers.getHeaderPtr(BString("Content-Length"));
	HttpHeader* cenc = headers.getHeaderPtr(BString("Content-Encoding"));
	if (cenc) {
		BString const cenc_val = cenc->getValue();
		if (icomp(cenc_val, BString("gzip"))) {
			if (handleWrongGzipEncoding(headers)) {
				cenc = 0;
			}
		}
		if (cenc && clen) {
			HttpState* next_state = handleLengthPlusEncodingProblem(
				headers, cenc_val, is_persistent
			);
			if (next_state) {
				return next_state;
			}
		}
	}
	
	if (clen) {
		BString const cl(clen->getValue());
		char const* cl_end = cl.end();
		uintmax_t body_size = StringUtils::parseUnsigned<uintmax_t>(cl.begin(), cl_end);
		if (cl_end != cl.end()) {
			// the header's value is already trimmed, so the problem is elsewhere
			return m_rParser.activateStateError("Content-Length header is broken");
		}
		m_ptrMetadata->setBodyStatus(HttpResponseMetadata::BODY_SIZE_KNOWN);
		m_ptrMetadata->setBodySize(body_size);
		m_rParser.eventGotMetadata(m_ptrMetadata, is_persistent);
		if (body_size == 0) {
			body_eof = true;
			return m_rParser.activateStateInactive();
		} else {
			return m_rParser.activateStateSizedFlatBody(body_size);
		}
	}
	
	m_rParser.eventGotMetadata(m_ptrMetadata, is_persistent);
	return m_rParser.activateStateUnsizedFlatBody();
}

bool
HttpStateResponseHeaders::isHeadResponse() const
{
	return m_ptrRequest->requestLine().getMethod() == BString("HEAD");
}

bool
HttpStateResponseHeaders::isConnectResponse() const
{
	return m_ptrRequest->requestLine().getMethod() == BString("CONNECT");
}

bool
HttpStateResponseHeaders::handleWrongGzipEncoding(HttpHeadersCollection& headers)
{
	/*
	Have you ever had a problem of downloading a .tar.gz file
	and having your browser mistakenly uncompress it to a simple .tar,
	while leaving the .tar.gz extension?
	Guess what, it's not the browser's fault, it's Apache to blame here.
	They set "Content-Encoding: gzip" whenever they serve a .tar.gz file.
	That's very wrong, because "Content-Encoding: gzip" indicates that the
	requested content has been compressed and that the user agent has to
	decompress it.
	If you've never had such experience, it's not because Apache stopped
	setting "Content-Encoding: gzip" for .tar.gz files, it's just because
	your browser applies the same workaround as we do here.
	*/
	BString const content_type("Content-Type");
	BString const content_encoding("Content-Encoding");
	if (headers.getHeader(content_type).getValue() == BString("application/x-gzip")) {
		headers.setHeader(HttpHeader(content_type, BString("application/octet-stream")));
	} else {
		static char const gz[] = { '.','g','z' };
		static char const tgz[] = { '.','t','g','z' };
		static char const tar_gz[] = { '.','t','a','r','.','g','z' };
		BString req_path = m_ptrRequest->requestLine().getURI().getDecodedPath();
		if (StringUtils::ciEndsWith(req_path.begin(), req_path.end(), tgz, tgz+sizeof(tgz)) ||
		    StringUtils::ciEndsWith(req_path.begin(), req_path.end(), tar_gz, tar_gz+sizeof(tar_gz))) {
			headers.setHeader(HttpHeader(content_type, BString("application/x-tgz")));
		} else if (StringUtils::ciEndsWith(req_path.begin(), req_path.end(), gz, gz+sizeof(gz))) {
			headers.setHeader(HttpHeader(content_type, BString("application/octet-stream")));
		} else {
			return false;
		}
	}
	headers.removeHeader(content_encoding);
	return true;
}

HttpState*
HttpStateResponseHeaders::handleLengthPlusEncodingProblem(
	HttpHeadersCollection& headers, BString const& cenc, bool is_persistent)
{
	BString const identity("identity");
	InsensitiveEqual icomp;
	
	if (!cenc.empty() && !icomp(cenc, identity)) {
		/*
		Here we have a problem. The problem is that some buggy implementations
		compress the response body and assign the original (uncompressed) size
		to Content-Length.
		So, we should not trust Content-Length if Content-Encoding is present.
		If the connection is not persistent, we can just throw away the
		Content-Length header and let the connection closing mark the end of data.
		In case of a persistent connection, we have no other option but to
		honour the Content-Length header [chunked transfers are handled above].
		*/
		if (!is_persistent) {
			headers.removeHeader(BString("Content-Length"));
			m_rParser.eventGotMetadata(m_ptrMetadata, is_persistent);
			return m_rParser.activateStateUnsizedFlatBody();
		}
	}
	return 0;
}
