############################################################################
##
## Copyright (c) 2000, 2001, 2002 BalaBit IT Ltd, Budapest, Hungary
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
##
##
## $Id: Http.py,v 1.31.2.8 2003/04/25 10:03:43 bazsi Exp $
##
## Author  : Bazsi
## Auditor : 
## Last audited version:
## Notes:
##
############################################################################

"""Module exporting the Http proxy interface.

This module defines the interface to the Http proxy as implemented by the
Http Zorp module.
"""

from Zorp import *
from Plug import PlugProxy
from Proxy import Proxy, proxyLog
from Session import StackedSession

HTTP_PASS	      = 1
HTTP_ACCEPT           = 1
HTTP_DROP	      = 5
HTTP_POLICY	      = 6
HTTP_CHANGE_NAME      = 100
HTTP_CHANGE_VALUE     = 101
HTTP_CHANGE_BOTH      = 102
HTTP_CHANGE_REGEXP    = 103
HTTP_INSERT           = 104
HTTP_REPLACE          = 105


HTTP_REQ_ACCEPT       = 1
HTTP_REQ_DENY         = 2
HTTP_REQ_REJECT       = 3
HTTP_REQ_ABORT        = 4
HTTP_REQ_POLICY       = 6

HTTP_RSP_ACCEPT       = 1
HTTP_RSP_DENY         = 2
HTTP_RSP_REJECT       = 3
HTTP_RSP_ABORT        = 4
HTTP_RSP_POLICY       = 6

HTTP_HDR_ACCEPT       = 1
HTTP_HDR_ABORT        = 4
HTTP_HDR_DROP         = 5
HTTP_HDR_POLICY       = 6
HTTP_HDR_CHANGE_NAME  = 100
HTTP_HDR_CHANGE_VALUE = 101
HTTP_HDR_CHANGE_BOTH  = 102
HTTP_HDR_CHANGE_REGEXP= 103
HTTP_HDR_INSERT       = 104
HTTP_HDR_REPLACE      = 105

HTTP_CONNECTION_CLOSE     = 0
HTTP_CONNECTION_KEEPALIVE = 1

HTTP_REQUEST = "http.request"
HTTP_ACCOUNTING = "http.accounting"

class HttpProxy(Proxy):
	"""Wrapper class for the http proxy
	
	HttpProxy is a wrapper class for the built in Http proxy implemented
	in Zorp. It features both transparent and non-transparent modes
	of operation, advanced filtering and more.

	Usage

	  The Http proxy core denies all requests by default, and provides
	  some attributes for the administrator to override this default.

	  Setting policy for requests
	  
	    Changing the default behaviour of requests can be done using the
	    hash named 'request'. This hash is indexed by the method name
	    (e.g: GET or POST). Each item in this hash is a tuple, specifying
	    action to take with the given request. Interpretation of 
	    the tuple itself and the action to take is specified by the
	    first item in this tuple holding one of the 'HTTP_REQ_*'
	    constants as described below.
	    
	    Possible values for the first item (Possible action codes for requests in HTTP):

	      HTTP_REQ_ACCEPT   -- [] allow request without modification
	      
	      HTTP_REQ_REJECT   -- [QSTRING] reject the request, reject reason can be 
	                           specified as the second item in the tuple
	      
	      HTTP_REQ_ABORT 	-- [] reject the request, and abort the connection immediately

	      HTTP_REQ_DENY	-- [] same as HTTP_REQ_ABORT
	      
	      HTTP_REQ_POLICY   -- [METHOD] call the given function to decide what to do
	                           this value uses an additional tuple item,
			           which must be a callable Python function.
			           The function must take four parameters:
			             self, method, url, version

	      Example (Sample for URL filtering in HTTP proxy)

	        class DmzHTTP(HttpProxy):
	      
	          def config(self):
	  	    HttpProxy.config(self)
		    self.request["GET"] = (HTTP_REQ_POLICY, self.filterURL)
		  
		  def filterURL(self, method, url, version):
		    if (url == "http://www.balabit.hu"):
		      return HTTP_REQ_ACCEPT
		    self.error_info = 'Content denied by policy.'
	            return HTTP_REQ_REJECT

	  Setting policy for responses
	  
	    You can change the default behaviour of the proxy when the
	    response for a request is received. To do this, you have to
	    add items in the hash named 'response'. This hash is indexed
	    by a response tuple comprising the request method (or '*')
	    and the response code (might be partial by leaving digits
	    from the end). The most specific match is used in the order:
	    
	      1. Full match: 'GET', '404'
	      
	      2. Partial match: 'GET', '4'
	      
	      3. Wildcard with full response code match: '*', '404'
	      
	      4. Wildcard with partial response code match: '*', '4'
	      
	      5. Default: '*', ''
	    
	    The value of each entry in this hash should be a tuple similar
	    to the 'request' hash: it has an action value describing the
	    action to take and optional parameters.

	    Possible values for the first item (Possible action codes for responses in HTTP):

	      HTTP_RSP_ACCEPT   -- [] allow response without modification (default)
	      
	      HTTP_RSP_DENY     -- [] deny response, return a policy violation page to the client instead
	      
	      HTTP_RSP_ABORT    -- [] same as HTTP_RSP_DENY
	      
	      HTTP_RSP_REJECT   -- [QSTRING] reject response, return a policy violation page to the client, with
	       	                   error information optinally specified as the second item
				   in the tuple
	      
	      HTTP_RSP_POLICY   -- [METHOD] call the given function to decide what to do.
	                           This value uses an additional tuple item,
	                           which must be a callable Python function.
			     	   The function must take five parameters:
			             self, method, url, version, response

	      Example (Sample for 404 response filtering in HTTP proxy)

	        class DmzHTTP(HttpProxy):
	      
	          def config(self):
	  	    HttpProxy.config(self)
		    self.response["GET", "404"] = (HTTP_RSP_POLICY, self.filter404)
		  
		  def filter404(self, method, url, version, response):
		    self.error_status = 404
		    self.error_info = "This page was not accessible."
	            return HTTP_RSP_REJECT

	  Changing headers in requests or responses

	    Both request headers and response headers can be modified
	    during transit. New header lines can be inserted, entries
	    can be modified or deleted. To change headers in the
	    request use the request_headers hash, or for response headers
	    use the response_headers hash.

	    Similarly to the request hash, these hashes contain a
	    variable-length tuple, where the first item determines
	    the interpretation of the remaining items. The hash index
	    is the name of the header to be modified.

	    Headers are not touched by the proxy by default, except the 
	    "Host:", "Connection:" and "Proxy-Connection" headers. However
	    the way these are modified can be changed here.

	    Possible values for the first item (Possible action codes for headers in HTTP):

	      HTTP_HDR_ACCEPT        -- [] accept header without change, default for all headers
	      
	      HTTP_HDR_DROP          -- [] remove this header
	      
	      HTTP_HDR_POLICY        -- [METHOD] call a Python function specified in the 
	                                second tuple item. It must be
	                                a Python function taking 3 parameters:
		                        self, hdr_name, hdr_value
		                    
	      HTTP_HDR_CHANGE_NAME   -- [QSTRING] set header name to the second tuple item
	      
	      HTTP_HDR_CHANGE_VALUE  -- [QSTRING] set header value to the second tuple item
	      
	      HTTP_HDR_CHANGE_BOTH   -- [QSTRING,QSTRING] set header name and value to the second and third tuple items respectively
	                            
	      HTTP_HDR_INSERT        -- [QSTRING] insert a new header defined by the next 
	                                tuple item (value)
	                            
	      HTTP_HDR_REPLACE       -- [QSTRING] remove all existing occurences of this header
	                                and insert this one instead.

	      Example (Sample for header filtering in HTTP)

	        class MyHttp(HttpProxy):

	          def config(self):
	  	    HttpProxy.config(self)
		    self.request_headers["User-Agent"] = (HTTP_HDR_CHANGE_VALUE, "Lynx 2.4.1")
		    self.request_headers["Cookie"] = (HTTP_HDR_POLICY, self.processCookies)
		    self.response_headers["Set-Cookie"] = (HTTP_HDR_DROP,)
		    
		  def processCookies(self, name, value):
		    # you could change the current header in self.current_header_name
		    # or self.current_header_value, the current request url
		    # in self.request_url
		    return HTTP_HDR_DROP
	      
	  Redirecting urls

	    You can choose to either reject a set of urls, or redirect them to a local
	    mirror by changing some attributes during request processing.

	    As a HTTP request comes in, normative policy chains are
	    processed (self.request, self.request_headers), where you
	    can install policy callbacks for certain events with the 
	    HTTP_REQ_POLICY or HTTP_HDR_POLICY directive. Any of these 
	    callbacks may change the request_url attribute which may 
	    result in a completely different url to be fetched.

	    Example (Sample for URL redirection in HTTP)

	      class MyHttp(HttpProxy):
	      
	        def config(self):
	          HttpProxy.config(self)
		  self.request["GET"] = (HTTP_REQ_POLICY, self.filterURL)
		  
		def filterURL(self, method, url, version):
		  self.request_url = "http://www.balabit.hu/"
		  return HTTP_REQ_ACCEPT

          Request types

            Zorp differentiates between two request types. One is referred
            to as server request, the other as proxy request. Server
            requests are sent by browsers directly communicating with HTTP
            servers. This request includes an URL relative to the server
            root (e.g. /index.html), and a 'Host' header indicating which
            virtual server to use. 
            
            Proxy requests on the other hand are used when the browser
            communicates with a HTTP proxy. This request includes a fully
            specified URL (e.g. http://www.something.com/index.html).
            
            As there is no clear distinction between the two request types,
            autodetecting their type is not always correct, though all
            common cases are covered.
            
            Requests are handled differently in transparent and
            non-transparent mode. 
            
            A transparent HTTP proxy (transparent_mode attribute is TRUE) is
            meant to be installed in front of a network where clients do not
            know about the presence of the firewall. In this case the proxy
            expects to see server type requests only. If clients communicate
            with a real HTTP proxy through the firewall, proxy type requests
            must be explicitly enabled using the 'permit_proxy_requests'
            attribute.
            
            The use of non-transparent HTTP proxy (transparent_mode
            attribute is FALSE) must be configured in web browsers behind
            the firewall. In this case Zorp expects proxy requests only, and
            emits server requests (assuming 'parent_proxy' is not set).
            
	  Using parent proxies

	    There are two things to be checked to use parent proxies. First
	    you have to pick a router which makes the proxy connect to the
	    parent proxy. The two possibilities are InbandRouter(), or
	    DirectedRouter().

	    The second thing to set is the parent_proxy and
	    parent_proxy_port attribute in the HttpProxy instance. Setting
	    these attributes results in proxy requests to be emitted to the
	    target server in either transparent or non-transparent mode.

	    The parent proxy attributes can be set in both the
	    configuration phase (e.g. config() event), and later on a
	    per-request basis.

	    Example (Sample for using parent proxies in HTTP)
	
	      class MyHttp(HttpProxy):
	      
	        def config(self):
		  HttpProxy.config(self)
		  self.parent_proxy = "proxy.example.com"
		  self.parent_proxy_port = 3128

          Error messages

            Error messages are stored as files in the directory specified by
            the 'error_files_directory' attribute. Each file may contain
            plain HTML with the following macros, which are expanded as the
            error message is sent to the client.

	      @INFO@ -- further error information as provided by the proxy

              @VERSION@ -- Zorp version number

              @DATE@ -- current date

	    It is generally advisably not to display error messages to
	    untrusted clients, as they may leak confidental information. To
	    turn error messages off, set 'error_silent' to 'FALSE', or 
	    strip error files down to a minimum.

	Attributes
	
	  transparent_mode            -- [BOOLEAN:TRUE:RW:R] TRUE for 
					 transparent proxy, FALSE otherwise

	  permit_server_requests      -- [BOOLEAN:TRUE:RW:R] allow server
	                                 type requests in transparent mode.

	  permit_proxy_requests       -- [BOOLEAN:FALSE:RW:R] allow proxy 
					 type requests in transparent mode

          permit_unicode_url          -- [BOOLEAN:FALSE:RW:R] allow unicode
					 characters in urls encoded as %u.
                                         This is an IIS extension to HTTP.

	  connection_mode             -- [ENUM;HTTP_CONNECTION:n/a:-:RW]
					 This value reflects the state of the
					 session. If the value equals to
					 'HTTP_CONNECTION_CLOSE', the
					 session will be closed after
					 servicing the current
					 request, otherwise if the
					 value is
					 'HTTP_CONNECTION_KEEPALIVE'
					 another request will be
					 fetched from the client. This
					 attribute can be used to
					 forcibly close a keepalive
					 connection.

	  parent_proxy                -- [QSTRING:"":RW:RW] address or hostname
	                                 of the parent proxy to connect to.
					 You have to use DirectedRouter or
					 InbandRouter for this option
					 to take effect.

	  parent_proxy_port           -- [INTEGER:3128:RW:RW] the port of the
                                         parent proxy to connect to.

	  default_port                -- [INTEGER:80:RW:RW] this value is used
					 in non-transparent mode when the 
					 requested URL does not contain a
					 port number. The default should be
					 80, otherwise your proxy may not
					 function properly.

	  rewrite_host_header         -- [BOOLEAN:TRUE:RW:RW] rewrite Host 
					 header in request when URL
					 redirection is performed
	
	  require_host_header         -- [BOOLEAN:TRUE:RW:R] require the 
					 presence of the Host: header. If set
					 to FALSE the real URL cannot be
					 recovered from some requests, which
					 might cause problems with URL 
					 filtering.

	  strict_header_checking      -- [BOOLEAN:TRUE:RW:R] require RFC
	                                 conformant HTTP headers. Some
	                                 webservers return unparseable
	                                 responses, which result in
					 an error message from Zorp. To
	                                 communicate with these servers
	                                 turn this check off. RFC
	                                 incompliant headers are
	                                 filtered and are not sent to
	                                 the client in any case.
	                                 (default: TRUE)
	                               
	  permit_null_response	      -- [BOOLEAN:TRUE:RW:R] permit RFC
					 incompliant responses with
					 headers not terminated by
					 CRLF and not contain entity body.

	  max_hostname_length         -- [INTEGER:256:RW:RW]

	  max_line_length             -- [INTEGER:4096:RW:R] maximum length of
	                                 lines in requests and
	                                 responses. This value does
	                                 not affect data transfer, as
	                                 they are transmitted in
	                                 binary mode.  (default: 4096)
					 
	  max_url_length              -- [INTEGER:4096:RW:RW] maximum length 
					 of an URL in a request. Note that this
					 directly affects forms using the 'GET'
					 method to pass data to CGI scripts.

	  max_body_length             -- [INTEGER:0:RW:RW] maximum length 
					 of an HTTP request or response body.

          max_chunk_length	      -- [INTEGER:262144:RW:RW] maximum
                                         length of a single chunk when 
                                         using chunked transfer-encoding.

	  max_header_lines	      -- [INTEGER:50:RW:RW] maximum number of
	                                 header lines in a request or
	                                 response (default: 50)

	  max_keepalive_requests      -- [INTEGER:0:RW:RW] maximum number
	                                 of requests in a single
	                                 session or 0 for infinite.
	                                 After the next request session
	                                 will be closed.

	  request_count               -- [INTEGER:0:-:R] the number of kept 
					 alive requests within this
					 session

	  timeout                     -- [INTEGER:300000:RW:RW] general I/O 
					 timeout in milliseconds. When
					 there is no specific timeout
					 for a given operation, this
					 value is used.

	  timeout_request             -- [INTEGER:10000:RW:RW] time to wait
					 for a request to arrive from the 
					 client.

	  request                     -- [HASH;QSTRING;HTTP_REQ:empty:RW:RW] normative policy 
					 hash, directing the proxy to
					 do something with requests,
					 without the need to call
					 Python. This hash is indexed
					 by the method (e.g. "GET",
					 "PUT" etc), and contains an action
					 tuple describing the action to
					 take on the appropriate request.

	  request_header              -- [HASH;QSTRING;HTTP_HDR:empty:RW:RW] normative policy 
				      	 hash, directing the proxy to
	                                 do something with request
	                                 headers. It is indexed by the
	                                 header name
	                                 (e.g. "Set-cookie"), and contains
					 an action tuple describing the
					 action to take on the appropriate
					 header.

	  response                    -- [HASH;QSTRING;HTTP_RSP:empty:RW:RW] normative policy 
					 hash directing the proxy to
	                                 do something with responses.

	  response_header             -- [HASH;QSTRING;HTTP_HDR:empty:RW:RW] similar to 
				         request_headers but applied
	                                 to response headers.

	  request_url                 -- [STRING:n/a:-:RW] request url string, 
				      	 can be changed to redirect
				      	 the current request.

	  request_url_proto           -- [STRING:n/a:-:R] protocol specifier 
					 of the url

	  request_url_username        -- [STRING:n/a:-:R] username if 
					 specified in the url

	  request_url_passwd          -- [STRING:n/a:-:R] password if 
					 specified in the url

	  request_url_host            -- [STRING:n/a:-:R] remote hostname in 
					 the url

	  request_url_port            -- [INTEGER:n/a:-:R] port number as 
					 specified in the url

	  request_url_file            -- [STRING:n/a:-:R] filename of the url

	  current_header_name         -- [STRING:n/a:-:RW] defined during 
					 header processing functions,
	                                 and can be changed to
	                                 actually change a header in
	                                 the request or response.

	  current_header_value        -- [STRING:n/a:-:RW] similar to
	                                 current_header_name but contains
					 the header value

	  error_status                -- [INTEGER:500:RW:RW] if an error occurs,
					 Zorp uses this value as the
					 status code of the HTTP
					 response it generates.  

	  error_info                  -- [STRING:n/a:-:RW] a string included
	                                 in error message.

          error_silent                -- [BOOLEAN:FALSE:RW:RW] turn off verbose error
                                         reporting to the HTTP client (hide firewall version)

	  error_files_directory       -- [QSTRING:"/usr/share/zorp/http":RW:RW] the location 
                                         for http error messages

	  auth_inband_supported       -- [INTEGER:1:R:R]

	  auth                        -+ [OBJECT:n/a:W:R]

	  auth_realm                  -- [QSTRING:"Zorp HTTP auth":W:R]

	  target_port_range           -- [QSTRING:"80,443":RW:RW]


	Attribute aliases 

	  The attributes below are for compatibility with Zorp
	  versions prior to 1.0. They will be removed in further
	  revisions so their use is deprecated.

	  (Compatibility aliases in HTTP)

	    transparent_server_requests -- permit_server_requests

	    transparent_proxy_requests  -- permit_proxy_requests
	 
	    request_timeout             -- timeout_request

	    request_headers             -- request_header

	    response_headers		-- response_header

	    url_*			-- request_url_*

	    error_response		-- error_status
	                                 
	"""

	name = "http"

	def __init__(self, session):
		"""Initializes a HttpProxy instance.

		Creates and initializes a HttpProxy instance.
		
		Arguments
		
		  self -- this instance
		
		  session -- the session this instance participates in
		"""
		self.connect_proxy = PlugProxy
		Proxy.__init__(self, session)
		
	def config(self):
		"""Default config event handler.

		Enables the most common HTTP methods so we have a
		useful default configuration. 
		
		Arguments
		
		  self -- this instance
		"""
		self.request["GET"]  = (HTTP_REQ_ACCEPT,)
		self.request["POST"] = (HTTP_REQ_ACCEPT,)
		self.request["HEAD"] = (HTTP_REQ_ACCEPT,)
		
	def connectMethod(self):
		#
		# create a connect_proxy instance
		#
		ss = StackedSession(self.session)
		self.connect_proxy(ss)
		return HTTP_REQ_ACCEPT


# we are transparent by default
HttpProxyTransparent = HttpProxy;

class HttpProxyNonTransparent(HttpProxy):
	"""Nontransparent HTTP proxy based on HttpProxy
	
	This class encapsulates a non-transparent HTTP proxy using
	the features provided by HttpProxy.
	"""
	
	def config(self):
		"""Config event handler
		
		Sets self.transparent_mode to FALSE to indicate
		non-transparent mode.
		
		Arguments
		
		  self -- this instance
		"""
		HttpProxy.config(self)
		self.transparent_mode = FALSE

class HttpProxyURIFilter(HttpProxy):
	"""Class encapsulating an URL filter capable HTTP proxy.

	This class is derived from HttpProxy extending it with URL filtering
	capability. The matcher attribute should be initialized to refer to
	a Matcher object. The initialization should be done in the class
	body.

	Attributes

	  matcher -- [INST_matcher:n/a:RW:RW] Matcher instance

	"""
	def config(self):
		HttpProxy.config(self)
		self.request["GET"] = (HTTP_POLICY, self.checkURL)
		self.request["POST"] = (HTTP_POLICY, self.checkURL)
		self.request["HEAD"] = (HTTP_POLICY, self.checkURL)
		if not hasattr(self, "matcher"):
			self.matcher = None
		
	def checkURL(self, method, url, version):
		proxyLog(self, HTTP_ACCOUNTING, 4, "http accounting; request='%s %s %s'" % (method, url, version))
		if self.matcher:
			if self.matcher.checkMatch(url):
				proxyLog(self, HTTP_REQUEST, 6, "request administratively prohibited; request='%s %s %s'" % (method, url, version))
				self.error_info = 'Accessing this content was administratively prohibited.'
				return HTTP_REQ_REJECT
		return HTTP_REQ_ACCEPT

class HttpProxyURIFilterNonTransparent(HttpProxyURIFilter):
	def config(self):
		HttpProxyURIFilter.config(self)
		self.transparent_mode = FALSE
