############################################################################
##
## Copyright (c) 2000, 2001, 2002 BalaBit IT Ltd, Budapest, Hungary
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
##
##
## $Id: Matcher.py,v 1.6.2.2 2002/11/25 11:57:07 sasa Exp $
##
## Author  : Bazsi
## Auditor :
## Last audited version:
## Notes:
##
############################################################################

from Zorp import *
import os, re, string

class AbstractMatcher:
	"""Abstract class encapsulating a string matcher.
	
	This abstract class encapsulates a string matcher whose purpose is to
	determine whether a given string matches some kind of backend database.
	
	Specialized subclasses of AbstractMatcher exist such as 'RegexpFileMatcher'
	which use regular expressions stored in flat files to find matches.
	"""
	def __init__(self):
		"""Constructor to initialize an AbstractMatcher instance.
		
		This constructor initializes an AbstractMatcher instance, currently it
		does nothing.
		
		Arguments
		
		  self -- this instance
		"""
		pass
		
	def checkMatch(self, str):
		"""Virtual function to check if a given string actually matches.
		
		This function is the actual entry point to determine if a given
		string actually matches.
		
		Arguments
		
		  self -- this instance
		  
		  str  -- string to check
		"""
		raise NotImplementedError


class RegexpMatcher(AbstractMatcher):
	"""Abstract class using regular expressions for string matching.
	
	This class is still an abstract class with no means to store regular
        expressions. It is designed to derive new classes from like
        RegexpFileMatcher.
        
        Attributes
        
          match  -- a list of compiled regular expressions which result in 
                    a positive match
          
          ignore -- a list of compiled regular expressions which define
                    which strings are to be ignored even if 'match' resulted
                    in a positive match

        """
        def __init__(self):
        	"""Constructor to initialize a RegexpMatcher instance.
        	
        	This constructor initializes a RegexpMatcher instance by setting
        	'match' and 'ignore' attributes to an empty list.
        	
        	Arguments
        	
        	  self -- this instance
        	  
        	"""
                self.match = []
                self.ignore = []

        def checkMatch(self, str):
        	"""Function to determine if a given string actually matches.
        	
        	This function uses the attributes 'match' and 'ignore' to check
        	if a string matches.
        	
        	Arguments
        	
        	  self -- this instance
        	  
        	  str -- string to check
        	"""
                for pattern in self.match:
                        if pattern.search(str):
                                for pattern in self.ignore:
                                        if pattern.search(str):
                                                return FALSE
                                return TRUE
                return FALSE


class RegexpFileMatcher(RegexpMatcher):
	"""Class using regular expressions stored in files for string matching.
	
	This class is an actually usable AbstractMatch derived class using files
	to store regular expressions to determine if a string matches. Instances
	of this class can be used for URL filtering for example.
	
	Attributes
	
	  match_file -- filename where positive matches are stored
	  
	  match_date -- the date (unix timestamp) when match_file was loaded
	  
	  ignore_file -- filename where ignore patterns are stored
	  
	  ignore_date -- the date (unix timestamp) when ignore_file was loaded
	"""
        def __init__(self, match_fname, ignore_fname=''):
        	"""Constructor to initialize a RegexpFileMatcher instance.
        	
        	This constructor initializes a RegexpFileMatcher instance by setting
        	attributes appropriately.
        	
        	Arguments
        	
        	  self -- this instance
        	  
        	  match_fname -- [FILENAME] file containing positive patterns
        	  
        	  ignore_fname -- [FILENAME] file containing patterns to be ignored
        	"""
        	RegexpMatcher.__init__(self)
		self.match_file = match_fname
 		self.match_date = 0
		self.ignore_file = ignore_fname
		self.ignore_date = 0

        def readFile(self, filename, array):
        	"""Function to read the contents of a file to an array of regular expressions.
        	
        	This function is called to load a set of patterns to an
        	array. The file is read line by line, and each line is
        	compiled as a regular expression.
        	
        	Arguments
        	
        	  self -- this instance
        	  
        	  filename -- file to read
        	  
        	  array -- array to place compiled regular expressions into
        	"""
                f = open(filename, 'r')
                line = string.rstrip(f.readline())
                while line:
                        try:
                                array.append(re.compile(line, re.IGNORECASE))
                        except re.error:
                                pass
                        line = string.rstrip(f.readline())
        
	def checkMatch(self, str):
		"""Function to determine if a string matches.
		
		This function is part of the AbstractMatch interface, and is
		called when the fate of a given string is to be determined.
		The implementation here checks if the pattern files have been
		changed, loads them if necessary and decides if the given string
		matches.

		Arguments
		
		  self -- this instance
		  
		  str -- string to check
		""" 
		try:
			st = os.stat(self.match_file)
			if self.match_date < st[8]:
				self.match_date = st[8]
				self.match = []
				self.readFile(self.match_file, self.match)
		except OSError:
                        log(None, CORE_POLICY, 3, "Error opening matchfile; filename='%s'" % self.match_file)
		
		if self.ignore_file:
			try:
				st = os.stat(self.ignore_file)
				if self.ignore_date < st[8]:
					self.ignore_date = st[8]
					self.ignore = []
					self.readFile(self.ignore_file, self.ignore)
			except OSError:
	                        log(None, CORE_POLICY, 3, "Error opening ignore file; filename='%s'" % self.ignore_file)

		return RegexpMatcher.checkMatch(self, str)
