###############################################################################
# Local Security Check Automation Framework
#
# Authors:
# Veerendra GG <veerendragg@secpod.com>
#
# Revision 1.0
# Date: 2009/04/09
#
# Copyright:
# Copyright (c) 2009 SecPod , http://www.secpod.org
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2
# (or any later version), as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
###############################################################################

import re
import os
import sys
import string

from common import utils


## Supported Mandriva OSes for parsing. The value is as used in
## gather-package-list.nasl to set "ssh/login/release"
os_map = {

#    'Corporate 3.0' : 'Corp3.0',
#    'Corporate 4.0' : 'Corp4.0',
#    'Multi Network Firewall 2.0' : 'MuNetFW2.0',
    'Mandriva Linux 2009.0' : 'MNDK_2009.0',
    'Mandriva Linux 2008.1' : 'MNDK_2008.1',
    'Mandriva Linux 2008.0' : 'MNDK_2008.0',
    'Mandriva Linux 2007.1' : 'MNDK_2007.1',
    'Mandriva Linux 2007.0' : 'MNDK_2007.0',
    'Mandriva Linux 2006.0' : 'MNDK_2006.0',

}

## Strips these from strip list
strip_val = ['.i586.rpm', '.x86_64.rpm', '.noarch.rpm', '.i386.rpm', '.src.rpm']

append_url = 'http://lists.mandriva.com/security-announce/'

## These are not advisories
skip_list = ['']


class Parser:
    """
    Mandriva security advisory parser, parse and populate the global variables
    """

    ## Global parse structure, initializing
    AdvID = ''
    Description = ''
    Packages = {}
    CVEs = ''
    Name = ''
    Summary = ''
    Platforms = ''
    Product = []
    Html_content = ''
    XREF = []
    FileName = ''


    def _getYearLinks(self, link, year, debug=0):
        """
        Gets the advisory links for the given year
        """
        year_links = []
        year = str(year)
        data = utils.getHTMLCon(link)
        links = re.findall('href="/security-announce.*', data)

        if links:
            for i in links[0].split(':'):
                for j in i.split(','):
                   j = re.findall('security-announce/(' + year + '.*)' + '/"', j)
                   if j:
                       year_links.append(append_url + j[0])

            if debug:
                if year_links:
                    print "\nAdvisory Links for (%s) year" %(year)
                    for i in year_links:
                        print i

            return year_links

        return []


    def _getEachAdvLink(self, link, debug=0):
        """
        Get security advisory links.
        """
        month_links = []
        data = utils.getHTMLCon(link)
        links = re.findall('href="(msg.*)"', data)
        year_month = os.path.basename(link)

        if links:
            for i in links:
                month_links.append(append_url + year_month + '/' + i)

            if debug:
                print "\nMandriva Advisories for (%s) year-month" %(year_month)
                print "Total (%s) Mandriva Advisories : " %(len(month_links))

            return month_links

        return []


    def fetchHTML(self, year, debug=0):
        """
        Retrive Mandriva Advisories locally
        """

        try:
            all_adv_links = []

            year_links = self._getYearLinks(self.main_url, year, debug)

            if not year_links:
                print "ERROR: Din't find mentioned (%s) year in Mandriva "+ \
                                             "Advisories..." %(year)
                print "Exiting ..."
                sys.exit(0)

            for link in year_links:
                month_links = self._getEachAdvLink(link, debug)
                if not month_links:
                    if debug:
                        print "ERROR: No Mandriva Security Advisories for : \n", link
                    continue

                all_adv_links.extend(month_links)

            if not all_adv_links:
                print "ERROR: Din't find any Mandriva Security Advisories...", year
                print "Exiting ..."
                sys.exit(0)

            all_adv_links = utils.removeDups(all_adv_links)

            for adv_url in all_adv_links:

                base_name = adv_url.split('/')[-1]
                month_year = adv_url.split('/')[-2]
                file_name = self.html_cache + month_year + '_' + base_name

                if not os.path.isfile(file_name):
                    if debug:
                        print "\nFetching Mandriva Advisory..." + \
                                        os.path.basename(adv_url)
                    try:
                        utils.fetchFiles(adv_url, file_name, debug)
                    except Exception, msg:
                        print 'ERROR: Error fething the url %s' % msg

        except Exception, msg:
            print "Exception in : mandriva -> Parser(Class) -> fetchHTML method()"
            sys.exit(msg)


    def _findAll(self, regex):
        """
        Returns Matched data
        """
        return regex.findall(self.Html_content)


    def getCVE(self, debug=0):
        """
        Returns CVE list
        """
        if debug:
            print "\nGetting CVE List..."

        cve_regex = re.compile('CVE-[0-9]+-[0-9]+')
        can_regex = re.compile('CAN-[0-9]+-[0-9]+')

        cve_list = self._findAll(cve_regex)
        cve_list.extend(self._findAll(can_regex))

        cve_list = utils.removeDups(cve_list)

        if cve_list:
            cve_list = '", "'.join(cve_list)
        else:
            cve_list = ''

        if debug:
            print "CVE List : ", cve_list

        return cve_list


    def getAdvID(self, debug=0):
        """
        Returns Mandriva Security Advisory ID
        """

        if debug:
            print "\nGetting Advisory ID..."

        adv_id_regex =  re.compile('Subject:<.*\[Security Announce\].*\[(.*)\]')
        adv_id = self._findAll(adv_id_regex)

        if not adv_id:
            return ''

        if debug:
            print "Advisory ID : ", adv_id

        return adv_id[0].strip()


    def getAffectedPackage(self, debug=0):
        """
        Returns Affected Packages/RPM's
        """

        if debug:
            print "\nGetting Affected Packages/RPM List..."

        pkg_regex =  re.compile("Package\s?\s?\s?\s?:(.*)", re.IGNORECASE)
        pkg = self._findAll(pkg_regex)

        if pkg:
            pkg = pkg[0].strip()
        else:
            pkg = ''

        if debug:
            print "Affected Packages/RPMS : ", pkg

        return pkg


    def getDescription(self, debug=0):
        """
        Returns Vulnerability Description
        """
        description = ''

        if debug:
            print "\nGetting Vulnerability Description..."

        desc_regex =  re.compile("(?s)Description:(.*)____", re.IGNORECASE)
        desc = self._findAll(desc_regex)

        if desc:
            desc = desc[0].strip()
            for line in desc.split('\n'):
                line = line.strip()
                if "____" in line:
                    break

                if "<a" in line and "</a>" in line:
                    tmp = re.findall('href="(.*)">', line)
                    if tmp:
                        line = tmp[0]
                line = line.replace('"', "'")

                description += "  " + line + '\n'

        description = description.strip()

        return description


    def getAffectedProduct(self, debug=0):
        """
        Returns Affected Product/Platform
        """
        prod_list = []
        tmp_list = []

        ## Get Affected Product/Platform
        product =  re.findall("(?s)Updated Packages\s?\s?\s?:(.*)___", self.Html_content)

        if product:
            product = product[0].strip()
            for line in product.split('\n'):
                if '.rpm' in line and ":" not in line:
                    continue
                elif "_________" in line:
                    break
                else:
                    tmp_list.append(line)

        if tmp_list:
            for prod in tmp_list:
                prod = prod.strip(':').strip()
                if prod:
                    prod_list.append(prod)

        if debug:
            print "\nAffected Product are : (%s)" %(', '.join(prod_list))

        ## Don't include Product/Platform, If not in "os_map" Dict
        ref_list = []

        for prod in prod_list:
            if "X86_64" in prod:
                tmp_prod = prod.strip('/X86_64')
            else:
                tmp_prod = prod

            if os_map.has_key(tmp_prod):
                ref_list.append(prod)
            elif debug and prod:
                print "\nUPDATE: Not Generating Code for (%s) OS" %(prod)
                print "If Needed to generate code, then "+ \
                      "add into dict variable os_map in parser"

        if ref_list and debug:
            print "Generating Code for (%s) Products " %(ref_list)

        return ref_list


    def getRPM(self, prod_list, debug=0):
        """
        Returns OS Package Dictionary
        """

        if debug:
            print "\nGetting RPM List..."

        all_rpms = []
        os_rpm_dict = {}
        data = re.findall("(?s)Updated Packages:(.*)_____", self.Html_content)
        if data: 
            data = data[0].strip()
        else:
            print "Package List not found in the advisory."
            return {}

        for prod in prod_list:
            if 'X86_64' in prod:
                continue
            d = re.findall("(?s)" + prod +'.*:.*', data)
            if d:
                d = d[0]
            else:
                print "Pakage for %s Product not found" %(prod)

            rpm_list = []
            for j in d.split('\n'):
                j = j.strip()
    
                if not j or prod in j or prod+'/X86_64' in j:
                    continue

                if "____" in j:
                    break

                if not ".rpm" in j and ":" in j:
                    break

                import os
                rpm = os.path.basename(j)

                flag = 0
                for k in strip_val:
                    if k in rpm:
                      rpm = rpm.replace(k, '')
                      flag = 1
                      break

                if flag:
                    rpm_list.append(rpm)
                else:
                    print "Found rpm other then, %s in line :: %s" % (', '.join(strip_val), rpm)

            all_rpms = utils.stripIt(rpm_list, strip_val)
            all_rpms = utils.removeDups(all_rpms)

            if os_map.has_key(prod):
                os_rpm_dict[os_map[prod]] = all_rpms

            if not os_rpm_dict:
                print "No RPMs Found for any product ",

        return os_rpm_dict


    def formatReference(self, main_url, file_name):
       """
       Constructs a reference for advisory
       """
       if not main_url.endswith('/'):
           main_url = main_url + '/'

       reference = main_url + '/'.join(file_name.split('_'))

       return reference


    def parser(self, html_content, debug=0):
        """
        Main parser function, builds the parser object
        by invoking parse functions
        """

        try:
            if debug:
                print "Mandriva Parser Initiated..."

            self.Html_content = html_content.replace('\r\n', '\n')

            self.CVEs = self.getCVE(debug)

            self.Platforms = self.getAffectedProduct(debug)
            if not self.Platforms or self.Platforms == []:
                if debug:
                    print "ERROR: Required Products not found..."
                return False

            self.Packages = self.getRPM(self.Platforms, debug)
            if not self.Packages or self.Packages == '':
                if debug:
                    print "ERROR: Required Packages not found..."
                return False

            self.Description = self.getDescription(debug)
            if not self.Description or self.Description == '':
                if debug:
                    print "ERROR: Description not found..."
                return False

            self.AdvID = self.getAdvID(debug)
            if not self.AdvID or self.AdvID == '':
                if debug:
                    print "ERROR: Advisory ID not found..."
                return False

            self.Product = self.getAffectedPackage(debug)
            if not self.Product or self.Product == '':
                if debug:
                    print "ERROR: Required Products not found..."
                return False

            self.Platforms = ",\n  ".join(self.Platforms)

            self.Summary = self.Product

            self.Name = self.Product + " " + self.AdvID + " (" + self.Product + ")"

            self.Impact = '  '

            adv_id = self.AdvID.split('-')
            if len(adv_id) == 2:
                (name, value) = self.AdvID.split('-')
            elif len(adv_id) == 3:
                (name, value1, value2) = self.AdvID.split('-')
                value = value1 + '-' + value2
            else:
                if debug:
                    print "ERROR: Check the Advisory ID : ", self.AdvID
                return False

            ## Set XREF
            self.XREF = [name, value]

            ## Construct File Name
            tmp = value.replace('-','_')
            value = tmp.replace(':','_')
            self.FileName = "_".join(['mandriva', name + "_" + value])

            if debug:
                print "\nAll mandatory attributes are parsed: ", self.AdvID

            return True

        except Exception, msg:
            print 'Exception in Parser mandriva -> Parser -> parser() Method '
            sys.exit(msg)
