# -------------------------------------------------------------------------
#     This file is part of mMass - the spectrum analysis tool for MS.
#     Copyright (C) 2005-07 Martin Strohalm <mmass@biographics.cz>

#     This program is free software; you can redistribute it and/or modify
#     it under the terms of the GNU General Public License as published by
#     the Free Software Foundation; either version 2 of the License, or
#     (at your option) any later version.

#     This program is distributed in the hope that it will be useful,
#     but WITHOUT ANY WARRANTY; without even the implied warranty of
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#     GNU General Public License for more details.

#     Complete text of GNU GPL can be found in the file LICENSE in the
#     main directory of the program
# -------------------------------------------------------------------------

# Function: Make protein digest and match to peaklist.

# load libs
import re
import string

# load modules
from nucleus import commfce
from modules.mformula.main import mFormula


class mCutCount:
    """Make protein digest and match to given peaklist"""

    # ----
    def __init__(self, config):

        self.config = config
        self.ctrlData = {}
        self.mFormula = mFormula(self.config.elem)
    # ----


    # ----
    def digest(self, parsedSeq):
        """ Make digested peptides. """

        # get cleavage site indexes
        digestIndexes = self.getDigestIndexes(parsedSeq)

        # filter digest indexes after modified amino
        if self.ctrlData['notcleavemodif']:
            digestIndexes = self.filterModified(parsedSeq, digestIndexes)

        # get main peptides
        peptides = self.getMainPeptides(parsedSeq, digestIndexes)

        # make miss-cleaved peptides
        if self.ctrlData['partials']:
            peptides = self.getPartials(peptides)

        # make combinations of modifications
        if self.ctrlData['optmodif']:
            peptides = self.makeModifCombinations(peptides)

        # add terminal groups
        peptides = self.addTerminalGroups(peptides, len(parsedSeq))

        # add charge
        peptides = self.addCharge(peptides)

        # filter masses out of limits
        if self.ctrlData['uselimits']:
            peptides = self.filterOutLimits(peptides)

        # format into printable version
        return self.makePrintable(peptides)
    # ----


    # ----
    def getDigestIndexes(self, parsedSeq):
        """ Generate list of cleavage site positions. """

        enzyme = self.ctrlData['enzyme']
        pattEnz = re.compile(self.config.enz[enzyme]['expr'])

        # make clean sequence
        cleanSequence = ''
        for aa in parsedSeq:
            cleanSequence += aa[0]

        # split by enzyme and get digest indexes
        peptide = ''
        index = -1
        digestIndexes = []
        for x in cleanSequence:
            peptide += x
            index +=1

            # if enzyme can cleave
            if re.search(pattEnz, peptide):
                digestIndexes.append(index-1)
                peptide = x

        # append terminal peptide
        digestIndexes.append(index)

        return digestIndexes
    # ----


    # ----
    def filterModified(self, parsedSeq, digestIndexes):
        """ Filter cleavage site if modified amino acid. """

        filteredIndexes = []
        for x in range(len(digestIndexes)):
            index = digestIndexes[x]
            if not len(parsedSeq[index]) > 3:
                filteredIndexes.append(index)

        return filteredIndexes
    # ----


    # ----
    def getMainPeptides(self, parsedSeq, digestIndexes):
        """ Get all information about each peptide.
        [start, stop, missed, mass, sequence, modifs] """

        peptides = []
        peptide = []

        # get peptide
        for x in range(len(parsedSeq)):
            peptide.append(parsedSeq[x])

            # till index
            if x in digestIndexes:
                start = x - len(peptide) + 1
                stop = x

                # [start, stop, missed, mass, sequence, modifs]
                peptides.append([start, stop, 0] + self.getPeptideData(peptide))
                peptide = []

        return peptides
    # ----


    # ----
    def getPeptideData(self, peptide):
        """ Get information about peptide. [mass, sequence, modifs] """

        sequence = ''
        mass = 0
        modifs = []

        # set masstype
        if self.ctrlData['masstype'] == 'amass':
            massType = 2
        else:
            massType = 1

        # parse peptide
        for aminoacid in peptide:

            # get sequence and mass (with modifs and terminus)
            sequence += aminoacid[0]
            mass += aminoacid[massType]

            # get modifications
            modifs += aminoacid[3:]

        return [mass, sequence, modifs]
    # ----


    # ----
    def getPartials(self, peptides):
        """ Add partial digested peptides to list. """

        n = self.ctrlData['partials']
        items = len(peptides)
        partials = []

        for x in range(items):
            pep = peptides[x]
            partials.append(pep)
            for y in range(1, n+1):

                # if sequence ends
                if not x + y < items:
                    break

                # make new peptide
                start = pep[0]
                stop = peptides[x+y][1]
                part = y
                mass = pep[3] + peptides[x+y][3]
                sequence = pep[4] + peptides[x+y][4]
                modifs = pep[5] + peptides[x+y][5]

                # add new peptide
                pep = [start, stop, part, mass, sequence, modifs]
                partials.append(pep)

        return partials
    # ----


    # ----
    def makeModifCombinations(self, peptides):
        """ Count combinations for all modifications. """

        pepComb = []
        for peptide in peptides:

            # get modif combinations
            modComb = []
            modOri = peptide[5]
            for x in range(len(modOri)):
                for mod in self.uniqueCombinations(modOri, x):
                    mod.sort()
                    modComb.append(mod)

            # filter same combinations
            filteredModComb = []
            for mod in modComb:
                if mod not in filteredModComb:
                    filteredModComb.append(mod)

            # get unmodified peptide
            modOriMass = self.countModifMass(modOri)
            pepFreeMass = peptide[3] - modOriMass

            # count masses for all combinations
            filteredModComb.sort()
            for mod in filteredModComb:
                modMass = self.countModifMass(mod)
                mass = pepFreeMass + modMass
                pepComb.append([peptide[0], peptide[1], peptide[2], mass, peptide[4], mod])

            # append full modified (original) peptide
            pepComb.append(peptide)

        return pepComb
    # ----


    # ----
    def uniqueCombinations(self, items, n):
        """ Count unique combinations from list. """

        if n==0:
            yield []
        else:
            for i in range(len(items)):
                for comb in self.uniqueCombinations(items[i+1:], n-1):
                    yield [items[i]] + comb
    # ----


    # ----
    def countModifMass(self, modifs):
        """ Count total mass for list of given modifications. """

        massType = self.ctrlData['masstype']
        mass = 0

        # add exact mass or find modification mass
        for mod in modifs:
            if type(mod) is float:
                mass += mod
            else:
                mass += self.config.mod[mod][massType]

        return mass
    # ----


    # ----
    def addTerminalGroups(self, peptides, length):
        """ Add terminal groups to each peptide in the list. """

        massType = self.ctrlData['masstype']
        enzyme = self.ctrlData['enzyme']

        # get terminus masses for selected enzyme
        nTerm = self.config.enz[enzyme]['n-term']
        nTerm = self.mFormula.getMass(nTerm, massType)
        cTerm = self.config.enz[enzyme]['c-term']
        cTerm = self.mFormula.getMass(cTerm, massType)

        # add terminal masses
        for x in range(len(peptides)):
            if peptides[x][0] > 0:
                peptides[x][3] += nTerm
            if peptides[x][1] < length-1:
                peptides[x][3] += cTerm

        return peptides
    # ----


    # ----
    def addCharge(self, peptides):
        """ Add charge to each peptide in the list. """

        H = self.config.elem['H'][self.ctrlData['masstype']]
        charge = self.ctrlData['charge']

        # count ion mass for peptides
        if charge != 0:
            for x in range(len(peptides)):
                mass = peptides[x][3]
                mass += charge * H
                mass = mass / abs(charge)
                peptides[x][3] = mass

        return peptides
    # ----


    # ----
    def filterOutLimits(self, peptides):
        """ Filter peptide out of selected mass range. """

        filteredMasses = []

        # make filter
        for x in range(len(peptides)):
            if not self.ctrlData['minmass'] < peptides[x][3] < self.ctrlData['maxmass']:
                filteredMasses.append(x)

        # filter peptides
        filteredMasses.reverse()
        for x in filteredMasses:
            del peptides[x]

        return peptides
    # ----


    # ----
    def makePrintable(self, peptides):
        """ Format peptide list to directly show. """

        formatedPeptides = []
        pepNo = 0

        # format each peptide
        for peptide in peptides:
            pepNo += 1
            pepRange = '[%d-%d]' % (peptide[0]+1, peptide[1]+1)

            # format modifications
            modifs = self.formatModifications(peptide[5])

            # add peptide
            formatedPeptides.append([pepNo, pepRange, peptide[2], peptide[3], peptide[4] + modifs, bool(modifs), ''])

        return formatedPeptides
    # ----


    # ----
    def formatModifications(self, modifications):
        """ Format list of modification. """

        formatedMod = ''
        modCounter = {}

        # format each modification
        if modifications != []:

            # count modifications
            for mod in modifications:
                if mod not in modCounter:
                    modCounter[mod] = 1
                else:
                    modCounter[mod] += 1

            # format modifications
            for mod in modCounter:
                formatedMod += '; ' + str(modCounter[mod]) + 'x ' + str(mod)
            formatedMod = ' (' + formatedMod[2:] + ')'

        return formatedMod
    # ----


    # ----
    def matchDataToPeaklist(self, peaklist, peptides):
        """ Compare peklist data with the list of digeted peptides. """

        matched = False
        tolerance = self.ctrlData['tolerance']
        errorType = self.ctrlData['errortype']

        for x in range(len(peptides)):
            peptideMass = peptides[x][3]

            # count tolerance from peptide mass and error
            massTolerance = commfce.countTolerance(peptideMass, tolerance, errorType)

            # check peaklist
            match = ''
            for peak in range(len(peaklist)):
                if (peptideMass - massTolerance) <= peaklist[peak][0] <= (peptideMass + massTolerance):
                    match += str(peak)+';'
                    matched = True
            peptides[x][6] = match

        return peptides, matched
    # ----


    # ----
    def getMatchInfo(self, peaklist, peptides, parsedSeq, errorType, digits):
        """ Get match-info for each peak in main peaklist. """

        data={}
        data['params'] = []
        data['errors'] = []
        data['hidden'] = {}

        seqMarker = []
        matchedPeaks = []
        matchedRanges = []
        errorList = []

        # get range and error for each matched peptide
        for peptide in peptides:
            if peptide[6] != '':

                # get matched range
                rangeFromTo = peptide[1][1:-1].split('-')
                matchedRanges.append([int(rangeFromTo[0]), int(rangeFromTo[1])])

                # get error list
                peaks = peptide[6].split(';')
                for peakIndex in peaks:
                    if peakIndex != '':

                        # get peak
                        if peakIndex not in matchedPeaks:
                            matchedPeaks.append(peakIndex)

                        # get masses
                        peakIndex = int(peakIndex)
                        peptideMass = peptide[3]
                        peakMass = peaklist[peakIndex][0]

                        # get error
                        errorList.append([peakMass, peakMass-peptideMass])

        # make matched sequence marker
        for x in range(len(parsedSeq)):
            seqMarker.append('-')
        for matched in matchedRanges:
            for x in range(matched[0]-1, matched[1]):
                seqMarker[x] = 'x'
        seqMarker = string.join(seqMarker, '')

        # get sequence coverage
        seqCoverage = int(round(100 * float(seqMarker.count('x')) / float(len(seqMarker))))

        # append data
        data['params'].append(['Tolerance: ', str(self.ctrlData['tolerance']) + ' ' + self.ctrlData['errortype']])
        data['params'].append(['Peaks in peaklist: ', str(len(peaklist))])
        data['params'].append(['Matched peaks: ', str(len(matchedPeaks))])
        data['params'].append(['Missed peaks: ', str(len(peaklist) - len(matchedPeaks))])
        data['params'].append(['Sequence length: ', str(len(seqMarker))])
        data['params'].append(['Sequence coverage: ', str(seqCoverage) + ' %'])
        data['params'].append(['Matched amino acids: ', str(seqMarker.count('x'))])
        data['params'].append(['Missed amino acids: ', str(seqMarker.count('-'))])
        data['errors'] = errorList
        data['hidden']['errortype'] = errorType

        return data
    # ----
