#!/usr/bin/python
# -*- Mode: python -*-
#
# Copyright (C) 2000 The ViewCVS Group. All Rights Reserved.
#
# By using this file, you agree to the terms and conditions set forth in
# the LICENSE.html file which can be found at the top level of the ViewCVS
# distribution or at http://www.lyra.org/viewcvs/license-1.html.
#
# Contact information:
#   Greg Stein, PO Box 760, Palo Alto, CA, 94302
#   gstein@lyra.org, http://www.lyra.org/viewcvs/
#
# -----------------------------------------------------------------------
#
# updates SQL database with new commit records
#
# -----------------------------------------------------------------------
#

#########################################################################
#
# INSTALL-TIME CONFIGURATION
#
# These values will be set during the installation process. During
# development, they will remain None.
#

LIBRARY_DIR = None
CONF_PATHNAME = None

# Adjust sys.path to include our library directory
import sys

if LIBRARY_DIR:
  sys.path.insert(0, LIBRARY_DIR)
else:
  sys.path[:0] = ['../lib']	# any other places to look?

#########################################################################

import os
import string
import getopt
import re
import cvsdb
import rlog
import config
import vclib.bincvs

DEBUG_FLAG = 0

## output functions
def debug(text):
    if DEBUG_FLAG:
        print 'DEBUG(viewcvs-loginfo):', text

def warning(text):
    print 'WARNING(viewcvs-loginfo):', text

def error(text):
    print 'ERROR(viewcvs-loginfo):', text
    sys.exit(1)

class FileData:
    def __init__(self, file, directory, old_version, new_version):
        self.file = file
        self.directory = directory
        self.old_version = old_version
        self.new_version = new_version

        ## set the state of this file from the
        ## old_version and new_version information
        if self.old_version == 'NONE':
            self.ctype = "added"
        elif self.new_version == 'NONE':
            self.ctype = "removed"
        else:
            self.ctype = "changed"

def CommitFromFileData(repos, file_data):
    ## construct the full path for the RCS file
    filename = os.path.join(repos.rootpath, file_data.directory, file_data.file)

    ## get the 'rlog' output for just this revision, and then convert
    ## to a commit object
    rlog_data = rlog.GetRLogData(repos, filename, file_data.new_version)
    commit_list = cvsdb.RLogDataToCommitList(repos.rootpath, rlog_data)
    commit = commit_list[0]

    ## set the type of commit from the file_data setting
    if file_data.ctype == "changed":
        commit.SetTypeChange()
    elif file_data.ctype == "added":
        commit.SetTypeAdd()
    elif file_data.ctype == "removed":
        commit.SetTypeRemove()
        
    return commit

def GetUnrecordedCommitList(repos, file_data):
    filename = os.path.join(repos.rootpath, file_data.directory, file_data.file)
    return cvsdb.GetUnrecordedCommitList(repos, filename)

_re_revisions = re.compile(                            
  r",(?P<old>(?:\d+\.\d+)(?:\.\d+\.\d+)*|NONE)"  # comma and first revision number
  r",(?P<new>(?:\d+\.\d+)(?:\.\d+\.\d+)*|NONE)"  # comma and second revision number
  r"(?:$| )"                                # space or end of string
)    

def CleanDirectory(dir):
    ## clean up the directory the following way: we don't want it
    ## to begin with a path seperator, and we don't want it to end
    ## with a path seperator
    directory = os.path.normcase(dir)
    
    while directory[0] == os.sep:
        directory = directory[1:]
    while directory[-1] == os.sep:
        directory = directory[:-1]
    
    return directory

def HeuristicArgParse(s, repository):
  """Current versions of CVS (except for CVSNT) do not escape spaces in file
  and directory names that are passed to the loginfo handler. Since the input
  to loginfo is a space separated string, this can lead to ambiguties. This
  function attempts to guess intelligently which spaces are separators and
  which are part of file or directory names. It disambiguates spaces in
  filenames from the separator spaces between files by assuming that every
  space which is preceded by two well-formed revision numbers is in fact a 
  separator. It disambiguates the first separator space from spaces in the 
  directory name by choosing the longest possible directory name that actually
  exists in the repository"""

  if s[-16:] == ' - New directory':
    return None

  if s[-19:] == r' - Imported sources':
    return None    

  file_data_list = []
  start = 0

  while 1:
    m = _re_revisions.search(s, start)

    if start == 0:
      if m is None: 
        error('Argument "%s" does not contain any revision numbers' % s)
    
      directory, filename = HeuristicArgParseDirectory(s[:m.start()], repository)
      if directory is None:
        error('Argument "%s" does not start with a valid directory' % s)

      directory = CleanDirectory(directory)

      debug('Directory name is "%s"' % directory)

    else:
      if m is None:
        warning('Failed to interpret past position %i in the loginfo argument, '
          'leftover string is "%s"' % start, pos[start:])
      
      filename = s[start:m.start()]
      
    old_version, new_version = m.group('old', 'new')

    file_data = FileData(os.path.normcase(filename), directory, old_version, new_version)
    file_data_list.append(file_data)
    
    debug('File "%s", old revision %s, new revision %s' 
      % (filename, old_version, new_version))

    start = m.end()
    
    if start == len(s): break
    
  return file_data_list
    
def HeuristicArgParseDirectory(s, repository):
  """Splits the first part of the argument string into a directory name
  and a file name, either of which may contain spaces. Returns the longest
  possible directory name that actually exists"""
  print "repo =", repository
  parts = string.split(s, " ")
  
  for i in range(len(parts)-1, 0, -1):
    directory = string.join(parts[:i])
    filename = string.join(parts[i:])
    if os.path.isdir(os.path.join(repository, directory)):
      return directory, filename
  
  return None, None

_re_cvsnt_revisions = re.compile(                            
  r"(?P<filename>.*)"                      # comma and first revision number
  r",(?P<old>(?:\d+\.\d+)(?:\.\d+\.\d+)*|NONE)"  # comma and first revision number
  r",(?P<new>(?:\d+\.\d+)(?:\.\d+\.\d+)*|NONE)"  # comma and second revision number
  r"$"                                       # end of string
)

def CvsNtArgParse(s, repository):
  """CVSNT escapes all spaces in filenames and directory names with 
  backslashes"""

  if s[-18:] == r' -\ New\ directory':
    return None

  if s[-21:] == r' -\ Imported\ sources':
    return None

  file_data_list = []

  directory, pos = NextFile(s)
  
  debug('Directory name is "%s"' % directory)
  
  directory = CleanDirectory(directory)
  
  while 1:
    fileinfo, pos = NextFile(s, pos)
    if fileinfo is None:
      break
    
    print "fileinfo = '%s'" % fileinfo
    m = _re_cvsnt_revisions.match(fileinfo)
    if m is None:
      warning('Can\'t parse file information in "%s"' % fileinfo)
      continue
    
    filename, old_revision, new_revision = m.group('filename', 'old', 'new')

    debug('File "%s", old revision %s, new revision %s' 
      % (filename, old_revision, new_revision))

    file_data = FileData(os.path.normcase(filename), directory, old_revision, new_revision)
    file_data_list.append(file_data)

  return file_data_list

def NextFile(s, pos = 0):
  escaped = 0
  ret = ''
  i = pos
  while i < len(s):
    c = s[i]
    if escaped:
      ret += c
      escaped = 0
    elif c == '\\':
      escaped = 1
    elif c == ' ':
      return ret, i + 1
    else:
      ret += c
    i += 1
    
  if len(ret):
    return ret, i + 1
  else:
    return None, i + 1

def BrokenCvsNtArgParse(s, repository):
  """Some earlier versions of CVSNT mistakenly escaped filenames twice, see
  http://cvs.cvsnt.org/cgi-bin/viewcvs.cgi/cvsnt/src/logmsg.c
  http://www.cvsnt.org/cgi-bin/bugzilla/show_bug.cgi?id=13
  http://www.cvsnt.org/cgi-bin/bugzilla/show_bug.cgi?id=33
  """
  escaped = 0
  us = ''
  for c in s:
    if escaped:
      us += c
      escaped = 0
    elif c == '\\':
      escaped = 1
    else:
      us += c
  return CvsNtArgParse(us, repository)

def ProcessLoginfo(rootpath, file_data_list):
    ## XXX This is a somewhat dirty hack:
    ## cvsdb already loads the configuration file and provides a cfg
    ## instance.  Pick it from there in order to be able to pass it down
    ## to rlog (see below).
    repository = vclib.bincvs.BinCVSRepository(None, rootpath, cvsdb.cfg.general)

    ## convert FileData objects into Commit objects so we can insert them
    ## into the database
    commit_list = []
    for file_data in file_data_list:
        ## XXX: this is nasty: in the case of a removed file, we are not
        ##      given enough information to find it in the rlog output!
        ##      So instead, we rlog everything in the removed file, and
        ##      add any commits not already in the database
        if file_data.ctype == "removed":
            temp = GetUnrecordedCommitList(repository, file_data)
            commit_list = commit_list + temp
        else:
            commit_list.append(CommitFromFileData(repository, file_data))

    ## add to the database
    db = cvsdb.ConnectDatabase()
    db.AddCommitList(commit_list)


## MAIN
if __name__ == '__main__':
    ## get the repository from the environment
    try:
        repository = os.path.normcase(os.environ['CVSROOT'])
    except KeyError:
        error('CVSROOT not in environment')

    debug('Repository name is "%s"' % repository)

    ## clean up the repository string: remove any trailing path seperater
    while repository[-1] == os.sep:
        repository = repository[:-1]

    ## parse arguments
    if len(sys.argv) > 1:
        # the first argument should contain file version information
        arg = sys.argv[1]
    else:
        # if there are no arguments, read version information from first line
        # of input just like previous versions of viewcvs
        arg = string.rstrip(sys.stdin.readline())

    if len(sys.argv) > 2:
        # if there is a second argument it indicates which parser should be
        # used to interpret the version information
        if sys.argv[2] == 'cvs':
          fun = HeuristicArgParse
        elif sys.argv[2] == 'cvsnt':
          fun = CvsNtArgParse
        elif sys.argv[2] == 'brokencvsnt':
          fun = BrokenCvsNtArgParse
        else:
          error('Bad arguments')
    else:
        # if there is no second argument, guess which parser to use based
        # on the operating system. Since CVSNT now runs on Windows and
        # Linux, the guess isn't neccessarily correct
        if sys.platform == "win32":
          fun = CvsNtArgParse
        else:
          fun = HeuristicArgParse
    
    if len(sys.argv) > 3:
        error('Bad arguments')

    file_list = fun(arg, repository)

    if file_list is None:
      debug('Directory was added, nothing to do')
    else:
      ProcessLoginfo(repository, file_list)

    sys.exit(0)
