###########################################################################
# clive, video extraction utility
# Copyright (C) 2007 Toni Gundogdu
#
# clive is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 0.1.2-1307 USA
###########################################################################

import os
import re
import random
import string
import math
import stat

from urlparse import *
from urllib import *


__all__ = [
  # Functions
  'normalize_url',
  'is_embed_url',
  'is_direct_vurl',
  'parse_fname',
  'parse_fname_direct',
  'parse_url',
  'parse_fromto',
  'pathjoin',
  'readfile',
  'readbatchfile',
  'bytesfile',
  'bytesmb',
  'byteshuman',
  'percent',
  'list_rmdup',
  # Classes
  'CliveError'
]


def normalize_url(url):
  """
  'Normalizes' the given URL. Converts parts of the URL to lower case,
  converts an embed URL to a video page URL and inserts http:// if
  needed.
  """
  if len(url) == 0: return url
  
  url = unquote(url)

  # Replace any &amp;
  url = url.replace('&amp;','&')

  # Google: deal with the absurdly long hyperlinks used all over
  # video.google.com. Extract video URL from the given URL.
  # NOTE: Users should always put these long URLs in quotes
  #       at command prompt.
  if url.lower().find('vidurl=') != -1:
    url = unquote(url.split('vidurl=')[1]).split('&')[0]

  # Convert scheme and host to lower case
  (scheme,host,path,params,query,fragment) = urlparse(url)
  scheme = scheme.lower()
  host = host.lower()

  # Re-construct URL
  url = urlunparse((scheme,host,path,params,query,fragment))

  # Embed -> video page if applicable
  url = embed_to_vpageurl(url)

  # Insert http:// if needed
  if not url.startswith('http://'):
    url = 'http://' + url

  return url

def embed_to_vpageurl(url):
  """Converts an embed video URL to a video page URL"""
  if url.lower().find('youtube.com') != -1:
    # YouTube specific
    url = url.replace('/v/','/watch?v=')
    url = url.replace('http://youtube.com','http://www.youtube.com')
  elif url.lower().find('video.google.') != -1:
    # Google Video specific
    url = url.replace('/googleplayer.swf?','/videoplay?')
  return url

def is_embed_url(url):
  """Checks if URL should be parsed for <embed> tags"""
  if url.find('/watch?v=') != -1 or \
     url.find('/videoplay?') != -1 or \
     url.find('dailymotion.com') != -1:
    return False
  return True

def is_direct_vurl(url):
  """Checks if the URL is a direct link to a video file"""
  fingerprints = [
    '/videofile/', # gv
    '/get_video?video_id=', # yt
  ]
  for fp in fingerprints:
    if url.find(fp) != -1:
      return True
  return False

def parse_fname(title, url, opts):
  """
  Tries to use <title/> for output filename, otherwise
  a random string is used for filename. Appends also file
  extension to a filename, based on the host.

  NOTE: Uses `--output-file' if specified
  """

  if opts.output_file:
    return opts.output_file

  if title.find('- Google Video') != -1:
    ext = 'avi'
  else:
    # youtube, dailymotion
    ext = 'flv'

  title = title.replace('YouTube -','')
  title = title.replace('- Google Video','')
  if title.find('- Dailymotion') != -1:
    title = title.split(' - ',1)[0]
    title = title.split('Video ',1)[1]
  title = re.sub('[^%s]' % opts.filename_chars,'',title)

  if len(title) == 0:
    title = ''.join([random.choice(string.letters) for c in xrange(8)])

  if len(title) > 64:
    title = title[:64]

  host = ''
  vid = ''
  if url.lower().find('youtube.com') != -1:
    host = 'youtube'
    vid = url.split('watch?v=',1)[1].split('&',1)[0]
  elif url.lower().find('video.google.') != -1:
    host = 'vgoogle'
    vid = url.split('docid=',1)[1].split('&',1)[0]
  elif url.lower().find('dailymotion.com') != -1:
    host = 'dmotion'
    vid = url.rsplit('/',1)[1].split('_',1)[0]

  lookup = [
    ('%t',title),
    ('%i',vid),
    ('%h',host),
    ('%e',ext)
  ]

  filename = opts.filename_format
  for id,repl in lookup: # replace string identifiers
    filename = filename.replace(id,repl)
  return filename

def parse_fname_direct(url, opts, say):
  """
  Extract video filename from URL. This works for GV URLs only.
  NOTE: if --output=FILENAME is used, then FILENAME is returned
  """
  if opts.output:
    return opts.output

  findstr = '/videofile/' # GV
  start = url.find(findstr)
  if start != -1:
    end = url.find('?',start)
    if end != -1:
      return url[start+len(findstr):end]

  if url.find('/get_video?video_id=') != -1: # YT
    raise CliveError('error : use --output with direct youtube video urls')

  return None

def parse_url(url):
  """Parses URL, returns host,path"""
  # NOTE: Python >= 2.5 offer less cumbersome way to do this
  (scheme,host,path,params,query,fragment) = urlparse(url)

  # Reconstruct the path with path+query
  path = urlunparse((None,None,path,None,query,None))
  return (host,path)

def parse_fromto(data, _from, _to,skip_from=False):
  """Basic text parser, extracts text between from-to"""
  start = data.find(_from)
  end = data.find(_to,start)
  text = ''

  if start != -1 and end != -1:
    if skip_from: start += len(_from)
    text = data[start:end]

  return text

def pathjoin(path1,path2):
  """Wrap os.path.join(), check if path exists before joining"""
  if len(path1) > 0:
    if not os.path.exists(path1):
      raise CliveError('error : cannot open %s' % path1)
  return os.path.join(path1,path2)

def readfile(fn):
  """
  Read a file into memory, return file data
  """
  if not os.path.exists(fn):
    raise CliveError('error : %s not found' % fn)
  f = open(fn,'r')
  data = f.read()
  f.close()
  return data

def readbatchfile(fn):
  """
  Read batch file lines into a list
  NOTE: Ignores duplicates and empty lines
  """
  if not os.path.exists(fn):
    raise CliveError('error : %s not found' % fn)
  f = open(fn,'r')
  return list_rmdup([normalize_url(ln.rstrip('\r\n')) \
          for ln in f.readlines()])

def bytesfile(fn):
  """Return file size"""
  return os.stat(fn)[stat.ST_SIZE]

def bytesmb(bytes):
  """Convert bytes to MB"""
  return "%.3fMB" % (float(bytes)/(1024*1024))

def byteshuman(bytes):
  """Convert bytes to 'human readable' size"""
  a = ['bytes','KB','MB','GB','TB']
  try:
    i = int(math.floor(math.log(bytes,1024)))
    b = round(float(bytes/math.pow(1024,i)),2)
  except:
    i = 0
    b = 0.0    
  return '%.2f%s' % (b,a[i])

def percent(i,j):
  """Return percent"""
  if i == 0 or j == 0: return 0
  return int(float(i)/float(j)*100)

def list_rmdup(lst):
  """
  Removes duplicates from the list. Empty strings are removed
  from a string list.

  NOTE: The list cannot contain both strings and numeric values
  """
  try:
    # Checks the first element only
    lst[0] + 1
    isnum = 1
  except:
    isnum = 0

  if not isnum:
    return reduce(
      lambda l,x: x not in l and len(x) > 0 and l.append(x) or l,lst,[]
    )
  else:
    return reduce(lambda l,x: x not in l and l.append(x) or l,lst,[])

class CliveError(Exception):
  """Class for Clive specific errors"""
  def __init__(self, text):
    self.text = text
