#! /usr/bin/perl
#* ============================================================
# * File        : wwitv.pl
# * Version     : 0.1
# * Author      : Eric
# * Date        : 2006-09-11
# * Description: parser for http://wwitv.com/portal.htm,
# *              station list frame
# *
# *
# *  
# * ============================================================ */

use English;
use XML::DOM;
use HTML::Entities;

#------------------------------------------------------------------------------
# Init
#------------------------------------------------------------------------------

&read_parse();    # get commandline parameters into @in
$source = $in[0]; # source filename from command line

my $doc = XML::DOM::Document->new;
my $head = $doc->createXMLDecl ('1.0');
my $root = $doc->createElement('items');

sub newNode
{
  local $name  = shift;
  local $value = shift;
  local $node = $doc->createElement($name);
  local $text = $doc->createTextNode($value);
  $node->appendChild($text);
  
  return $node;
}

#------------------------------------------------------------------------------
# read file into $data
#------------------------------------------------------------------------------

$datafile = $source;
open( INFO, "<$datafile" );      # Open file for reading
undef $/;
$data = <INFO>;                 # Read all
close(INFO);

#------------------------------------------------------------------------------
# Parse playlist
#------------------------------------------------------------------------------


# <tr><td class="name" width="120"><font class="new">.</font><a class="travel" href="http://europa.eu.int/comm/avservices/ebs/welcome_en.cfm" target="TV">EBS</a></td><td class="qr">

#<a class="r" href="javascript:listen('http://europa.eu.int/comm/avservices/ebs/welcome_en.cfm','http://wwitv.com/online/7168.ram',0)">100K</a></td>

#</td><td class="q"><center><font class="hd2">Y</td><td class="qe"><font class="hd2">The European Union's TV news agency.</td></tr>

sub dump_lines
{
  # get rid of newlines
  $data =~ s/\n/ /g;
  # multiple spaces
  $data =~ s/\s\s+/ /g;
  # remove all up to Live? column name
  $data =~ s/^.*Live\?//g;
  # newline for <tr  (stations are listed in table rows)
  $data =~ s/<tr/\n/gi;
  # remove some formatting
  $data =~ s/<\/?font[^>]*>//gi;
  $data =~ s/<\/?b[^>]*>//gi;
  
  @lines = split ( "\n", $data);
  
  foreach $line(@lines)
  {

    @cells = split ( "<td", $line);

    $name = "";
    
    if (@matches = ( $cells[1] =~ m/href[^>]+>([^<]*)<\/a>/gi ) )
    {
      $name = @matches[0];
    }

    if ( ($name ne "") && ($cells[2] =~ m/href="[^,]+,'([^']+'[^>]+>[^<]+)<\/a>/gi ) )
    {

        $url = $1;
        $url =~ m/([^']+)'[^>]+>(.*)/;

        $url = $1;
        $url_name = $2;
    }
    
    if ( ($name ne "") && ($cells[4] =~ m/>([^<]+)<\/td>\s*<\/tr>.*/gi ) )
    {
        $item = $doc->createElement('item');
        $root->appendChild($item);

        $idname = decode_entities($name . ", $1 ($url_name)");
        
        $item->appendChild( newNode('name', "$idname") );
        $item->appendChild( newNode('url', $url) );
        $item->appendChild( newNode('descr', "$idname") );
        $item->appendChild( newNode('handler', "default") );
      
    }

  }
}

#------------------------------------------------------------------------------
# search url's in $data and place them in special format
#------------------------------------------------------------------------------

&dump_lines();

print $head->toString;
print $root->toString;
print "\n";

#--------------------------------------------------------------------------------
# get command line parameters
#--------------------------------------------------------------------------------

sub read_parse 
{
  local (*in) = @_ if @_;
  local ($i);
  push(@in, @ARGV);
  foreach $i (0 .. $#in) { $in[$i] =~ s/\+/ /g;}
  return scalar(@in);
}



