#!/usr/bin/perl

#  Copyright (C) 2012 Morten Grouleff
#
#  Derived from "tv_grab_dk_dr_2009" by Thomas Horsten <thomas@horsten.com>
#
#  This program is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

######################################################################
# Udover XMLTV kræves Parse::RecDescent og DateTime, som på
# Debian / Ubuntu kan installeres med:
#   sudo aptitude install libparse-recdescent-perl libdatetime-perl
#
# Kun testet på Linux. Beta version :)
# Kommentarer til: Thomas Horsten <thomas@horsten.com>
#
# History
#   Per Baekgaard <baekgaard@b4net.dk> 2009-08-13 20:39 CEST
#      - Minor changes supporting the released version on dr.dk
#
#   Morten Grouleff (morten@grouleff.com) 
#      - Rewritten to use the new JSON api instead of screenscraping from the html.
#


use strict;
use warnings;

use open qw/:std :utf8/;

use JSON;
use XMLTV;
use XMLTV::ProgressBar;
use XMLTV::Options qw/ParseOptions/;
use XMLTV::Configure::Writer;

use LWP::UserAgent;
use IO::Scalar;

use DateTime;

# TODO: This ought be in a config file, I suppose. But for now, it lives here.
my @title_fixups = (); # regexp => [ $title, $subtitle ]. Or when prefixed with "#", regexp => [ $title, #$episode-num ].
# DR renames stuff...:
push @title_fixups, ['^Historien om\s+(.+)$', [ '\'Historien om\'', '$1']];
push @title_fixups, ['^DR2\s*Premiere\s+(.+)$', ['\'DR2 Premiere\'', '$1']];
# Extract episode number if possible:
push @title_fixups, ['^(.+)\s*\(\s*(\d{1,3})\s*\)(\s*.*)$', ['$1 . $3', '#$2']];
push @title_fixups, ['^(.+)\s*(\(\d+\:\d+\))(\s*.*)$', ['$1 . $3', '#$2']]; # episode num copy
# Move to sub-title, when " - " or " : " is in title.
push @title_fixups, ['^(.+)\s*\:\s+(.+)$', ['$1', '$2']];
push @title_fixups, ['^(.+)\s+\-\s+(.+)$', ['$1', '$2']];

my $debug = 0; # Plenty of extra output.

my $grabber_name = 'tv_grab_dk_dr';
my $id_prefix = '.dr.dk';

# FR#109  my $default_root_url = 'http://www.dr.dk/tv/oversigt/json/guide/';
my $default_root_url = 'http://www.dr.dk/tjenester/program-guide/json/guide/';

my %grabber_tags = ( 'source-info-url'	   =>
		     # FR109  'http://www.dr.dk/tv/oversigt/json/guide/',
				 'http://www.dr.dk/tjenester/program-guide/json/guide/',
		     'source-info-name'	   =>
		     'DR TV Oversigt',
		     'generator-info-name' =>
		     'XMLTV',
		     'generator-info-url'  =>
		     'http://niels.dybdahl.dk/xmltvdk/',
    );

# Time zone the server uses
my $server_tz = 'Europe/Copenhagen';
my $LocalTZ = DateTime::TimeZone->new( name => $server_tz );

sub config_stage
{
    my( $stage, $conf ) = @_;
    my $result;

    $stage eq "start" || die "Unknown stage $stage";

    my $writer = new XMLTV::Configure::Writer( OUTPUT => \$result,
					       encoding => 'utf-8' );
    if( $stage eq 'start' ) {
	$writer->start( { grabber => $grabber_name } );
	$writer->start_selectone( {
	    id => 'accept-copyright-disclaimer',
	    title => [ [ 'Acceptér ansvarsfraskrivelse', 'da'],
		       [ 'Accept disclaimer', 'en'] ],
	    description => [ [ "Data fra DR's programoversigt er " .
			       "beskyttet af loven om ophavsret, " .
			       "og må kun anvendes til personlige, " .
			       "ikke-kommercielle formål. " .
			       "Dette programs forfatter(e) kan ikke " .
			       "holdes ansvarlig for evt. misbrug.", 'da' ],
			     [ "Data from DR's program guide is " .
			       "protected by copyright law and may " .
			       "only be used for personal, non-commercial " .
			       "purposes. The author(s) " .
			       "of this program accept no responsibility " .
			       "for any mis-use.",
			       'en' ] ] } );
	$writer->write_option( {
	    value=>'reject',
	    text=> [ [ 'Jeg accepterer IKKE betingelserne', 'da'],
		     [ 'I do NOT accept these conditions', 'en'] ] } );
	$writer->write_option( {
	    value=>'accept',
	    text=> [ [ 'Jeg accepterer betingelserne', 'da'],
		     [ 'I accept these conditions', 'en'] ] } );
	$writer->end_selectone();
	$writer->start_selectone( {
	    id => 'include-radio',
	    title => [ [ 'Medtag radio-kanaler', 'da'],
		       [ 'Include radio channels', 'en'] ],
	    description => [ [ "DR's programoversigt indeholder " .
			       "radiokanaler, du kan her vælge " .
			       "om de skal medtages i listen.", 'da' ],
			     [ "DR's program guide includes radio " .
			       "channels, here you can choose whether " .
			       "to include them.", 'en' ] ] } );
	$writer->write_option( {
	    value=>'0',
	    text=> [ [ 'Udelad radio-kanaler', 'da'],
		     [ 'Exclude radio channels', 'en'] ] } );
	$writer->write_option( {
	    value=>'1',
	    text=> [ [ 'Medtag radio-kanaler', 'da'],
		     [ 'Include radio channels', 'en'] ] } );
	$writer->end_selectone();

	$writer->write_string( {
	    id => 'root-url',
	    title => [ [ 'Root URL for grabbing data', 'en' ],
		       [ 'Grund-URL for grabberen', 'da' ] ],
	    description => [ [ 'Provide the URL of DR\'s program guide ' .
			       'data data engine, ' .
			       'including the trailing slash.', 'en' ],
			     [ 'Indtast URL\'en på DR\'s tv-oversigs data ' .
			       'engine, inklusive den ' .
			       'efterfølgende skråstreg.', 'da' ] ],
	    default => $default_root_url } );

	$writer->write_string( {
	    id => 'episode-in-subtitle',
	    title => [ [ 'Should we include the episode number as default subtitle', 'en' ],
		       [ 'Indsæt afsnits-nr som undertitel?', 'da' ] ],
	    description => [ [ '  When set, insert the episode number as a subtitle with the configured string as prefix. ' .
			       '  when there is a subtitle already, prepend the episode number, ' .
			       '  Leave empty to disable this feature.', 'en' ],
			     [ '  Denne tekst vælger hvad der skal indsættes i undertitlen foran afsnitsnummeret. ' .
			       '  Når der er en undertitel i forvejen, indsættes dette blot før denne.' .
			       '  Sæt til tom for at slå indsættelsen fra. ', 'da' ] ],
	    default => '' } );

    }
    $writer->end( 'select-channels' );

    return $result;
}

sub getUrl($) {
    my ( $url ) = @_;
    my $ua = LWP::UserAgent->new;
    $ua->agent("xmltv/$XMLTV::VERSION");
    my $req = HTTP::Request->new(GET => $url);
    $req->header('Accept' => 'Accept=text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8');
    my $encoding = 'utf-8';
    $req->header('Accept-Charset' => $encoding);
    my $res = $ua->request($req);
    if ($res->is_success) {
	if ($res->header('Content-Type') && $res->header('Content-Type') =~ /[^\/]+\/[^\/]+\;\s*charset=(.*)$/) {
	    $encoding = $1;
	}
	return $res->decoded_content(charset => $encoding);
    }
    else {
	print STDERR "Error: " . $res->status_line . " on url: " . $url . "\n";
	return 0;
    }
}

sub list_channels($$)
{
    my( $conf, $opt ) = @_;
    my $chanlist = &get_channel_list($conf);
    #print Dumper $chanlist;
    my $result="";
    my $fh = new IO::Scalar \$result;
    my $oldfh = select( $fh );
    my $writer = new XMLTV::Writer(OUTPUT => $fh, encoding => 'utf-8');
    $writer->start(\%grabber_tags);
    $writer->write_channels($chanlist);
    $writer->end();
    select( $oldfh );
    $fh->close();

    #print "RESULT:\n$result\n";
    return $result;
}

sub get_channel_list($)
{
    my ( $conf ) = @_;
    my $drlist = get_dr_channel_list($conf);
    #print Dumper $drlist;
    my %chanlist = ();
    foreach my $chan (@$drlist) {
	my $shortid = $chan->{'Id'};
	$shortid =~ s/^[wr]\_//;
	my $id = $shortid . $id_prefix;
	# tv_validate_file barfs if ID contains + as for d3+.dr.dk
	$id =~ s/\+/plus/g;
	$chanlist{$id}->{'id'} = $id;
	$chanlist{$id}->{'_dr_listing_id'} = $chan->{'Id'};
#	$chanlist{$id}->{'icon'} = [{ 'src'=>$conf->{'root-url'}->[0] .
#					 "Images/Logos/" . $shortid .
#					 ".gif" }];
	$chanlist{$id}->{'_source_url'} = $chan->{'sourceUrl'};
	my $chan_lang; # = $dr_language_codes{$chan->{'country_code'}};
	$chan_lang = 'da'; # unless $chan_lang;
	$chanlist{$id}->{'_lang'} = $chan_lang;
	$chanlist{$id}->{'display-name'} =
	    [ [ $chan->{'Name'}, $chan_lang ]];
	$chanlist{$id}->{'_name'} = $chan->{'name'};
    }

    return \%chanlist;
}

sub get_dr_channel_list($)
{
    my ( $conf ) = @_;
    my @types = ('tv');
    if ($conf->{'include-radio'}[0] eq '1') {
	push (@types, 'radio');
    }
    my @results = ();
    foreach my $type (@types) {
	print STDERR "TYPE: $type\n" if $debug;
	my $url = $conf->{'root-url'}->[0] . 'channels?mediaType=' . $type;
	#  http://www.dr.dk/tjenester/program-guide/json/guide/channels?mediaType=tv
	print STDERR "Get: $url\n" if $debug;
	my $content = getUrl($url) || return 0;
	print STDERR "Got: $content\n"  if $debug;
	my $channels = ();
	eval {
	    $channels = from_json( $content, { utf8  => 1 } );
			1;
	} or do {
	    warn "Failed to get channels: $_";
	};
	#print Dumper $parsed;
	my $c = $channels->{'Channels'};
	push(@results, @$c);
    }
    return \@results;
}

sub json_date_to_xmltv($)
{
    # Format: \/Date(1352178000000)\/ -> 20090613123456 +0100
    my ($d) = @_;
    if ($d =~ m/Date\((\d+)([+-]\d\d\d\d)?\)/ ) {
	my ( $epoch_milliseconds, $timezone ) = ( $1, $2 );
	my $dt = DateTime->from_epoch( epoch => $epoch_milliseconds / 1000 );
	if ($timezone) {
	    $dt->set_time_zone($timezone);
	}
	$dt->set_time_zone($LocalTZ);
	return $dt->format_cldr('yyyyMMddHHmmss ZZZ');
    } else {
	return 0;
    }
}

sub get_schedules($$$)
{
    my ($conf,  $chan, $date ) = @_;

    my @schedules = ();

    my $url = $conf->{'root-url'}->[0] .
	'schedule?startTimesectionId=1&days=' . $date . '&channelid=' . $chan->{'_dr_listing_id'} . '&oneTimesectionOnly=false&mediaType=tv';
	 # http://www.dr.dk/tjenester/program-guide/json/guide/schedule?startTimesectionId=1&days=0&channelid=w_DR2&oneTimesectionOnly=false&mediaType=tv
    print STDERR "Get: $url\n" if $debug;
    my $content = getUrl($url) || return 0;
    print STDERR "Got: $content\n" if $debug;

    my @results = ();
    
    my $parsed = ();
    eval {
	$parsed = from_json( $content, { utf8  => 1 } );
	1;
    } or do {
	# Ignoring failed json parses.
    };

    if (!$parsed || ("HASH" ne ref $parsed))  {
	&warning("(BUG?) Parser barfed while processing channel " .
		 $chan->{'_name'}. " date ". $date . " (empty result?) URL: $url\n");
	print STDERR "Content: $content\n";
	print STDERR Dumper $parsed;
	return \@results;
    } elsif ( ("ARRAY" ne ref ($parsed->{'TimeSection'} )) ) {
	    &warning("(BUG?) Parser barfed while processing channel " .
		    $chan->{'_name'}. " date ". $date . " (empty result?) URL: $url\n");
	    &warning("Result type: " . ref ($parsed) . "\n");
	    print STDERR "Content: $content\n";
	    print STDERR Dumper $parsed;
	return \@results;
    }

    foreach my $section (@{$parsed->{'TimeSection'}}) {
	foreach my $listing (@{$section->{'Programs'}}) {
	    my %p = ();
	    # attributes
	    $p{'channel'} = $chan->{'id'};
	    $p{'start'} = json_date_to_xmltv($listing->{'StartDateTime'});
	    $p{'stop'} = json_date_to_xmltv($listing->{'EndDateTime'});
	    
	    my $title = $listing->{'Title'};
	    if (@title_fixups && defined $title) {
		my $subtitle = '';
		my $episodenum;
		for my $fixup (@title_fixups) {
		    my $match = $fixup->[0];
		    #print "Inspecting '$title' for match $match\n";
		    if ($title =~ m/$match/i) {
			# The fixups contains backrefs to the regexp result. Don't make a match here, it will ruin it.
			my $placements = $fixup->[1];
			#print "Matched, applying '" . $placements->[0] . "', '" . $placements->[1] . "'\n";
			$title = eval $placements->[0] if defined $placements->[0];
			if (defined $placements->[1]) {
			    my $str = $placements->[1];
			    if (substr($str, 0, 1) eq '#') {
				my $e = eval substr($str, 1);
				if ($e =~ m/(\d+)\:(\d+)/) {
				    $episodenum = $1;
				    $p{'episode-num'} = [ [ " . " . ($1 - 1) . "/" . $2 . " . ", "xmltv_ns" ] ];
				} else {
				    $episodenum = $e;
				    $p{'episode-num'} = [ [ " . " . ($e - 1) . " . ", "xmltv_ns" ] ];
                               }
			    } else {
				$subtitle .= eval $placements->[1];
			    }
			}
		    }
		}

		########################################
		# Sæt afsnitsnummer ind først i subtitle, hvis ønsket.
		my $episode_in_subtitle = $conf->{'episode-in-subtitle'};
		if (defined $episode_in_subtitle && defined $episode_in_subtitle->[0]) {
		    if (defined $episodenum) {
			if ($subtitle eq '') {
			    $subtitle = $episode_in_subtitle->[0] . ' ' . $episodenum;
			} else {
			    $subtitle = $episode_in_subtitle->[0] . ' ' . $episodenum . ': ' . $subtitle;
			}
		    }
		}

		if ($subtitle ne '') {
		    $p{'sub-title'} = [ [ $subtitle,
					  $chan->{'_lang'} ] ];
		}
	    }	

	    my @title;
	    push (@title, [ $title, $chan->{'_lang'} ]);
	    if ($listing->{'OriginalTitle'}) {
		my $original_lang = 'en'; # guess_original_language($listing);
		if (!$original_lang) {
		    $original_lang = 'en';
		}
		push (@title, [ $listing->{'OriginalTitle'}, $original_lang ]);
	    }
	    $p{'title'} = \@title;

	    my $description;
	    if ($listing->{'HasDescription'} eq 'true') {
		my $desc_url = $conf->{'root-url'}->[0] . 'ProgramDetails/?id=' . $listing->{'Id'} . '&days=' . $date . '&channelid=' . $chan->{'_dr_listing_id'} . '&mediaType=tv';
		# http://www.dr.dk/tjenester/program-guide/json/guide/ProgramDetails/?id=dr.dk/mas/whatson/216871786813&days=0&channelid=w_DR2&mediaType=tv
		print STDERR "Get: $desc_url\n" if $debug;
		my $desc_content = getUrl($desc_url);
		if ($desc_content) {
		    my $json;
		    eval {
			$json = from_json( $desc_content, { utf8  => 1 } );
			$description = $json->{'Description'} . ' ';
			$description =~ s/\<br\>/. /g;
			$description =~ s/\s*\n\s*/. /g;
			1;
		    } or do {
			# Ignoring failed json parses, leaving desc empty.
		    };
		}
	    }

	    ########################################
	    # punchline som ekstra beskrivelse.
	    if ($listing->{'Punchline'}) {
		my $pl = $listing->{'Punchline'};
		# Der er nogle gange linjeskift i
		# punchlines, hvilket XMLTV ikke bryder
		# sig om. 
		$pl =~ s/\s*\n\s*/. /g;
		$pl =~ s/\<br\>/. /g;
		$description .= $pl;
	    }

	    if ($description) {
		$p{'desc'} = [ [ $description, $chan->{'_lang'} ] ];
	    }

	    ########################################
	    # Genudsendelse, HD, etc.
	    if ($listing->{'IsRerun'} eq 'true') {
		$p{'previously-shown'} = {};
	    }
	    if ($listing->{'DisplayHD'}) {
		$p{'video'}{'quality'} = 'HDTV';
	    } 
	    
	    if ($listing->{'Display16_9'}) {
		$p{'video'}{'aspect'} = '16:9';
	    }

	    ########################################
	    # Genre/kategori
	    # Her bruges genre_text, vi kunne også
	    # bruge genre_code og have en tabel
	    # til at få i det mindste den generelle
	    # kategori på engelsk (farver i MythTV!)
	    # TODO: Fix engelske kategorier
	    if ($listing->{'Category'} &&
		$listing->{'Category'} ne 'Ukategoriseret' &&
		$listing->{'Category'} ne 'Ukendt' &&
		$listing->{'Category'} ne 'Andre') {
		$p{'category'} = [ [ $listing->{'Category'}, 'da']];
	    }

	    ########################################
	    # URL
	    if ($listing->{'ProgramSeriesSiteUrl'}) {
		$p{'url'} = [ $listing->{'ProgramSeriesSiteUrl'} ];
	    }

	    # Sanity checks..
	    if (!$p{'start'}) { warning("No 'START' attribute"); next; }
	    if (!$p{'stop'}) { warning("No 'START' attribute"); next; }
	    if (!$p{'title'}) { warning("No 'TITLE' attribute"); next; }

	    #print Dumper \%p;
	    push(@results, \%p);
	}
    }
    return \@results;
}

my $opt;
my $conf;
( $opt, $conf ) = ParseOptions( {	
    grabber_name => $grabber_name,	
    capabilities => [qw/baseline manualconfig tkconfig apiconfig/],
    stage_sub => \&config_stage,
    listchannels_sub => \&list_channels,
    #load_old_config_sub => \&load_old_config,
    version => '$Id: tv_grab_dk_dr,v 1.11 2014/06/04 14:07:30 bilbo_uk Exp $',
    description => "TV Oversigten fra Danmarks Radios (2012) ".
	"(www.dr.dk/tjenester/programoversigt)",
				} );



my %writer_args = ( encoding => 'utf-8' );
if (defined $opt->{'output'}) {
    my $fh = IO::File->new($opt->{'output'}, ">:utf8");
    die "Cannot write to $opt->{'output'}" if not $fh;
    $writer_args{'OUTPUT'} = $fh;
}
my $writer = new XMLTV::Writer(%writer_args);

$writer->start(\%grabber_tags);

#print "Grabbing channel list\n";
my $chanlist = &get_channel_list($conf) || die "Couldn't get channel list";

# Check channels specified are valid
my @channels = ();
foreach my $cid (@{$conf->{'channel'}}) {
    my $chan = $chanlist->{$cid};
    if (!$chan) {
	warn("Unknown channel ".$cid." in config file\n");
    } else {
	$writer->write_channel($chan);
	push (@channels, $cid);
    }
}

# data uses offset from today in days.
for (my $c=0; $c<$opt->{'days'}; $c++) {
    foreach my $cid (@channels) {
	my $chan = $chanlist->{$cid};
	if (!$chan) {
	    &warning("Unknown channel $cid\n");
	} else {
	    #print "ID: $cid Name: " .
	    #$chan->{'display-name'}[0][0]."\n";
	    my $day = $c;
	    $day += $opt->{offset} if ($opt->{offset});
	    my $schedules = get_schedules($conf, $chan, $day);
	    if ("ARRAY" ne ref($schedules)) {
		warn("Schedules for $cid on day $c not valid - empty?\n");
		next;
	    }
	    foreach my $s (@$schedules) {
		#print Dumper $s;
		if ("HASH" ne ref($s)) {
		    warn("Weird listing:\n");
		    print STDERR Dumper $s;
		} else {
		    $writer->write_programme($s);
		}
	    }
	}
    }
}
$writer->end();
