#!/usr/bin/perl -w

=pod

=head1 NAME

tv_grab_in - Grab TV listings for India using WhatsOn.

=head1 SYNOPSIS

    tv_grab_in --help

    tv_grab_in --version

    tv_grab_in --capabilities

    tv_grab_in --configure [--config-file FILE]
                                [--auto-config add|ignore]
                              [--gui OPTION]

    tv_grab_in --list-lineups [--config-file FILE] [--dd-data FILE]
                                 [--reprocess]

    tv_grab_in [--config-file FILE] [--dd-data FILE]
                  [--auto-config add|ignore]
                  [--days N] [--offset N] [--quiet] [--notrim]
                  [--output FILE]
                  [--list-channel]

=head1 DESCRIPTION

This script downloads TV listings using What's On's XMLTV data service.

=head1 Stand-alone options

=over

=item --help

Print a help message and exit.

=item --version

Show the version of the grabber.

=item --capabilities

Show which capabilities the grabber supports. For more
information, see L<http://xmltv.org/wiki/xmltvcapabilities.html>

=back

=head1 Mode selection (default is grab mode)

=over

=item --configure

Activates configure mode.  If a config file already exists the values
are used as defaults.

=item --gui OPTION

Use this option to enable a graphical interface to be used.
OPTION may be 'Tk', or left blank for the best available choice.
Additional allowed values of OPTION are 'Term' for normal terminal output
(default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar.

=item --list-lineups

Lists available lineups.  Only requires username in the config file. Used
by programs that automate the L</--configure> process.

=back

=head1 General Options

=over

=item --config-file
I<file>

Set the name of the configuration file, the default is B<~/.xmltv/tv_grab_in.conf>.
This is the file created during L</--configure> mode.

=item --auto-config I<add|ignore>

When used in --configure mode, updates the config file, removing old channels, and adding or
ignoring new channels.  Prompts are skipped if defaults are available in the current config file.

When used in grab mode, appends new channels to the config file.

=back

=head1 Grabber Mode options

=over

=item --days I<n>

Grab I<n> days.  The default is 7.

=item --offset I<n>

Start N days after the default.

=item --quiet

Suppress some messages normally written to standard error.

=item --output I<file>

Write xml to I<file> rather than standard output.

=item --list-channel

Same as B<--days> 0

=back

=head1 SEE ALSO

L<xmltv>.

=head1 Author

Author/Maintainer: Robert Eden, rmeden@yahoo.com

=cut

#################################################################
# initializations

use strict;
use XMLTV::Version '$Id: tv_grab_in,v 1.5 2010/03/31 06:27:34 rmeden Exp $ ';
use XMLTV::Capabilities qw/baseline manualconfig/;
use XMLTV::Description 'India (Whats On)';
use Data::Dumper;
use Date::Manip;
use Getopt::Long;
use IO::Scalar;
use XML::LibXML;

use XMLTV;
use XMLTV::Ask;
use XMLTV::Config_file;
use XMLTV::Get_nice;
use XMLTV::Memoize;
use XMLTV::ProgressBar;
use XMLTV::Gunzip;

use XMLTV::Usage <<END
$0: get listings via Whats On
in XMLTV format

    tv_grab_in --help

    tv_grab_in --version

    tv_grab_in --capabilities

    tv_grab_in --configure [--config-file FILE] [--auto-config add|ignore]
                              [--gui OPTION]

    tv_grab_in --list-lineups [--config-file FILE]

    tv_grab_in [--config-file FILE] [--auto-config add|ignore]
                  [--days N] [--offset N] [--quiet]
                  [--output FILE]
                  [--list-channel]

END
;

#
# module version checking doesn't work with XMLTV version numbers
#
die "ERROR: XMLTV.PM 0.5.32 required\n" if $XMLTV::VERSION lt '0.5.32';

#
# Global Vars
#
my @messages;           # DD warnings.
my %chan_config;        # Active/inactive channels.
my %chan_id;            # quick channel id lookup
my %station;            # DD station data
my %lineups;            # DD channel mapping data
my %program;            # DD program data
my %crew;               # DD crew data
my %programGenre;       # DD Genre data
my @schedules;          # DD schedule list

my $bar;                # handle for status bar
my $DEBUG          =0;  # debug mode
my $config_file;        # config file name
my $tz_offset=0;
my $start_time=time();
my $sched_count=0;       # record count;
my %old_lineups=();      # used for DD schema 1.2 -> 1.3 migration
my %icons=();            # holds icons (if present)

my $opt_help;           # ask for help
my $opt_configure;      # configure mode
my $opt_config_file ;   # config_file_name
my $opt_gui ;           # use a gui for configuration
my $opt_output;         # output name
my $opt_days       =7;  # days to fetch
my $opt_offset     =0;  # day to start
my $opt_quiet      =0;  # supress messages
my $opt_lineup     =''; # 
my $opt_list_channels='';
my $opt_list_lineups='' ;
my $opt_auto_config='';
my $lineup_url ='';     # lineup url from conifg file
my $opt_lineup_url =''; # override url from config file

my $lineups={};
my $guide_data={};
select STDERR; $|=1;
select STDOUT; $|=1;
$XMLTV::Get_nice::Delay=0;
XMLTV::Memoize::check_argv('XMLTV::Get_nice::get_nice_aux');

Date_Init("TZ=+05:30"); # all times are Indian Standard (by definition)

#
# Process command line
#
foreach (@ARGV) {
    tr/_/-/ if /^--/; # older option style
}
GetOptions(
    	   'config-file=s' => \$opt_config_file,
           'help'          => \$opt_help,
    	   'gui:s'         => \$opt_gui,
    	   'quiet'         => \$opt_quiet,
    	   'configure'     => \$opt_configure,
    	   'config=s'      => \$opt_config_file,
    	   'output=s'      => \$opt_output,
           'days=i'        => \$opt_days,
    	   'offset=i'      => \$opt_offset,
    	   'lineup=s'      => \$opt_lineup,
    	   'lineup-url=s'  => \$opt_lineup_url,
           'list-channels' => \$opt_list_channels,
           'list-lineups'  => \$opt_list_lineups,
           'debug'         => \$DEBUG,
	  )
  or usage(0);
usage(1) if $opt_help;
die "ERROR: number of days must not be negative\n"         if ($opt_days < 0);

$opt_days    = 0 if $opt_configure || $opt_list_channels || $opt_list_lineups;
$config_file = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_in' , $opt_quiet);
XMLTV::Ask::init($opt_gui);

########################################################################
#
# Load config file
#
if (-e $config_file && ( !$opt_configure || -s $config_file ))
{
    foreach (XMLTV::Config_file::read_lines($config_file))
    {
        next unless defined $_;
        chomp;
        my( $setting, $val ) = split( /:\s+/o, $_, 2 );

        if ( $setting =~ /^(not )?channel$/o )
        {
            $chan_config{$val} = $1 ? 0 : 1;
        }
        elsif ( $setting eq 'lineup' )
        {
            $opt_lineup = $val;
        }
        elsif ( $setting eq 'lineup_url' )
        {
            $lineup_url = $val;
        }
        elsif ( $setting eq 'auto-config' )
        {
            $opt_auto_config = $val;
        }
        else
        {
            warn "WARNING: Unknown setting: $setting, skipping.\n";
        }
    }
} # load config file

#-------------------------------------------------------------------------------
# get_nice_gzip
#-------------------------------------------------------------------------------
# desc          : get url with get_nice, check for gzip encoding, return
#                 decoded if necessary
# arguments     : url to fetch from
# returns       : html as scalar
#-------------------------------------------------------------------------------
sub get_nice_gzip( $ ) {
    printf STDERR "downloading %s\n",$_[0] unless $opt_quiet;
    my $data = get_nice($_[0]);
    printf STDERR "downloaded %s bytes\n",length($data) unless $opt_quiet;
    # Use heuristics to check for valid html
    if ($data =~ /<html|<head|<body|<table|<div|<tv|<lineup/i) {
        return $data;
    }
    print STDERR "content is gzipped, deflating\n" unless $opt_quiet;
    return gunzip($data);
}


#
# get lineups
#
sub get_lineups {
    return 0 if (keys %lineups);
    my $content=get_nice_gzip("http://www.zipazap.com/videoportal/mediacenter/lineup.xml");
    die "Error fetching lineup list " unless $content;
    my $parser = XML::LibXML->new();
    my $doc = $parser->parse_string( $content );
    my $root = $doc->getDocumentElement;

    foreach my $lineup ($root->findnodes('url')) {
       my $name= $lineup->getAttribute('name');
       my $url = $lineup->getFirstChild->getData;
       $lineups{$name}=$url;
    }

}

#
# This needs to be replaced with a smaller download file for speed.
#
sub get_stations {
    die "Can't get stations without a lineup!" unless $opt_lineup;

    my $guide_string=get_guide($lineup_url);

    my $parser = XML::LibXML->new();
    my $doc = $parser->parse_string( $guide_string );
    my $root = $doc->getDocumentElement;

    foreach my $station ($root->findnodes('channel')) {
       my $id  = $station->getAttribute('id');
       my $name= $station->getElementsByTagName('display-name')->[0]->getFirstChild->getData;
       $guide_data->{channel}{$id}{'display-name'}=$name;
    }

    die "Error fetching channel list for lineup\n" unless $guide_data->{channel};
} # get_stations

#
# --list-lineup mode
#
if ($opt_list_lineups)
{
    get_lineups();
    foreach (sort keys %lineups) {
        printf STDOUT "$_\n";
        }
    exit 0;
}

#
# --configure
#
if ($opt_configure) {
    my %station_map=();

    if ($opt_lineup_url) {  #override default
        $lineup_url=$opt_lineup_url; 
    } else {
        get_lineups();
        my @choices=sort keys %lineups;
        my $val = $opt_lineup || $choices[0];
        $val = ask_choice("\nWhich Lineup? ($opt_lineup)",$val,@choices);
        $opt_lineup=$val;
        $lineup_url=$lineups{$opt_lineup};
    }
#
# get station names
#
    get_stations();
    foreach (keys %{$guide_data->{channel}}) {
        $station_map{$guide_data->{channel}->{$_}->{'display-name'}}=$_;
    }

# Construct the questions
    my @questions;
    foreach (sort keys %station_map)   {
        push @questions, sprintf("Add channel %-20s :",$_);
    }

# Ask the questions
    my @answers = ask_many_boolean( 1, @questions );

# Save the answers
    my $i=0;
    foreach (sort keys %station_map)
    {
        $chan_config{$station_map{$_}} = $answers[$i];
        $i++;
    }
#
# ask about auto-config for the config file
#
    my $config_autoconfig=0;
    if (ask_boolean("Lineups change periodically. The default for new stations is to notify you.\n".
                    "Do you want new stations to be automatically added?")) {
       $config_autoconfig="add";
    }
    elsif (ask_boolean("Do you want new stations to be ignored?")) {
       $config_autoconfig="ignore";
    }

#
# Write the config file
#
    open(CONF,">$config_file") or die "ERROR: can't open config file: $config_file\n";
    print CONF "lineup:   $opt_lineup\n";
    print CONF "lineup_url:  $lineup_url\n";
    print CONF "auto-config: $config_autoconfig\n" if $config_autoconfig;

    my %chan_found=();
    foreach (sort keys %chan_config) {
       print CONF ( $chan_config{$_} ? '' : 'not ' ), "channel: $_\n";
       $chan_found{$_} = 1;
    }

    foreach (sort keys %chan_config)
    {
        next if $chan_found{$_};
        print STDERR "Channel '$_' no longer exists\n";
    }
    close CONF;
    say( 'Configuration complete!' );
    exit 0;
} # --configure channel list

#
# get_guide
# Until we get a standard filename for the guide, this subroutine checks
# the folder and gets the last link from the page.
#
sub get_guide($) {
  my $root=shift;
  use HTML::TokeParser;

  print STDERR "Checking $root\n" unless $opt_quiet;
  my $index_page=get_nice($lineup_url);
  die "Error downloading index_page from $lineup_url" unless length($index_page)>100;
 
#
# we want the last URL on the page
#
  my $p = HTML::TokeParser->new(\$index_page);
  my $file="";
  while (my $token = $p->get_tag("a")) {
#      my $url = $token->[2]{href} || "-";
      $_= $p->get_trimmed_text("/a");
      next unless /^\w+_XMLTV_\d+_\d+.xml.gz/;
      $file=$_;
  }

  return get_nice_gzip($root.$file);
}

#
# Get data!
#
    my $start = DateCalc("00:00:00","+ $opt_offset days") || die "ERROR: Can't compute <$opt_offset> days\n";
    my $stop  = DateCalc($start    ,"+ $opt_days   days") || die "ERROR: Can't compute <$opt_days> days\n";
print STDERR "debug: got start=$start\n" if $DEBUG;
print STDERR "debug: got stop=$stop\n"   if $DEBUG;

    $guide_data=get_guide($lineup_url);

#
# open output file
# we probably should use XMLTV:Writer, but the data should already be in XMLTV
# format and this is so much more efficient than parsing it
#
    if (defined $opt_output){
        open(STDOUT,">$opt_output") or die "ERROR: cannot write to $opt_output\n";
    }
    print '<tv source-info-url="http://zipazap.com/" source-info-name="Whats on India" generator-info-name="XMLTV/$Id: tv_grab_in," generator-info-url="http://www.xmltv.org/">'."\n";

#
# ok, let's walk through the data
#
{
unless ($opt_quiet) {
    $bar = new XMLTV::ProgressBar('processing data',length($guide_data)+1);
}
    my $guide_fh = new IO::Scalar \$guide_data;
    my $skip=0;
    my $count=0;
    while (defined($_ = $guide_fh->getline)) {
# 	print STDERR $_."\n";
# last if $count>1000;
        $bar->update(tell($guide_fh)) if $bar && (++$count%1000==0);
        next unless (/^ /); # ignore the <xml> and <tv> tags

        if (/<channel id="(.+?)"/) {
            $skip=0;
            $skip=1 unless $chan_config{$1};
        } elsif (/<programme start=.(\d+).+channel="(.+?)"/) {
            $skip=0;
            $skip=1 if $1 lt $start;
            $skip=1 if $1 gt $stop;
            $skip=1 unless $chan_config{$2};
            $sched_count++ unless $skip;
        };

        print "$_" unless $skip;
        }
    $guide_fh->close;
}
#
# do some final clean-up
#
    print "</tv>\n";
    $bar->update($#schedules+1) if $bar;
    $bar->finish() if $bar;

    printf STDERR "\nDownloaded %d programs in %d seconds\n",$sched_count,time()-$start_time
        unless $opt_quiet;

exit(0);

