#!/usr/bin/perl

use strict;
use warnings;
use feature qw(say);
use utf8;

use Getopt::Long;
use YAML::Any qw(LoadFile);
use LWP::UserAgent;
use XML::Feed;

##### variables
our $VERSION = '0.02';
my %opts = (
  config  => "$ENV{HOME}/.config/twatch-lite/config",
  state   => "$ENV{HOME}/.config/twatch-lite/state",
  verbose => 0,
);

##### functions
sub usage {
  print <<USAGE;
Usage: twatch-lite [-v] [-c <config>]
  -c, --config=FILE     Path to config file, mandatory
                        default: $opts{config}
  -s, --state=FILE      Path to file with feed positions, optional
                        default: $opts{state}
  -v, --verbose         Increase verbosity
USAGE
  exit shift // 1;
}

sub _log {
  my ($level, $message) = @_;
  return if ($level eq 'debug' and not $opts{verbose});
  print STDERR "[$level] ", $message, "\n";
  exit(1) if $level eq 'fatal';
}

sub load_config {
  my ($path) = @_;

  _log(debug => "Loading config");

  _log(fatal => "Can't find config: no such file")
    unless -f $path;

  my $config;
  eval { $config = LoadFile($path); 1 }
    or do { _log(fatal => "Can't load config: $@"); };

  _log(debug => "Sanity checks for config file");
  _log(fatal => "Missing section `feeds` in config file")
    unless (exists $config->{feeds} and ref $config->{feeds} eq 'ARRAY');
  _log(warn => "'outdir' not set in config, using /tmp"), $config->{outdir} = '/tmp'
    unless ($config->{outdir});

  my $i = 0;
  foreach my $feed (@{ $config->{feeds} }) {
    $i++;
    _log(debug => "Processing feed #$i: $feed->{url}");
    _log(fatal => "Feed is not a hashref")
      unless (ref $feed eq 'HASH');
    _log(fatal => "Missing 'url' in feed")
      unless $feed->{url} and $feed->{url} =~ m{https?://}oi;
    _log(fatal => "Missing 'matches' section in feed")
      unless $feed->{matches} and ref $feed->{matches} eq 'ARRAY';
    _log(fatal => "Empty 'matches' section in feed")
      unless (scalar @{ $feed->{matches} });

    $feed->{linktype} //= 'direct';
    $feed->{lookup}   //= 'title';
    # TODO: more
    ($feed->{hostname}) = ($feed->{url} =~ m{://([a-z0-9\.-]+)}oi);
  }

  return $config;
}

sub load_state {
  my ($path) = @_;
  my $state = {};

  _log(debug => "Loading file with feed positions");

  if (-f $path) {
    open my $FH, '<', $path;
    while (1) {
      last if eof($FH);
      my $line = <$FH>;
      chomp $line;
      next unless ($line =~ m{^([a-z0-9\.-]+),(\d+)}oi);
      $state->{$1} = $2; # hostname => lastseen
    }
    close $FH;
  } else {
    _log(warn => "Missing state file, will create one at exit");
  }

  return $state;
}

sub process_feed {
  my ($ua, $feedconf) = @_;
  my $urls = {};

  my $resp = $ua->get($feedconf->{url});
  unless ($resp->is_success) {
    _log(error => "Can't fetch feed");
    return $urls;
  }

  my $feed = XML::Feed->parse(\$resp->decoded_content);
  unless ($feed) {
    _log(error => "Can't parse feed: " . XML::Feed->errstr);
    return $urls;
  }

  my @entries = reverse $feed->entries;
  for my $entry (@entries) {
    my $time = (ref $entry->issued)
      ? $entry->issued->epoch
      : $entry->modified->epoch;
    if ($feedconf->{lastseen} >= $time) {
      _log(debug => "Already seen before: $time");
      next;
    }
    my $text = ($feedconf->{lookup} eq 'body')
      ? $entry->content->body
      : $entry->title;
    _log(info => "Processing: " . $entry->title);
    _log(debug => "Match text is: ". $text);
    foreach my $match (@{ $feedconf->{matches} }) {
      if ($match =~ m{^/(.*)/$}) {
      _log(debug => "Trying regex '$1'");
        next unless $text =~ qr/$1/i;
      } else {
      _log(debug => "Trying substr '$match'");
        next unless index(lc($text), lc($match)) >= 0;
      }
      # gotcha!
      my $msg = sprintf "Got it! URL: %s (%s)", $entry->link, $entry->title;
      _log(info => $msg);
      $urls->{$entry->link} = $entry->title;
    }
    $feedconf->{lastseen} = $time;
  }

  return $urls;
}

sub fetch_torrents {
  my ($ua, $outdir, $urls) = @_;

  while (my ($url, $title) = each(%{ $urls })) {
    my $resp = $ua->get($url);
    unless ($resp->is_success) {
      _log(error => "Can't fetch torrent file: $url");
      next;
    }
    my $file = sprintf "%s/%s", $outdir, $resp->filename;
    open my $T, '>', $file;
    unless ($T) {
      _log(error => "Can't save torrent to $file");
      next;
    }
    print $T $resp->decoded_content;
    close $T;
  }

  return 1;
}

sub save_state {
  my ($path, $state) = @_;

  open my $FH, '>', "$path.new";
  unless ($FH) {
    _log(fatal => "Can't open state file for write");
    return;
  }
  while (my ($host, $time) = each(%{ $state })) {
    printf $FH "%s,%s\n", $host, $time;
  }
  close $FH;
  rename("$path.new", $path)
    or _log(error => "Can't update state file: $!");

  return 1;
}

##### logic
GetOptions(
  "config=s" => \$opts{config},
  "state=s"  => \$opts{state},
  "verbose"  => \$opts{verbose},
) or usage(1);

my $config = load_config($opts{config});
my $state  = load_state ($opts{state});

my $ua = LWP::UserAgent->new;
$ua->agent("twatch-lite/$VERSION");
$ua->show_progress(1) if $opts{verbose};
if ($config->{proxy}) {
  $ua->proxy(['http', 'https'], $config->{proxy});
} else {
  $ua->env_proxy;
}
if ($config->{cookies}) {
  $ua->cookie_jar({ file => "$ENV{HOME}/.config/twatch-lite/cookies.txt" });
}

foreach my $feed (@{ $config->{feeds} }) {
  my $host = $feed->{hostname};
  $feed->{lastseen} = $state->{$host} // 0;
  my $urls = process_feed($ua, $feed);
  fetch_torrents($ua, $config->{outdir}, $urls)
    if scalar keys $urls;
  $state->{$host} = $feed->{lastseen};
}
save_state($opts{state}, $state);

exit 0;

__END__

=pod

=head1 NAME

twatch-lite -- periodically polls RSS/ATOM feeds and cherry-picks wanted torrent files

=head1 SYNOPSIS

  man twatch-lite
  mkdir -p ~/.config/twatch-lite
  vim ~/.config/twatch-lite/config
  twatch-lite --verbose # 1st time, debug

=head1 DESCRIPTION

This is very simple and specialized alternative to full-featured software called L<Twatch>. It works only with RSS/ATOM feeds.

=head1 Config file

File format is simple: YAML. Real config example:

  ---
  outdir: /tmp
  proxy: http://192.168.1.1:3128
  feeds:
    - url: http://torrent-tracker.org/rss.php
      linktype: direct
      lookup: title
      matches:
      - 'match substring'
      - '/match regex/'

Now let looks closer: we have a lot of global parameters and section 'feeds'

=head2 Global parameters

=head3 C<outdir>

Where to save downloaded torrent files. You may want configure your torrent-client to pickup files from this dir. If unset, you'll get warning, and '/tmp' will be used.

=head3 C<proxy>

If set - will use http proxy, when fetching rss and torrent-files. Otherwise will connect directly (default).

=head3 C<cookies>

If set to any non-empty value, will accept and send back cookies in http-requests. Default - unset.

=head2 Section C<feeds>

Configured feeds.

=head3 C<url>

URL with RSS/ATOM feed. This option must be set in each feed.

=head3 C<matches>

List of wanted 'keywords'. If keyword surrounded with slashes, like C</this/>,  it will be handled as regex, and as substring otherwise. Regex syntax is PCRE and described in L<perlre(3)>.

If feed entry matches ANY keyword, torrent file will be downloaded and stored to B<outdir>.

Matches list must contain at least one keyword. If you want all torrents from this feed, use '/.*/' regex keyword.

=head3 C<lookup>

Lookup this field in RSS/ATOM entry for wanted keywords. Acceptable values: 'body', 'title' (default).

=head3 C<linktype>

Type of link in RSS/ATOM entry. Possible values:

* direct -- link points directly to torrent file (default)
* page   -- link points to page with link to torrent file (not implemented yet)

=head1 SEE ALSO

L<Twatch>

=head1 AUTHORS

Alex 'AdUser' Z <aduser@cpan.org>

=cut
