#!/usr/bin/perl -w
use strict;
use warnings;
use lib './lib';
use base 'LEOCHARRE::CLI';
use PDF::OCR::Thorough::Cached;

our $VERSION = sprintf "%d.%02d", q$Revision: 1.5 $ =~ /(\d+)/g;

my $conf={};
my $abs_conf = '/etc/pdf2ocr.conf';

if (-f $abs_conf ){

   $conf = config('/etc/pdf2ocr.conf');
}


my $o= gopts('sna:C');

$conf->{CACHE_BY_SUM} = 1 if $o->{s};
$conf->{abs_cache} = $o->{a} if $o->{a};





my $pdfs = argv_aspaths();

scalar @$pdfs or man();



for (@$pdfs){
   my $p = new PDF::OCR::Thorough::Cached($_) or next;
	$PDF::OCR::Thorough::Cached::ABS_CACHE_DIR = $conf->{abs_cache} if $conf->{abs_cache};
	$PDF::OCR::Thorough::Cached::CACHE_BY_SUM = $conf->{CACHE_BY_SUM} if $conf->{CACHE_BY_SUM};

   debug("cache by sum? ".$PDF::OCR::Thorough::Cached::CACHE_BY_SUM);
   debug("abs cache dir? ".$PDF::OCR::Thorough::Cached::ABS_CACHE_DIR);

   my $abs_cache_file = $p->abs_cached;
   debug("cache file: $abs_cache_file");
   if ($o->{n}){
      print STDERR "$abs_cache_file\n";
      next;
   }
   if ($o->{C}){
      print STDERR ( -f $abs_cache_file ? "$abs_cache_file\n" : "0\n");
      next;
   }

   my $text = $p->get_text;

   print $text;
}











__END__

=pod

=head1 NAME

pdf2ocr - get text content of pdf document images within

=head1 DESCRIPTION

Argument is a pdf file.

This script assumes that each page in the pdf is one 8.5x11 page.. ONE image
that's what the calculations are set up for.

=head1 USAGE EXAMPLES


=head1 OPTION FLAGS

   
	-h help
	-d debug
	-v version

   -s cache by sum on
   -n don't do anything, just show where cache file would be
   -C don't do anything, only show where cache file is if there 
      basically checking if it's cached or not.

=head1 PARAMETERS
   -a abs cache dir
   
=head1 /etc/pdf2ocr.conf

   ---
   abs_cache: /tmp/cache
   CACHE_BY_SUM: 1

=head1 SEE ALSO

PDF::OCR - parent package
PDF::OCR::Thorough::Cached
LEOCHARRE::CLI

=head1 AUTHOR

Leo Charre leocharre at cpan dot org

=cut
