#!/usr/bin/perl



use warnings;

use strict;



=head1 specs



Turns http://www.loc.gov/marc/bibliographic/ecbdist.html into the format

used by MARC::Lint.pm



Takes ecbdist.html as input.  Skips fixed fields and data marked

"[OBSOLETE]"  Also, the HTML file doesn't include the 841-88X tags,

so those are hardcoded here.



=head1 AUTHOR



Originally written by Colin Campbell at Sirsi, and taken over and modified

by Andy Lester.



=cut



open( my $fh, "../lib/MARC/Lint.pm" ) or die "Can't open module";

while ( <$fh> ) {

    print;

    last if /^__DATA__/;

}

close $fh;



local $/ = undef;

my $text = <>;

$text =~ s/(<BR>|\r|\n)+/\n/ig;

my @lines = split( /\n/, $text );





my $in_tag = undef;

my $i1;

my $i2;

my $curr_indicator;

my $ntags;

my $desc1;

my $desc2;



my $started = 0;

for ( @lines ) {

    unless ($started) {

	$started=1 if /Number and Code Fields/;

	next;

    }

    s/^\s+//;

    s/\s+$//;

    next if $_ eq "";



    if ( /^(\d\d\d)/ ) {

	my $tag = $1;

	if (/OBSOLETE/) { 

	    $in_tag = 0;

	    next; 

	}



	/$tag - (.+) \((N?R)\)/ or die "Tag $tag is invalid format";

	my $desc = $1;

	my $nr = $2;

	++$ntags;

	$in_tag = 1;

	print "\n" if $ntags > 1;

	print "$tag\t$nr\t$desc\n";

	$i1 = $i2 = "";

	next;

    }



    next unless $in_tag;

    next if /OBSOLETE/;

    

    if (/^First - (.+)/) {

	$curr_indicator = 1;

	$desc1 = $1;

    } elsif (/^Second - (.+)/) {

	print_indicator( 1, $i1, $desc1 );

	undef $desc1;

	$curr_indicator = 2;

	$desc2 = $1;

    } elsif (/^Subfield/) {

	print_indicator( 2, $i2, $desc2 );

	undef $desc2;

	$curr_indicator = 0;

    } else {

	if ($curr_indicator) {

	    my $data = '';

	    if (/^(\d-\d)/) {

		$data = $1;

	    } elsif (/^([#0123456789])/) {

		$data = $1;

	    }

	    $data = "b" if $data eq "#";

	    if ($curr_indicator == 1) {

		$i1 .= $data;

	    } elsif ($curr_indicator == 2) {

		$i2 .= $data;

	    }



	} else {

	    if ( /^\$(.) - (.+)\s*\((N?R)\)/ ) {

		my ($sub,$desc,$nr) = ($1,$2,$3);

		print "$sub\t$nr\t$desc\n";

	    } 

	}

    }

} # main while



sub print_indicator {

    my $n = shift;

    my $val = shift;

    my $desc = shift;



    $val = "blank" if $val eq "b";



    print "ind$n\t$val\t$desc\n";

}

