#!/usr/bin/perl -w
#
# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at:
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>

use strict;

my $NUM_SCORESETS = 4;

my $ZERO_MINISCULE_SCORES =     1;
my $MINISCULE_THRESHOLD =       0.1;      # points

my $UNZERO_META_PREDICATES =    1;

# scores are broken into three regions:
# 1. "pre" (stuff before generated mutable scores)
# 2. "gen" (first generated mutable scores section)
# 3. "end" (stuff after generated mutable scores)
# 4. "gen2" (any later generated mutable scores sections)

# options
my ($scoreset, $oldscores, $newscores) = @ARGV;
$scoreset = int($scoreset) if defined $scoreset;
if (!defined $newscores || $scoreset < 0 || $scoreset >= $NUM_SCORESETS ) {
  die "usage: rewrite-cf-with-new-scores scoreset oldscores.cf newscores.cf\n";
}

# variables filled-out in read_rules()
our %rules;			# rules data

# variables filled-out in read_gascores()
my %gascores = ();		# generated scores

# variables filled-out in read_oldscores()
my $pre = '';			# stuff before first "gen" section
my $end = '';			# stuff after first "gen" section
my %oldscores;			# old scores
my %comment;			# "gen" rule comments
my %fixed;			# scores that are fixed (non-gen)
my %gen2;			# scores that are gen in the $end string

# compiled output
my @gen_order = ();
my %gen_lines = ();

# read stuff in
read_rules();
read_gascores();
read_oldscores();

build_new_scores();

if ($ZERO_MINISCULE_SCORES) {
  fixup_miniscule_scores();
}
if ($UNZERO_META_PREDICATES) {
  fixup_meta_predicates();
}

$end = sub_gen2($end);

# write stuff out
print $pre;
print_gen();
print $end;
exit;


sub read_rules {
  system ("./parse-rules-for-masses -s $scoreset") and die;
  if (-e "tmp/rules.pl") {
    # note: the spaces need to stay in front of the require to work around
    # a RPM 4.1 problem
    require "./tmp/rules.pl";
  }
  else {
    die "parse-rules-for-masses had no error but no tmp/rules.pl";
  }
}

sub read_gascores {
  open (STDIN, "<$newscores") or die "cannot open $newscores";
  while (<STDIN>) {
    next unless /^score\s+(\S+)\s+(-?\d+(?:\.\d+)?)/;
    my $name = $1;
    my $score = $2;

    # various things we should be concerned about
    if (!exists $rules{$name}) {
      warn "$name is not defined in tmp/rules.pl\n";
      next;
    }
    if ($rules{$name}->{issubrule}) {
      warn "$name is an indirect sub-rule in tmp/rules.pl\n";
      next;
    }
    if ($rules{$name} =~ /^__/) {
      warn "$name has an indirect sub-rule \"__\" prefix\n";
      next;
    }
    if ($name eq '(null)') {
      warn "$name is (null)\n";
      next;
    }

    $gascores{$name} = $score;
  }
}

sub read_oldscores {
  open (IN, "<$oldscores") or die "cannot open $oldscores";

  # state of things
  my $where = "pre";		# region of original scores file that we're in
  my $seen_gen = 0;             # have we seen the first <gen:mutable> tag?

  # read everything in
  while (my $line = <IN>) {
    if ($line =~ /<\/gen:mutable>/) {
      $where = "end";
    }

    if ($where eq "pre") {
      readline_fix($line);
      $pre .= $line;
    }
    elsif ($where eq "gen") {
      readline_gen($line);
    }
    elsif ($where eq "gen2") {
      readline_gen2($line);
      $end .= $line;
    }
    elsif ($where eq "end") {
      readline_fix($line);
      $end .= $line;
    }

    if ($line =~ /<gen:mutable>/) {
      if ($seen_gen) {
        $where = "gen2";
      }
      else {
        $where = "gen";
        $seen_gen = 1;
      }
    }
  }
}

# used for both "pre" and "end"
sub readline_fix {
  my ($line) = @_;

  my $comment;
  if ($line =~ s/\s*#\s*(.*)//) {
    $comment = $1;
  }
  if ($line =~ /^\s*score\s+(\S+)\s/) {
    my (undef, $name, @scores) = split(' ', $line);
    $fixed{$name}++;
    $comment{$name} = $comment if $comment;
  }
}

sub readline_gen {
  my ($line) = @_;

  my $comment;
  if ($line =~ s/\s*#\s*(.*)//) {
    $comment = $1;
    $comment =~ s/ n=$scoreset//;
  }
  if ($line =~ /^\s*score\s+(\S+)\s/) {
    my (undef, $name, @scores) = split(' ', $line);
    for (my $i = 1; $i < $NUM_SCORESETS; $i++) {
      $scores[$i] = $scores[0] unless defined $scores[$i];
    }
    @{$oldscores{$name}} = @scores;
    $comment{$name} = $comment if $comment;
  }
}

sub readline_gen2 {
  my ($line) = @_;

  my $comment;
  if ($line =~ s/\s*#\s*(.*)//) {
    $comment = $1;
    $comment =~ s/ n=$scoreset//;
  }
  if ($line =~ /^\s*score\s+(\S+)\s/) {
    my (undef, $name, @scores) = split(' ', $line);
    for (my $i = 1; $i < $NUM_SCORESETS; $i++) {
      $scores[$i] = $scores[0] unless defined $scores[$i];
    }
    @{$oldscores{$name}} = @scores;
    $comment{$name} = $comment if $comment;
    $gen2{$name}++;
  }
}

sub build_new_scores {
  # we just consider scores for this set that are in the input or were in the
  # "gen" region from the old scores, tmp/rules.pl is not considered here
  my %gen;				# rules to be printed in "gen" region
  $gen{$_} = 1 for keys %gascores;	# scores for this set from GA
  $gen{$_} = 1 for keys %oldscores;	# original scores in "gen" region

  # remove fixed scores
  for (keys %fixed) {
    delete $gen{$_};
  }

  # sort all generated rules by name
  for my $name (sort keys %gen) {
    next if ($rules{$name}->{lang});	# "lang es" rules etc.
    next if ($rules{$name}->{issubrule});	# indirect sub-rules
    next if ($name eq 'AWL');		# dynamic score

    my @scores = ();
    my $comment = '';
    $comment = $comment{$name} if defined $comment{$name};
    
    # use the old scores if they existed
    @scores = @{$oldscores{$name}} if exists $oldscores{$name};
    
    # set appropriate scoreset value
    if (defined $gascores{$name}) {
      $scores[$scoreset] = $gascores{$name};
      delete $oldscores{$name};
    }
    else {
      # zero for current scoreset if there was no new score;
      # when the perceptron does this for mutable rules, it means
      # that score had a new score of 0
      $scores[$scoreset] = 0;

      if (defined $oldscores{$name}) {
	$comment .= " n=$scoreset";
	#warn "$name has no GA score, but had a score before\n";
      }
    }

    # sort and unique comment tags
    my %unique;
    $unique{$_} = 1 for split(' ', $comment);
    $comment = join(' ', sort keys %unique);

    push (@gen_order, $name);
    $gen_lines{$name} = {
      scores => \@scores,
      comment => $comment
    };
  }
}

sub new_score_line {
  my ($name) = @_;

  # create new score line
  my @scores = @{$gen_lines{$name}{scores}};
  my $comment = $gen_lines{$name}{comment};
  return sprintf("score %s %s%s", $name,
          join(" ", generate_scores($name, @scores)),
          ($comment) ? ' # ' . $comment : '');
}

sub print_gen {
  print "\n";
  foreach my $name (@gen_order) {
    next if ($gen2{$name});       # will do that separately
    print new_score_line($name), "\n";
  }
  print "\n";
}

sub sub_gen2 {
  my $end = shift;

  foreach my $name (keys %gen2) {
    if ($end !~ s/^\s*score\s+${name}\s.+?$/
            new_score_line($name);
        /em)
    {
      # we failed to sub it; output score in main gen:mutable block instead
      delete $gen2{$name};
    }
  }
  $end;
}

sub generate_scores {
  my ($name, @scores) = @_;

  my $isnet = 0;
  my $islearn = 0;
  if (defined $rules{$name}->{tflags}) {
    $isnet = ($rules{$name}->{tflags} =~ /\bnet\b/);
    $islearn = ($rules{$name}->{tflags} =~ /\blearn\b/);
  }

  # set defaults if not already set
  if (!defined $scores[0]) {
    warn "$name does not have a default score\n";
    $scores[0] ||= 0;
  }

  my $flag = 1;
  for (my $i = 1; $i < $NUM_SCORESETS; $i++) {
    $scores[$i] = $scores[0] unless defined $scores[$i];
    $flag = 0 if ($scores[$i] != $scores[$i-1]);
  };

  # enforce rule/scoreset rules.
  # net rules never have a non-zero score in sets 0 and 2
  for (my $i = 0; $i < $NUM_SCORESETS; $i++) {
    if ($isnet && ($i & 1) == 0) {
      $scores[$i] = 0;
      $flag = 0 if ($i > 0 && $scores[$i] != $scores[$i-1]);
    }
    if ($islearn && ($i & 2) == 0) {
      $scores[$i] = 0;
      $flag = 0 if ($i > 0 && $scores[$i] != $scores[$i-1]);
    }
  }

  if ($flag) {
    splice @scores, 1;
  }

  return @scores;
}

sub fixup_miniscule_scores {
  my $num_fixed = 0;

  foreach my $name (@gen_order) {
    my @scores = @{$gen_lines{$name}{scores}};
    if (abs($scores[$scoreset]) < $MINISCULE_THRESHOLD) {
      $scores[$scoreset] = 0;
      $num_fixed++;
    }
    @{$gen_lines{$name}{scores}} = @scores;
  }

  warn "zeroed $num_fixed scores for being 'miniscule'.\n";
}

sub fixup_meta_predicates {
  # this is the opposite of t/meta.t

  while (my ($name, $info) = each %rules)
  {
    my $type = $info->{type} || "unknown";
    # look at meta rules that are not disabled
    next unless ($type eq "meta" && ($name =~ /^__/ || $info->{score} != 0));

    next unless ($info->{depends});

    # test rules should not impose requirements on release rules; ignore
    # any dependency requirements caused by T_ rules
    next if $name =~ /^T_/;

    for my $depend (@{ $info->{depends} }) {
      if (!exists $rules{$depend}) {
        warn "$name depends on $depend which is nonexistent\n";
        next;
      }

      # if dependency is a predicate, it'll run
      next if $depend =~ /^__/;

      # not a generated rule?  not our problem, then; t/meta.t will catch it
      next unless (exists $gen_lines{$depend});

      # ignore "tflags net" and "tflags learn" rules -- it is OK
      # for those to have zero scores in some scoresets, for obvious
      # reasons.
      next if (defined $rules{$depend}->{tflags} &&
              $rules{$depend}->{tflags} =~ /\b(?:net|learn)\b/);

      # if dependency has a non-zero score, it'll run
      my $depscore = $gen_lines{$depend}{scores}[$scoreset];
      next if (defined $depscore && $depscore != 0);

      warn "dep failure: $name depends on $depend with 0 score; fixing at non-0\n";
      $gen_lines{$depend}{scores}[$scoreset] = 0.001;
    }
  }
}
