#!/usr/bin/perl -w

use strict;
use warnings;
use Getopt::Long;

# rsup - Upgrade RiveScript 1.x code to 2.x standards.
# Usage: rsup --out ./outdirectory <rs docs or directories>
our $VERSION = '0.01';
my $help = 0;
my $out  = '';
my $ext  = '.rs';
my $bak  = 0;
my $fixobj = 0;
my $opts = GetOptions (
	"help|h"        => \$help,
	"backup|bak|b"  => \$bak,
	"dontfixperl|p" => \$fixobj,
	"out|o=s"       => \$out,
	"ext|x=s"       => \$ext,
);
our @warnings = ();

# Asking for help?
if ($help) {
	&help();
}

# Verify that the output directory is writable.
if (length $out) {
	if (!-d $out) {
		die "Output directory $out doesn't exist!";
	}
	if (!-w $out) {
		die "Output directory $out is not writable!";
	}
}

# Collect the rest of the arguments.
my @in = @ARGV;
if (scalar(@in) == 0) {
	&usage();
}

# Process each argument.
foreach my $item (@in) {
	if (-d $item) {
		# This is a directory, so open it.
		opendir (DIR, $item);
		foreach my $file (sort(readdir(DIR))) {
			if ($file =~ /\~$/) {
				if ($bak == 0) {
					# Skip backup files~
					next;
				}
			}
			if ($file =~ /\Q$ext\E/i) {
				&parseFile("$item/$file");
			}
		}
		closedir (DIR);
	}
	elsif (-f $item) {
		# This is a file.
		if ($item =~ /\Q$ext\E/i) {
			&parseFile($item);
		}
	}
}

# Any warnings?
if (scalar(@warnings)) {
	print "\n";
	print "=" x 60;
	print "\n"
		. "The following warnings were found during execution:\n\n"
		. join("\n",@warnings) . "\n";
}

sub parseFile {
	my $file = shift;

	print "<= Reading $file\n";

	open (FILE, $file);
	my @read = <FILE>;
	close (FILE);
	chomp @read;

	# Create a buffer for the new file.
	my @new = (
		"// Converted to RiveScript 2 by rsup v. $VERSION",
		"// Generated on " . localtime(time()),
		'',
		"! version = 2.0",
		'',
	);

	my $lineno = 0;
	my $skippedLast = 0;
	my $inComment = 0;
	my $inObject = 0;
	foreach my $line (@read) {
		$lineno++;

		# See if we're inside an object.
		if ($inObject) {
			if ($line =~ /^<\s*object/i) {
				# Ends the object.
				$inObject = 0;
				push (@new, "< object");
				next;
			}

			# Attempt to fix up the Perl code if we can parse it.
			if ($fixobj == 0) {
				if ($line =~ /my \((.+?)\) = \@_/i) {
					$line =~ s/my \((.+?)\) = \@_/my (\$rs,$1) = \@_/ig;
					print "\tFixed obj. line: $line\n";
				}
			}
			push (@new,$line);
			next;
		}

		if ($inComment) {
			if ($line =~ /\*\//) {
				$inComment = 0;
				push (@new,$line);
				next;
			}
			push (@new,$line);
			next;
		}

		# Further chomp the line.
		$line =~ s/^(\t|\x0a|\x0d|\s)+//g;
		$line =~ s/(\t|\x0a|\x0d|\s)+$//g;

		# Blank lines?
		if (length $line == 0) {
			push (@new,$line);
			next;
		}

		if ($line =~ /^\#/) {
			# Single-line comment.
			push (@new,$line);
			next;
		}
		elsif ($line =~ /^\/\//) {
			# Single-line // comment.
			push (@new,$line);
			next;
		}
		elsif ($line =~ /^\/\*/) {
			# Start of a multi-line comment.
			if ($line =~ /\*\//) {
				# It ends on the same line.
				push (@new,$line);
				next;
			}
			push (@new,$line);
			$inComment = 1;
			next;
		}
		elsif ($line =~ /\*\//) {
			# End of a multi-line comment.
			push (@new,$line);
			$inComment = 0;
			next;
		}

		# Convert &object.syntax() to <call> syntax.
		if ($line !~ /^\&/) {
			while ($line =~ /\&([A-Za-z0-9\.\s]+)\((.+?)\)/) {
				my $before = '&' . $1 . '(' . $2 . ')';
				my (@cmds) = split(/\./, $1);
				my $cmd = join(" ",@cmds);
				my $args = $2;
				$line =~ s/\&(.+?)\((.+?)\)/<call>$cmd $args<\/call>/ig;
				$line =~ s/<call>(.+?)\s+?<\/call>/<call>$1<\/call>/ig;
				print "\tConverted object call format at $file line $lineno.\n"
					. "\t\t$before  =>  $line\n";
			}
		}

		# Separate the command from the data.
		my ($cmd) = $line =~ /^(.)/i;
		$line =~ s/^([^\s]+)\s+//i;

		# Skipping this line?
		my $skip = 0;

		# Process the command.
		if ($cmd eq '^') {
			# This is a continue command. If we've skipped the line it continues, skip this too.
			if ($skippedLast) {
				next;
			}
		}
		elsif ($cmd eq '!') {
			my @fields = split(/\s+/, $line);
			my $type = $fields[0];

			# Make sure this isn't a RS version line.
			if ($type =~ /version/i) {
				my $v = $fields[2];
				if (int($v) >= 2) {
					print "\tSkipping file: it's already RiveScript v. 2 or greater.\n";
					return;
				}
			}

			# Obsolete types:
			if ($type =~ /(addpath|include|syslib)/i) {
				print "\tRemoving obsolete definition type \"$type\" at $file line $lineno.\n";
				$skip = 1;
			}
		}
		elsif ($cmd eq '>') {
			my @fields = split(/\s+/, $line);
			my $type = $fields[0];

			# Objects are slightly different now.
			if ($type =~ /^object/i) {
				my $name = $fields[1];
				if (length $name) {
					my $before = $line;
					$line = "object $name perl";
					$inObject = 1;
					print "\tUpdated object declaration at $file line $lineno.\n"
						. "\t\t$before  ==>  $line\n";
				}
				else {
					print "\tWarning: found object at $file line $lineno but can't determine its name.\n";
					push (@warnings,"Found object at $file line $lineno but can't determine its name.\n"
						. "\t$cmd $line");
					$inObject = 1;
				}
			}
		}
		elsif ($cmd eq '*') {
			my ($cond,$do) = ('','');
			my $before = $line;
			if ($line =~ /=\>/) {
				($cond,$do) = split(/=\>/, $line, 2);
			}
			elsif ($line =~ /::/) {
				($cond,$do) = split(/::/, $line, 2);
			}
			else {
				print "\tWarning: can't parse conditionals at $file line $lineno.\n";
				push (@warnings,"Can't parse conditionals at $file line $lineno:\n"
					. "\t$cmd $line");
				next;
			}

			$cond =~ s/^\s+//g;
			$cond =~ s/\s+$//g;
			$do =~ s/^\s+//g;
			$do =~ s/\s+$//g;

			my ($left,$eq,$right) = ($cond =~ /^(.+?)\s*(=|\!=|\<|\<=|\>|\>=|\?)\s*(.+?)$/i);

			if ($eq eq '=') {
				if ($right =~ /^[0-9]+$/) {
					$line = "<get $left> == $right => $do";
				}
				else {
					$line = "<get $left> eq $right => $do";
				}
			}
			elsif ($eq eq '!=') {
				if ($right =~ /^[0-9]+$/) {
					$line = "<get $left> != $right => $do";
				}
				else {
					$line = "<get $left> ne $right => $do";
				}
			}
			elsif ($eq eq '?') {
				$line = "<get $left> != undefined => $do";
			}
			else {
				$line = "<get $left> $eq $right => $do";
			}

			print "\tConverted conditionals at $file line $lineno.\n"
				. "\t\tBefore: $before\n"
				. "\t\tAfter:  $line\n";
		}
		elsif ($cmd eq '&') {
			# This command is obsolete.
			print "\tSkipping obsolete Perl command (&) at $file line $lineno.\n";
			$skip = 1;
		}

		# Skipping this line?
		if ($skip) {
			$skippedLast = 1;
			next;
		}

		$skippedLast = 0;
		if ($cmd =~ /^(\!|>|\+|\-|\%|\^|\@|\*|\#)$/i) {
			push (@new,join(" ",$cmd,$line));
		}
		else {
			push (@new,join("",$cmd,$line));
		}
	}

	# Cut off the directory.
	my $name = $file;
	if (length $out) {
		my @parts = split(/(\/|\\)/, $file);
		$name = pop(@parts);
	}

	# Save the file.
	if (length $out) {
		print "=> Writing $out/$name\n";
		open (WRITE, ">$out/$name");
		print WRITE join("\n",@new);
		close (WRITE);
	}
	else {
		print "=> Writing $name\n";
		open (WRITE, ">$name");
		print WRITE join("\n",@new);
		close (WRITE);
	}
}

sub usage {
	print "Usage: rsup [--out --ext --backup --dontfixperl] <docs or directories>\n"
		. "Try `rsup --help` for more information.\n";
	exit(0);
}

sub help {
	print qq~Usage: rsup [--out --ext --backup --dontfixperl] <docs or directories>

NAME

	rsup - Upgrade RiveScript 1.x documents to RS 2.0 standards.
	Version: $VERSION

OPTIONS

	--out <directory>
	-o

		Specify a directory to output the new documents. If not
		specified, the files being read from will be replaced with the
		new documents.

	--ext <extension=.rs>
	-x

		For any arguments that are directories, all files in that
		directory ending with this extension are read. Default ".rs"

	--backup
	--bak
	-b

		Specify this flag if you want backup files (such as those
		created by Emacs and gEdit) to be processed. The default is to
		not read these files.

	--dontfixperl
	-p

		When reading in Perl objects, rsup will, by default, attempt to
		fix the \@_ line to include \$rs, the reference to the RS
		instance. Since this will modify the code of your object, you
		can specify this flag to disable this feature.

	<directories or documents>

		After specifying any command-line arguments, give rsup a list of
		directories or files to work on. For directories, they are
		opened and any RiveScript documents inside are automatically
		read. For individual files, just these files are read.

CAVEATS

	This program is still under development. It tries its best to upgrade
	old RiveScript code to the new standards, but it\'s not perfect. It will
	output everything it changes to the terminal, but you may need to go
	through and make some custom tweaks to fix anything that it didn\'t
	translate properly.

AUTHOR

	Casey Kirsle
	<rivescript.com>
~;

	exit(0);
}
