#!/usr/bin/env perl

use English;
use strict;
use warnings;
use open OUT => ":raw";
$OUTPUT_AUTOFLUSH = 1;

my $CHPL_HOME = $ENV{'CHPL_HOME'};
undef $ENV{'CHPL_DEVELOPER'};
my $msghead = ""; #adjust as desired, e.g. "convert: "
my ($numErrors, $numWarnings) = (0,0);


### help/usage

sub help {
   my $verbose = $_[0];
   $verbose and print <<'eom1;';

[Usage is listed at the end; run with -q to skip explanations.]

This script is provided so the Chapel users can semi-automatically
adjust their code to certain changes in the Chapel language presently
being implemented. A brief summary of changes is:
- 'extern' is now a reserved keyword and '_extern' is deprecated;
- 'ref' is now a reserved keyword;
- instead of the 'def' keyword, use:
   - 'proc' for non-iterator functions,
   - 'iter' for iterators;
- correspondingly, 'proc' and 'iter' are now keywords and may not
  be used as identifiers
  ('def' will not be a keyword after the transition is completed);
- iterators may not 'return' an expression -
  a 'yield EXPR; return;' sequence must be used instead.

This script attempts to convert Chapel source files by running the
compiler on each file and looking at the "deprecated" warnings that
result. Or, file/lines for def->proc replacement can be provided in a
".procs" file. For those files that are modified, the original file is
saved as ".bak" .

By default the script output shows which files the compiler is run on,
what compiler output is not recognized (and hence ignored) by the script,
which files are being modified, and what unexpected situations occur.
The latter are indicated as "errors".

The present limitations or the script are:
- the following idioms are flagged as "unimplemented" errors but not converted:
   - uses of 'iter' as an identifier,
   - uses of 'ref' as an identifier,
   - 'return EXPR' in iterators;
- source code may not be converted when:
   - there are compilation errors,
   - 'def' occurs on a different line than the function name,
   - 'def' is preceeded or followed by non-white space;
- the module code is not handled
  (but the Chapel distribution contains already-converted module code).

Therefore after running this script on your code base it may be
necessary to run the compiler again and follow up on its diagnostic messages.

eom1;

   print <<'eom2;';
Usage:  update-sources  FILE|DIR|OPTION...
  FILE - depending on the suffix:
    .chpl  - a Chapel source file to be converted
             based on diagnostics from running the compiler on that file
    .procs - a list of (grep-like) lines of the form  filename:lineno:whatever
             where 'def' should be converted to 'proc'
  DIR
    convert all .chpl files in this directory
  OPTIONs are:
    -d   show the modifications performed via 'diff'
    -q   be quiet
    -v, -vv, -vvv  show more and more progress details
    -h, --help     print this message and exit
eom2;

   $verbose and print <<'eom3;';

eom3;
}


### argument handling

my @SOURCE_ARGS = ();
my @SOURCE_FLAGS = ();
my @FILES;
my $verbose = 1;
my $modules2 = ();
my $showdiffs = "";
my $compiler = "";
my $compopts = "--no-codegen --baseline";  # --baseline seems to speed it up
my $needCompiler = 0;
my $needHelp = 0;

foreach my $arg (@ARGV) {
   if ($arg eq "-q") {        # quiet
      $verbose = 0;
   } elsif ($arg eq "-v") {   # verbose
      $verbose = 2;
   } elsif ($arg eq "-vv") {  # more verbose
      $verbose = 3;
   } elsif ($arg eq "-vvv") { # even more verbose
      $verbose = 4;
   } elsif ($arg eq "-m") {   # do modules, too (currently does not work)
      $modules2 = 1;
   } elsif ($arg eq "-d") {   # show diffs on the modified files
      $showdiffs = " ";
   } elsif ($arg eq "-h" or $arg eq "--help") {   # help
      $needHelp = 1;
   } elsif ($arg =~ /^-/) {        # others starting with '-' - unrecognized
      error("unknown option: '$arg'");
   } elsif (-d $arg) {
      if (-r $arg and -x $arg) {
         push @SOURCE_ARGS, $arg;
         push @SOURCE_FLAGS, "dir-find";
	 $needCompiler = 1;
      } else {
         error("could not read the directory '$arg'");
      }
   } elsif ($arg =~ /\.chpl$/) {
      if (-r $arg) {
	 push @SOURCE_ARGS, $arg;
	 push @SOURCE_FLAGS, "chpl-source";
	 $needCompiler = 1;
      } else {
	 error("could not read the file '$arg'");
      }
   } elsif ($arg =~ /\.procs$/) {
      if (-r $arg) {
	 push @SOURCE_ARGS, $arg;
	 push @SOURCE_FLAGS, "def2proc-file";
      } else {
	 error("could not read the file '$arg'");
      }
   } else {                        # any other arg
      error("unknown argument: '$arg' - not a directory or a recognized file");
   }
}


### argument post-processing

if ($needHelp) { help($verbose); exitIfErrors(); exit(0); }

@SOURCE_ARGS or error("no source files or directories to process");

if (! $CHPL_HOME) {
   if ($needCompiler) {
      error("\$CHPL_HOME is not defined");
   }
} elsif (!$compiler) {
   my $platform = `$CHPL_HOME/util/chplenv/platform`;
   if (!$platform) {
      error("could not determine the Chapel platform");
   } else {
      chop $platform;
      $compiler = "$CHPL_HOME/bin/$platform/chpl";
      -r $compiler && -x $compiler or
          error("could not read or execute the compiler '$compiler'");
   }
}

exitIfErrors(1);

my $reportedDidNotFindError = 0;

# run 'find' for directory arguments, combining with file arguments
# preserving their relative order
#
for my $i (0 .. $#SOURCE_ARGS) {
   my ($src, $flag) = ($SOURCE_ARGS[$i], $SOURCE_FLAGS[$i]);
   if (!$src or !$flag) {
      bug("empty source of flag: '$src':'$flag' [$i]");
      next;
   }
   if ($flag eq "chpl-source" or $flag eq "def2proc-file") {
      push @FILES, $src;
   } elsif ($flag eq "dir-find") {
      push @FILES, runFind($src);
   } else {
      bug("unknown flag in: '$src':'$flag' [$i]");
      next;
   }
}

@FILES or $reportedDidNotFindError or error("did not find any source files");
exitIfErrors(1);


### declare/initialize some globals

my ($curOpenFile, $curLine, $curModFile, $curBakFile,
    $curIgnModuleFile, $curIgnSkippedFile,
    ) =
   ("", -1, "", "", "", "");
my (%PROCESSED_FILES, %MODULE_FILES);
my ($numMessagesTotal, $numMessagesNotErrorFormat, $numIrrelevantMessages,
    $numModuleFiles, $numModuleMessages, $numSkippedFileMessages,
    $numFilesModified, $numDef2proc, $numDef2iter, $numOldExtern,
    $numRet2yield, $numRefAsIdent, $numIterAsIdent, $numMessagesBadFileOrLineno,
    $numIOskippedMessages, $numNonMatched, $numCompiles, $numDef2ProcFiles) =
   (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
# reset for each line of compiler output
my ($msgAll, $msgFile, $msgLineno, $msgMsg, $msgAction, $msgCounterVar);


### run the compiler on each file in the argument list

initHandleActions();

foreach my $file (@FILES) {
   if ($file =~ /\.procs$/) {
      verb(1, "reading def->proc information from '$file' ...");
      if (open(PROCS, $file)) {
	 $numDef2ProcFiles++;
      } else {
	 error("could not read the def->proc file '$file': $!");
	 next;
      }
      while (<PROCS>) {
	 chop;
	 handleOneInfoLine($_, "def2proc", \$numDef2proc);
      }
      close(PROCS);
   } else {
      verb(1, "applying compiler to '$file' ...");
      if (open(COMP_MSGS, "$compiler $compopts '$file' 2>&1 |")) {
	 $numCompiles++;
      } else {
	 error("could not run the compiler on '$file': $!");
	 next;
      }

      while (<COMP_MSGS>) {
	 chop;
	 handleOneCompMsg($_);
      }
      close(COMP_MSGS);
   }
   finalizeCurFile();
}

finishHandleActions();

printStats();
printUnhandledMessages();


### summarize and exit

exitIfErrors();

#verb(1, "\n");
verb(1, "DONE");
exit(0);


### handle one compiler message or info file line

sub handleOneCompMsg {
   $msgAll = $_[0];
   $msgFile = "";
   $msgLineno = -1;
   $msgMsg = "";
   $msgAction = "";
   $msgCounterVar = ();
   $numMessagesTotal++;

   if ($msgAll =~ /^([^:]+):([-1-9]\d*): (.+)$/) {
      ($msgFile, $msgLineno, $msgMsg) = ($1,$2,$3);
      verb(99, "got $msgFile:$msgLineno: $msgMsg");
   } else {
      verb(1, "ignored message c1: $msgAll");
      $numMessagesNotErrorFormat++;
      return;
   }

   # check for module or skipped files
   if ($msgFile eq $curIgnModuleFile) {
      verb(4, "ignored message (in modules): $msgAll");
      $numModuleMessages++;
      return;
   }
   if ($msgFile eq $curIgnSkippedFile) {
      verb(3, "ignored message (in a skipped file): $msgAll");
      $numSkippedFileMessages++;
      return;
   }
   if (!$modules2 and $msgFile ne $curIgnModuleFile and
       $msgFile =~ m@^\$CHPL_HOME/modules/@) {
      if (++$MODULE_FILES{$msgFile} > 1) {
         # already seen this one
      } else {
         verb(2, "file ignored (module): $msgFile");
         $numModuleFiles++;
      }
      $curIgnModuleFile = $msgFile;
      verb(4, "ignored message (in modules): $msgAll");
      $numModuleMessages++;
      return;
   }

   # see what the message is telling us to do, if at all
   if ($msgMsg eq "warning: the 'def' keyword is deprecated - replace it with 'proc'") {
      $msgAction = "def2proc";
      $msgCounterVar = \$numDef2proc;
   } elsif ($msgMsg eq "warning: the 'def' keyword is deprecated - replace it with 'iter'") {
      $msgAction = "def2iter";
      $msgCounterVar = \$numDef2iter;
   } elsif ($msgMsg eq "warning: returning a value in an iterator is deprecated; replace it with a yield of that value, followed by a return with no expression" or $msgMsg eq "error: returning a value in an iterator") {
      $msgAction = "unimplemented (return->yield)";
      $msgCounterVar = \$numRet2yield;
   } elsif ($msgMsg eq "syntax error: near 'ref'") {
      $msgAction = "unimplemented ('ref' identifier)";
      $msgCounterVar = \$numRefAsIdent;
   } elsif ($msgMsg eq "syntax error: near 'iter'") {
      $msgAction = "unimplemented ('iter' identifier)";
      $msgCounterVar = \$numIterAsIdent;
   } elsif ($msgMsg eq "warning: The _extern keyword is deprecated. Use extern (no leading underscore) instead.") {
     $msgAction = "trimExtern";
     $msgCounterVar = \$numOldExtern;
   } else {
      verb(1, "ignored message c2: $msgAll");
      $numIrrelevantMessages++;
      return;
   }

   if ($msgAction =~ "^unimplemented") {
      couldNotHandle($msgAction, $msgCounterVar);
      return;
   }

   if ($msgFile eq "<internal>" or $msgLineno <= 0) {
      couldNotHandle("unimplemented (bad file name or line number)",
                     \$numMessagesBadFileOrLineno);
      return;
   }

   handleAction();
}

sub handleOneInfoLine {
   $msgAll = $_[0];
   $msgFile = "";
   $msgLineno = -1;
   $msgMsg = "";
   $msgAction = $_[1];
   $msgCounterVar = $_[2];
   $numMessagesTotal++;

   if ($msgAll =~ /^([^:]+):([1-9]\d*):(.+)$/) { #NB diff than handleOneCompMsg
      ($msgFile, $msgLineno, $msgMsg) = ($1,$2,$3);
      verb(99, "got $msgFile:$msgLineno:$msgMsg");
   } else {
      verb(1, "ignored message c3: $msgAll");
      $numMessagesNotErrorFormat++;
      return;
   }

   handleAction();
}


### final printing

sub printStats {
   if ($verbose < 1) {
      return;
   }

   print "\n";
   print "Statistics:\n";
   printf "  compiler runs: %d\n", $numCompiles;
   printf "  def->iter info files: %d\n", $numDef2ProcFiles;
   printf "  modified files: %d\n", $numFilesModified;
   printf "%8d  def->proc\n", $numDef2proc;
   printf "%8d  def->iter\n", $numDef2iter;
   printf "%8d  _extern->extern\n", $numOldExtern;
if ($numRet2yield or $numRefAsIdent or $numIterAsIdent or $numMessagesBadFileOrLineno or
    $numNonMatched or $numIOskippedMessages)
{
   printf "  could not handle\n";
   printf "%8d  return->yield\n", $numRet2yield;
   printf "%8d  'iter' identifier\n", $numIterAsIdent;
   printf "%8d  'ref' identifier\n", $numRefAsIdent;
   printf "%8d  bad file name or line number\n", $numMessagesBadFileOrLineno;
   printf "%8d  source line contents not as expected\n", $numNonMatched;
   printf "%8d  I/O-related issues\n", $numIOskippedMessages;
}
if ($numModuleFiles or $numModuleMessages or $numSkippedFileMessages) {
   printf "  skipped files\n";
   printf "%8d  messages in %d module files\n", $numModuleMessages, $numModuleFiles;
   printf "%8d  messages in repeated or otherwise skipped files\n", $numSkippedFileMessages;
}
if ($numMessagesNotErrorFormat or $numIrrelevantMessages) {
   printf "  unrelated compiler output\n";
   printf "%8d  non-\"message\" lines\n", $numMessagesNotErrorFormat;
   printf "%8d  unrelated error/warning messages\n", $numIrrelevantMessages;
}
   printf "Total compiler output\n";
   printf "%8d  lines\n", $numMessagesTotal;
   print "\n";

   my $sumItemized = $numDef2proc + $numDef2iter + $numOldExtern + $numRet2yield +
      $numRefAsIdent + $numIterAsIdent + $numMessagesNotErrorFormat + $numNonMatched +
      $numIOskippedMessages + $numSkippedFileMessages +
      $numModuleMessages + $numIrrelevantMessages;
   if ($numMessagesTotal != $sumItemized) {
      print "Unexpected: the sum of the itemized counts is $sumItemized, does not match $numMessagesTotal\n";
      print "\n";
   }
}

sub printUnhandledMessages {
   if ($numSkippedFileMessages) {
      warning("$numSkippedFileMessages messages were not acted upon (not shown)"
              . " - a rerun is recommended\n");
   }
}

sub couldNotHandle {
   my ($message, $counterVar, $extramsg) = @_;
   if ($message !~ "^unimplemented") {
      $message = "could not handle ($message)";
   }
   error("$message\n  compiler message: $msgAll",
         $extramsg ? "\n  $extramsg" : "");
   $$counterVar++;
}


### perform file editing actions

sub initHandleActions {
   # nothing so far
}

# essentially, invokes
# advanceTo() then convertLine()
#
sub handleAction {
   my $lineToEdit = advanceTo($msgFile, $msgLineno);
   if ($lineToEdit) {
      convertLine($lineToEdit);
   } else {
      if ($msgAction eq "<repeatedfile>") {
         verb(3, "ignored message (in a skipped file): $msgAll");
         $numSkippedFileMessages++;
      } else {
         couldNotHandle("I/O-related issue // $msgAction", \$numIOskippedMessages);
      }
   }
}

sub finishHandleActions {
   finalizeCurFile();
}

# modify the source code line (comes in an argument) and print to TO_FILE
#
sub convertLine {
   my ($lineToEdit) = @_;
   my ($substitute, $success) = ("", 1);
   if ($msgAction eq "def2proc") {
      $substitute = "proc";
   } elsif ($msgAction eq "def2iter") {
      $substitute = "iter";
   } elsif ($msgAction eq "trimExtern") {
      $substitute = "extern";
   }
   if (!$substitute) {
      bug("unexpected action '$msgAction' in convertLine()");
      couldNotHandle("unexpected action '$msgAction'" .
                     " (billed to \"I/O-related issues\")", \$numIOskippedMessages);
      return;
   }
   # finally, handle the line
   $lineToEdit =~ s/^()def(\s)/$1$substitute$2/ or
   $lineToEdit =~ s/(\s)def(\s)/$1$substitute$2/ or
   $lineToEdit =~ s/^()_extern(\s)/$1$substitute$2/ or
   $lineToEdit =~ s/(\s)_extern(\s)/$1$substitute$2/ or
   $success = 0;
   if ($success) {
      ${$msgCounterVar}++;
   } else {
      couldNotHandle("source line contents not as expected",
                     \$numNonMatched, "source line: $lineToEdit");
   }
   print TO_FILE $lineToEdit;
}

# open the current source file for reading
# and source.converted for writing
#
sub startCurFile {
   my ($toFile) = @_;
   if ($curOpenFile) {
      bug("\$curOpenFile is non-empty in startCurFile() - will close it");
      finalizeCurFile();
   }
   if (!$toFile) {
      bug("\$toFile not defined in startCurFile()");
      return 0;
   }
   if ($PROCESSED_FILES{$toFile}) {
      bug("repeated file '$toFile' in startCurFile()");
      markAsIgnored($toFile, "repeated file");
      $msgAction = "<repeatedfile>";
      return 0;
   }
   $PROCESSED_FILES{$toFile}++;
   if (open(FROM_FILE, '<', $toFile)) {
      # good
   } else {
      error("could not open '$toFile': $!");
      markAsIgnored($toFile, "could not open");
      return 0;
   }
   $curModFile = "$toFile.converted";
   if (open(TO_FILE, '>', $curModFile)) {
      # good
   } else {
      error("could not open '$curModFile': $!");
      markAsIgnored($toFile, "could not open temp file for conversion");
      close(FROM_FILE);
      $curModFile = "";
      return 0;
   }
   $curOpenFile = $toFile;
   $curLine = 0;
   verb(1, "modifying '$curOpenFile' ...");
   $numFilesModified++;
   return 1;
}

# returns the line of interest
# after copying all previous lines from source to source.converted
# if error, reports it and returns false
#
sub advanceTo {
   my ($toFile, $toLine) = @_;
   if ($toFile ne $curOpenFile) {
      finalizeCurFile();
      if (startCurFile($toFile)) {
         # good
      } else {
         return "";
      }
   } elsif ($curLine >= $toLine) {
      # most likely repeated file, so mark it as ignored
      markAsIgnored($toFile, "repeated line");
      $msgAction = "<repeatedfile>";
      return "";
   }
   my $myLine;
   while ($curLine < $toLine) {
      $curLine++;
      $myLine = <FROM_FILE>;
      if (!$myLine) {
         # got eof
         error("encountered end of file at Line $curLine while seeking Line $toLine in '$toFile'");
         markAsIgnored($toFile, "unexpected end of file");
         finalizeCurFile();
         return "";
      }
      if ($curLine < $toLine) {
         # not subject to edit
         print TO_FILE $myLine;
      }
   }
   # looks good!
   return $myLine;
}

sub markAsIgnored {
   my ($toFile, $reason) = @_;
   verb(2, "file ignored ($reason): $toFile");
   $PROCESSED_FILES{$toFile}++;
   $curIgnSkippedFile = $toFile;
}

# copy the remaining from source to source.converted
# mv source source.back
# mv source.converted source
#
sub finalizeCurFile {
   if (!$curOpenFile) {
      return;
   }
   $curBakFile = "$curOpenFile.bak";

   # copy over whatever remains in curOpenFile
   while (<FROM_FILE>) { print TO_FILE $_; }

   # close the handles
   if (close(TO_FILE)) {
      if (close(FROM_FILE)) {
         # rename the from/to files
         if (rename($curOpenFile, $curBakFile)) {
            if (rename($curModFile, $curOpenFile)) {
               # success!
            } else {
               error("could not move '$curModFile' to '$curOpenFile'" .
                     " - mods should be in '$curModFile'," .
                     " original code in '$curBakFile'");
            }
         } else {
            error("could not move away '$curOpenFile' - left it unmodified");
            unlink($curModFile);
         }
      } else {
         error("could not close '$curOpenFile', so did not update it");
         unlink($curModFile);
      }
   } else {
      error("could not close '$curModFile', so did not update '$curOpenFile'");
      close(TO_FILE);
      unlink($curModFile);
   }

   if ($showdiffs) {
      print "----------- diffs on $curOpenFile -----------\n";
      system("diff $showdiffs $curBakFile $curOpenFile");
      print "-----------\n";
   }
   # indicate we are done with these
   $curOpenFile = "";
   $curBakFile = "";
   $curModFile = "";
   $curLine = -1;
}


### helpers

sub runFind {
   my ($findArg) = @_;
   verb(1, "running find on '$findArg' ...");
   if (open(FIND, "find '$findArg' -name '*.chpl' -not -type d | sort |")) {
      # good
   } else {
      error("could not run 'find' on '$findArg': $!");
      return ();
   }
   my @FOUND;
   while (<FIND>) {
      chop;
      push @FOUND, $_;
   }
   close(FIND);
   if (!@FOUND) {
      error("did not find any files in '$findArg'");
      $reportedDidNotFindError = 1;
      return ();
   }
   verb(1, "found", $#FOUND + 1, "files");
   return @FOUND;
}
      
sub verb {
   my ($level, @msg) = @_;
   if ($level > $verbose) {
      return;
   }
   if ("@msg" eq "\n") {
      print "\n";
   } else {
      print $msghead . "@msg\n";
   }
}

sub warning {
   $numWarnings++;
   print STDERR "WARNING: @_\n";
}

sub error {
   $numErrors++;
   print STDERR "ERROR: @_\n";
}

sub bug {
   error("bug: @_");
}

sub fatal {
   error(@_);
   exitIfErrors(1);
}

sub exitIfErrors {
   my ($aborting) = @_;
   if (!$numErrors) {
      return;
   }
   if ($aborting) {
      print STDERR $msghead . "aborting\n";
      exit(2);
   } else {
      print STDERR "\n" . $msghead . "DONE - got $numErrors error(s)\n";
      exit(1);
   }
}
