#!/usr/bin/env perl

use strict;               
use warnings;

if ($#ARGV >= 0 && ($ARGV[0] eq '-h' || $ARGV[0] eq '--help')) {
   print <<END;

prediff-for-slurm-launcher is for the use with 'start_test' as the system-wide
prediff, when the launcher is slurm. This works for interactive and batch jobs.
E.g. (assuming bash):

  CHPL_SYSTEM_PREDIFF=\$CHPL_HOME/util/test/prediff-for-slurm-launcher \\
start_test ...

For batch jobs this assumes that slurm just outputs the job submission as:
Submitted batch job <x>

and that the program output is placed in <testname>.<x>.out

We remove the job submission text and replace it with the text from the output
file. 

For interactive jobs we just remove all of the sallac commands. Note that for
interactive jobs start_test needs to be run with -nostdinredirect and
CHPL_LAUNCHER_TIMEOUT needs to be set to slurm and CHPL_LAUNCHER_WALLTIME needs
to be set. For batch jobs CHPL_LAUNCHER_WALLTIME can be set, but it defaults to
an hour if not set 

END
exit();
}

### the files to operate on

# outfile is $2
my $outfile = $ARGV[1];
my $modfile = "$outfile.tmp";
open OUTFILE, $outfile or die "could not open the test output file $outfile: $!";


# check that the pattern is found and if not exit early  
sub check1line {
   my ($pattern) = @_;
   my $line = <OUTFILE>;
   if (!$line || $line !~ /$pattern/) { nochange(); }
   return $1;
}

my $batch = $ENV{'CHPL_LAUNCHER_USE_SBATCH'};

# for batch jobs we basically remove the job submission, then we wait for the 
# job to complete by checking squeue's completed jobs and then we take the 
# job output file and put as the output for the start_test run 
if ($batch) {
    # remove the batch submission output if it existed, elsewise some other error
    # occured and we just exist without changing the output 
    my $batchNum = check1line('^Submitted batch job (.*)');
    my $execName = $ARGV[0];
    my $batchOutfile = "$execName.$batchNum.out";

    # while the job is not done running, sleep 
    while (not jobDoneRunning($batchNum)) {
        sleep 1;
    } 
    
    # open a temp modified file and the batch output file 
    open MODFILE, ">$modfile" or die "could not open the modified test output file $modfile: $!";
    open BATCHOUTFILE, $batchOutfile or die "could not open the test output file $batchOutfile: $!";

    # copy the output to the modified file for each line that doesn't start with
    # slurm unless the job failed then print the slurm errors 
    while (<BATCHOUTFILE>) {
       my $line = $_;
       if ($line !~ /^slurm.*/ or jobFailed($batchNum)) {
           print MODFILE $line;
       }
    }
    system("rm $batchOutfile");

# for interactive jobs, we just want to remove the salloc messages 
# and for some reason carriage returns are put at the end of lines 
} else {
    
    open MODFILE, ">$modfile" or die "could not open the modified test output file $modfile: $!";
    while (<OUTFILE>) {
       my $line = $_;
       if ($line !~ /^salloc.*/) {
           $line =~ s/\r//g;
           print MODFILE $line;
       }
    }
}


# replace the old file with the new file
close(MODFILE) or die "could not close the modified test output file: $!";
close(OUTFILE) or die "could not close the test output file: $!";
system("mv $modfile $outfile");

exit();

# check the status of a job based on id. This assumes that job id is in the 
# column of squues output. First argument is job id, second is states to check
sub jobStatus {
    
    my $jobID = $_[0];
    my $states = $_[1];
    my $output = `squeue --states=$states`;
    my @lines = split /\n/, $output;
    foreach my $line (@lines) {
        if ($line =~ /\s*(\S*).*/) {
           if ($1 eq $jobID ) {
               return 1;
           }
        }
    } 
    return 0;
}

# check if a job failed due to user cancel, non-zero exit code or timeout
sub jobFailed {
   return jobStatus( $_[0], "CANCELLED,FAILED,TIMEOUT");
}

# check if the job is done running, either through completion or some failure
sub jobDoneRunning {
   return jobStatus( $_[0], "COMPLETED,CANCELLED,FAILED,TIMEOUT");
}

# if there is no match, leave $outfile unchanged
sub nochange {
  close(OUTFILE);
  exit();
}
