#!/usr/bin/env perl
# /=====================================================================\ #
# |  maketests                                                          | #
# | Test Maintenance tool                                               | #
# |=====================================================================| #
# | Part of LaTeXML:                                                    | #
# |  Public domain software, produced as part of work done by the       | #
# |  United States Government & not subject to copyright in the US.     | #
# |---------------------------------------------------------------------| #
# | Bruce Miller <bruce.miller@nist.gov>                        #_#     | #
# | http://dlmf.nist.gov/LaTeXML/                              (o o)    | #
# \=========================================================ooo==U==ooo=/ #
use strict;
use warnings;
use FindBin;
use lib "$FindBin::RealBin/../blib/lib";
use Getopt::Long qw(:config no_ignore_case);
use Pod::Usage;
use LaTeXML;
use LaTeXML::Util::Pathname;
use LaTeXML::Util::Test;
use File::Copy;
use File::Which;
use File::Spec::Functions;
use Algorithm::Diff;
use Term::ANSIColor;

#======================================================================
# NOTE: Don't forget you can use
#  make test TEST_FILES=t/sometestglob.t
#======================================================================
my $identity = "maketests (LaTeXML version $LaTeXML::VERSION)";
my ($doxml,     $dopdf, $docompare)   = ();
my ($verbosity, $help,  $showversion) = (0);
my $whitespace = 1;    # keep whitespace;
GetOptions("xml!" => \$doxml,
  "pdf!"        => \$dopdf,
  "compare!"    => \$docompare,
  "quiet"       => sub { $verbosity--; },
  "verbose"     => sub { $verbosity++; },
  "VERSION"     => \$showversion,
  "help"        => \$help,
  "dvi"         => \$help,                  # Obsolete!
  "nodvi"       => \$help,
  "whitespace!" => \$whitespace,
) or pod2usage(-message => $identity, -exitval => 1, -verbose => 0, -output => \*STDERR);
pod2usage(-message => $identity, -exitval => 1, -verbose => 2, -output => \*STDOUT) if $help;
if ($showversion) { print STDERR "$identity\n"; exit(1); }
pod2usage(-message => "$identity\nMissing test TeX file",
  -exitval => 1, -verbose => 0, -output => \*STDERR) unless @ARGV;

binmode(STDOUT, ":encoding(UTF-8)");
binmode(STDERR, ":encoding(UTF-8)");
#======================================================================
# hmm... could be even more helpful
#  copy xml to a backup and compare ?
# or compare option
# What about if $ARG is a directory? Do all tests in that directory?

my (@PASS, @FAIL);
foreach my $source (@ARGV) {
  $source =~ s/(\.pdf|\.dvi|\.xml|\.tex|\.spec)$//;    # Ignore extension, if given.
  if (-f "$source.spec") {                             # Do Daemon style test
    maketest_daemon($source); }
  elsif (-f "$source.tex") {                           # Do LaTeXML style test
    maketest_latexml($source); }
  else {
    die "There seems to be no test case $source"; }
}

if ($docompare) {
  print "Testing results:"
    . " Pass= " . (@PASS ? scalar(@PASS) . ' (' . join(',', @PASS) . ')' : '0')
    . " Fail= " . (@FAIL ? scalar(@FAIL) . ' (' . join(',', @FAIL) . ')' : '0') . "\n"; }

#======================================================================
sub maketest_latexml {
  my ($source) = @_;
  $source .= ".tex" unless $source =~ /\.tex$/;
  my ($dir, $name, $ext) = pathname_split($source);
  my $xml_file = pathname_concat($dir, "$name.xml");
  my $pdf_file = pathname_concat($dir, "$name.pdf");
  my $src_time = pathname_timestamp($source);
  my $xml_time = pathname_timestamp($xml_file);
  my $pdf_time = pathname_timestamp($pdf_file);

  if ($docompare) {
    (compare_xml($dir, $name) ? push(@FAIL, $name) : push(@PASS, $name)); }
  else {
    # Possibly remake xml
    if (defined $doxml ? $doxml : !(-f $xml_file) || ($src_time > $xml_time)) {
      gen_xml($dir, $name); }
    elsif (!defined $doxml) {
      print STDERR "Test case $xml_file seems uptodate\n"; }
    # Possibly remake pdf
    if ((defined $dopdf ? $dopdf : !(-f $xml_file) || ($src_time > $pdf_time))) {
      gen_pdf($dir, $name); }
    elsif (!defined $dopdf) {
      print STDERR "Test case $pdf_file seems uptodate\n"; }
  }
  return; }

sub maketest_daemon {
  my ($source) = @_;
  $source .= ".spec" unless $source =~ /\.spec$/;
  my ($dir, $name, $ext) = pathname_split($source);
  my $cwd  = pathname_cwd();
  my $opts = read_options($source, "$dir/$name");
  push @$opts, (['destination', "$name.test.xml"],
    ['log',                "/dev/null"],
    ['timeout',            5],
    ['autoflush',          1],
    ['timestamp',          '0'],
    ['nodefaultresources', ''],
    ['xsltparameter',      'LATEXML_VERSION:TEST'],
    ['nocomments',         '']);

  my $invocation = catfile($FindBin::Bin, '..', 'blib', 'script', 'latexmlc') . ' ';
  my $timed      = undef;
  foreach my $opt (@$opts) {
    if ($$opt[0] eq 'timeout') {    # Ensure .opt timeout takes precedence
      if ($timed) { next; } else { $timed = 1; }
    }
    $invocation .= "--" . $$opt[0] . (length($$opt[1]) ? ('="' . $$opt[1] . '" ') : (' '));
  }
  $invocation .= " 2>$name.test.status ";
  pathname_chdir($dir);
  if (system($invocation) != 0) {
    unlink "$name.test.xml"    if -e "$name.test.xml";
    unlink "$name.test.status" if -e "$name.test.status";
    die "Failed to execute '$invocation': $!"; }
  if ($docompare) {
    my $ndiffs = 0;
    print STDERR "\n" . ("=" x 40) . "\nCompare Expected $dir/$name to current output XML:\n";
    $ndiffs += compareLines(readXML("$name.xml"), readXML("$name.test.xml"));
    print STDERR "\n" . ("=" x 40) . "\nCompare Expected Status $dir/$name to current status:\n";
    $ndiffs += compareLines(readText("$name.status"), readText("$name.test.status"));
    unlink "$name.test.xml"    if -e "$name.test.xml";
    unlink "$name.test.status" if -e "$name.test.status";
    print STDERR "" . ("=" x 40) . "\n\n";
    ($ndiffs ? push(@FAIL, $name) : push(@PASS, $name)); }
  else {
    if (!defined $doxml) {
      my $src_time = pathname_timestamp($source);
      my $xml_time = pathname_timestamp("$name.xml");
      $doxml = !(-f "$name.xml") || ($src_time > $xml_time); }
    if ($doxml) {
      print STDERR "\n" . ("=" x 40) . "\nReplacing $dir/$name.xml and status.\n";
      move("$name.test.xml",    "$name.xml")    if -e "$name.test.xml";
      move("$name.test.status", "$name.status") if -e "$name.test.status"; } }
  pathname_chdir($cwd);
  return; }

#======================================================================

# Question!
#   How should the comparison be done?
#   * Incorporate a diff algorithm?  Probably yes
#   * What form of the XML should be diff'd ?
#     Primarily a question of whitespace,
#     but potentially also namespace, PI, comments ?
#     Forms:
#     - the literal straight output of latexml?
#     - a normalized form that puts one element per line?
# Whatever form, it is most useful if it is the same form
# as is used by "make test" itself!
# Or should it???
# The normalized form is most sensible for comparison,
# but the literal form is most expected(?) for test!!
# (else, we're ignoring whitespace changes in test)
sub compare_xml {
  my ($dir, $name) = @_;
  my $source = pathname_make(dir => $dir, name => $name, type => 'tex');
  my $xml    = pathname_make(dir => $dir, name => $name, type => 'xml');
  my $test   = pathname_make(dir => $dir, name => $name, type => 'test.xml');
  #========================================
  # Generate the XML from the current latexml
  gen_xml($dir, $name, 1);

  #========================================
  # Do the comparison
  print STDERR "\n" . ("=" x 40) . "\nCompare Expected $dir/$name to current output XML:\n";
  my $result = compareLines(readXML($xml), readXML($test));
  print STDERR "" . ("=" x 40) . "\n\n";
  unlink $test if -e $test;
  return $result; }

sub compareLines {
  my ($oldlines, $newlines) = @_;
  my $diff   = Algorithm::Diff->new($oldlines, $newlines);
  my $ndiffs = 0;
  while ($diff->Next()) {
    next if $diff->Same();
    $ndiffs++;
    my $sep = '';
    if (!$diff->Items(2)) {
      printf "Missing: %d,%dd%d\n", $diff->Get(qw( Min1 Max1 Max2 )); }
    elsif (!$diff->Items(1)) {
      printf "Added: %da%d,%d\n", $diff->Get(qw( Max1 Min2 Max2 )); }
    else {
      $sep = "---\n";
      printf "Changed: %d,%dc%d,%d\n", $diff->Get(qw( Min1 Max1 Min2 Max2 )); }
    print "< $_\n" for $diff->Items(1);
    print $sep;
    print "> $_\n" for $diff->Items(2);
  }
  if ($ndiffs) {
    print color('red') . "Failed!" . color('reset') . "\n"; }
  else {
    print color('green') . "OK!" . color('reset') . "\n"; }
  return $ndiffs; }

sub readXML {
  my ($xmlfile) = @_;
  my $parser = XML::LibXML->new();
  $parser->keep_blanks($whitespace);    # Keep or trim whitespace.
  return stringifyDom($parser->parse_file($xmlfile)); }

sub readText {
  my ($textfile) = @_;
  my $IN;
  local $/ = undef;
  open($IN, '<', $textfile) or die "Couldn't read textfile '$textfile': $!";
  my $string = <$IN>;
  close($IN);
  return [grep { /\S/ } split("\n", $string)]; }

sub stringifyDom {
  my ($dom) = @_;
###  my $string = $dom->toString(2);
  my $string = $dom->toString(1);
  my $parser = XML::LibXML->new();
  $parser->validation(0);
  $parser->keep_blanks(1);
  $string = $parser->parse_string($string)->toStringC14N(0);
  return [grep { /\S/ } split("\n", $string)]; }

sub gen_xml {
  my ($dir, $name, $for_comparison) = @_;
  # Ensure a sandbox for the LaTeXML conversion by forking and working in the child.
  my $pid = fork();
  if ($pid) {
    waitpid($pid, 0);
    return; }
  my $source = pathname_make(dir => $dir, name => $name, type => 'tex');
  my $xml = pathname_make(dir => $dir, name => $name, type => $for_comparison ? 'test.xml' : 'xml');
  if (-f "$xml") {
    rename("$xml", "$xml.bak"); }
  #========================================
  # Generate the XML from the current latexml
  my $gen_xml_core_options = {%LaTeXML::Util::Test::CORE_OPTIONS_FOR_TESTS};
  $$gen_xml_core_options{verbosity} = $verbosity || 0;
  if (my $dom = convert_texfile_as_test(texpath => $source, name => $name, core_options => $gen_xml_core_options)) {
    my $dom_str = $dom->toString(1);
    open(my $test_fh, '>', $xml) or die "Couldn't open output file $xml: $!";
    binmode($test_fh, ":encoding(UTF-8)");
    print $test_fh $dom_str;
    close $test_fh; }
  else {
    die "Failed to convert $source: $!"; }
  exit; }

sub gen_pdf {
  my ($dir, $name) = @_;
  my $cwd = pathname_cwd();
  pathname_chdir($dir) if $dir;
  # Get list of files currently in directory.
  my $DIR;
  opendir($DIR, ".") or die "Cannot read directory $dir: $!";
  my @files_before = readdir($DIR);
  closedir($DIR);
  my $program = 'pdftex';
  my $IN;
  open($IN, '<', "$name.tex") or die "Cannot scanTeX file $name.tex: $!";

  while (<$IN>) {
    $program = 'pdflatex' if /documentclass/; }
  close($IN);
  local $ENV{TEXINPUTS} = "$FindBin::RealBin/../lib/LaTeXML/texmf/::" . ($ENV{TEXINPUTS} || '');
  system($program, '-interaction=batchmode', $name) == 0 or die "Couldn't run $program $name: $!";
  system($program, '-interaction=batchmode', $name) == 0 or die "Couldn't run $program $name: $!";

  # Now delete an new files except $name.pdf
  opendir($DIR, ".") or die "Cannot read directory $dir: $!";
  my @files_now = readdir($DIR);
  closedir($DIR);
  foreach my $f (@files_now) {
    if (($f ne "$name.pdf") && !grep { $f eq $_ } @files_before) {
      unlink($f); } }
  pathname_chdir($cwd) if $dir;
  return; }

sub read_options {
  my ($optionfile, $testname) = @_;
  my $opts = [];
  my $OPT;
  if (open($OPT, "<", $optionfile)) {
    while (my $line = <$OPT>) {
      next if $line =~ /^#/;
      chomp($line);
      if ($line =~ /(\S+)\s*=\s*(.*)/) {
        my ($key, $value) = ($1, $2 || '');
        $value =~ s/\s+$//;
        push @$opts, [$key, $value]; } }
    close $OPT; }
  else {
    do_fail($testname, "Could not open $optionfile"); }
  return $opts; }
#======================================================================
__END__

=pod

=head1 NAME

C<maketests [options] testcases...> - LaTeXML test maintenance tool

=head1 SYNOPSIS

Use

   make test

from the build directory to run all tests.

To compare the current latexml output for
I<set/case> against the expected output:

   maketests --compare set/case

To regenerate the xml for a specific case,
that is, to assert that the XML as it is currently
generated is what should now be expected for that case,
use

   maketests set/case

For help and description of the test framework:

   maketests --help

=head1 OVERVIEW

LaTeXML includes a set of test cases for validating
the compilation and installation, as well as for
verifying the impact of changes to the system.

Tests are conducted using the standard  L<Test::Builder> module,
by executing

   make test

from the top level build directory.  It reports
whether all tests are completed successfully,
or which tests failed to generate the expected XML,
typically showing the first mismatching line.

=head2 Test Suite Organization

Tests are collected in sets in the C<t> subdirectory of the LaTeXML build directory.
Each set tests related functionality and is represented by
its own subdirectory (eg. I<t/someset>), and a driver program (eg. I<44_someset.t>)
in the C<t> directory.
The driver runs all the test cases in given set's directory.

Each test case, say I<case>, within a test set consists of 3 files:

   case.tex   The source TeX file
   case.xml   The expected XML output from latexml
   case.pdf   The result of running pdflatex; This file is
       only for understanding the effect of the TeX markup.

[The driver program and all three files for each testcase
should be under SVN control.]

=head2 Testing

The C<make test> command should be run whenever LaTeXML is modified
to verify that unexpected changes to the generated
XML have not been introduced.  Given the nature
of TeX and LaTeXML, such surprises can be frequent!

When failed tests are discovered, the command

  maketest --compare set/case

can be run to see the differences between the
expected output and the currently generated output.
If the new output is not correct, you must continue
refining LaTeXML until all tests pass.
However, if the new output is preferred for
whatever reason, the command

  maketest set/case

should be used to update the test.

Whenever new functionality is added to LaTeXML,
new test cases or even new sets of test cases
should be added.

See the documentation below for more details about
these forms of C<maketest>, and the documentation
for L<Test::Builder> for more
information about testing Perl modules.

=head1 TESTING OPERATIONS

=head2 Comparing XML Output

When a test fails, it is often unclear exactly
how the output has changed; C<make test> only
reports the first differing line of XML.
The command

   maketests --compare set/case ...

will generate the new output compare it
to the expected output in a diff-like manner.
Multiple testcases can be given on the command line.

Note that to aid in comprehension, the XML is
reformatted to start and end each element on a new line.
In contrast, the testing in C<make test> compares the
literal XML without changing whitespace.
Thus, changes that only affect whitespace will
report a failed test from C<make test>,
but C<maketests --compare> will report the output
as identical!

=head2 Updating a Test Case

Occasionally, modifications to latexml or its binding files change the
expectations about what the generated XML should look like;
presumably the new version is better.  Or sometimes, you
wish to improve the test by making it more comprehensive.

In such cases, the test case(s) will need to be updated by
regenerating the expected xml, and the pdf if the tex changed.
Simply run

   maketests set/case ...

Multiple testcases can be given on the command line to be updated.

To force regeneration of the pdf, use:

   maketests --pdf set/case ...

Remember to commit any changed files.

=head2 Adding a Test Case

To add a new test case, I<newcase>, to an existing test set, I<set>,
create the appropriate TeX file C<t/set/newcase.tex> that
involves the markup to be tested.
Then run

   maketests set/newcase ...

to create the required I<set/newtest.xml> and I<set/newcase.pdf>
files. Multiple testcases can be added at once.

Remember to add all three files to the SVN repository.

=head2 Adding a Test Set

To create a new set of tests, say I<newset>, create
the directory I<t/newset> to contain the test cases
and a driver program, I<t/##_newset.t>, (choose a number
for I<##> to specifies when this test set will run in the sequence).
The driver should look like:

   use FindBin;
   use lib "$FindBin::Bin/lib";
   use TestLaTeXML;
   latexml_tests("t/newset");

Then add any desired testcases as described above.
Remember to add the directory, driver and any tests
to the SVN repository.

=cut
