#! /usr/bin/perl
# update-ispell-hash 0.4 -- creating dictionary hash from subdictionaries
#
# (c) 1999 Piotr Roszatycki <dexter@debian.org>
#
# Some subroutines taken from yada 0.6
# Copyright 1999 Charles Briscoe-Smith
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# Changes:
#  * 0.4 - bugfix, "Can't open file" if file didn't exist
#  * 0.3 - renamed dictionary.info -> dictionary.desc
#        - gzip, bzip2 and sq support
#        - fixed error messages
#        - new options
#        - build process uses icombine
#  * 0.2 - security check for temporary file
#  * 0.1 - initial release
#
# Todo:
# * clean code?

$ispelllibpath = "/usr/lib/ispell";
$ispellvarpath = "/var/lib/ispell";
$ispellsharepath = "/usr/share/ispell";
$tmppath = "/tmp/ispell-config.$$";



%sourcefields = ();
foreach ("source", "affix", "hash")
{
  $sourcefields{$_}++;
}

%dictionaryfields = ();
foreach ("dictionary", "description", "default")
{
  $dictionaryfields{$_}++;
}

########################################################################
# Standard ways of printing error messages
########################################################################
sub gasp {
  print STDERR "@_\n";
  $errors++;
}

sub choke {
  print STDERR "@_\n";
  &clean();
  exit 1;
}

########################################################################
# About info
########################################################################
sub about {
    choke "Usage:\tupdate-ispell-hash [-h] [-a] [-d] [-f <file>] [-x <file>] [<dictionary>]\n"
}

sub usage {
    choke "Usage:\tupdate-ispell-hash [-h] [-a] [-d] [-f <file>] [-x <file>] [<dictionary>]\n"
         ."\n"
         ."Options:\n"
         ."\t-h | --help           this help info\n"
         ."\t-a | --all            build from all dictionaries\n"
         ."\t-d | --default        build from default dictionaries\n"
         ."\t-f | --file <file>    build hash to file\n"
         ."\t-x | --affix <file>   use affix file\n"
         ."\t<dictionary>          dictionary to building\n";
}

########################################################################
# Execute an external program, and bomb out if errors occur
########################################################################
sub run {
  if (my $pid = fork) {
    waitpid $pid, 0;
    choke "*** error: $_[0] exited with status $?\n" if $? != 0;
  } elsif (defined $pid) {
    exec @_ or exit 1;
  } else {
    choke "Cannot fork a child process";
  }
}

########################################################################
# Read a paragraph into %par
########################################################################
sub getpara {
  return 0 if eof DESC;
  while (<DESC>) {
    next if m/^\s*\#/;
    s/\s+$//;
    last unless m/^$/;
  }
  %par=();
  while (defined) {
    m/^([-A-Za-z0-9]+)\s*\:\s*(.*)$/ or choke "Invalid line found";
    $keyword=$1;
    $keyword=~tr/A-Z/a-z/;
    $contents=$2;
    while (<DESC>) {
      next if m/^\#/;
      s/\s+$//s;
      last unless s/^(\s)//;
      $x=$1;
      s/^/$x/ if $x ne " ";
      s/^\.(\.*)$/$1/;
      $contents.="\n".$_;
    }
    $contents=~s/^\n//s;
    $par{$keyword}=$contents;
    last if not defined or m/^$/;
  }
  if( ! defined %source ) {
    foreach (keys %par) {
      choke "Unknown field `$_' in source paragraph"
        unless exists $sourcefields{$_};
    }
    %source = %par;
  } else {
    foreach (keys %par) {
      choke "Unknown field `$_' in dictionary paragraph"
        unless exists $dictionaryfields{$_};
    }
    $dictionary{$par{dictionary}} = %par;
  }
  return 1;
}

########################################################################
# Clean temporary files
########################################################################
sub clean {
    unlink <$tmppath*>;
}

########################################################################
# Signal trap
########################################################################
sub trap {
    &clean();
    exit 1;
}

########################################################################
# Main program
########################################################################

$SIG{INT}  = \&trap;
$SIG{KILL} = \&trap;
$SIG{TERM} = \&trap;

while( $_ = shift @ARGV ) {
    if( $_ eq "-h" || $_ eq "--help" ) {
	&usage();
    } elsif( $_ eq "-a" || $_ eq "--all" ) {
	$all = 1;
    } elsif( $_ eq "-d" || $_ eq "--default" ) {
	$default = 1;
    } elsif( $_ eq "-f" || $_ eq "--file" ) {
	$ispellhashfile = shift @ARGV;
    } elsif( $_ eq "-x" || $_ eq "--affix" ) {
	$ispellaffixfile = shift @ARGV;
    } elsif( $_ eq "-l" || $_ eq "--list" ) {
	$ispelllistfile = shift @ARGV;
    } elsif( $_ =~ /^-/ || $source ) {
	&about();
    } else {
	$source = $_;
    }
}

unless( $source ) {
    print "\nPlease choose dictionary:\n\n";
    foreach( <$ispellsharepath/*.desc> ) {
	s/.*\///;
	s/\.desc//;
	$source[$i++] = $_;
	print "\t[$i] $_\n";
    }
    choke "Can't find any $ispellsharepath/*.desc file" unless $i;
    print "\nSelect the number of dictionary [1] ";
    $source = <STDIN>;
    chomp $source;
    if( $source =~ /^\d+$/ ) {
	$source = $source[$source-1];
    } elsif ( $source eq "" ) {
	$source = $source[0];
    }
}

-d "$ispellsharepath/$source" or
    choke "Can't find $ispellsharepath/$source dictionary\n";
open DESC, "$ispellsharepath/$source.desc" or
    choke "Can't open $ispellsharepath/$source.desc file for reading";

open WORDS, ">$tmppath.words" or
    choke "Can't open $tmppath.words file for writing";

&getpara();

choke "Source: field not found\n" unless $source{source};
choke "Affix: field not found\n" unless $source{affix};
choke "Hash: field not found\n" unless $source{hash};

$ispellaffixfile = $source{affix} unless $ispellaffixfile;
$ispellaffixfile = "$ispelllibpath/$ispellaffixfile" unless $ispellaffixfile =~ /\//;
$ispellhashfile = $source{hash} unless $ispellhashfile;
$ispellhashfile = "$ispellvarpath/$ispellhashfile" unless $ispellhashfile =~ /\//;
$ispellhashfile = "$ispellhashfile.hash" unless $ispellhashfile =~ /\.hash$/;
$ispelllistfile = "$source" unless $ispelllistfile;
$ispelllistfile = "$ispellvarpath/$ispelllistfile" unless $ispelllistfile =~ /\//;
$ispelllistfile = "$ispelllistfile.list" unless $ispelllistfile =~ /\.list$/;

# check if hash file is writable
if( ! -w $ispellhashfile ) {
    open TMP, ">$ispellhashfile" or
	choke "Can't write $ispellhashfile file";
    close TMP;
    unlink $ispellhashfile;
}

print "\nPlease choose dictionary modules for $source hash\n\n"
    unless( $all || $default );

if( -f $ispelllistfile ) {
    open LIST, "$ispelllistfile" or
	choke "Can't open $ispelllistfile for reading";
    while( <LIST> ) {
	chomp;
	$default{$_} = 1;
    }
    $list = 1;
}

while(&getpara()) {
    if( ! $list ) {	
	$default{$par{dictionary}} = 1 if $par{default};
    }
    if( $all ) {
	$opt = "y";
    } elsif( $default ) {
	$opt = "";
    } else {
	$desc = $par{description};
	$desc =~ s/\n.*//s;
	printf "$desc? [%s/%s/i/q] ", $default{$par{dictionary}} ? "Y" : "y", 
	    $default{$par{dictionary}} ? "n" : "N";
	$opt = <STDIN>;
	chomp $opt;
    }
    if( lc($opt) eq "y" || $opt eq "" && $default{$par{dictionary}} ) {
	$list{$par{dictionary}} = 1;
	$file = "$ispellsharepath/$source/$par{dictionary}";
	if( -f "$file" ) {
            open FILE, "$file" or
		choke "Can't open $file file for reading";
	} elsif( -f "$file.sq" ) {
            open FILE, "unsq $file.sq |" or
		choke "Can't open $file.sq file for reading";
	} elsif( -f "$file.gz" ) {
            open FILE, "gzip -cd $file.gz |" or
		choke "Can't open $file.gz file for reading";
	} elsif( -f "$file.sq.gz" ) {
            open FILE, "gzip -cd $file.sq.gz | unsq |" or
		choke "Can't open $file.sq.gz file for reading";
	} elsif( -f "$file.bz2" ) {
            open FILE, "bzip2 -cd $file.bz2 |" or
		choke "Can't open $file.bz2 file for reading";
	} elsif( -f "$file.sq.bz2" ) {
            open FILE, "bzip2 -cd $file.sq.bz2 | unsq |" or
		choke "Can't open $file.sq.bz2 file for reading";
	} else {
	    choke "Can't find $file dictionary";
	}
	while( <FILE> ) {
	    print WORDS;
	}
	close FILE;
    } elsif( lc($opt) eq "i" ) {
	$desc = $par{description};
	$desc =~ s/^[^\n]*\n?//s;
	print $desc, "\n\n";
	redo;
    } elsif( lc($opt) eq "q" ) {
	print "Abort.\n";
	&clean();
	exit 0;
    }
}

close DICT;
close WORDS;
close LIST;

if( ! $default && ! $all ) {
    open LIST, ">$ispelllistfile" or
	choke "Can't open $ispelllistfile for writing";
    foreach( sort keys %list ) {
	print LIST "$_\n";
    }
    close LIST;
}

-e "$tmppath.dict" and die "$tmppath.dict already exists\n";
&run("cat $tmppath.words | tr -d \'\\r\' | grep -v \"^/\" | ".
     "sort -u | icombine $ispellaffixfile > $tmppath.dict");
&run("buildhash $tmppath.dict $ispellaffixfile $ispellhashfile 2>&1");
print "Done.\n";

&clean();

1;
