#!/usr/bin/env ruby
# -*- coding: euc-jp; mode: ruby; -*-
## bskk -- Bayesian estimation for SKK
## Copyright (C) 2004 Kenichi Kurihara <kenichi_kurihara@nifty.com>

## Author: Kenichi Kurihara <kenichi_kurihara@nifty.com>
## Maintainer: SKK Development Team <skk@ring.gr.jp>
## Keywords: japanese

## This file is part of Daredevil SKK.

## Daredevil SKK is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2, or
## (at your option) any later version.

## Daredevil SKK is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
## General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with Daredevil SKK, see the file COPYING.  If not, write to
## the Free Software Foundation Inc., 59 Temple Place - Suite 330,
## Boston, MA 02111-1307, USA.

### Commentary:
## Ȥϡskk-bayesian.el  Commentary 򻲾


### Specifications:

## bskk λˡ -s ץ󤬤뤫ˤ餺TERM ʥ
## 뤳ȡ

## TERM ʥľ˽λ롣¸ϹԤʤ

## emacs Ͻλ bskk Ф COMMAND_SAVE ¸򤵤
## ʤƤϤʤ

### TODO:
## /home/USER/bin/bskk:325: warning: assigned but unused variable - adr
## /home/USER/bin/bskk:355: warning: assigned but unused variable - i

## Code:
require 'fileutils'
if RUBY_VERSION.to_f < 1.9
  require 'jcode'
  $KCODE = "EUC"
else
  Encoding.default_external = "EUC-JP"
  Encoding.default_internal = "EUC-JP"
end

COMMAND_SORT = "#sort"
COMMAND_ADD  = "#add"
COMMAND_SAVE = "#save"

def check_type( variable, *types )
  types.each{ |type|
    if variable.is_a?( type )
      return
    end
  }
  raise TypeError.new( "need #{type.to_s} but #{variable.class.to_s}" )
end

class Distribution
  public
  # obj.class == String -> e.g. obj="#1/#2/"
  #                             ԥɤޤǤϤʤ
  def initialize( obj )
    @word2count = Hash.new(0)
    @total_count = 0
    word_count_regexp = /^(.+)#(\d+)$/
    if obj != nil
      check_type( obj, String )
      line = obj
      line.split( "/" ).each{ |word_count|
        if word_count =~ word_count_regexp
          word = $1
          count = $2.to_i
          @word2count[word] = count
          @total_count += count
        else
          STDERR.puts( ";; invalid line=#{line}." )
        end
      }
    end
    decrement
  end
  
  def to_s
    decrement
    line = ""
    @word2count.each{ |word, count|
      line += "#{word.to_s}##{count}/"
    }
    return line
  end
  
  def get_prob( word )
    check_type( word, String )
    return @word2count[word].to_f / @total_count.to_f
  end
  
  def increment( word )
    check_type( word, String )
    @word2count[word] += 1
    @total_count += 1
  end

  private
  ## @total_count  Bignum ʤ顢Fixnum ˼ޤ褦ˤ
  def decrement
    # = NEWS for Ruby 2.4.0
    # == Changes since the 2.3.0 release
    # === Core classes updates (outstanding ones only)
    # * Integer
    #   * Fixnum and Bignum are unified into Integer  [Feature #12005]

    # /home/USER/bin/bskk:116: warning: constant ::Bignum is deprecated #
    # Bignum  Fixnum ζ̤ʤΤǡʤˤʤǤ褤ΤǤϡ
    if 2.3 < RUBY_VERSION.to_f
      return
    end

    while @total_count.is_a?( Bignum )
      temp = Hash.new
      total = 0
      @word2count.each{ |word, count|
        new_count = (count / 10).to_i
        if new_count > 0
          temp[word] = new_count
          total += new_count
        end
      }
      @word2count = temp
      @total_count = total
    end
  end
end

class DBHistoryFileSyntaxException < Exception
end

class DB
  attr_reader :dirty
  
  public
  def initialize( io=nil )
    check_type( io, IO, NilClass )
    @i2prefix2dst = Array.new
    @dirty = false
    if( io == nil )
      return
    end
    regexp_index = /^&index=(\d+)/
    regexp_prefix = /^#prefix=(.+)$/
    regexp_counts = /^%(.+)$/
    index = nil
    prefix_char = nil
    begin
      while not io.eof?
        line = io.readline.chomp!
        if line =~ regexp_counts
          if index == nil or prefix_char == nil
            STDERR.puts( ";; couldn't find index and prefix.  ignore #{line}." )
            next
          end
          if @i2prefix2dst.size <= index
            @i2prefix2dst.fill( nil, @i2prefix2dst.size..index )
          end
          if @i2prefix2dst[index] == nil
            @i2prefix2dst[index] = Hash.new
          end
          dst = Distribution.new( $1 )
          @i2prefix2dst[index][prefix_char] = dst
        elsif line =~ regexp_index
          index = $1.to_i
        elsif line =~ regexp_prefix
          prefix_char = $1
        else
          STDERR.puts( ";; ignore invlid line=#{line}." )
          next
        end
      end
    rescue Exception => e
      raise DBHistoryFileSyntaxException.new( e.message )
    end
  end

  ## 
  ## word_str    : ""
  ## prefix_list : ["", "", "", "", ""]
  def add_history( word_str, prefix_list )
    check_type( word_str, String )
    check_type( prefix_list, Array )
    @dirty = true
    word = word_str
    if prefix_list.size > @i2prefix2dst.size
      @i2prefix2dst.fill( nil, @i2prefix2dst.size..prefix_list.size )
    end
    prefix_list.each_with_index{ |prefix_str, i|
      prefix2dst = @i2prefix2dst[i]
      if prefix2dst == nil
        prefix2dst = Hash.new
        @i2prefix2dst[i] = prefix2dst
      end
      prefix_char = prefix_str
      dst = prefix2dst[prefix_char]
      if dst == nil
        dst = Distribution.new( nil )
        @i2prefix2dst[i][prefix_char] = dst
      end
      dst.increment( word )
    }
  end

  ## 
  ## entries     : ["", "", ""]
  ## prefix_list : ["", "", "", "", ""]
  ## return      : [["", "", ""], [0.4, 0.2, 0.1]]
  def sort( entries, prefix_list )
    check_type( entries, Array )
    check_type( prefix_list, Array )
    size = entries.size
    sorted_entries = Array.new( size )
    entries.each_with_index{ |entry, i|
      sorted_entries[i] = [entry, 0.0, entry]
    }
    weights = weights( prefix_list.size )
    prefix_list.each_with_index{ |prefix, p_i|
      prefix2dst = @i2prefix2dst[p_i]
      if prefix2dst == nil; next; end
      dst = prefix2dst[prefix]
      if dst == nil; next; end
      sorted_entries.each_with_index{ |entry, e_i|
        prob = dst.get_prob( entry[2] )
        entry[1] += prob * weights[p_i]
      }
    }
    ## ߽˥
    sorted_entries.sort!{ |e1, e2|
      e2[1] <=> e1[1]
    }
    probs = Array.new( size )
    sorted_entries.each_with_index{ |e, i|
      sorted_entries[i] = e[0]
      probs[i] = e[1]
    }
    return [sorted_entries, probs]
  end

  # return 
  # true    success of save
  # false   otherwise
  def save( path_to_file, path_to_bak )
    debug_log( "save...    ;" + Time.now.to_s )
    if not @dirty
      debug_log( "save...done; not dirty; " + Time.now.to_s )
      return true
    end
    if File.exist?( path_to_file )
      FileUtils.copy( path_to_file, path_to_bak )
    end
    path_to_lock = path_to_file + ".lock"
    lock = open( path_to_lock, "w" )
    if try_lock( lock )
      f = open( path_to_file, "w" )
      marshal( f )
      f.close
      lock.close
      debug_log( "save...done; " + Time.now.to_s )
      @dirty = false
      return true
    else
      lock.close
      debug_log( "save...failed; " + Time.now.to_s )
      return false
    end
  end

  private
  def marshal( io )
    check_type( io, IO )
    time = Time.now
    @i2prefix2dst.each_with_index{ |prefix2dst, index|
      if prefix2dst == nil
        next
      end
      io.puts( "&index=#{index}" )
      prefix2dst.each{ |prefix, dst|
        io.puts( "#prefix=#{prefix}" )
        io.puts( "%" + dst.to_s )
      }
    }
    debug_log( "marchasl() took " + (Time.now-time).to_f.to_s + "(sec)." )
  end

  ## ʬۤνŤߤ֤
  ## ֤ͤϡArray: [w_1, w_2, .., w_n]
  ## p( word | p_1, .., p_n )
  ## = \sum_{i=1}^n p( word | p_i ) * w_i
  ## "p_n p_{n-1} .. p_1 word"
  def weights( n )
    if n == @weights_last_n
      return @weights_last_weights
    end
    weights = Array.new( n )
    t = n * (n+1) / 2
    for i in 0..(n-1)
      weights[i] = (n-i).to_f / t.to_f
    end
    @weights_last_n = n
    @weights_last_weights = weights
    return weights
  end

end


## ensuring-bind tcp server
## TCPServer ϡbind 򤦤ޤƤʤ褦ruby 1.8.1
## TCPServer.open ǡbind 褦bskk 2ư
## ʤ3Ĥ̵ä
## ܤɤäƤʤ
class EBTCPServer
  require "socket"
  include Socket::Constants
  def initialize( port, backlog=5 )
    @soc_waiting = Socket.new( Socket::AF_INET, Socket::SOCK_STREAM, 0 )
    @soc_waiting.setsockopt( SOL_SOCKET, SO_REUSEADDR, 1 )
    @soc_waiting.bind( [Socket::AF_INET,port,127,0,0,1].pack( "s n C4 x8" ) )
    @soc_waiting.listen( backlog )
  end

  ## Socket Υ󥹥󥹤֤
  def accept
    if @soc_waiting
      soc, adr = @soc_waiting.accept
      return soc
    else
      raise Exception.new( "already closed" )
    end
  end

  def close
    @soc_waiting.close
    @soc_waiting = nil
  end
end

## verbose puts
def v_err_puts( message )
  STDERR.puts message if @verbose
end

def debug_log( message )
  if $debug and File.directory?( ENV["HOME"] + "/tmp" )
    f = open( ENV["HOME"] + "/tmp/bskk.log", "a" )
    f.puts Time.now.to_s + ": " + message
    f.close
  end
end

def try_lock( file, lock_mode=File::LOCK_EX, timeout=2, try_interval=0.1 )
  check_type( file, File )
  check_type( timeout, Numeric )
  check_type( try_interval, Numeric )
  for i in 0..( timeout.to_f/try_interval.to_f ).to_i
    if file.flock( lock_mode | File::LOCK_NB )
      return true
    end
    sleep try_interval
  end
  return false
end

def split_line( line )
  array = []
  line.chomp.each_char{ |char|
    array.push( char )
  }
  return array
end

# out ϡemacs  buffer ˽Ϥ
def serve( db, input, out, history_file, history_file_bak )
  # emacs ϡǽβԤȤä buffer ɾ롣
  # 2 ܰʹߤɾɬפʾϤƤϤʤ
  # ޤ2 ܰʹߤϥȥȤɬפ롣
  debug_log( "start" )
  begin
    # mail loop
    while true
      line = input.readline.chomp
      debug_log( "Recieve: " + line )
      case line
      when COMMAND_SORT
        entries = input.readline.chomp.split( "/" )
        prefix_list = split_line( input.readline ).reverse
        sorted_entries, probs = db.sort( entries, prefix_list )
        out.puts "(" + sorted_entries.map!{|entry|
          entry.inspect
        }.join( " " ) + ")"
        # out.puts ";;(\"#{probs.join("\" \"")}\")" # ruby 1.6.8 ǡԶ
        out.puts ";;(\""+ probs.join( "\" \"" ) + "\")"
        out.puts ";;OK."
      when COMMAND_ADD
        kakutei_word = input.readline.chomp
        prefix_list = split_line( input.readline ).reverse
        if prefix_list != nil
          db.add_history( kakutei_word, prefix_list )
        end
        out.puts "t"
        out.puts ";;OK."
      when COMMAND_SAVE
        if db.save( history_file, history_file_bak )
          out.puts "t"
          out.puts ";;Succeeded."
        else
          out.puts "nil"
          out.puts ";;Failed."
          debug_log( "failed in saving; " + Time.now.to_s )
        end
      else
        out.puts( ";;unknown command: #{line}", err )
      end
      out.flush
    end
  rescue IOError => e
    debug_log( "#{e.to_s}" + Time.now.to_s )
  rescue => e
    debug_log( "#{e.to_s}" + Time.now.to_s )
  end
  debug_log( "finish (server); " + Time.now.to_s )
end

DEFAULT_PORT = 51178

def mode_serve( history_file, server, port )
  check_type( history_file, String )
  check_type( server, TrueClass, FalseClass )
  history_file_bak = history_file + ".BAK"
  ## create DB
  db = nil
  if File.exist?( history_file ) # read history
    f = open( history_file )
    begin
      if try_lock( f, File::LOCK_SH )
        db = DB.new( f )
      end
    rescue DBHistoryFileSyntaxException => e
      STDERR.puts( e.message )
      STDERR.puts( "#{history_file} is invalid." )
      if File.exist?( history_file_bak )
        STDERR.puts( "use #{history_file_bak} instead of #{history_file}." )
        f2 = open( history_file_bak )
        begin
          if try_lock( f2, File::LOCK_SH )
            db = DB.new( f2 )
          end
        rescue DBHistoryFileSyntaxException => e2
          STDERR.puts( e2.message )
          STDERR.puts( "#{history_file_bak} is also invalid." )
        ensure
          f2.close
        end
      end
    ensure
      f.close
    end
  end
  if db == nil
    db = DB.new
  end
  ## handle SIGTERM
  begin
    proc = Proc.new{
      debug_log( "SIGTERM; " + Time.now.to_s )
      STDERR.puts "Signal SIGTERM"
      STDERR.puts "halt bskk"
      exit
    }
    if defined?( Signal ) # Signal is a ruby 1.7 feature
      Signal.trap( :TERM, proc )
    else
      trap( :TERM, proc )
    end
  end
  ## main routine
  begin
    if server
      socket = nil
      begin
        socket = EBTCPServer.new( port )
      rescue => e
        STDERR.puts "Error: #{e.to_s}"
        exit(1)
      end
      v_err_puts( "bind to #{DEFAULT_PORT.to_s}." )
      pid = fork
      if pid  ## parent
        socket.close
        exit
      else    ## child
        STDERR.puts( "fork to be a deamon. PID=#{Process.pid}." )
        while true
          s = socket.accept
          v_err_puts( "accept a new client." )
          Thread.start( s ){ |io|
            serve( db, io, io, history_file, history_file_bak )
            io.close
            v_err_puts( "close an session." )
          }
          v_err_puts( "#{(Thread.list.size-1).to_s} session(s) remain(s)." )
        end
        socket.close
      end
    else
      serve( db, STDIN, STDOUT, history_file, history_file_bak )
    end
  rescue SignalException => e # TERM ʳ
    debug_log( "SIG #{e.to_s}; " + Time.now.to_s )
    STDERR.puts "Signal #{e.to_s}"
    STDERR.puts "use TERM signal to halt bskk."
    exit
  end
end

def print_help
  STDOUT.puts <<"----------"
usage:
  bskk (options)

options:
  -f  history_file      use history_file as a history_file.
  -s                    run as a server.
  -p  port              use port (default=#{DEFAULT_PORT}).
  -v                    verbose.
  -d                    debug mode.
  -h                    show a help and exit.
----------
end

def main
  ## read arguments
  history_file = nil
  server = false
  @verbose = false
  port = DEFAULT_PORT
  begin
    i = 0
    while i < ARGV.size
      case ARGV[i]
      when "-f"
        history_file = File.expand_path( ARGV[i+1] )
        i += 1
      when "-s"
        server = true
      when "-h"
        print_help
        exit
      when "-p"
        p = ARGV[i+1].to_i
        if p
          port = p
        else
          STDERR.puts( "-p needs an integer. #{ARGV[i+1]} is invalid." )
          print_help
          exit
        end
        i += 1
      when "-v"
        @verbose = true
      when "-d"
        $debug = true
      else
        STDERR.puts( "an unknown option: #{ARGV[i]}" )
        print_help
        exit
      end
      i += 1
    end
    if history_file == nil
      STDERR.puts( "specify a history file by '-f'" )
      print_help
      exit
    end
  end
  mode_serve( history_file, server, port )
end

# Υե뤬¹ԥեȤƼ¹Ԥ줿
if $0 == __FILE__
  main
end
