(* \chaptertitle{PDFIo}{General Input and Output} *)
open Utility

(* We use 64-bit sized files as standard. *)
(*IF-OCAML*)
open LargeFile
(*ENDIF-OCAML*)

(* \section{Defining and creating input and output functions} *)

(*IF-OCAML*)
type pos = int64
let pos_succ = i64succ
let pos_pred = i64pred
let pos_max = i64max
let possub = i64sub
let posadd = i64add
let posofi x = i64ofi x
let postoi x = i64toi x
let postoi64 x = x
let posofi64 x = x
(*ENDIF-OCAML*)

(*i*)(*F#
type pos = int
let pos_succ = succ
let pos_pred = pred
let pos_max = max
let possub = ( - )
let posadd = ( + )
let posofi (x : int) = (x : pos)
let postoi (x : int) = (x : pos)
let postoi64 (x : pos) = (i64ofi x : int64)
let posofi64 (x : int64) = (i64toi x : pos)
F#*)(*i*)

let no_more = ~-1

(* \intf A general type for input functions. This allows paramaterization over
channels, strings, bigarrays etc. *)
type input =
  {pos_in : unit -> pos;
   seek_in : pos -> unit;
   input_char : unit -> char option;
   input_byte : unit -> int;
   in_channel_length : unit -> pos;
   set_offset : pos -> unit}

(* \intf A general type for output functions, allowing parameterisation as above. *)
type output =
  {pos_out : unit -> pos;
   seek_out : pos -> unit;
   output_char : char -> unit;
   output_byte : int -> unit;
   out_channel_length : unit -> pos}

(* \intf Create input functions from a channel. *)
let input_of_channel ch =
  let offset = ref (posofi 0) in
    {pos_in =
       (fun () -> possub (pos_in ch) !offset);
     seek_in =
       (fun x -> seek_in ch (posadd x !offset));
     input_char =
       (fun () ->
          try Some (input_char ch) with End_of_file -> dpr "3A"; None);
     input_byte =
       (fun () ->
          try input_byte ch with End_of_file -> dpr "3B"; no_more);
     in_channel_length =
       (fun () -> in_channel_length ch);
     set_offset =
       (fun o -> offset := o)}

(* \intf Create input functions from a [Utility.stream]. *)
let input_of_stream s =
  let input_int () =
    if s.pos > stream_size s.data - 1
      then
        begin
          s.pos <- s.pos + 1;
          no_more
        end
      else
        begin
          s.pos <- s.pos + 1;
          sget s.data (s.pos - 1)
        end
  in
    {pos_in =
       (fun () -> posofi s.pos);
     seek_in =
       (fun p ->
          s.pos <- postoi p);
     input_char =
       (fun () ->
         match input_int () with x when x = no_more -> None | s -> Some (char_of_int s));
     input_byte =
       input_int;
     in_channel_length =
       (fun () -> posofi (stream_size s.data));
     set_offset =
       (fun _ -> raise (Failure "set_offset: not implemented"))
    }

(* \intf Create input functions from a [Utility.bytestream]. *)
let input_of_bytestream b =
  input_of_stream {pos = 0; data = b}

let input_of_string s =
  input_of_bytestream (bytestream_of_string s)

(* \intf Output functions over channels *)
let output_of_channel ch =
  {pos_out = (fun () -> pos_out ch);
   seek_out = seek_out ch;
   output_char = (fun c -> output_byte ch (int_of_char c));
   output_byte = output_byte ch;
   out_channel_length = (fun () -> out_channel_length ch)}

(* \intf Output functions over streams. If data is written past the end of a stream,
we extend the stream to that point plus one-third of that (new) size. Note that
this has an implication upon mixing reading and writing: the stream will have
junk in the extended section and will be longer than that which has been
written. *)
let output_of_stream s =
  let highest_written = ref (posofi 0) in
    let output_int i =
      if s.pos > stream_size s.data - 1
        then
          let newstream = mkstream (s.pos * 2 - s.pos / 2) in
            for x = 0 to stream_size s.data - 1 do
              sset newstream x (sget s.data x)
            done;
            sset newstream s.pos i;
            highest_written := pos_max !highest_written (posofi s.pos);
            s.pos <- s.pos + 1;
            s.data <- newstream
        else
          begin
            highest_written := pos_max !highest_written (posofi s.pos);
            sset s.data s.pos i;
            s.pos <- s.pos + 1
          end
    in
        {pos_out =
           (fun () -> posofi s.pos);
         seek_out =
           (fun p -> s.pos <- postoi p);
         output_char =
           (fun c -> output_int (int_of_char c));
         output_byte =
           output_int;
         out_channel_length =
           (fun () -> pos_succ !highest_written)}

(* \section{Utility functions} *)

(* \intf Nudge forward one character. *)
let nudge i =
  ignore (i.input_byte ())

(* \intf Read one character behind the current position, and reposition ourselves on
that character. *)
let read_char_back i =
  let pos = i.pos_in () in
    i.seek_in (pos_pred pos);
    let chr = i.input_char () in
      i.seek_in (pos_pred pos);
      chr

(* \intf Go back one character in a file. *)
let rewind i =
  i.seek_in (pos_pred (i.pos_in ()))

let rewind2 i =
  i.seek_in (possub (i.pos_in ()) (posofi 2))

let rewind3 i =
  i.seek_in (possub (i.pos_in ()) (posofi 3))

(* \intf Read a character, leaving the position unchanged. *)
let peek_char i =
  let r = i.input_char () in
    rewind i; r

(* \intf Read a byte, leaving the position unchanged. *)
let peek_byte i =
  let r = i.input_byte () in
    rewind i; r

(* \intf Output a string. *)
let output_string o s =
  String.iter o.output_char s

(* \intf Make a bytestream of an input channel. *)
let bytestream_of_input_channel ch =
  let fi = input_of_channel ch in
    let size = postoi (fi.in_channel_length ()) in
      let s = mkstream size in
        for x = 1 to size do
          match fi.input_byte () with
          | b when b = no_more -> failwith "channel length inconsistent"
          | b -> sset s (x - 1) b
        done;
        s

(* \intf Save a bytestream to a channel. *)
let bytestream_to_output_channel ch data =
  for x = 1 to stream_size data do
    output_byte ch (sget data (x - 1))
  done

(* Like [Pervasives.read_line] *) 
let read_line i =
  (* Raise EndOfInput if at end *)
  begin match i.input_byte () with
  | x when x = no_more -> dpr "O"; raise End_of_file;
  | _ -> ()
  end;
  rewind i;
  (* Read characters whilst <> newline or until end of input *)
  let rec read_chars prev =
    match i.input_byte () with
    | x when x = no_more -> rev prev
    | x when char_of_int x = '\n' -> rev ('\n'::prev)
    | x -> read_chars (char_of_int x::prev)
  in
    implode (read_chars [])

(* \section{Reading MSB-first Bit streams} *)

(*\intf The type of bit (MSB first) streams. *)
type bitstream =
  {input : input; (* The input from which bits are taken. It is advanced a byte at a time *)
   mutable currbyte : int; (* Current byte value from input *)
   mutable bit : int; (* Mask for getting the next bit (128, 64,... 2, 1 or 0 = none left) *)
   mutable bitsread : int (* A count of the number of bits read since inception. Debug use only *)}

(* \intf Make a [bitstream] from an [input]. *) 
let bitstream_of_input i =
  {currbyte = 0;
   bit = 0;
   bitsread = 0;
   input = i}

(* For debug only.... *)
let input_in_bitstream b =
  b.input

(* \intf Get a single bit. *)
let rec getbit b =
  if b.bit = 0 then
    begin
      b.currbyte <-
        begin match b.input.input_byte () with
        | x when x = no_more -> dpr "P"; raise End_of_file
        | x -> x
        end;
      b.bit <- 128;
      getbit b
    end
  else
    let r = b.currbyte land b.bit > 0 in
      b.bitsread <- b.bitsread + 1;
      b.bit <- b.bit / 2;
      r

(* \intf Get a bit as an integer, set = 1, unset = 0 *)
let getbitint i =
  if getbit i then 1 else 0

(* \intf Align on a byte boundary. *)
let align b =
  if b.bit > 0 then b.bitsread <- (b.bitsread / 8 + 1)  * 8;
  b.bit <- 0

(* Get [n] (up to 32) bits from [b], returned as an [int32], taken highest bit
first. Getting 0 bits gets the value 0.\SPEED{Far too slow}. *)
let char_of_bool = function true -> '1' | false -> '0'

let getval_32 b n =
  if n < 0 then raise (Invalid_argument "Io.getval_32 - n < 0") else
    if n = 0 then 0l else
      let bits = manyunique (mkunit getbit b) n in
        Int32.of_string ("0b" ^ implode (map char_of_bool bits))

(* \section{Writing MSB-first bit streams} *)

(* The type: A current byte, the position in the byte (0 = nothing in it, 7 =
almost full), and the list (in reverse order) of full bytes so far *)
type bitstream_write =
  {mutable wcurrbyte : int;
   mutable wbit : int;
   mutable bytes : int list}

let make_write_bitstream () =
  {wcurrbyte = 0;
   wbit = 0;
   bytes = []}

let copy_write_bitstream b =
  let b' = make_write_bitstream () in
    b'.wcurrbyte <- b.wcurrbyte;
    b'.wbit <- b.wbit;
    b'.bytes <- b.bytes;
    b'

let print_bitstream b =
  Printf.printf "wcurrbyte = %i, wbit = %i, %i bytes output\n"
  b.wcurrbyte b.wbit (length b.bytes)

(* Put a single bit into bitstream [b]*)
let putbit b bit =
  assert (bit = 0 || bit = 1);
  match b.wbit with
  | 7 ->
      b.bytes <- (b.wcurrbyte lor bit) :: b.bytes;
      b.wbit <- 0;
      b.wcurrbyte <- 0
  | _ ->
      b.wbit <- b.wbit + 1;
      b.wcurrbyte <- b.wcurrbyte lor (bit lsl (8 - b.wbit))

let putbool b bit =
  putbit b ((function false -> 0 | true -> 1) bit)

(* Put a multi-bit value [n] of bits [bs] (given as an [int32]) into bitstream [b]. *)
let rec putval b bs n =
  if bs < 0 || bs > 32 then raise (Invalid_argument "putval");
  match bs with
  | 0 -> ()
  | _ ->
      let bit =
        if land32 n (i32ofi (1 lsl (bs - 1))) > 0l then 1 else 0
      in
        putbit b bit;
        putval b (bs - 1) n

(* Align on a byte boundary, writing zeroes. *)
let align_write b =
  if b.wbit > 0 then
    for x = 1 to 8 - b.wbit do
      putbit b 0
    done

(* Get the output out. *)
let bytestream_of_write_bitstream b =
  align_write b;
  bytestream_of_list (rev b.bytes)

(* Return a list of booleans, representing (in order) the bits *)
let bits_of_write_bitstream b =
  let numbits = length b.bytes * 8 + b.wbit
  and bytestream = bytestream_of_write_bitstream b
  and bits = ref [] in
    let bitstream = bitstream_of_input (input_of_bytestream bytestream) in
      for x = 1 to numbits do
        bits =| getbit bitstream
      done;
      rev !bits

(* Same, but from a list *)
let join_write_bitstreams ss =
  let c = make_write_bitstream () in
    iter
      (putbool c)
      (flatten (map bits_of_write_bitstream ss));
    c

(* Append b to a. Inputs unaltered. *)
let write_bitstream_append a b =
  join_write_bitstreams [a; b]

(* Same, but align at the join. *)
let write_bitstream_append_aligned a b =
  let c = copy_write_bitstream a in
    align_write c;
    write_bitstream_append c b

