(* \chaptertitle{PDFImage}{PDF Images} *)
open Utility

type pixel_layout =
  | BPP1
  | BPP8
  | BPP24
  | BPP48

type image =
  | JPEG of bytestream * float list option
  | JPEG2000 of bytestream * float list option
  | JBIG2 of bytestream * float list option
  | Raw of int * int * pixel_layout * bytestream

let string_of_layout = function
  | BPP1 -> "BPP1"
  | BPP8 -> "BPP8"
  | BPP24 -> "BPP24"
  | BPP48 -> "BPP48"

let string_of_image = function
  | JPEG _ -> "JPEG"
  | JPEG2000 _ -> "JPEG2000"
  | JBIG2 _ -> "JBIG2"
  | Raw (w, h, layout, data) ->
      "RAW: " ^ string_of_int w ^ " " ^ string_of_int h
      ^ " " ^ string_of_layout layout ^ " bytes of data = "
      ^ string_of_int (stream_size data)

let print_floats fs =
  for x = 1 to Array.length fs do
    print_float fs.(x - 1);
    print_string " "
  done;
  print_string "\n"

(* Decodes an image in-place, given an array of floats. Assumes that output of Decode fits into same number of bits as input of Decode. FIXME: When might this not be true? *)

(* Invert the bits in a bytestream *)
let invert_bits s =
  for x = 0 to stream_size s - 1 do
    sset s x (sget s x lxor 255)
  done

let decode fs bpc image =
  (*i Printf.printf "decode, %i floats, BPC %i\n" (Array.length fs) bpc; i*)
  match Array.length fs with
  | 0 -> ()
  | l when odd l -> raise (Pdf.PDFError "Bad /Decode")
  | l ->
      (* Check to see if it's the identity transform. If so, do nothing. *)
      let ident = ref true in
        for x = 0 to Array.length fs / 2 - 1 do
          if fs.(x * 2) <> 0. || fs.(x * 2 + 1) <> 1. then clear ident
        done;
        (*i Printf.printf "ident: %b\n" !ident; i*)
        if not !ident then
          let interpolate dmin dmax x =
            int_of_float (dmin +. (float x *. (dmax -. dmin) /. (2.0 ** float bpc -. 1.)))
          in
            let functions =
              Array.init (l / 2) (function i -> interpolate (fs.(i * 2)) (fs.(i * 2 + 1)))
            in
              match bpc with
              | 1 ->
                 (* For now, just recognise [1 0] *)
                 invert_bits image
              | 2 -> (*iflprint "DECODE: 2 bit\n"; i*)()
              | 4 -> (*iflprint "DECODE: 4 bit\n"; i*)()
              | 8 ->
                  (*i flprint "DECODE: 8 bit\n";
                  print_floats fs; i*)
                  for p = 0 to stream_size image - 1 do
                    sset image p ((functions.(p mod (l / 2))) (sget image p))
                  done
              | 16 -> (*i flprint "16 bit"; i*) ()
              | _ -> raise (Pdf.PDFError "Bad /Decode")

let decode_defaults pdf resources entry image =
  match entry with
  | None ->
      begin match Pdf.lookup_direct pdf "/ColorSpace" image with
      | None -> None
      | Some cspace ->
          match Pdfspace.read_colourspace pdf resources cspace with 
          | Pdfspace.DeviceGray | Pdfspace.CalGray (_, _, _) | Pdfspace.Separation (_, _, _) ->
              Some (Pdf.Array [Pdf.Real 0.; Pdf.Real 1.])
          | Pdfspace.DeviceRGB | Pdfspace.CalRGB (_, _, _, _) ->
              Some (Pdf.Array [Pdf.Real 0.; Pdf.Real 1.; Pdf.Real 0.; Pdf.Real 1.;Pdf.Real 0.; Pdf.Real 1.])
          | Pdfspace.DeviceCMYK ->
              Some (Pdf.Array [Pdf.Real 0.; Pdf.Real 1.; Pdf.Real 0.; Pdf.Real 1.;Pdf.Real 0.; Pdf.Real 1.; Pdf.Real 0.; Pdf.Real 1.])
          | Pdfspace.Lab (_, _, range) ->
              Some (Pdf.Array ([Pdf.Real 0.; Pdf.Real 1.] @ map (function n -> Pdf.Real n) (Array.to_list range)))
          | Pdfspace.ICCBased {Pdfspace.icc_range = range} ->
              (*i flprint "Making default from ICCBased colour space\n"; i*)
              Some (Pdf.Array (map (function n -> Pdf.Real n) (Array.to_list range)))
          | Pdfspace.Indexed (_, _) ->
              (*i flprint "Making default from Indexed colour space\n"; i*)
              (*i let bpc =
                match Pdf.lookup_direct_orelse pdf "/BitsPerComponent" "/BPC" image with
                | Some (Pdf.Integer n) -> n
                | _ -> 0
              in i*) (* Commented out because we don't use it yet... *)
                (* For now, just make identity. FIXME: How should decode /
                 indexed work - do it in the actual extraction routine? *)
                Some (Pdf.Array [Pdf.Real 0.; Pdf.Real 1.])
                (*i let n = 2.0 ** float bpc -. 1.0 in
                  Some (Pdf.Array [Pdf.Real 0.; Pdf.Real n]) i*)
          | Pdfspace.Pattern -> None
          | Pdfspace.DeviceN (arr, _, _, _) ->
              Some (Pdf.Array (flatten (many [Pdf.Real 0.; Pdf.Real 1.] (Array.length arr))))
      end
  | x ->
      match Pdf.lookup_direct pdf "/ColorSpace" image with
      | None -> x (* Because image masks don't have a colourspace *)
      | Some cspace ->
          match Pdfspace.read_colourspace pdf resources cspace with
          (* Again. A bodge. Need to sort out indexed decoding properly. *)
          | Pdfspace.Indexed (_, _) -> None
          | _ -> x

(* Decode until it is either plain or a type of decoding we can't deal with
natively. *) 
let rec decode_to_image pdf = function
  | Pdf.Stream {contents = d, s} as stream ->
      begin match Pdf.lookup_direct pdf "/Filter" d with
      | None
      | Some (Pdf.Array [])
      | Some (Pdf.Name ("/DCTDecode" | "/DCT" | "/JBIG2Decode" | "/JPXDecode"))
      | Some (Pdf.Array [Pdf.Name ("/DCTDecode" | "/DCT" | "/JBIG2Decode" | "/JPXDecode")]) -> ()
      | _ ->
          Pdfcodec.decode_pdfstream_onestage pdf stream;
          decode_to_image pdf stream 
      end
  | _ -> raise (Pdf.PDFError "decode_to_image: bad stream")

(* Basic CMYK to RGB conversion *)
let rgb_of_cmyk c m y k =
  let c = float c and m = float m and y = float y and k = float k in
  let r = 255. -. fmin 255. ((c /.  255.) *. (255. -. k) +. k) 
  and g = 255. -. fmin 255. ((m /.  255.) *. (255. -. k) +. k)
  and b = 255. -. fmin 255. ((y /.  255.) *. (255. -. k) +. k) in
    toint r, toint g,  toint b

let read_cmyk_8bpp_as_rgb24 width height data =
  let data' = mkstream (width * height * 3) in
    for p = 0 to width * height - 1 do
      let c = sget data (p * 4)
      and m = sget data (p * 4 + 1)
      and y = sget data (p * 4 + 2)
      and k = sget data (p * 4 + 3) in
        let r, g, b = rgb_of_cmyk c m y k in
          sset data' (p * 3) r;
          sset data' (p * 3 + 1) g;
          sset data' (p * 3 + 2) b
    done;
    data'

let read_gray_8bpp_as_rgb24 width height data =
  let data' = mkstream (width * height * 3) in
    for pout = 0 to width * height - 1 do
      sset data' (pout * 3) (sget data pout);
      sset data' (pout * 3 + 1) (sget data pout);
      sset data' (pout * 3 + 2) (sget data pout);
    done;
    data'

(* Input is 1bpp, rows padded to bytes. *)
let read_1bpp_as_rgb24 width height s =
  let s' = mkstream (width * height * 3)
  and s_bits = Pdfio.bitstream_of_input (Pdfio.input_of_bytestream s) in
    let pout = ref 0 in
      for row = 0 to height - 1 do
        let bits_to_do = ref width in
          while !bits_to_do > 0 do
            let bit = if Pdfio.getbit s_bits then 255 else 0 in
              sset s' !pout bit;
              sset s' (!pout + 1) bit;
              sset s' (!pout + 2) bit;
              decr bits_to_do;
              pout += 3
          done;
          Pdfio.align s_bits 
      done;
      s'

(* 4bpp, rows padded to bytes. *)
let read_4bpp_gray_as_rgb24 width height s =
  let s' = mkstream (width * height * 3)
  and s_bits = Pdfio.bitstream_of_input (Pdfio.input_of_bytestream s) in
    let pout = ref 0 in
      for row = 0 to height - 1 do
        let pix_to_do = ref width in
          while !pix_to_do > 0 do
            let a = if Pdfio.getbit s_bits then 1 else 0 in
            let b = if Pdfio.getbit s_bits then 1 else 0 in
            let c = if Pdfio.getbit s_bits then 1 else 0 in
            let d = if Pdfio.getbit s_bits then 1 else 0 in
              let col = (a * 8 + b * 4 + c * 2 + d) * (16 + 1) in
                sset s' !pout col;
                sset s' (!pout + 1) col;
                sset s' (!pout + 2) col;
                decr pix_to_do;
                pout += 3
          done;
          Pdfio.align s_bits 
      done;
      s'

let read_8bpp_indexed_as_rgb24 table width height s =
  let s' = mkstream (width * height * 3) in
    for x = 0 to width * height - 1 do
      match Hashtbl.find table (sget s x) with
      | [r; g; b] ->
          sset s' (x * 3) r;
          sset s' (x * 3 + 1) g;
          sset s' (x * 3 + 2) b
      | _ -> raise (Pdf.PDFError "read_8bpp_indexed_as_rgb24")
    done;
    s'

let read_8bpp_cmyk_indexed_as_rgb24 table width height s =
  let s' = mkstream (width * height * 3) in
    for x = 0 to width * height - 1 do
      match Hashtbl.find table (sget s x) with
      | [c; m; y; k] ->
          let r, g, b = rgb_of_cmyk c m y k in
            sset s' (x * 3) r;
            sset s' (x * 3 + 1) g;
            sset s' (x * 3 + 2) b
      | _ -> raise (Pdf.PDFError "read_8bpp_indexed_as_rgb24")
    done;
    s'

let read_4bpp_indexed_as_rgb24 table width height s =
  let s' = mkstream (width * height * 3) in
    let posin = ref 0
    and posout = ref 0 in
      for row = 0 to height - 1 do
        for byte = 0 to (width + 1) / 2 - 1 do
          let p1 = sget s !posin lsr 4
          and p2 = sget s !posin land 15 in
            begin match Hashtbl.find table p1 with
            | [r1; g1; b1] ->
                sset s' !posout r1; incr posout;
                sset s' !posout g1; incr posout;
                sset s' !posout b1; incr posout;
            | _ -> raise (Pdf.PDFError "read_4bpp_indexed_as_rgb24")
            end;
            begin
              if not (odd width && byte = (width + 1) / 2 - 1) then
              match Hashtbl.find table p2 with
              | [r2; g2; b2] ->
                   sset s' !posout r2; incr posout;
                   sset s' !posout g2; incr posout;
                   sset s' !posout b2; incr posout;
              | _ -> raise (Pdf.PDFError "read_4bpp_indexed_as_rgb24")
            end;
            incr posin
        done
      done;
      s'

let read_4bpp_cmyk_indexed_as_rgb24 table width height s =
  let s' = mkstream (width * height * 3) in
    let posin = ref 0
    and posout = ref 0 in
      for row = 0 to height - 1 do
        for byte = 0 to (width + 1) / 2 - 1 do
          let p1 = sget s !posin lsr 4
          and p2 = sget s !posin land 15 in
            begin match Hashtbl.find table p1 with
            | [c; m; y; k] ->
                let r1, g1, b1 = rgb_of_cmyk c m y k in
                  sset s' !posout r1; incr posout;
                  sset s' !posout g1; incr posout;
                  sset s' !posout b1; incr posout;
            | _ -> raise (Pdf.PDFError "read_4bpp_cmyk_indexed_as_rgb24")
            end;
            begin
              if not (odd width && byte = (width + 1) / 2 - 1) then
              match Hashtbl.find table p2 with
              | [c; m; y; k] ->
                  let r2, g2, b2 = rgb_of_cmyk c m y k in
                    sset s' !posout r2; incr posout;
                    sset s' !posout g2; incr posout;
                    sset s' !posout b2; incr posout;
              | _ -> raise (Pdf.PDFError "read_4bpp_cmyk_indexed_as_rgb24")
            end;
            incr posin
        done
      done;
      s'

(* Separation, CMYK alternate, tint transform function. *)
let read_separation_cmyk_as_rgb24 f width height s = 
  let s' = mkstream (width * height * 3) in
    for p = 0 to width * height - 1 do
      let v = sget s p in
        match Pdffun.eval_function f [float v /. 255.] with
        | [c; y; m; k] ->
            let c = toint (c *. 255.)
            and m = toint (m *. 255.)
            and y = toint (y *. 255.)
            and k = toint (k *. 255.) in
              let r, g, b = rgb_of_cmyk c m y k in
                sset s' (p * 3) r;
                sset s' (p * 3 + 1) g;
                sset s' (p * 3 + 2) b;
        | _ ->
            raise (Pdf.PDFError "Bad tint transform function")
    done;
    s'

(* FIXME: Need to add this. Before or after decode? This module needs re-writing
 * somewhat! *)
let convert_lab_to_rgb stream = ()

let rec read_raw_image size colspace bpc pdf resources width height dict data =
  match size, colspace, bpc with
  | size, (Pdfspace.DeviceRGB | Pdfspace.CalRGB _), Some (Pdf.Integer 8)
      when size >= width * height * 3 ->
        Raw (width, height, BPP24, data)
  | size, Pdfspace.DeviceCMYK, Some (Pdf.Integer 8)
      when size >= width * height * 4 ->
        Raw (width, height, BPP24, read_cmyk_8bpp_as_rgb24 width height data)
  | size, (Pdfspace.DeviceGray | Pdfspace.CalGray _), Some (Pdf.Integer 8)
      when size >= width * height ->
        Raw (width, height, BPP24, read_gray_8bpp_as_rgb24 width height data)
  | size, _, Some (Pdf.Integer 1)
      when size >= width * height / 8 ->
        Raw (width, height, BPP24, read_1bpp_as_rgb24 width height data)
  | size, Pdfspace.DeviceGray, Some (Pdf.Integer 4)
      when size >= width * height / 2 ->
        Raw (width, height, BPP24, read_4bpp_gray_as_rgb24 width height data)
  | size, Pdfspace.Indexed ((Pdfspace.DeviceRGB | Pdfspace.CalRGB _), table), Some (Pdf.Integer 8)
  | size,
    Pdfspace.Indexed
      ((Pdfspace.DeviceN (_, (Pdfspace.DeviceRGB | Pdfspace.CalRGB _ |
      Pdfspace.ICCBased {Pdfspace.icc_alternate = (Pdfspace.DeviceRGB |
      Pdfspace.CalRGB _)}), _, _) |
       Pdfspace.ICCBased {Pdfspace.icc_alternate = (Pdfspace.DeviceRGB |
       Pdfspace.CalRGB _)}) , table),
    Some (Pdf.Integer 8)
      when size >= width * height ->
        Raw (width, height, BPP24, read_8bpp_indexed_as_rgb24 table width height data)
  | size, Pdfspace.Indexed (Pdfspace.DeviceCMYK, table), Some (Pdf.Integer 8)
      when size >= width * height ->
        Raw (width, height, BPP24, read_8bpp_cmyk_indexed_as_rgb24 table width height data)
  | size, Pdfspace.Indexed ((Pdfspace.DeviceRGB | Pdfspace.CalRGB _), table), Some (Pdf.Integer 4)
  | size, Pdfspace.Indexed (Pdfspace.ICCBased {Pdfspace.icc_alternate = (Pdfspace.DeviceRGB | Pdfspace.CalRGB _)}, table), Some (Pdf.Integer 4)
      when size >= width * height / 2 ->
        Raw (width, height, BPP24, read_4bpp_indexed_as_rgb24 table width height data)
  | size, Pdfspace.Indexed ((Pdfspace.DeviceCMYK), table), Some (Pdf.Integer 4)
  | size, Pdfspace.Indexed (Pdfspace.ICCBased {Pdfspace.icc_alternate = (Pdfspace.DeviceCMYK)}, table), Some (Pdf.Integer 4)
      when size >= width * height / 2 ->
        Raw (width, height, BPP24, read_4bpp_cmyk_indexed_as_rgb24 table width height data)
  | size, Pdfspace.Separation (_, Pdfspace.DeviceCMYK, fn), Some (Pdf.Integer 8)
      when size >= width * height ->
          Raw (width, height, BPP24, read_separation_cmyk_as_rgb24 fn width height data)
  | size, Pdfspace.ICCBased {Pdfspace.icc_alternate = cs}, _ ->
      read_raw_image size cs bpc pdf resources width height dict data
  | size, cs, bpc ->
     (*i Printf.printf "NO IMAGE:\n size:%i\n cspace\n%s\n bpc\n%s\n width %i\n height %i\n" size
     (Pdfspace.string_of_colourspace cs)
     (match bpc with None -> "NONE" | Some bpc -> Pdfwrite.string_of_pdf bpc)
     width
     height;
     flush stdout; i*)
     raise (Pdf.PDFError "No image\n")

let rec get_raw_image pdf resources width height dict data =
  try
  let size =
    stream_size data
  and colspace =
    (* If an image mask, it's /DeviceGray, effectively *)
    match Pdf.lookup_direct_orelse pdf "/ImageMask" "/IM" dict with
    | Some (Pdf.Boolean true) -> Pdfspace.DeviceGray
    | _ ->
      let colspace =
        Pdf.lookup_direct_orelse pdf "/ColorSpace" "/CS" dict
      in
        let space =
          match Pdf.lookup_direct pdf "/ColorSpace" resources, colspace with
          | Some (Pdf.Dictionary _ as d), Some (Pdf.Name c) ->
              begin match Pdf.lookup_direct pdf c d with
              | Some colspace -> colspace
              | _ -> (Pdf.Name c)
              end
          | _ ->
              match colspace with
              | Some c -> c
              | _ -> raise (Pdf.PDFError "PDf image: no colourspace")
        in
          Pdfspace.read_colourspace pdf resources space
  and bpc =
    match Pdf.lookup_direct_orelse pdf "/BitsPerComponent" "/BPC" dict with
    | Some bpc -> Some bpc
    | None ->
        match Pdf.lookup_direct pdf "/ImageMask" dict with
        | Some (Pdf.Boolean true) -> Some (Pdf.Integer 1)
        | _ -> None
  in
    (*i flprint ("IMAGE SPACE:\n" ^ Pdfspace.string_of_colourspace colspace ^
     * "\n"); i*)
    read_raw_image size colspace bpc pdf resources width height dict data
  with
    e ->
      (*i Printf.eprintf (Pdfwrite.string_of_pdf (Pdf.direct pdf dict)); i*)
      raise e 

(* Print some debug information about an image. *)
let print_image pdf resources img =
  (*i Printf.printf "-----------------------------------------------------\n";
  Printf.printf "Image Dictionary:\n%s\n" (Pdfwrite.string_of_pdf img); i*)
  let w = match Pdf.lookup_direct pdf "/Width" img with Some (Pdf.Integer n) -> n | _ ->  0
  and h = match Pdf.lookup_direct pdf "/Height" img with Some (Pdf.Integer n)-> n | _ -> 0 in
    Printf.printf "Width is %i, height %i\n" w h;
  begin match Pdf.lookup_direct pdf "/ColorSpace" img with
  | Some cs -> Printf.printf "Colourspace is...%s\n" (Pdfspace.string_of_colourspace (Pdfspace.read_colourspace pdf resources cs))
  | None -> Printf.printf "No Colourspace\n"
  end;
  begin match Pdf.lookup_direct pdf "/BitsPerComponent" img with
  | Some (Pdf.Integer n) -> Printf.printf "%i Bits Per Component\n" n
  | _ -> Printf.printf "No /BitsPerComponent\n"
  end;
  begin match Pdf.lookup_direct pdf "/Decode" img with
  | Some decode -> Printf.printf "Decode Array: %s\n" (Pdfwrite.string_of_pdf decode)
  | None -> Printf.printf "No /Decode Array\n"
  end

let get_image_24bpp pdf resources stream =
  (*i flprint "\n";
  print_image pdf resources (Pdf.direct pdf stream);
  flprint "\n"; i*)
  let stream = Pdf.direct pdf stream in
  let streamdict, data =
    Pdf.getstream stream;
    match stream with
    | Pdf.Stream {contents = (s, Pdf.Got d)} ->
        s, d
    | _ -> raise (Assert_failure ("", 0, 0)) (*r [Pdf.getstream] would have failed *)
  in
    let width = 
      match (Pdf.lookup_direct_orelse pdf "/Width" "/W" streamdict) with
      | Some (Pdf.Integer x) -> x
      | _ -> raise (Pdfread.PDFSemanticError "Malformed /Image width")
    and height =
      match (Pdf.lookup_direct_orelse pdf "/Height" "/H" streamdict) with
      | Some (Pdf.Integer x) -> x
      | _ -> raise (Pdfread.PDFSemanticError "Malformed /Image height")
    and bpc =
      match Pdf.lookup_direct_orelse pdf "/BitsPerComponent" "/BPC" streamdict with
      | Some (Pdf.Integer n) -> n
      | _ -> 0
    in
      decode_to_image pdf stream;
      match stream with
      | Pdf.Stream {contents = (Pdf.Dictionary d) as dict, Pdf.Got s} ->
          let get_decode () =
            let decode_entry = Pdf.lookup_direct_orelse pdf "/Decode" "/D" dict in
              let decode_entry = decode_defaults pdf resources decode_entry dict in
                  match decode_entry with
                  | Some (Pdf.Array nums) ->
                      Some (map (function (Pdf.Real n) -> n | _ -> 0.) nums)
                  | _ -> None
          in
            begin match Pdf.lookup_direct_orelse pdf "/Filter" "/F" dict with
            | None | Some (Pdf.Array []) ->
                let raw = get_raw_image pdf resources width height dict s
                and decode_entry = Pdf.lookup_direct_orelse pdf "/Decode" "/D" dict in
                  (* Printf.printf "Decode entry before decode_defaults %s\n"
                  ((function None -> "None" | Some x -> (Pdfwrite.string_of_pdf
                  x)) decode_entry); i*)
                  let decode_entry = decode_defaults pdf resources decode_entry dict in
                  (*i Printf.printf "Decode entry after decode_defaults %s\n"
                  ((function None -> "None" | Some x -> (Pdfwrite.string_of_pdf
                  x)) decode_entry); i*)
                    let floats =
                      match decode_entry with
                      | Some (Pdf.Array elts) -> Array.of_list (map Pdf.getnum elts)
                      | None -> [||]
                      | _ -> raise (Pdf.PDFError "Bad /Decode")
                    in
                      begin match raw with
                      | Raw (_, _, _, data) -> if floats <> [||] then decode floats bpc data;
                      | _ -> ()
                      end;
                      raw
            | Some (Pdf.Name ("/DCTDecode" | "/DCT"))
            | Some (Pdf.Array [Pdf.Name ("/DCTDecode" | "/DCT")]) -> JPEG (s, get_decode ())
            | Some (Pdf.Name "/JBIG2Decode")
            | Some (Pdf.Array [Pdf.Name "/JBIG2Decode"]) -> JBIG2 (s, get_decode ())
            | Some (Pdf.Name "/JPXDecode")
            | Some (Pdf.Array [Pdf.Name "/JPXDecode"]) -> JPEG2000 (s, get_decode ())
            | _ -> raise (Pdf.PDFError "decode_to_image")
            end
      | _ -> raise (Assert_failure ("", 0, 0))

let get_image (pdf : Pdf.pdfdoc) (resources : Pdf.pdfobject) (stream : Pdf.pdfobject) =
  (raise (Pdf.PDFError "get_image not implemented") : image)

let get_image_raw_24bpp (pdf : Pdf.pdfdoc) (resources : Pdf.pdfobject) (stream : Pdf.pdfobject) =
  (raise (Pdf.PDFError "get_image not implemented") : image)

let get_image_raw (pdf : Pdf.pdfdoc) (resources : Pdf.pdfobject) (stream : Pdf.pdfobject) =
  (raise (Pdf.PDFError "get_image not implemented") : image)

