lz77/lib/finder.ml

75 lines
2.7 KiB
OCaml

open Stdlib
module H = Hashtbl
let min_match_len = 3
let max_match_len = 255
let max_candidates = 128
type t = { table : (int, int list) H.t; mutable last_indexed_pos : int }
let create () = { table = H.create 131071; last_indexed_pos = 0 }
let hash3 b0 b1 b2 = (b0 lsl 16) lor (b1 lsl 8) lor b2
let add_start_pos t key pos =
match H.find_opt t.table key with
| None -> H.add t.table key [pos]
| Some lst -> H.replace t.table key (pos :: lst)
let add_bytes t window _buf _ofs _len =
let total = Int64.to_int (Window.total_processed window) in
let start = max 1 (t.last_indexed_pos + 1) in
let last = total - 2 in
for p = start to last do
let d = total - p + 1 in
let b0 = Window.get_byte_at_distance window d in
let b1 = Window.get_byte_at_distance window (d - 1) in
let b2 = Window.get_byte_at_distance window (d - 2) in
let key = hash3 b0 b1 b2 in
add_start_pos t key p
done;
if last >= t.last_indexed_pos then t.last_indexed_pos <- max t.last_indexed_pos (total - 1)
let take_n n lst =
let rec aux i acc = function
| [] | _ when i <= 0 -> List.rev acc
| x::xs -> aux (i - 1) (x :: acc) xs
in aux n [] lst
let prune_positions current_total = function
| [] -> []
| lst ->
let lower = current_total - Window.window_size + 1 in
List.filter (fun pos -> pos >= lower) lst
let find_longest t window lookahead ofs la_len =
if la_len - ofs < min_match_len then None else
let b0 = Char.code (Bytes.get lookahead ofs) in
let b1 = Char.code (Bytes.get lookahead (ofs + 1)) in
let b2 = Char.code (Bytes.get lookahead (ofs + 2)) in
let key = hash3 b0 b1 b2 in
match H.find_opt t.table key with
| None -> None
| Some positions_raw ->
let current_total = Int64.to_int (Window.total_processed window) in
let positions = prune_positions current_total positions_raw in
let candidates = take_n max_candidates positions in
let best = ref None in
let window_sz = Window.window_size in
List.iter (fun pos ->
let distance = current_total - pos + 1 in
if distance > 0 && distance <= window_sz then begin
let max_len = min max_match_len (la_len - ofs) in
let available = Window.current_size window - distance + 1 in
if available >= min_match_len then
let final_max = min max_len available in
let matched = Window.match_length_at_distance window distance lookahead ofs final_max in
if matched >= min_match_len then
match !best with
| None -> best := Some (distance, matched)
| Some (d,l) -> if matched > l || (matched = l && distance < d) then best := Some (distance, matched)
end
) candidates;
best