(**************************************************************************)
(* *)
(* Copyright (C) Jean-Christophe Filliatre *)
(* *)
(* This software is free software; you can redistribute it and/or *)
(* modify it under the terms of the GNU Library General Public *)
(* License version 2.1, with the special exception on linking *)
(* described in file LICENSE. *)
(* *)
(* This software is distributed in the hope that it will be useful, *)
(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *)
(* *)
(**************************************************************************)
(* Suffix trees *)
(* Following Ukkonen's algorithm as described in Gusfield's book "Algorithms
on Strings, Trees and Sequences" *)
module type Alphabet = sig
type t
val dummy : t
val equal : t -> t -> bool
val compare : t -> t -> int
val print : Format.formatter -> t -> unit
type s
val length : s -> int
val get : s -> int -> t
end
module type Branching = sig
type key
type 'a t
val create : unit -> 'a t
val add : 'a t -> key -> 'a -> unit
val find : 'a t -> key -> 'a
val iter : (key -> 'a -> unit) -> 'a t -> unit
end
let debug = false
module Make(A : Alphabet)(B : Branching with type key = A.t) = struct
type node = {
mutable label_start : int;
mutable label_end : int;
mutable father : node;
mutable node_type : node_type;
mutable suffix_link : node;
path_position : int
}
and node_type = Leaf of int | Branch of node B.t
type t = { tree_string : A.s; tree_root : node; }
open Format
let print fmt t =
let m = A.length t.tree_string in
let rec print_node n depth =
if depth > 0 then begin
for i = 1 to depth - 1 do printf "|"; done;
fprintf fmt "+";
for i = n.label_start to n.label_end do
A.print fmt (if i == m+1 then A.dummy else A.get t.tree_string (i-1))
done;
if debug then
fprintf fmt " \t\t\t(%d,%d)" n.label_start n.label_end;
fprintf fmt "@\n"
end;
match n.node_type with
| Leaf _ -> ()
| Branch b -> B.iter (fun _ n -> print_node n (depth+1)) b
in
print_node t.tree_root 0
(* a dummy node that will never appear in returned suffix trees *)
let rec dummy_node =
{ label_start = 0; label_end = 0; father = dummy_node;
node_type = Leaf (-1); suffix_link = dummy_node; path_position = 0 }
let create_node ?(suffix_link=dummy_node) ~father ls le nt pp =
{ label_start = ls; label_end = le; father = father;
node_type = nt; suffix_link = suffix_link; path_position = pp }
type rule = Rule1 | Rule2 | Rule3
type position = { mutable pos_node : node; mutable pos_in_edge : int }
type extension_rule_2 = New_son | Split
(* Ukkonen's algorithm *)
let create s =
let m = A.length s in
(* we do not build [s$]; instead we override [get] to cover [1..m+1] *)
let get i =
assert (1 <= i && i <= m+1);
if i <= m then A.get s (i-1) else A.dummy
in
(* the root tree and the first node *)
let root_sons = B.create () in
let rec root =
{ label_start = 0; label_end = 0; father = root;
node_type = Branch root_sons; suffix_link = root; path_position = 0 }
in
let tree = { tree_string = s; tree_root = root } in
let n = create_node ~suffix_link:root ~father:root 1 (m+1) (Leaf 1) 1 in
B.add root_sons (get 1) n;
(* label end: during construction label end of a leaf is [!e] *)
let e = ref 0 in
let label_end n = match n.node_type with
| Leaf _ -> !e
| Branch _ -> n.label_end
in
(* DEBUG *)
let print fmt t =
let m = A.length t.tree_string in
let rec print_node n depth =
let e = label_end n in
if depth > 0 then begin
for i = 1 to depth - 1 do printf "|"; done;
fprintf fmt "+";
for i = n.label_start to e do
A.print fmt
(if i == m+1 then A.dummy else A.get t.tree_string (i-1))
done;
if debug then
fprintf fmt " \t\t\t(%d,%d)" n.label_start e;
fprintf fmt "@\n"
end;
match n.node_type with
| Leaf _ -> ()
| Branch b -> B.iter (fun _ n -> print_node n (depth+1)) b
in
print_node t.tree_root 0
in
(* END DEBUG *)
let suffix_less = ref dummy_node in
let set_suffix_link n =
if !suffix_less != dummy_node then begin
!suffix_less.suffix_link <- n;
suffix_less := dummy_node
end
in
let pos = { pos_node = root; pos_in_edge = 0 } in
let label_length n = label_end n - n.label_start + 1 in
let last_char_in_edge () =
if debug then printf "last_char_in_edge = %b@."
(pos.pos_in_edge = label_length pos.pos_node - 1);
pos.pos_in_edge = label_length pos.pos_node - 1
in
(* select the right branch *)
let find_son n c = match n.node_type with
| Leaf _ -> dummy_node
| Branch b -> (try B.find b c with Not_found -> dummy_node)
in
(* tracing a string down the tree (updating [pos]) *)
let trace_single_edge (gs,ge) ~skip =
let cont_node = find_son pos.pos_node (get gs) in
if cont_node == dummy_node then begin
pos.pos_in_edge <- label_length pos.pos_node - 1;
0, true
end else begin
pos.pos_node <- cont_node;
let length = label_length pos.pos_node in
let str_len = ge - gs + 1 in
if skip then begin
if length <= str_len then begin
pos.pos_in_edge <- length - 1;
length, not (length < str_len)
end else begin
pos.pos_in_edge <- str_len - 1;
str_len, true
end
end else begin
let length = if str_len < length then str_len else length in
pos.pos_in_edge <- 1;
let rec loop chars_found =
if pos.pos_in_edge < length then begin
if not (A.equal
(get (pos.pos_node.label_start + pos.pos_in_edge))
(get (gs + pos.pos_in_edge)))
then begin
pos.pos_in_edge <- pos.pos_in_edge - 1;
chars_found, true
end else begin
pos.pos_in_edge <- pos.pos_in_edge + 1;
loop (succ chars_found)
end
end else begin
pos.pos_in_edge <- pos.pos_in_edge - 1;
chars_found, not (chars_found < str_len)
end
in
loop 1
end
end
in
let trace_string gamma ~skip =
let rec trace chars_found ((gs,ge) as g) =
pos.pos_in_edge <- 0;
let edge_chars_found, search_done = trace_single_edge g ~skip in
let chars_found = chars_found + edge_chars_found in
if not search_done then
trace chars_found (gs + edge_chars_found, ge)
else
chars_found
in
trace 0 gamma
in
(* following the suffix link *)
let follow_suffix_link () =
if pos.pos_node != root then begin
if pos.pos_node.suffix_link == dummy_node || not (last_char_in_edge ())
then begin
if pos.pos_node.father == root then
pos.pos_node <- root
else begin
let s = pos.pos_node.label_start in
let gamma = (s, s + pos.pos_in_edge) in
pos.pos_node <- pos.pos_node.father.suffix_link;
ignore (trace_string gamma ~skip:true)
end
end else begin
pos.pos_node <- pos.pos_node.suffix_link;
pos.pos_in_edge <- label_length pos.pos_node - 1
end
end
in
(* node creation (Rule 2) *)
let apply_extension_rule_2 ls le path_pos edge_pos = function
| New_son ->
if debug then printf "rule 2: new leaf (%d,%d)@." ls le;
let leaf =
create_node ls le ~father:pos.pos_node (Leaf path_pos) path_pos
in
begin match pos.pos_node.node_type with
| Branch b -> B.add b (get ls) leaf; leaf
| Leaf _ -> assert false
end
| Split ->
if debug then printf "rule 2: split (%d,%d)@." ls le;
let node = pos.pos_node in
let b = B.create () in
let intl =
create_node node.label_start (node.label_start + edge_pos)
~father:node.father (Branch b) node.path_position
in
node.label_start <- node.label_start + edge_pos + 1;
let leaf = create_node ~father:intl ls le (Leaf path_pos) path_pos in
B.add b (get node.label_start) node;
B.add b (get ls) leaf;
node.father <- intl;
begin match intl.father.node_type with
| Branch b -> B.add b (get intl.label_start) intl; intl
| Leaf _ -> assert false
end
in
(* Single Extension Algorithm (SEA) *)
let sea j0 i_1 after_rule_3 =
if debug then begin
printf "%a@." print tree;
printf "extension: %d phase+1: %d" j0 i_1;
if after_rule_3 then
printf " starting at (%d,%d | %d)\n" pos.pos_node.label_start
(label_end pos.pos_node) pos.pos_in_edge
else
printf " followed from (%d,%d | %d)\n" pos.pos_node.label_start
(label_end pos.pos_node) pos.pos_in_edge
end;
let j = ref j0 in
if not after_rule_3 then follow_suffix_link ();
let chars_found =
if pos.pos_node == root then begin
trace_string (!j,i_1) ~skip:false
end else begin
j := i_1;
if last_char_in_edge () then begin
let tmp = find_son pos.pos_node (get i_1) in
if tmp != dummy_node then begin
pos.pos_node <- tmp;
pos.pos_in_edge <- 0;
1
end else
0
end else
if
A.equal (get (pos.pos_node.label_start + pos.pos_in_edge + 1))
(get i_1)
then begin
pos.pos_in_edge <- pos.pos_in_edge + 1;
1
end else
0
end
in
if debug then printf "chars_found = %d@." chars_found;
if chars_found = i_1 - !j + 1 then begin
(* Rule 3 applies *)
if debug then printf "rule 3 (%d,%d)@." !j i_1;
set_suffix_link pos.pos_node.father;
Rule3
end else if last_char_in_edge () || pos.pos_node == root then
match pos.pos_node.node_type with
| Branch b ->
ignore
(apply_extension_rule_2 (!j + chars_found) i_1 j0 0 New_son);
set_suffix_link pos.pos_node;
Rule2
| Leaf _ ->
Rule1
else begin
let tmp =
apply_extension_rule_2 (!j+chars_found) i_1 j0 pos.pos_in_edge Split
in
if !suffix_less != dummy_node then !suffix_less.suffix_link <- tmp;
if label_length tmp = 1 && tmp.father == root then begin
tmp.suffix_link <- root;
suffix_less := dummy_node
end else
suffix_less := tmp;
pos.pos_node <- tmp;
Rule2
end
in
(* Ukkonen main loop *)
let extension = ref 2 in
let last_rule_is_3 = ref false in
for i = 2 to m do (* phase [i+1] *)
e := i+1;
let rec spa () =
if !extension <= i+1 then begin
let rule = sea !extension (i+1) !last_rule_is_3 in
last_rule_is_3 := (rule = Rule3);
if not !last_rule_is_3 then begin incr extension; spa () end
end
in
spa ()
done;
let rec set_leaf_end n = match n.node_type with
| Leaf _ -> n.label_end <- m+1
| Branch b -> B.iter (fun _ n -> set_leaf_end n) b
in
set_leaf_end root;
tree
let find t s =
let m = A.length t.tree_string in
let p = A.length s in
let get i =
assert (1 <= i && i <= m+1);
if i <= m then A.get t.tree_string (i-1) else A.dummy
in
let find_son n c = match n.node_type with
| Leaf _ -> raise Not_found
| Branch b -> B.find b c
in
let rec descend n j =
let rec descend_edge j k =
if j < p && k <= n.label_end && A.equal (get k) (A.get s j) then
descend_edge (succ j) (succ k)
else if j = p then
{ pos_node = n; pos_in_edge = k }
else if k > n.label_end then
descend (find_son n (A.get s j)) j
else
raise Not_found
in
descend_edge j n.label_start
in
let n0 = match t.tree_root.node_type with
| Branch b -> B.find b (if p = 0 then A.dummy else A.get s 0)
| Leaf _ -> assert false
in
descend n0 0
let substring t s =
if A.length s = 0 then
0
else
let p = find t s in
p.pos_node.path_position - 1
let leaves f p =
let rec iter n = match n.node_type with
| Leaf j -> f (j - 1)
| Branch b -> B.iter (fun _ n -> iter n) b
in
iter p.pos_node
end
(* some usual branching implementations *)
module Bmap(X : Map.OrderedType) : Branching with type key = X.t =
struct
type key = X.t
module M = Map.Make(X)
type 'a t = 'a M.t ref
let create () = ref M.empty
let add m k v = m := M.add k v !m
let find m k = M.find k !m
let iter f m = M.iter f !m
end
module Barray(A : sig val size : int end) : Branching with type key = int =
struct
type key = int
type 'a t = 'a option array
let create () = Array.create A.size None
let add m k v = m.(k) <- Some v
let find m k = match m.(k) with Some v -> v | None -> raise Not_found
let iter f m =
Array.iteri (fun k v -> match v with None -> () | Some v -> f k v) m
end
module Blist(X : sig type t val equal : t -> t -> bool end)
: Branching with type key = X.t =
struct
type key = X.t
type 'a t = (key * 'a) list ref
let create () = ref []
let add m k v =
let rec replace = function
| [] -> raise Not_found
| (a,_ as p) :: l -> if X.equal k a then (a,v) :: l else p :: replace l
in
m := try replace !m with Not_found -> (k,v) :: !m
let find m k =
let rec lookup = function
| [] -> raise Not_found
| (a,b) :: l -> if X.equal k a then b else lookup l
in
lookup !m
let iter f m = List.iter (fun (a,b) -> f a b) !m
end
module Bhash(A : sig val size : int end)(X : Hashtbl.HashedType)
: Branching with type key = X.t =
struct
type key = X.t
module H = Hashtbl.Make(X)
type 'a t = 'a H.t
let create () = H.create A.size
let add m k v = H.replace m k v
let find m k = H.find m k
let iter = H.iter
end
module CharString = struct
type t = char
let dummy = Char.chr 0
let equal x y = x == y
let compare x y = Char.code x - Char.code y
let hash c = Char.code c
let print fmt c = Format.fprintf fmt "%c" c
type s = string
let length = String.length
let get = String.unsafe_get
end
module IntArray = struct
type t = int
let dummy = -1
let equal x y = x == y
let compare = compare
let hash c = Char.code c
let print fmt c = Format.fprintf fmt "%d" c
type s = int array
let length = Array.length
let get = Array.unsafe_get
end
(* manually defunctorized code for type string *)
module Ukkonen = struct
(*module B = Bhash(struct let size = 17 end)(CharString)*)
module B = Bmap(CharString)
type node = {
mutable label_start : int;
mutable label_end : int;
mutable father : node;
mutable node_type : node_type;
mutable suffix_link : node;
path_position : int;
}
and node_type = Leaf of int | Branch of node B.t
type t = { tree_string : string; tree_root : node; }
open Format
let print fmt t =
let m = String.length t.tree_string in
let rec print_node n depth =
if depth > 0 then begin
for i = 1 to depth - 1 do printf "|"; done;
fprintf fmt "+";
for i = n.label_start to n.label_end do
fprintf fmt "%c"
(if i == m+1 then '$' else String.unsafe_get t.tree_string (i-1))
done;
if debug then
fprintf fmt " \t\t\t(%d,%d)" n.label_start n.label_end;
fprintf fmt "@\n"
end;
match n.node_type with
| Leaf _ -> ()
| Branch b -> B.iter (fun _ n -> print_node n (depth+1)) b
in
print_node t.tree_root 0
(* a dummy node that will never appear in returned suffix trees *)
let rec dummy_node =
{ label_start = 0; label_end = 0; father = dummy_node;
node_type = Leaf (-1); suffix_link = dummy_node; path_position = 0 }
let create_node ?(suffix_link=dummy_node) ~father ls le nt pp =
{ label_start = ls; label_end = le; father = father;
node_type = nt; suffix_link = suffix_link; path_position = pp }
type rule = Rule1 | Rule2 | Rule3
type position = { mutable pos_node : node; mutable pos_in_edge : int }
type extension_rule_2 = New_son | Split
(* Ukkonen's algorithm *)
let create s =
let m = String.length s in
let s0 = String.make (m+2) '\000' in
String.blit s 0 s0 1 m;
(* we do not build [s$]; instead we override [get] to cover [1..m+1] *)
let get i =
assert (1 <= i && i <= m+1);
String.unsafe_get s0 i
in
(* the root tree and the first node *)
let root_sons = B.create () in
let rec root =
{ label_start = 0; label_end = 0; father = root;
node_type = Branch root_sons; suffix_link = root; path_position = 0 }
in
let tree = { tree_string = s; tree_root = root } in
let n = create_node ~suffix_link:root ~father:root 1 (m+1) (Leaf 1) 1 in
B.add root_sons (get 1) n;
(* label end: during construction label end of a leaf is [!e] *)
let e = ref 0 in
let label_end n = match n.node_type with
| Leaf _ -> !e
| Branch _ -> n.label_end
in
(* DEBUG *)
let print fmt t =
let m = String.length t.tree_string in
let rec print_node n depth =
let e = label_end n in
if depth > 0 then begin
for i = 1 to depth - 1 do printf "|"; done;
fprintf fmt "+";
for i = n.label_start to e do
fprintf fmt "%c"
(if i == m+1 then '$' else String.unsafe_get t.tree_string (i-1))
done;
if debug then
fprintf fmt " \t\t\t(%d,%d)" n.label_start e;
fprintf fmt "@\n"
end;
match n.node_type with
| Leaf _ -> ()
| Branch b -> B.iter (fun _ n -> print_node n (depth+1)) b
in
print_node t.tree_root 0
in
(* END DEBUG *)
let suffix_less = ref dummy_node in
let set_suffix_link n =
if !suffix_less != dummy_node then begin
!suffix_less.suffix_link <- n;
suffix_less := dummy_node
end
in
let pos = { pos_node = root; pos_in_edge = 0 } in
let label_length n = label_end n - n.label_start + 1 in
let last_char_in_edge () =
if debug then printf "last_char_in_edge = %b@."
(pos.pos_in_edge = label_length pos.pos_node - 1);
pos.pos_in_edge = label_length pos.pos_node - 1
in
(* select the right branch *)
let find_son n c = match n.node_type with
| Leaf _ -> dummy_node
| Branch b -> (try B.find b c with Not_found -> dummy_node)
in
(* tracing a string down the tree (updating [pos]) *)
let trace_single_edge (gs,ge) ~skip =
let cont_node = find_son pos.pos_node s0.[gs] in
if cont_node == dummy_node then begin
pos.pos_in_edge <- label_length pos.pos_node - 1;
0, true
end else begin
pos.pos_node <- cont_node;
let length = label_length pos.pos_node in
let str_len = ge - gs + 1 in
if skip then begin
if length <= str_len then begin
pos.pos_in_edge <- length - 1;
length, not (length < str_len)
end else begin
pos.pos_in_edge <- str_len - 1;
str_len, true
end
end else begin
let length = if str_len < length then str_len else length in
pos.pos_in_edge <- 1;
let rec loop chars_found =
if pos.pos_in_edge < length then begin
if not (s0.[pos.pos_node.label_start + pos.pos_in_edge] ==
s0.[gs + pos.pos_in_edge])
then begin
pos.pos_in_edge <- pos.pos_in_edge - 1;
chars_found, true
end else begin
pos.pos_in_edge <- pos.pos_in_edge + 1;
loop (succ chars_found)
end
end else begin
pos.pos_in_edge <- pos.pos_in_edge - 1;
chars_found, not (chars_found < str_len)
end
in
loop 1
end
end
in
let trace_string gamma ~skip =
let rec trace chars_found ((gs,ge) as g) =
pos.pos_in_edge <- 0;
let edge_chars_found, search_done = trace_single_edge g ~skip in
let chars_found = chars_found + edge_chars_found in
if not search_done then
trace chars_found (gs + edge_chars_found, ge)
else
chars_found
in
trace 0 gamma
in
(* following the suffix link *)
let follow_suffix_link () =
if pos.pos_node != root then begin
if pos.pos_node.suffix_link == dummy_node || not (last_char_in_edge ())
then begin
if pos.pos_node.father == root then
pos.pos_node <- root
else begin
let s = pos.pos_node.label_start in
let gamma = (s, s + pos.pos_in_edge) in
pos.pos_node <- pos.pos_node.father.suffix_link;
ignore (trace_string gamma ~skip:true)
end
end else begin
pos.pos_node <- pos.pos_node.suffix_link;
pos.pos_in_edge <- label_length pos.pos_node - 1
end
end
in
(* node creation (Rule 2) *)
let apply_extension_rule_2 ls le path_pos edge_pos = function
| New_son ->
if debug then printf "rule 2: new leaf (%d,%d)@." ls le;
let leaf =
create_node ls le ~father:pos.pos_node (Leaf path_pos) path_pos
in
begin match pos.pos_node.node_type with
| Branch b -> B.add b s0.[ls] leaf; leaf
| Leaf _ -> assert false
end
| Split ->
if debug then printf "rule 2: split (%d,%d)@." ls le;
let node = pos.pos_node in
let b = B.create () in
let intl =
create_node node.label_start (node.label_start + edge_pos)
~father:node.father (Branch b) node.path_position
in
node.label_start <- node.label_start + edge_pos + 1;
let leaf = create_node ~father:intl ls le (Leaf path_pos) path_pos in
B.add b s0.[node.label_start] node;
B.add b s0.[ls] leaf;
node.father <- intl;
begin match intl.father.node_type with
| Branch b -> B.add b s0.[intl.label_start] intl; intl
| Leaf _ -> assert false
end
in
(* Single Extension Algorithm (SEA) *)
let sea j0 i_1 after_rule_3 =
if debug then begin
printf "%a@." print tree;
printf "extension: %d phase+1: %d" j0 i_1;
if after_rule_3 then
printf " starting at (%d,%d | %d)\n" pos.pos_node.label_start
(label_end pos.pos_node) pos.pos_in_edge
else
printf " followed from (%d,%d | %d)\n" pos.pos_node.label_start
(label_end pos.pos_node) pos.pos_in_edge
end;
let j = ref j0 in
if not after_rule_3 then follow_suffix_link ();
let chars_found =
if pos.pos_node == root then begin
trace_string (!j,i_1) ~skip:false
end else begin
j := i_1;
if last_char_in_edge () then begin
let tmp = find_son pos.pos_node s0.[i_1] in
if tmp != dummy_node then begin
pos.pos_node <- tmp;
pos.pos_in_edge <- 0;
1
end else
0
end else
if
s0.[pos.pos_node.label_start + pos.pos_in_edge + 1] ==
s0.[i_1]
then begin
pos.pos_in_edge <- pos.pos_in_edge + 1;
1
end else
0
end
in
if debug then printf "chars_found = %d@." chars_found;
if chars_found = i_1 - !j + 1 then begin
(* Rule 3 applies *)
if debug then printf "rule 3 (%d,%d)@." !j i_1;
set_suffix_link pos.pos_node.father;
Rule3
end else if last_char_in_edge () || pos.pos_node == root then
match pos.pos_node.node_type with
| Branch b ->
ignore
(apply_extension_rule_2 (!j + chars_found) i_1 j0 0 New_son);
set_suffix_link pos.pos_node;
Rule2
| Leaf _ ->
Rule1
else begin
let tmp =
apply_extension_rule_2 (!j+chars_found) i_1 j0 pos.pos_in_edge Split
in
if !suffix_less != dummy_node then !suffix_less.suffix_link <- tmp;
if label_length tmp = 1 && tmp.father == root then begin
tmp.suffix_link <- root;
suffix_less := dummy_node
end else
suffix_less := tmp;
pos.pos_node <- tmp;
Rule2
end
in
(* Ukkonen main loop *)
let extension = ref 2 in
let last_rule_is_3 = ref false in
for i = 2 to m do (* phase [i+1] *)
e := i+1;
let rec spa () =
if !extension <= i+1 then begin
let rule = sea !extension (i+1) !last_rule_is_3 in
last_rule_is_3 := (rule = Rule3);
if not !last_rule_is_3 then begin incr extension; spa () end
end
in
spa ()
done;
let rec set_leaf_end n = match n.node_type with
| Leaf _ -> n.label_end <- m+1
| Branch b -> B.iter (fun _ n -> set_leaf_end n) b
in
set_leaf_end root;
tree
let dummy_char = Char.chr 0
let find t s =
let m = String.length t.tree_string in
let p = String.length s in
let get i =
assert (1 <= i && i <= m+1);
if i <= m then String.unsafe_get t.tree_string (i-1) else dummy_char
in
let find_son n c = match n.node_type with
| Leaf _ -> raise Not_found
| Branch b -> B.find b c
in
let rec descend n j =
let rec descend_edge j k =
if j < p && k <= n.label_end && (get k) == (String.unsafe_get s j) then
descend_edge (succ j) (succ k)
else if j = p then
{ pos_node = n; pos_in_edge = k }
else if k > n.label_end then
descend (find_son n (String.unsafe_get s j)) j
else
raise Not_found
in
descend_edge j n.label_start
in
let n0 = match t.tree_root.node_type with
| Branch b ->
B.find b (if p = 0 then dummy_char else String.unsafe_get s 0)
| Leaf _ -> assert false
in
descend n0 0
let substring t s =
if String.length s = 0 then
0
else
let p = find t s in
p.pos_node.path_position - 1
let leaves f p =
let rec iter n = match n.node_type with
| Leaf j -> f (j - 1)
| Branch b -> B.iter (fun _ n -> iter n) b
in
iter p.pos_node
(* ne fonctionne que si [B = Bmap] car il faut que [iter] soit lexic. *)
let suffix_array t =
let m = String.length t.tree_string in
let rec iter n = match n.node_type with
| Leaf j -> printf "%s\n" (String.sub t.tree_string (j-1) (m-j+1))
| Branch b -> B.iter (fun _ n -> iter n) b
in
iter t.tree_root
end