(**************************************************************************)
(*                                                                        *)
(*  Copyright (C) Jean-Christophe Filliatre                               *)
(*                                                                        *)
(*  This software is free software; you can redistribute it and/or        *)
(*  modify it under the terms of the GNU Library General Public           *)
(*  License version 2.1, with the special exception on linking            *)
(*  described in file LICENSE.                                            *)
(*                                                                        *)
(*  This software is distributed in the hope that it will be useful,      *)
(*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *)
(*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.                  *)
(*                                                                        *)
(**************************************************************************)

(*s This module implements Ukkonen's algorithm for suffix trees.
    The module [Ukkonen] provides an implementation for Ocaml type [string].
    A generic implementation is also provided, as a functor parameterized by
    both the alphabet (the types for characters and strings) and the data
    structure used to branch in the suffix trees. *)

module Ukkonen : sig
  type t
  val create : string -> t
  val substring : t -> string -> int
  type position
  val find : t -> string -> position
  val leaves : (int -> unit) -> position -> unit
  val print : Format.formatter -> t -> unit
  val suffix_array : t -> unit
end


(*s Alphabet *)

module type Alphabet = sig

  (* characters *)

  type t 

  (* [dummy] is a character that is assumed not to appear in any string *)
  val dummy : t

  val equal : t -> t -> bool
  val compare : t -> t -> int

  val print :  Format.formatter -> t -> unit

  (* strings over this alphabet *)

  type s

  val length : s -> int

  val get : s -> int -> t

end

(*s Branching *)

module type Branching = sig

  type key
  type 'a t 

  val create : unit -> 'a t
  val add : 'a t -> key -> 'a -> unit
  val find : 'a t -> key -> 'a
  val iter : (key -> 'a -> unit) -> 'a t -> unit
end

(*s Generic Suffix Trees implementation *)

module Make(A : Alphabet)(B : Branching with type key = A.t) : sig

  (** the type of suffix trees *)
  type t

  (** [create s] build the suffix tree for string [s] *)
  val create : A.s -> t

  (** [print fmt t] displays the suffix tree [t] on the formatter [fmt] *)
  val print : Format.formatter -> t -> unit

  (** [substring t s] returns the position of substring [s] in the string
      denotated by suffix tree [t] if any; raises [Not_found]
      otherwise *)
  val substring : t -> A.s -> int

  (** position within a suffix tree *)
  type position

  (** [find t s] descends the suffix tree [t] according to string [s] and
      returns the corresponding position in [t] if any; raises [Not_found]
      otherwise *)
  val find : t -> A.s -> position

  (** [leaves f p] iterates function [f] overs all suffixes below 
      position [p]; a suffix is passed as the index of its first character *)
  val leaves : (int -> unit) -> position -> unit

end

(*s Some usual branching implementations *)

(* Maps from Ocaml's standard library *)
module Bmap(X : Map.OrderedType) 
  : Branching with type key = X.t

(* Arrays. To be used only when characters are integers. 
   The alphabet size must be provided *)
module Barray(A : sig val size : int end) 
  : Branching with type key = int

(* Association lists *)
module Blist(X : sig type t val equal : t -> t -> bool end) 
  : Branching with type key = X.t

(* Hash tables. 
   The provided value [A.size] is a hint to initialize hash tables length.*)
module Bhash(A : sig val size : int end)(X : Hashtbl.HashedType) 
  : Branching with type key = X.t

(*s The usual alphabet for types [char] and [string] *)

module CharString : Alphabet with type t = char and type s = string


This document was generated using caml2html