(**************************************************************************) (* *) (* Copyright (C) Jean-Christophe Filliatre *) (* *) (* This software is free software; you can redistribute it and/or *) (* modify it under the terms of the GNU Library General Public *) (* License version 2, with the special exception on linking *) (* described in file LICENSE. *) (* *) (* This software is distributed in the hope that it will be useful, *) (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *) (* *) (**************************************************************************) (*p \usepackage{url} \def\currentmodule{Koda-Ruskey (\today)} *) (*S Intro. This program implements the Koda-Ruskey algorithm for generating all ideals of a given forest poset \char091{\sl Journal of Algorithms\/ \bf 15} (1993), 324--340\char093. It was inspired by two implementations of this algorithm by D.~E.~Knuth (available at \url{http://www-cs-staff.Stanford.EDU/~knuth/programs.html#Gray}). This implementation demonstrates the adequacy of a functional programming language for this particular algorithm. Indeed, it has a very nice and concise implementation using higher-order functions. (The algorithm itself, given in section~\ref{algo}, is only 9 lines long). *) (*s Trees and forests. In a given forest, the [n] nodes are numbered from 0 to [n-1] according to a postfix traversal. *) type tree = Node of int * forest and forest = tree list (*s From/to nested parentheses. There is a one-to-one mapping between forests and nested parentheses, where each pair of parentheses is the child of the immediatly enclosing pair of parentheses. *) let from_paren s = let rec parse stack i = parser | [< ''('; st >] -> (* we push a new forest on top of stack *) parse ([] :: stack) i st | [< '')'; st >] -> (* we make a node with the forest [f1] on top of stack *) (match stack with | f1 :: f2 :: s -> let t = Node (i, List.rev f1) in parse ((t :: f2) :: s) (i + 1) st | _ -> failwith "unbalanced parenthesis") | [< >] -> (* end of input: there must be exactly one forest on top of stack *) (match stack with | [f] -> List.rev f | _ -> failwith "unclosed parenthesis") in parse [[]] 0 (Stream.of_string s) open Printf let rec print_tree (Node (_, f)) = "(" ^ print_forest f ^ ")" and print_forest = function | [] -> "" | t :: f -> print_tree t ^ print_forest f (* \newpage *) (*s Nice rendering with dot. We append all the ideals in a single {\tt dot} file ["tree.dot"]. Bits are given as an array [bits] of zeros and ones, and nodes are displayed in white or grey accordingly. (Each ideal goes on a separate page; thus stepping from page to page gives a nice animation.) *) let cout = open_out "tree.dot" let to_dot f bits = fprintf cout "digraph g {\n"; let rec print_tree (Node (i, f)) = let color = if bits.(i) == 0 then "" else "[style=filled]" in fprintf cout "b%d %s;\n" i color; List.iter (fun (Node (j, _)) -> fprintf cout "b%d -> b%d;\n" i j) f; print_forest f and print_forest f = List.iter print_tree f in print_forest f; fprintf cout "}\n" (*s Size. *) let rec tree_size (Node (_,f)) = succ (forest_size f) and forest_size f = List.fold_left (fun s t -> s + tree_size t) 0 f (*s Command line options. *) let display = ref true let dot = ref false (*S Purely functional implementation. *) type state = int array * string type computation = state -> state let create f = (Array.create (forest_size f) 0, "") let update i v (t, d) = let t' = Array.copy t in t'.(i) <- v; (t', d) let get i (t, _) = t.(i) let msg m ((t, d) as s) = if !display then (t, d ^ m ^ "\n") else s let print (t, d) = (t, (Array.fold_left (fun d b -> d ^ string_of_int b) d t) ^ "\n") let (++) f g s = g (f s) let ideals0 f = let rec loop_f k f s = match f with | [] -> k s | t :: f -> loop_t (loop_f k f) t s and loop_t k (Node (i,f)) s = if get i s == 0 then (k ++ update i 1 ++ loop_f k f) s else (loop_f k f ++ update i 0 ++ k) s in let s = create f in let s = msg (sprintf "Bitstrings generated from \"%s\":" (print_forest f)) s in let s = loop_f print f s in let s = msg "... and now we generate them in reverse:" s in let s = loop_f print f s in if !display then print_string (snd s) (* \newpage *) (*S Koda-Ruskey algorithm. \label{algo} The function [ideals] generates all ideals of a given forest, starting from $00\cdots0$ (all zeros). Then it generates the patterns again, in reverse order. The code works as follows: [loop_f k f] generates all the ideals of a given forest [f] and [loop_t k t] all the ideals of a given tree [t]. In both case, the function [k] is to be called in between the computation steps. Both functions [loop_f] and [loop_t] can generate the patterns in both ways, depending on the state of the bits when called. The main call is on [loop_f], with an ``empty'' function [k]. If for instance [loop_f] is called on a forest $[t_1;t_2]$, it will call itself recursively on $[t_2]$ with a continuation [k] which calls [loop_t] on $t_1$. Therefore, all patterns will be generated for $t_1$ each time a progress is made in the generation of patterns for $t_2$. [loop_t] tests the bit of the node to determine in which way patterns have to be generated. If zero, we know that all bits below are zeros; thus we call [k], we set the bit to one and we call [loop_f] on the children with the same continuation [k]. If one, we know that the patterns have to be generated in reverse order; thus we do the converse: we call [loop_f] on the children, resulting with all children being set to zero, we set the node to zero and we call [k]. *) let ideals f = let n = forest_size f in let bits = Array.create n 0 in let dump () = for i = 0 to n - 1 do printf "%d" bits.(i) done; printf "\n" in let rec loop_f k = function | [] -> k () | t :: f -> loop_t (fun () -> loop_f k f) t and loop_t k (Node (i,f)) = if bits.(i) == 0 then begin k (); bits.(i) <- 1; loop_f k f end else begin loop_f k f; bits.(i) <- 0; k () end in let k0 () = if !display then dump (); if !dot then to_dot f bits in if !display then printf "Bitstrings generated from \"%s\":\n" (print_forest f); loop_f k0 f; if !display then printf "... and now we generate them in reverse:\n"; loop_f k0 f (* \newpage *) (*S A better implementation. The above code is nice but is actually building the same closures many times. Indeed, we build the same closure each time we traverse the same tree or the same forest. The following code factorizes this closure construction. Functions [loop_f] and [loop_t] now return a function, of type [unit->unit], which generates the patterns. The main call consists in calling [loop_f] to get a function [loop] and then to call this function (first to get the patterns and then a second time to get the patterns in reverse order). Note how the recursive calls to [loop_f] in [loop_t] have been factorized, out of the closure [fun () -> ...]. Therefore, the partial evaluation [(loop_t k t)] in [loop_f] is really performing computations. This code is roughly 33\% faster than the previous one. *) let ideals2 f = let n = forest_size f in let bits = Array.create n 0 in let dump () = for i = 0 to n - 1 do printf "%d" bits.(i) done; printf "\n" in let rec loop_f k = function | [] -> k | t :: f -> loop_t (loop_f k f) t and loop_t k (Node (i,f)) = let lf = loop_f k f in fun () -> if bits.(i) == 0 then begin k (); bits.(i) <- 1; lf () end else begin lf (); bits.(i) <- 0; k () end in let k0 () = if !display then dump (); if !dot then to_dot f bits in let loop = loop_f k0 f in if !display then printf "Bitstrings generated from \"%s\":\n" (print_forest f); loop (); if !display then printf "... and now we generate them in reverse:\n"; loop () (* \newpage *) (*S A defunctionalized implementation. The above code is nice but makes many calls to unknown functions, which incurs a high speed penalty on modern processors. Instead, we represent closures using an explicit datatype. Defunctionalization is originally due to Reynolds. It has been recently studied again by Danvy and employed by Cejtin, Jagannathan and Weeks in the MLton compiler. This approach seems roughly 25\% faster than the previous one, when compiled with \verb+ocamlopt+. *) type continuation = | Continue of int * continuation * continuation (* values for [i], [k] and [k'] *) | Init let ideals3 f = let n = forest_size f in let bits = Array.create n 0 in let dump () = for i = 0 to n - 1 do printf "%d" bits.(i) done; printf "\n" in let rec loop_f k = function | [] -> k | t :: f -> loop_t (loop_f k f) t and loop_t k (Node (i,f)) = Continue (i, k, loop_f k f) in let rec exec = function | Continue (i, k, k') -> if bits.(i) == 0 then begin exec k; bits.(i) <- 1; exec k' end else begin exec k'; bits.(i) <- 0; exec k end | Init -> if !display then dump (); if !dot then to_dot f bits in let loop = loop_f Init f in if !display then printf "Bitstrings generated from \"%s\":\n" (print_forest f); exec loop; if !display then printf "... and now we generate them in reverse:\n"; exec loop (* \newpage *) (*S An ``even more defunctionalized'' implementation. It is easy to see that the execution of [loop_f] above does nothing but create exactly one [Continue] node per node in the forest. So, the work performed during this phase essentially amounts to associating two nodes, namely the nodes associated with the continuations [k] and [k'], to every node. Then, why not represent this association using two integer arrays? This is what we do below. The arrays [ak] and [ak'] associate a continuation to every node. The special value $-1$ is used to represent the initial continuation. This approach seems roughly 3\% faster than the previous one, when compiled with \verb+ocamlopt -unsafe+. *) let ideals4 f = let n = forest_size f in let bits = Array.create n 0 in let ak = Array.create n (-1) and ak' = Array.create n (-1) in let dump () = for i = 0 to n - 1 do printf "%d" bits.(i) done; printf "\n" in let rec loop_f k = function | [] -> k | t :: f -> loop_t (loop_f k f) t and loop_t k (Node (i,f)) = ak.(i) <- k; ak'.(i) <- loop_f k f; i in let rec exec = function | (-1) -> if !display then dump (); if !dot then to_dot f bits | i -> if bits.(i) == 0 then begin exec ak.(i); bits.(i) <- 1; exec ak'.(i) end else begin exec ak'.(i); bits.(i) <- 0; exec ak.(i) end in let loop = loop_f (-1) f in if !display then printf "Bitstrings generated from \"%s\":\n" (print_forest f); exec loop; if !display then printf "... and now we generate them in reverse:\n"; exec loop (* \newpage *) (*S Count. The following functions [count_f] and [count_t] compute the number of patterns, respectively for forests and trees. We use 64-bits integers to avoid overflows. *) let rec count_f f = List.fold_left (fun n t -> Int64.mul n (count_t t)) Int64.one f and count_t (Node (_,f)) = Int64.succ (count_f f) (*S Main program. The forest is given on the command line. Options may also be passed to first print the number of patterns (\verb!-count!), to select a {\tt dot} rendering (\verb!-dot!), to suppress the printings on standard output (\verb!-silent!) and/or to select the desired implementation (\verb!-algo1! and \verb!-algo2! respectively). *) let forest = ref [] let algo0 = ref false let algo1 = ref false let algo2 = ref false let algo3 = ref false let algo4 = ref false let count = ref false let _ = Arg.parse [ "-dot", Arg.Set dot, "display patterns using dot"; "-silent", Arg.Clear display, "do not display patterns on stdout"; "-count", Arg.Set count, "print number of patterns"; "-algo0", Arg.Set algo0, "use algo 0"; "-algo1", Arg.Set algo1, "use algo 1"; "-algo2", Arg.Set algo2, "use algo 2"; "-algo3", Arg.Set algo3, "use algo 3"; "-algo4", Arg.Set algo4, "use algo 4" ] (fun s -> forest := from_paren s) "usage: koda-ruskey [options] \"nested parentheses\"" let _ = if !count then begin printf "nb patterns = %s\n" (Int64.format "%d" (count_f !forest)); flush stdout end; if !algo0 then ideals0 !forest; if !algo1 then ideals !forest; if !algo2 then ideals2 !forest; if !algo3 then ideals3 !forest; if !algo4 then ideals4 !forest; flush stdout; if !dot then begin if not !algo1 && not !algo2 && not !algo3 && not !algo4 then to_dot !forest (Array.create (forest_size !forest) 0); close_out cout; ignore (Sys.command "dot -Tps -o tree.ps tree.dot"); ignore (Sys.command "gv -media automatic tree.ps") end