OCaml Hackers

(* TODO *)

This is meant to be a reusable parser for files where indentation matters. It only cares about space-based indentation.

This implementation should probably be modified to support input other than files or in_channels.

It originates from this thread on caml-list, where more information is given:

http://caml.inria.fr/pub/ml-archives/caml-list/2009/06/628f99a8b518...

(*
   indent_parser.ml 
Author: Martin Jambon, June 2009
Released into the Public Domain. Use at your own risk.
*)

type indent_line = Lexing.position * (int * string)

type indent_tree =
[ `Line of (Lexing.position * string)
| `Block of (Lexing.position * indent_tree list) ]


let split s =
let len = String.length s in
let result = ref None in
try
for i = 0 to len - 1 do
if s.[i] <> ' ' then (
result := Some (i, String.sub s i (len - i));
raise Exit
)
done;
None
with Exit -> !result

let parse_lines fname ic : indent_line list =
let lines = ref [] in
let lnum = ref 0 in
try
while true do
let bol = pos_in ic in
let s = input_line ic in
incr lnum;
match split s with
None -> ()
| Some ((n, _) as x) ->
let pos = {
Lexing.pos_fname = fname;
pos_lnum = !lnum;
pos_bol = bol;
pos_cnum = bol + n;
} in
lines := (pos, x) :: !lines
done;
assert false
with End_of_file -> List.rev !lines

let parse_lines_from_file fname =
let ic = open_in fname in
try
let x = parse_lines fname ic in
close_in ic;
x
with e ->
close_in_noerr ic;
raise e

let error pos msg =
let cpos = pos.Lexing.pos_cnum - pos.Lexing.pos_bol in
let msg =
Printf.sprintf "File %S, line %i, characters %i-%i:\n%s"
pos.Lexing.pos_fname pos.Lexing.pos_lnum 0 cpos msg
in
failwith msg

let rec block_body cur_indent sub_indent cur_block l :
indent_tree list * indent_line list =
match l with
[] -> (List.rev cur_block, [])
| (pos, (n, s)) :: tl ->
if n = cur_indent then
block_body cur_indent sub_indent (`Line (pos, s) :: cur_block) tl
else if n > cur_indent then (
(match sub_indent with
None -> ()
| Some n' ->
if n <> n' then
error pos "Inconsistent indentation"
);
let sub_block, remaining =
block_body n None [ `Line (pos, s) ] tl in

block_body
cur_indent (Some n) (`Block (pos, sub_block) :: cur_block)
remaining
)
else
(List.rev cur_block, l)


let parse_indentation fname =
let l = parse_lines_from_file fname in
let result, remaining = block_body 0 None [] l in
assert (remaining = []);
result


let test () =
let fname = Filename.temp_file "test" ".ind" in
let oc = open_out fname in
output_string oc "
if True:
x = 3
y = (2 +
4 + 5)
else:
x = 5
if False:
x = 8
z = 2
";
close_out oc;

try
let result = parse_indentation fname in
Sys.remove fname;
result
with Failure msg as e ->
Printf.eprintf "%s\n%!" msg;
Sys.remove fname;
raise e

Share 

Add a Comment

You need to be a member of OCaml Hackers to add comments!

Join this social network

About

Martin Jambon Martin Jambon created this social network on Ning.

Create your own social network!

Badge

Loading…

© 2009   Created by Martin Jambon on Ning.   Create Your Own Social Network

Badges  |  Report an Issue  |  Privacy  |  Terms of Service