1(* DSL parser for configuration files with error reporting.
2Supports key-value pairs and sections.
3Uses a simple parser combinator approach with custom error handling.
4*)
5
6module Parser =
7struct
8(* Basic types for configuration data *)
9type value = string
10type key = string
11type section_name = string
12
13(* Represents a parsed configuration entry *)
14type config_entry =
15| KeyValue of key * value
16| Section of section_name * config_entry list
17
18(* Error type *)
19type error =
20| UnexpectedToken of string * string (* Expected, Got *)
21| UnexpectedEOF of string (* Expected *)
22| CustomError of string
23
24(* Result type for parsing operations *)
25type ('a, 'e) result = Ok of 'a | Error of 'e
26
27(* Parser type: takes input string, returns result *)
28type 'a parser = string -> ('a, error) result
29
30(* Helper to combine results *)
31let bind (p : 'a parser) (f : 'a -> 'b parser) : 'b parser =
32fun input ->
33match p input with
34| Error e -> Error e
35| Ok (result, remaining_input) -> f result remaining_input
36
37(* Helper to return a successful result *)
38let return_ (value : 'a) : 'a parser =
39fun input -> Ok (value, input)
40
41(* --- Primitive Parsers --- *)
42
43(* Parses a specific string literal *)
44let literal (s : string) : string parser =
45fun input ->
46if String.is_prefix s input then
47Ok (s, String.drop_prefix (String.length s) input)
48else
49Error (UnexpectedToken (s, if String.length input > 0 then String.sub input 0 10 else "EOF"))
50
51(* Parses a sequence of characters matching a predicate *)
52let take_while (pred : char -> bool) : string parser =
53fun input ->
54let len = String.length input in
55let rec go i =
56if i < len && pred input.[i] then go (i + 1)
57else i
58in
59let count = go 0 in
60if count > 0 then
61Ok (String.sub input 0 count, String.drop_prefix count input)
62else
63Error (UnexpectedToken ("non-empty string", if len > 0 then String.sub input 0 10 else "EOF"))
64
65(* Parses a character that is not in a given set *)
66let not_char (c : char) : char parser =
67fun input ->
68match input with
69| [] -> Error (UnexpectedEOF (Printf.sprintf "character not '%c'" c))
70| hd :: tl ->
71if hd <> c then Ok (hd, tl)
72else Error (UnexpectedToken (Printf.sprintf "character not '%c'" c, String.make 1 hd))
73
74(* Parses a specific character *)
75let char (c : char) : char parser =
76fun input ->
77match input with
78| [] -> Error (UnexpectedEOF (String.make 1 c))
79| hd :: tl ->
80if hd = c then Ok (hd, tl)
81else Error (UnexpectedToken (String.make 1 c, String.make 1 hd))
82
83(* --- Combinators --- *)
84
85(* Sequence: parse p1 then p2 *)
86let seq (p1 : 'a parser) (p2 : 'b parser) : ('a * 'b) parser =
87fun input ->
88match p1 input with
89| Error e -> Error e
90| Ok (res1, rem1) ->
91match p2 rem1 with
92| Error e -> Error e
93| Ok (res2, rem2) -> Ok ((res1, res2), rem2)
94
95(* Choice: try p1, if it fails, try p2 *)
96let choice (p1 : 'a parser) (p2 : 'a parser) : 'a parser =
97fun input ->
98match p1 input with
99| Ok res -> Ok res
100| Error _ -> p2 input
101
102(* Many: parse p zero or more times *)
103let many (p : 'a parser) : 'a list parser =
104fun input ->
105let rec loop acc current_input =
106match p current_input with
107| Ok (res, next_input) -> loop (res :: acc) next_input
108| Error _ -> Ok (List.rev acc, current_input)
109in
110loop [] input
111
112(* Many1: parse p one or more times *)
113let many1 (p : 'a parser) : 'a list parser =
114fun input ->
115match p input with
116| Error e -> Error e
117| Ok (res, rem) ->
118let (res_list, final_rem) = (many p) rem in
119Ok (res :: res_list, final_rem)
120
121(* Skip whitespace *)
122let skip_whitespace : unit parser =
123let is_whitespace c = c = ' ' || c = '\t' || c = '\n' || c = '\r' in
124let _ = many (take_while is_whitespace) in
125fun input -> Ok ((), input) (* Always succeeds, consumes whitespace *)
126
127(* --- Grammar Parsers --- *)
128
129(* Parses a key: alphanumeric characters *)
130let parse_key : key parser =
131let is_key_char c = Char.is_alphanumeric c in
132let _ = skip_whitespace in
133bind (take_while is_key_char) (fun k -> return_ k)
134
135(* Parses a value: anything until newline or end of string *)
136let parse_value : value parser =
137let is_value_char c = c <> '\n' && c <> '\r' in
138let _ = skip_whitespace in
139bind (take_while is_value_char) (fun v -> return_ v)
140
141(* Parses a key-value pair: key = value *)
142let parse_key_value : config_entry parser =
143fun input ->
144match seq parse_key (literal "=") input with
145| Error e -> Error e
146| Ok (key, rem1) ->
147match parse_value rem1 with
148| Error e -> Error e
149| Ok (value, rem2) ->
150let _ = skip_whitespace in (* Consume trailing whitespace/newline *)
151Ok (KeyValue (key, value), rem2)
152
153(* Parses a section: [section_name] *)
154let parse_section_header : section_name parser =
155fun input ->
156match seq (char '[') (take_while (fun c -> c <> ']')) input with
157| Error e -> Error e
158| Ok (_, (name, rem1)) ->
159match char ']' rem1 with
160| Error e -> Error e
161| Ok (_, rem2) -> Ok (name, rem2)
162
163(* Parses the content of a section: key-value pairs or nested sections *)
164let rec parse_section_content : config_entry list parser =
165fun input ->
166let rec loop acc current_input =
167match skip_whitespace current_input with
168| Error e -> Error e (* Should not happen if skip_whitespace is correct *)
169| Ok (_, rem_ws) ->
170match parse_section_header rem_ws with
171| Ok (section_name, rem_sec) ->
172match parse_section_content rem_sec with
173| Error e -> Error e
174| Ok (entries, rem_final) ->
175loop (Section (section_name, entries) :: acc) rem_final
176| Error _ -> (* Not a section, try key-value pair *)
177match parse_key_value rem_ws with
178| Ok (kv_entry, rem_kv) -> loop (kv_entry :: acc) rem_kv
179| Error _ -> (* If it's not a section or KV, it might be end of section or error *)
180if String.length rem_ws = 0 || String.is_prefix "[" rem_ws then
181Ok (List.rev acc, current_input) (* End of section or EOF *)
182else
183Error (CustomError "Expected key-value pair or section header")
184in
185loop [] input
186
187(* Parses the entire configuration file *)
188let parse_config : config_entry list parser =
189fun input ->
190let rec loop acc current_input =
191match skip_whitespace current_input with
192| Error e -> Error e
193| Ok (_, rem_ws) ->
194match parse_section_header rem_ws with
195| Ok (section_name, rem_sec) ->
196match parse_section_content rem_sec with
197| Error e -> Error e
198| Ok (entries, rem_final) ->
199loop (Section (section_name, entries) :: acc) rem_final
200| Error _ -> (* Not a section, try key-value pair *)
201match parse_key_value rem_ws with
202| Ok (kv_entry, rem_kv) -> loop (kv_entry :: acc) rem_kv
203| Error _ -> (* If it's not a section or KV, it might be end of file or error *)
204if String.length rem_ws = 0 then
205Ok (List.rev acc, "") (* End of file *)
206else
207Error (CustomError "Expected key-value pair or section header")
208in
209loop [] input
210
211(* --- Public API --- *)
212let parse (config_string : string) : (config_entry list, error) result =
213parse_config config_string
214
215end
216
217(* Example usage:
218let config_str = "[Database]\n host = localhost\n port = 5432\n\n[Server]\n timeout = 30\n"
219match Parser.parse config_str with
220| Ok entries -> List.iter (fun e -> Printf.printf "%A\n" e) entries
221| Error err -> Printf.printf "Error: %A\n" err
222*)