ℹ️ Select 'Choose Exercise', or randomize 'Next Random Exercise' in selected language.

Choose Exercise:
Timer 00:00
WPM --
Score --
Acc --
Correct chars --

OCaml DSL Pattern: Configuration Parser with Error Combinators

OCaml

Goal -- WPM

Ready
Exercise Algorithm Area
1(* DSL parser for configuration files with error reporting.
2Supports key-value pairs and sections.
3Uses a simple parser combinator approach with custom error handling.
4*)
5
6module Parser =
7struct
8(* Basic types for configuration data *)
9type value = string
10type key = string
11type section_name = string
12
13(* Represents a parsed configuration entry *)
14type config_entry =
15| KeyValue of key * value
16| Section of section_name * config_entry list
17
18(* Error type *)
19type error =
20| UnexpectedToken of string * string (* Expected, Got *)
21| UnexpectedEOF of string (* Expected *)
22| CustomError of string
23
24(* Result type for parsing operations *)
25type ('a, 'e) result = Ok of 'a | Error of 'e
26
27(* Parser type: takes input string, returns result *)
28type 'a parser = string -> ('a, error) result
29
30(* Helper to combine results *)
31let bind (p : 'a parser) (f : 'a -> 'b parser) : 'b parser =
32fun input ->
33match p input with
34| Error e -> Error e
35| Ok (result, remaining_input) -> f result remaining_input
36
37(* Helper to return a successful result *)
38let return_ (value : 'a) : 'a parser =
39fun input -> Ok (value, input)
40
41(* --- Primitive Parsers --- *)
42
43(* Parses a specific string literal *)
44let literal (s : string) : string parser =
45fun input ->
46if String.is_prefix s input then
47Ok (s, String.drop_prefix (String.length s) input)
48else
49Error (UnexpectedToken (s, if String.length input > 0 then String.sub input 0 10 else "EOF"))
50
51(* Parses a sequence of characters matching a predicate *)
52let take_while (pred : char -> bool) : string parser =
53fun input ->
54let len = String.length input in
55let rec go i =
56if i < len && pred input.[i] then go (i + 1)
57else i
58in
59let count = go 0 in
60if count > 0 then
61Ok (String.sub input 0 count, String.drop_prefix count input)
62else
63Error (UnexpectedToken ("non-empty string", if len > 0 then String.sub input 0 10 else "EOF"))
64
65(* Parses a character that is not in a given set *)
66let not_char (c : char) : char parser =
67fun input ->
68match input with
69| [] -> Error (UnexpectedEOF (Printf.sprintf "character not '%c'" c))
70| hd :: tl ->
71if hd <> c then Ok (hd, tl)
72else Error (UnexpectedToken (Printf.sprintf "character not '%c'" c, String.make 1 hd))
73
74(* Parses a specific character *)
75let char (c : char) : char parser =
76fun input ->
77match input with
78| [] -> Error (UnexpectedEOF (String.make 1 c))
79| hd :: tl ->
80if hd = c then Ok (hd, tl)
81else Error (UnexpectedToken (String.make 1 c, String.make 1 hd))
82
83(* --- Combinators --- *)
84
85(* Sequence: parse p1 then p2 *)
86let seq (p1 : 'a parser) (p2 : 'b parser) : ('a * 'b) parser =
87fun input ->
88match p1 input with
89| Error e -> Error e
90| Ok (res1, rem1) ->
91match p2 rem1 with
92| Error e -> Error e
93| Ok (res2, rem2) -> Ok ((res1, res2), rem2)
94
95(* Choice: try p1, if it fails, try p2 *)
96let choice (p1 : 'a parser) (p2 : 'a parser) : 'a parser =
97fun input ->
98match p1 input with
99| Ok res -> Ok res
100| Error _ -> p2 input
101
102(* Many: parse p zero or more times *)
103let many (p : 'a parser) : 'a list parser =
104fun input ->
105let rec loop acc current_input =
106match p current_input with
107| Ok (res, next_input) -> loop (res :: acc) next_input
108| Error _ -> Ok (List.rev acc, current_input)
109in
110loop [] input
111
112(* Many1: parse p one or more times *)
113let many1 (p : 'a parser) : 'a list parser =
114fun input ->
115match p input with
116| Error e -> Error e
117| Ok (res, rem) ->
118let (res_list, final_rem) = (many p) rem in
119Ok (res :: res_list, final_rem)
120
121(* Skip whitespace *)
122let skip_whitespace : unit parser =
123let is_whitespace c = c = ' ' || c = '\t' || c = '\n' || c = '\r' in
124let _ = many (take_while is_whitespace) in
125fun input -> Ok ((), input) (* Always succeeds, consumes whitespace *)
126
127(* --- Grammar Parsers --- *)
128
129(* Parses a key: alphanumeric characters *)
130let parse_key : key parser =
131let is_key_char c = Char.is_alphanumeric c in
132let _ = skip_whitespace in
133bind (take_while is_key_char) (fun k -> return_ k)
134
135(* Parses a value: anything until newline or end of string *)
136let parse_value : value parser =
137let is_value_char c = c <> '\n' && c <> '\r' in
138let _ = skip_whitespace in
139bind (take_while is_value_char) (fun v -> return_ v)
140
141(* Parses a key-value pair: key = value *)
142let parse_key_value : config_entry parser =
143fun input ->
144match seq parse_key (literal "=") input with
145| Error e -> Error e
146| Ok (key, rem1) ->
147match parse_value rem1 with
148| Error e -> Error e
149| Ok (value, rem2) ->
150let _ = skip_whitespace in (* Consume trailing whitespace/newline *)
151Ok (KeyValue (key, value), rem2)
152
153(* Parses a section: [section_name] *)
154let parse_section_header : section_name parser =
155fun input ->
156match seq (char '[') (take_while (fun c -> c <> ']')) input with
157| Error e -> Error e
158| Ok (_, (name, rem1)) ->
159match char ']' rem1 with
160| Error e -> Error e
161| Ok (_, rem2) -> Ok (name, rem2)
162
163(* Parses the content of a section: key-value pairs or nested sections *)
164let rec parse_section_content : config_entry list parser =
165fun input ->
166let rec loop acc current_input =
167match skip_whitespace current_input with
168| Error e -> Error e (* Should not happen if skip_whitespace is correct *)
169| Ok (_, rem_ws) ->
170match parse_section_header rem_ws with
171| Ok (section_name, rem_sec) ->
172match parse_section_content rem_sec with
173| Error e -> Error e
174| Ok (entries, rem_final) ->
175loop (Section (section_name, entries) :: acc) rem_final
176| Error _ -> (* Not a section, try key-value pair *)
177match parse_key_value rem_ws with
178| Ok (kv_entry, rem_kv) -> loop (kv_entry :: acc) rem_kv
179| Error _ -> (* If it's not a section or KV, it might be end of section or error *)
180if String.length rem_ws = 0 || String.is_prefix "[" rem_ws then
181Ok (List.rev acc, current_input) (* End of section or EOF *)
182else
183Error (CustomError "Expected key-value pair or section header")
184in
185loop [] input
186
187(* Parses the entire configuration file *)
188let parse_config : config_entry list parser =
189fun input ->
190let rec loop acc current_input =
191match skip_whitespace current_input with
192| Error e -> Error e
193| Ok (_, rem_ws) ->
194match parse_section_header rem_ws with
195| Ok (section_name, rem_sec) ->
196match parse_section_content rem_sec with
197| Error e -> Error e
198| Ok (entries, rem_final) ->
199loop (Section (section_name, entries) :: acc) rem_final
200| Error _ -> (* Not a section, try key-value pair *)
201match parse_key_value rem_ws with
202| Ok (kv_entry, rem_kv) -> loop (kv_entry :: acc) rem_kv
203| Error _ -> (* If it's not a section or KV, it might be end of file or error *)
204if String.length rem_ws = 0 then
205Ok (List.rev acc, "") (* End of file *)
206else
207Error (CustomError "Expected key-value pair or section header")
208in
209loop [] input
210
211(* --- Public API --- *)
212let parse (config_string : string) : (config_entry list, error) result =
213parse_config config_string
214
215end
216
217(* Example usage:
218let config_str = "[Database]\n host = localhost\n port = 5432\n\n[Server]\n timeout = 30\n"
219match Parser.parse config_str with
220| Ok entries -> List.iter (fun e -> Printf.printf "%A\n" e) entries
221| Error err -> Printf.printf "Error: %A\n" err
222*)
Algorithm description viewbox

OCaml DSL Pattern: Configuration Parser with Error Combinators

Algorithm description:

This OCaml code implements a parser for a simple configuration file DSL using a combinator-style approach. It defines parsers for basic elements like keys, values, and section headers, and combines them to parse the overall structure. Crucially, it incorporates error handling using a custom `result` type and specific `error` constructors, providing informative messages for syntax errors. This DSL pattern is widely used for creating flexible and readable configuration systems, scripting languages, and query interfaces, where clear error reporting is essential for user experience.

Algorithm explanation:

The parser is built using a set of primitive parsers (e.g., `literal`, `take_while`, `char`) and combinators (`seq`, `choice`, `many`, `many1`) that build more complex parsers. Each parser takes an input string and returns a `result` type: either `Ok (parsed_value, remaining_input)` or `Error error_details`. The `error` type includes specific messages like `UnexpectedToken` and `UnexpectedEOF`, aiding in debugging. The `parse_config` function orchestrates the parsing of sections and key-value pairs. The `skip_whitespace` parser is used throughout to ignore irrelevant characters. The time complexity is roughly O(N*M) in the worst case for naive combinators, where N is the input length and M is the number of parsers, but with optimizations and careful design, it approaches O(N). Space complexity is O(N) for intermediate string manipulations and the resulting AST. Edge cases include empty input, malformed keys/values, missing delimiters, and incorrect section nesting.

Pseudocode:

define result = Ok(value, remaining_input) | Error(error_details)
define error = UnexpectedToken(expected, got) | UnexpectedEOF(expected) | CustomError(message)
define parser = input_string -> result

// Primitive parsers
function literal(s): returns parser that matches string s
function take_while(predicate): returns parser that matches chars satisfying predicate
function char(c): returns parser that matches character c

// Combinators
function seq(p1, p2): returns parser that runs p1 then p2, returning (res1, res2)
function choice(p1, p2): returns parser that tries p1, then p2 if p1 fails
function many(p): returns parser that runs p zero or more times, returning list
function many1(p): returns parser that runs p one or more times, returning list

// Grammar parsers
function skip_whitespace(): consumes whitespace
function parse_key(): parses an alphanumeric key
function parse_value(): parses a value until newline
function parse_key_value(): parses "key = value"
function parse_section_header(): parses "[section_name]"
function parse_section_content(): parses list of entries within a section
function parse_config(): parses the entire configuration file

function parse(config_string):
  return parse_config(config_string)