ℹ️ Select 'Choose Exercise', or randomize 'Next Random Exercise' in selected language.

Choose Exercise:
Timer 00:00
WPM --
Score --
Acc --
Correct chars --

OCaml Functional Parser for Simple Arithmetic Expressions

OCaml

Goal -- WPM

Ready
Exercise Algorithm Area
1(* Parser for simple arithmetic expressions.
2Supports +, -, *, /, parentheses, and integers.
3Handles operator precedence and associativity.
4Uses a lexer to tokenize the input string.
5*)
6
7module Lexer =
8struct
9type token =
10| Int of int
11| Add
12| Sub
13| Mul
14| Div
15| LParen
16| RParen
17| Eof
18| Unknown of string
19
20let tokenize (input : string) : token list =
21let rec go pos acc =
22if pos >= String.length input then
23List.rev (Eof :: acc)
24else
25match input.[pos] with
26| ' ' | '\t' | '\n' | '\r' -> go (pos + 1) acc (* Skip whitespace *)
27| '+' -> go (pos + 1) (Add :: acc)
28| '-' -> go (pos + 1) (Sub :: acc)
29| '*' -> go (pos + 1) (Mul :: acc)
30| '/' -> go (pos + 1) (Div :: acc)
31| '(' -> go (pos + 1) (LParen :: acc)
32| ')' -> go (pos + 1) (RParen :: acc)
33| c when Char.is_digit c ->
34let start_pos = pos in
35let rec read_digits p =
36if p < String.length input && Char.is_digit input.[p] then
37read_digits (p + 1)
38else
39p
40in
41let end_pos = read_digits pos in
42let num_str = String.sub input start_pos (end_pos - start_pos) in
43go end_pos (Int (int_of_string num_str) :: acc)
44| c -> go (pos + 1) (Unknown (String.make 1 c) :: acc) (* Handle unknown characters *)
45in
46go 0 []
47end
48
49module Parser =
50struct
51type expr =
52| Num of int
53| Add of expr * expr
54| Sub of expr * expr
55| Mul of expr * expr
56| Div of expr * expr
57
58exception Parse_error of string
59
60let parse (tokens : Lexer.token list) : expr =
61let rec parse_expr tokens = parse_add_sub tokens
62
63and parse_add_sub tokens =
64let (left, tokens') = parse_mul_div tokens in
65parse_add_sub_cont left tokens'
66
67and parse_add_sub_cont left tokens =
68match tokens with
69| Lexer.Add :: tokens' ->
70let (right, tokens'') = parse_mul_div tokens' in
71parse_add_sub_cont (Add (left, right)) tokens''
72| Lexer.Sub :: tokens' ->
73let (right, tokens'') = parse_mul_div tokens' in
74parse_add_sub_cont (Sub (left, right)) tokens''
75| _ -> (left, tokens)
76
77and parse_mul_div tokens =
78let (left, tokens') = parse_atom tokens in
79parse_mul_div_cont left tokens'
80
81and parse_mul_div_cont left tokens =
82match tokens with
83| Lexer.Mul :: tokens' ->
84let (right, tokens'') = parse_atom tokens' in
85parse_mul_div_cont (Mul (left, right)) tokens''
86| Lexer.Div :: tokens' ->
87let (right, tokens'') = parse_atom tokens' in
88parse_mul_div_cont (Div (left, right)) tokens''
89| _ -> (left, tokens)
90
91and parse_atom tokens =
92match tokens with
93| Lexer.Int n :: tokens' -> (Num n, tokens')
94| Lexer.LParen :: tokens' ->
95let (expr, tokens'') = parse_expr tokens' in
96match tokens'' with
97| Lexer.RParen :: tokens''' -> (expr, tokens''')
98| _ -> raise (Parse_error "Mismatched parentheses")
99| Lexer.Eof :: _ -> raise (Parse_error "Unexpected end of input")
100| Lexer.Unknown s :: _ -> raise (Parse_error (Printf.sprintf "Unknown token: %s" s))
101| _ -> raise (Parse_error "Unexpected token")
102in
103let (result, remaining_tokens) = parse_expr tokens in
104match remaining_tokens with
105| [Lexer.Eof] -> result
106| _ -> raise (Parse_error "Unexpected tokens after expression")
107end
108
109let parse_expression (input : string) : Parser.expr =
110let tokens = Lexer.tokenize input in
111Parser.parse tokens
112
113(* Example usage:
114let expr_tree = parse_expression "(3 + 4) * 5"
115*)
Algorithm description viewbox

OCaml Functional Parser for Simple Arithmetic Expressions

Algorithm description:

This OCaml code implements a recursive descent parser for arithmetic expressions. It first tokenizes the input string using a simple lexer, then constructs an abstract syntax tree (AST) representing the expression. This is a fundamental technique used in compilers and interpreters to understand code structure. It's applicable in any scenario where you need to process structured text input, like configuration files or simple domain-specific languages.

Algorithm explanation:

The parser uses a top-down, recursive descent approach. The `parse_expr` function initiates the parsing process, delegating to `parse_add_sub` to handle addition and subtraction, which have lower precedence. `parse_add_sub` in turn calls `parse_mul_div` for multiplication and division, and `parse_mul_div` calls `parse_atom` for the most basic elements like numbers and parenthesized sub-expressions. This structure naturally enforces operator precedence. Associativity is handled by the recursive calls within `parse_add_sub_cont` and `parse_mul_div_cont`, which allow for sequences of the same-precedence operators. Error handling is included for malformed input, such as mismatched parentheses or unexpected tokens. The time complexity is O(N) where N is the number of tokens, as each token is processed a constant number of times. The space complexity is O(N) in the worst case due to the recursion depth and the AST construction.

Pseudocode:

function parse(tokens):
  result, remaining = parse_expression(tokens)
  if remaining contains only EOF:
    return result
  else:
    raise ParseError("Unexpected tokens")

function parse_expression(tokens):
  left, tokens' = parse_add_sub(tokens)
  return left, tokens'

function parse_add_sub(tokens):
  left, tokens' = parse_mul_div(tokens)
  return parse_add_sub_cont(left, tokens')

function parse_add_sub_cont(left, tokens):
  if tokens start with Add:
    right, tokens'' = parse_mul_div(tokens')
    return parse_add_sub_cont(Add(left, right), tokens'')
  else if tokens start with Sub:
    right, tokens'' = parse_mul_div(tokens')
    return parse_add_sub_cont(Sub(left, right), tokens'')
  else:
    return left, tokens

function parse_mul_div(tokens):
  left, tokens' = parse_atom(tokens)
  return parse_mul_div_cont(left, tokens')

function parse_mul_div_cont(left, tokens):
  if tokens start with Mul:
    right, tokens'' = parse_atom(tokens')
    return parse_mul_div_cont(Mul(left, right), tokens'')
  else if tokens start with Div:
    right, tokens'' = parse_atom(tokens')
    return parse_mul_div_cont(Div(left, right), tokens'')
  else:
    return left, tokens

function parse_atom(tokens):
  if tokens start with Int(n):
    return Num(n), tokens'
  else if tokens start with LParen:
    expr, tokens'' = parse_expression(tokens')
    if tokens'' start with RParen:
      return expr, tokens'''
    else:
      raise ParseError("Mismatched parentheses")
  else if tokens start with Eof:
    raise ParseError("Unexpected end of input")
  else:
    raise ParseError("Unexpected token")