1//! In this example we build an [S-expression](https://en.wikipedia.org/wiki/S-expression)
2//! parser and tiny [lisp](https://en.wikipedia.org/wiki/Lisp_(programming_language)) interpreter.
3//! Lisp is a simple type of language made up of Atoms and Lists, forming easily parsable trees.
4
5#![cfg(feature = "alloc")]
6
7use nom::{
8  branch::alt,
9  bytes::complete::tag,
10  character::complete::{alpha1, char, digit1, multispace0, multispace1, one_of},
11  combinator::{cut, map, map_res, opt},
12  error::{context, VerboseError},
13  multi::many0,
14  sequence::{delimited, preceded, terminated, tuple},
15  IResult, Parser,
16};
17
18/// We start by defining the types that define the shape of data that we want.
19/// In this case, we want something tree-like
20
21/// Starting from the most basic, we define some built-in functions that our lisp has
22#[derive(Debug, PartialEq, Clone, Copy)]
23pub enum BuiltIn {
24  Plus,
25  Minus,
26  Times,
27  Divide,
28  Equal,
29  Not,
30}
31
32/// We now wrap this type and a few other primitives into our Atom type.
33/// Remember from before that Atoms form one half of our language.
34
35#[derive(Debug, PartialEq, Clone)]
36pub enum Atom {
37  Num(i32),
38  Keyword(String),
39  Boolean(bool),
40  BuiltIn(BuiltIn),
41}
42
43/// The remaining half is Lists. We implement these as recursive Expressions.
44/// For a list of numbers, we have `'(1 2 3)`, which we'll parse to:
45/// ```
46/// Expr::Quote(vec![Expr::Constant(Atom::Num(1)),
47///                  Expr::Constant(Atom::Num(2)),
48///                  Expr::Constant(Atom::Num(3))])
49/// Quote takes an S-expression and prevents evaluation of it, making it a data
50/// structure that we can deal with programmatically. Thus any valid expression
51/// is also a valid data structure in Lisp itself.
52
53#[derive(Debug, PartialEq, Clone)]
54pub enum Expr {
55  Constant(Atom),
56  /// (func-name arg1 arg2)
57  Application(Box<Expr>, Vec<Expr>),
58  /// (if predicate do-this)
59  If(Box<Expr>, Box<Expr>),
60  /// (if predicate do-this otherwise-do-this)
61  IfElse(Box<Expr>, Box<Expr>, Box<Expr>),
62  /// '(3 (if (+ 3 3) 4 5) 7)
63  Quote(Vec<Expr>),
64}
65
66/// Continuing the trend of starting from the simplest piece and building up,
67/// we start by creating a parser for the built-in operator functions.
68fn parse_builtin_op<'a>(i: &'a str) -> IResult<&'a str, BuiltIn, VerboseError<&'a str>> {
69  // one_of matches one of the characters we give it
70  let (i, t) = one_of("+-*/=")(i)?;
71
72  // because we are matching single character tokens, we can do the matching logic
73  // on the returned value
74  Ok((
75    i,
76    match t {
77      '+' => BuiltIn::Plus,
78      '-' => BuiltIn::Minus,
79      '*' => BuiltIn::Times,
80      '/' => BuiltIn::Divide,
81      '=' => BuiltIn::Equal,
82      _ => unreachable!(),
83    },
84  ))
85}
86
87fn parse_builtin<'a>(i: &'a str) -> IResult<&'a str, BuiltIn, VerboseError<&'a str>> {
88  // alt gives us the result of first parser that succeeds, of the series of
89  // parsers we give it
90  alt((
91    parse_builtin_op,
92    // map lets us process the parsed output, in this case we know what we parsed,
93    // so we ignore the input and return the BuiltIn directly
94    map(tag("not"), |_| BuiltIn::Not),
95  ))(i)
96}
97
98/// Our boolean values are also constant, so we can do it the same way
99fn parse_bool<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> {
100  alt((
101    map(tag("#t"), |_| Atom::Boolean(true)),
102    map(tag("#f"), |_| Atom::Boolean(false)),
103  ))(i)
104}
105
106/// The next easiest thing to parse are keywords.
107/// We introduce some error handling combinators: `context` for human readable errors
108/// and `cut` to prevent back-tracking.
109///
110/// Put plainly: `preceded(tag(":"), cut(alpha1))` means that once we see the `:`
111/// character, we have to see one or more alphabetic chararcters or the input is invalid.
112fn parse_keyword<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> {
113  map(
114    context("keyword", preceded(tag(":"), cut(alpha1))),
115    |sym_str: &str| Atom::Keyword(sym_str.to_string()),
116  )(i)
117}
118
119/// Next up is number parsing. We're keeping it simple here by accepting any number (> 1)
120/// of digits but ending the program if it doesn't fit into an i32.
121fn parse_num<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> {
122  alt((
123    map_res(digit1, |digit_str: &str| {
124      digit_str.parse::<i32>().map(Atom::Num)
125    }),
126    map(preceded(tag("-"), digit1), |digit_str: &str| {
127      Atom::Num(-1 * digit_str.parse::<i32>().unwrap())
128    }),
129  ))(i)
130}
131
132/// Now we take all these simple parsers and connect them.
133/// We can now parse half of our language!
134fn parse_atom<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> {
135  alt((
136    parse_num,
137    parse_bool,
138    map(parse_builtin, Atom::BuiltIn),
139    parse_keyword,
140  ))(i)
141}
142
143/// We then add the Expr layer on top
144fn parse_constant<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> {
145  map(parse_atom, |atom| Expr::Constant(atom))(i)
146}
147
148/// Before continuing, we need a helper function to parse lists.
149/// A list starts with `(` and ends with a matching `)`.
150/// By putting whitespace and newline parsing here, we can avoid having to worry about it
151/// in much of the rest of the parser.
152///
153/// Unlike the previous functions, this function doesn't take or consume input, instead it
154/// takes a parsing function and returns a new parsing function.
155fn s_exp<'a, O1, F>(inner: F) -> impl FnMut(&'a str) -> IResult<&'a str, O1, VerboseError<&'a str>>
156where
157  F: Parser<&'a str, O1, VerboseError<&'a str>>,
158{
159  delimited(
160    char('('),
161    preceded(multispace0, inner),
162    context("closing paren", cut(preceded(multispace0, char(')')))),
163  )
164}
165
166/// We can now use our new combinator to define the rest of the `Expr`s.
167///
168/// Starting with function application, we can see how the parser mirrors our data
169/// definitions: our definition is `Application(Box<Expr>, Vec<Expr>)`, so we know
170/// that we need to parse an expression and then parse 0 or more expressions, all
171/// wrapped in an S-expression.
172///
173/// `tuple` is used to sequence parsers together, so we can translate this directly
174/// and then map over it to transform the output into an `Expr::Application`
175fn parse_application<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> {
176  let application_inner = map(tuple((parse_expr, many0(parse_expr))), |(head, tail)| {
177    Expr::Application(Box::new(head), tail)
178  });
179  // finally, we wrap it in an s-expression
180  s_exp(application_inner)(i)
181}
182
183/// Because `Expr::If` and `Expr::IfElse` are so similar (we easily could have
184/// defined `Expr::If` to have an `Option` for the else block), we parse both
185/// in a single function.
186///
187/// In fact, we define our parser as if `Expr::If` was defined with an Option in it,
188/// we have the `opt` combinator which fits very nicely here.
189fn parse_if<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> {
190  let if_inner = context(
191    "if expression",
192    map(
193      preceded(
194        // here to avoid ambiguity with other names starting with `if`, if we added
195        // variables to our language, we say that if must be terminated by at least
196        // one whitespace character
197        terminated(tag("if"), multispace1),
198        cut(tuple((parse_expr, parse_expr, opt(parse_expr)))),
199      ),
200      |(pred, true_branch, maybe_false_branch)| {
201        if let Some(false_branch) = maybe_false_branch {
202          Expr::IfElse(
203            Box::new(pred),
204            Box::new(true_branch),
205            Box::new(false_branch),
206          )
207        } else {
208          Expr::If(Box::new(pred), Box::new(true_branch))
209        }
210      },
211    ),
212  );
213  s_exp(if_inner)(i)
214}
215
216/// A quoted S-expression is list data structure.
217///
218/// This example doesn't have the symbol atom, but by adding variables and changing
219/// the definition of quote to not always be around an S-expression, we'd get them
220/// naturally.
221fn parse_quote<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> {
222  // this should look very straight-forward after all we've done:
223  // we find the `'` (quote) character, use cut to say that we're unambiguously
224  // looking for an s-expression of 0 or more expressions, and then parse them
225  map(
226    context("quote", preceded(tag("'"), cut(s_exp(many0(parse_expr))))),
227    |exprs| Expr::Quote(exprs),
228  )(i)
229}
230
231/// We tie them all together again, making a top-level expression parser!
232
233fn parse_expr<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> {
234  preceded(
235    multispace0,
236    alt((parse_constant, parse_application, parse_if, parse_quote)),
237  )(i)
238}
239
240/// And that's it!
241/// We can now parse our entire lisp language.
242///
243/// But in order to make it a little more interesting, we can hack together
244/// a little interpreter to take an Expr, which is really an
245/// [Abstract Syntax Tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) (AST),
246/// and give us something back
247
248/// To start we define a couple of helper functions
249fn get_num_from_expr(e: Expr) -> Option<i32> {
250  if let Expr::Constant(Atom::Num(n)) = e {
251    Some(n)
252  } else {
253    None
254  }
255}
256
257fn get_bool_from_expr(e: Expr) -> Option<bool> {
258  if let Expr::Constant(Atom::Boolean(b)) = e {
259    Some(b)
260  } else {
261    None
262  }
263}
264
265/// This function tries to reduce the AST.
266/// This has to return an Expression rather than an Atom because quoted s_expressions
267/// can't be reduced
268fn eval_expression(e: Expr) -> Option<Expr> {
269  match e {
270    // Constants and quoted s-expressions are our base-case
271    Expr::Constant(_) | Expr::Quote(_) => Some(e),
272    // we then recursively `eval_expression` in the context of our special forms
273    // and built-in operators
274    Expr::If(pred, true_branch) => {
275      let reduce_pred = eval_expression(*pred)?;
276      if get_bool_from_expr(reduce_pred)? {
277        eval_expression(*true_branch)
278      } else {
279        None
280      }
281    }
282    Expr::IfElse(pred, true_branch, false_branch) => {
283      let reduce_pred = eval_expression(*pred)?;
284      if get_bool_from_expr(reduce_pred)? {
285        eval_expression(*true_branch)
286      } else {
287        eval_expression(*false_branch)
288      }
289    }
290    Expr::Application(head, tail) => {
291      let reduced_head = eval_expression(*head)?;
292      let reduced_tail = tail
293        .into_iter()
294        .map(|expr| eval_expression(expr))
295        .collect::<Option<Vec<Expr>>>()?;
296      if let Expr::Constant(Atom::BuiltIn(bi)) = reduced_head {
297        Some(Expr::Constant(match bi {
298          BuiltIn::Plus => Atom::Num(
299            reduced_tail
300              .into_iter()
301              .map(get_num_from_expr)
302              .collect::<Option<Vec<i32>>>()?
303              .into_iter()
304              .sum(),
305          ),
306          BuiltIn::Times => Atom::Num(
307            reduced_tail
308              .into_iter()
309              .map(get_num_from_expr)
310              .collect::<Option<Vec<i32>>>()?
311              .into_iter()
312              .product(),
313          ),
314          BuiltIn::Equal => Atom::Boolean(
315            reduced_tail
316              .iter()
317              .zip(reduced_tail.iter().skip(1))
318              .all(|(a, b)| a == b),
319          ),
320          BuiltIn::Not => {
321            if reduced_tail.len() != 1 {
322              return None;
323            } else {
324              Atom::Boolean(!get_bool_from_expr(reduced_tail.first().cloned().unwrap())?)
325            }
326          }
327          BuiltIn::Minus => Atom::Num(if let Some(first_elem) = reduced_tail.first().cloned() {
328            let fe = get_num_from_expr(first_elem)?;
329            reduced_tail
330              .into_iter()
331              .map(get_num_from_expr)
332              .collect::<Option<Vec<i32>>>()?
333              .into_iter()
334              .skip(1)
335              .fold(fe, |a, b| a - b)
336          } else {
337            Default::default()
338          }),
339          BuiltIn::Divide => Atom::Num(if let Some(first_elem) = reduced_tail.first().cloned() {
340            let fe = get_num_from_expr(first_elem)?;
341            reduced_tail
342              .into_iter()
343              .map(get_num_from_expr)
344              .collect::<Option<Vec<i32>>>()?
345              .into_iter()
346              .skip(1)
347              .fold(fe, |a, b| a / b)
348          } else {
349            Default::default()
350          }),
351        }))
352      } else {
353        None
354      }
355    }
356  }
357}
358
359/// And we add one more top-level function to tie everything together, letting
360/// us call eval on a string directly
361fn eval_from_str(src: &str) -> Result<Expr, String> {
362  parse_expr(src)
363    .map_err(|e: nom::Err<VerboseError<&str>>| format!("{:#?}", e))
364    .and_then(|(_, exp)| eval_expression(exp).ok_or("Eval failed".to_string()))
365}
366
367fn main() {
368  let expression_1 = "((if (= (+ 3 (/ 9 3))
369         (* 2 3))
370     *
371     /)
372  456 123)";
373  println!(
374    "\"{}\"\nevaled gives us: {:?}",
375    expression_1,
376    eval_from_str(expression_1)
377  );
378}
379