1//! In this example we build an [S-expression](https://en.wikipedia.org/wiki/S-expression) 2//! parser and tiny [lisp](https://en.wikipedia.org/wiki/Lisp_(programming_language)) interpreter. 3//! Lisp is a simple type of language made up of Atoms and Lists, forming easily parsable trees. 4 5#![cfg(feature = "alloc")] 6 7use nom::{ 8 branch::alt, 9 bytes::complete::tag, 10 character::complete::{alpha1, char, digit1, multispace0, multispace1, one_of}, 11 combinator::{cut, map, map_res, opt}, 12 error::{context, VerboseError}, 13 multi::many0, 14 sequence::{delimited, preceded, terminated, tuple}, 15 IResult, Parser, 16}; 17 18/// We start by defining the types that define the shape of data that we want. 19/// In this case, we want something tree-like 20 21/// Starting from the most basic, we define some built-in functions that our lisp has 22#[derive(Debug, PartialEq, Clone, Copy)] 23pub enum BuiltIn { 24 Plus, 25 Minus, 26 Times, 27 Divide, 28 Equal, 29 Not, 30} 31 32/// We now wrap this type and a few other primitives into our Atom type. 33/// Remember from before that Atoms form one half of our language. 34 35#[derive(Debug, PartialEq, Clone)] 36pub enum Atom { 37 Num(i32), 38 Keyword(String), 39 Boolean(bool), 40 BuiltIn(BuiltIn), 41} 42 43/// The remaining half is Lists. We implement these as recursive Expressions. 44/// For a list of numbers, we have `'(1 2 3)`, which we'll parse to: 45/// ``` 46/// Expr::Quote(vec![Expr::Constant(Atom::Num(1)), 47/// Expr::Constant(Atom::Num(2)), 48/// Expr::Constant(Atom::Num(3))]) 49/// Quote takes an S-expression and prevents evaluation of it, making it a data 50/// structure that we can deal with programmatically. Thus any valid expression 51/// is also a valid data structure in Lisp itself. 52 53#[derive(Debug, PartialEq, Clone)] 54pub enum Expr { 55 Constant(Atom), 56 /// (func-name arg1 arg2) 57 Application(Box<Expr>, Vec<Expr>), 58 /// (if predicate do-this) 59 If(Box<Expr>, Box<Expr>), 60 /// (if predicate do-this otherwise-do-this) 61 IfElse(Box<Expr>, Box<Expr>, Box<Expr>), 62 /// '(3 (if (+ 3 3) 4 5) 7) 63 Quote(Vec<Expr>), 64} 65 66/// Continuing the trend of starting from the simplest piece and building up, 67/// we start by creating a parser for the built-in operator functions. 68fn parse_builtin_op<'a>(i: &'a str) -> IResult<&'a str, BuiltIn, VerboseError<&'a str>> { 69 // one_of matches one of the characters we give it 70 let (i, t) = one_of("+-*/=")(i)?; 71 72 // because we are matching single character tokens, we can do the matching logic 73 // on the returned value 74 Ok(( 75 i, 76 match t { 77 '+' => BuiltIn::Plus, 78 '-' => BuiltIn::Minus, 79 '*' => BuiltIn::Times, 80 '/' => BuiltIn::Divide, 81 '=' => BuiltIn::Equal, 82 _ => unreachable!(), 83 }, 84 )) 85} 86 87fn parse_builtin<'a>(i: &'a str) -> IResult<&'a str, BuiltIn, VerboseError<&'a str>> { 88 // alt gives us the result of first parser that succeeds, of the series of 89 // parsers we give it 90 alt(( 91 parse_builtin_op, 92 // map lets us process the parsed output, in this case we know what we parsed, 93 // so we ignore the input and return the BuiltIn directly 94 map(tag("not"), |_| BuiltIn::Not), 95 ))(i) 96} 97 98/// Our boolean values are also constant, so we can do it the same way 99fn parse_bool<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> { 100 alt(( 101 map(tag("#t"), |_| Atom::Boolean(true)), 102 map(tag("#f"), |_| Atom::Boolean(false)), 103 ))(i) 104} 105 106/// The next easiest thing to parse are keywords. 107/// We introduce some error handling combinators: `context` for human readable errors 108/// and `cut` to prevent back-tracking. 109/// 110/// Put plainly: `preceded(tag(":"), cut(alpha1))` means that once we see the `:` 111/// character, we have to see one or more alphabetic chararcters or the input is invalid. 112fn parse_keyword<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> { 113 map( 114 context("keyword", preceded(tag(":"), cut(alpha1))), 115 |sym_str: &str| Atom::Keyword(sym_str.to_string()), 116 )(i) 117} 118 119/// Next up is number parsing. We're keeping it simple here by accepting any number (> 1) 120/// of digits but ending the program if it doesn't fit into an i32. 121fn parse_num<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> { 122 alt(( 123 map_res(digit1, |digit_str: &str| { 124 digit_str.parse::<i32>().map(Atom::Num) 125 }), 126 map(preceded(tag("-"), digit1), |digit_str: &str| { 127 Atom::Num(-1 * digit_str.parse::<i32>().unwrap()) 128 }), 129 ))(i) 130} 131 132/// Now we take all these simple parsers and connect them. 133/// We can now parse half of our language! 134fn parse_atom<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> { 135 alt(( 136 parse_num, 137 parse_bool, 138 map(parse_builtin, Atom::BuiltIn), 139 parse_keyword, 140 ))(i) 141} 142 143/// We then add the Expr layer on top 144fn parse_constant<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> { 145 map(parse_atom, |atom| Expr::Constant(atom))(i) 146} 147 148/// Before continuing, we need a helper function to parse lists. 149/// A list starts with `(` and ends with a matching `)`. 150/// By putting whitespace and newline parsing here, we can avoid having to worry about it 151/// in much of the rest of the parser. 152/// 153/// Unlike the previous functions, this function doesn't take or consume input, instead it 154/// takes a parsing function and returns a new parsing function. 155fn s_exp<'a, O1, F>(inner: F) -> impl FnMut(&'a str) -> IResult<&'a str, O1, VerboseError<&'a str>> 156where 157 F: Parser<&'a str, O1, VerboseError<&'a str>>, 158{ 159 delimited( 160 char('('), 161 preceded(multispace0, inner), 162 context("closing paren", cut(preceded(multispace0, char(')')))), 163 ) 164} 165 166/// We can now use our new combinator to define the rest of the `Expr`s. 167/// 168/// Starting with function application, we can see how the parser mirrors our data 169/// definitions: our definition is `Application(Box<Expr>, Vec<Expr>)`, so we know 170/// that we need to parse an expression and then parse 0 or more expressions, all 171/// wrapped in an S-expression. 172/// 173/// `tuple` is used to sequence parsers together, so we can translate this directly 174/// and then map over it to transform the output into an `Expr::Application` 175fn parse_application<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> { 176 let application_inner = map(tuple((parse_expr, many0(parse_expr))), |(head, tail)| { 177 Expr::Application(Box::new(head), tail) 178 }); 179 // finally, we wrap it in an s-expression 180 s_exp(application_inner)(i) 181} 182 183/// Because `Expr::If` and `Expr::IfElse` are so similar (we easily could have 184/// defined `Expr::If` to have an `Option` for the else block), we parse both 185/// in a single function. 186/// 187/// In fact, we define our parser as if `Expr::If` was defined with an Option in it, 188/// we have the `opt` combinator which fits very nicely here. 189fn parse_if<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> { 190 let if_inner = context( 191 "if expression", 192 map( 193 preceded( 194 // here to avoid ambiguity with other names starting with `if`, if we added 195 // variables to our language, we say that if must be terminated by at least 196 // one whitespace character 197 terminated(tag("if"), multispace1), 198 cut(tuple((parse_expr, parse_expr, opt(parse_expr)))), 199 ), 200 |(pred, true_branch, maybe_false_branch)| { 201 if let Some(false_branch) = maybe_false_branch { 202 Expr::IfElse( 203 Box::new(pred), 204 Box::new(true_branch), 205 Box::new(false_branch), 206 ) 207 } else { 208 Expr::If(Box::new(pred), Box::new(true_branch)) 209 } 210 }, 211 ), 212 ); 213 s_exp(if_inner)(i) 214} 215 216/// A quoted S-expression is list data structure. 217/// 218/// This example doesn't have the symbol atom, but by adding variables and changing 219/// the definition of quote to not always be around an S-expression, we'd get them 220/// naturally. 221fn parse_quote<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> { 222 // this should look very straight-forward after all we've done: 223 // we find the `'` (quote) character, use cut to say that we're unambiguously 224 // looking for an s-expression of 0 or more expressions, and then parse them 225 map( 226 context("quote", preceded(tag("'"), cut(s_exp(many0(parse_expr))))), 227 |exprs| Expr::Quote(exprs), 228 )(i) 229} 230 231/// We tie them all together again, making a top-level expression parser! 232 233fn parse_expr<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> { 234 preceded( 235 multispace0, 236 alt((parse_constant, parse_application, parse_if, parse_quote)), 237 )(i) 238} 239 240/// And that's it! 241/// We can now parse our entire lisp language. 242/// 243/// But in order to make it a little more interesting, we can hack together 244/// a little interpreter to take an Expr, which is really an 245/// [Abstract Syntax Tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) (AST), 246/// and give us something back 247 248/// To start we define a couple of helper functions 249fn get_num_from_expr(e: Expr) -> Option<i32> { 250 if let Expr::Constant(Atom::Num(n)) = e { 251 Some(n) 252 } else { 253 None 254 } 255} 256 257fn get_bool_from_expr(e: Expr) -> Option<bool> { 258 if let Expr::Constant(Atom::Boolean(b)) = e { 259 Some(b) 260 } else { 261 None 262 } 263} 264 265/// This function tries to reduce the AST. 266/// This has to return an Expression rather than an Atom because quoted s_expressions 267/// can't be reduced 268fn eval_expression(e: Expr) -> Option<Expr> { 269 match e { 270 // Constants and quoted s-expressions are our base-case 271 Expr::Constant(_) | Expr::Quote(_) => Some(e), 272 // we then recursively `eval_expression` in the context of our special forms 273 // and built-in operators 274 Expr::If(pred, true_branch) => { 275 let reduce_pred = eval_expression(*pred)?; 276 if get_bool_from_expr(reduce_pred)? { 277 eval_expression(*true_branch) 278 } else { 279 None 280 } 281 } 282 Expr::IfElse(pred, true_branch, false_branch) => { 283 let reduce_pred = eval_expression(*pred)?; 284 if get_bool_from_expr(reduce_pred)? { 285 eval_expression(*true_branch) 286 } else { 287 eval_expression(*false_branch) 288 } 289 } 290 Expr::Application(head, tail) => { 291 let reduced_head = eval_expression(*head)?; 292 let reduced_tail = tail 293 .into_iter() 294 .map(|expr| eval_expression(expr)) 295 .collect::<Option<Vec<Expr>>>()?; 296 if let Expr::Constant(Atom::BuiltIn(bi)) = reduced_head { 297 Some(Expr::Constant(match bi { 298 BuiltIn::Plus => Atom::Num( 299 reduced_tail 300 .into_iter() 301 .map(get_num_from_expr) 302 .collect::<Option<Vec<i32>>>()? 303 .into_iter() 304 .sum(), 305 ), 306 BuiltIn::Times => Atom::Num( 307 reduced_tail 308 .into_iter() 309 .map(get_num_from_expr) 310 .collect::<Option<Vec<i32>>>()? 311 .into_iter() 312 .product(), 313 ), 314 BuiltIn::Equal => Atom::Boolean( 315 reduced_tail 316 .iter() 317 .zip(reduced_tail.iter().skip(1)) 318 .all(|(a, b)| a == b), 319 ), 320 BuiltIn::Not => { 321 if reduced_tail.len() != 1 { 322 return None; 323 } else { 324 Atom::Boolean(!get_bool_from_expr(reduced_tail.first().cloned().unwrap())?) 325 } 326 } 327 BuiltIn::Minus => Atom::Num(if let Some(first_elem) = reduced_tail.first().cloned() { 328 let fe = get_num_from_expr(first_elem)?; 329 reduced_tail 330 .into_iter() 331 .map(get_num_from_expr) 332 .collect::<Option<Vec<i32>>>()? 333 .into_iter() 334 .skip(1) 335 .fold(fe, |a, b| a - b) 336 } else { 337 Default::default() 338 }), 339 BuiltIn::Divide => Atom::Num(if let Some(first_elem) = reduced_tail.first().cloned() { 340 let fe = get_num_from_expr(first_elem)?; 341 reduced_tail 342 .into_iter() 343 .map(get_num_from_expr) 344 .collect::<Option<Vec<i32>>>()? 345 .into_iter() 346 .skip(1) 347 .fold(fe, |a, b| a / b) 348 } else { 349 Default::default() 350 }), 351 })) 352 } else { 353 None 354 } 355 } 356 } 357} 358 359/// And we add one more top-level function to tie everything together, letting 360/// us call eval on a string directly 361fn eval_from_str(src: &str) -> Result<Expr, String> { 362 parse_expr(src) 363 .map_err(|e: nom::Err<VerboseError<&str>>| format!("{:#?}", e)) 364 .and_then(|(_, exp)| eval_expression(exp).ok_or("Eval failed".to_string())) 365} 366 367fn main() { 368 let expression_1 = "((if (= (+ 3 (/ 9 3)) 369 (* 2 3)) 370 * 371 /) 372 456 123)"; 373 println!( 374 "\"{}\"\nevaled gives us: {:?}", 375 expression_1, 376 eval_from_str(expression_1) 377 ); 378} 379