1// (C) Copyright 2016 Jethro G. Beekman 2// 3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or 4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license 5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your 6// option. This file may not be copied, modified, or distributed 7// except according to those terms. 8//! Evaluating C expressions from tokens. 9//! 10//! Numerical operators are supported. All numerical values are treated as 11//! `i64` or `f64`. Type casting is not supported. `i64` are converted to 12//! `f64` when used in conjunction with a `f64`. Right shifts are always 13//! arithmetic shifts. 14//! 15//! The `sizeof` operator is not supported. 16//! 17//! String concatenation is supported, but width prefixes are ignored; all 18//! strings are treated as narrow strings. 19//! 20//! Use the `IdentifierParser` to substitute identifiers found in expressions. 21 22use std::collections::HashMap; 23use std::num::Wrapping; 24use std::ops::{ 25 AddAssign, BitAndAssign, BitOrAssign, BitXorAssign, DivAssign, MulAssign, RemAssign, ShlAssign, 26 ShrAssign, SubAssign, 27}; 28 29use crate::literal::{self, CChar}; 30use crate::token::{Kind as TokenKind, Token}; 31use crate::ToCexprResult; 32use nom::branch::alt; 33use nom::combinator::{complete, map, map_opt}; 34use nom::multi::{fold_many0, many0, separated_list0}; 35use nom::sequence::{delimited, pair, preceded}; 36use nom::*; 37 38/// Expression parser/evaluator that supports identifiers. 39#[derive(Debug)] 40pub struct IdentifierParser<'ident> { 41 identifiers: &'ident HashMap<Vec<u8>, EvalResult>, 42} 43#[derive(Copy, Clone)] 44struct PRef<'a>(&'a IdentifierParser<'a>); 45 46/// A shorthand for the type of cexpr expression evaluation results. 47pub type CResult<'a, R> = IResult<&'a [Token], R, crate::Error<&'a [Token]>>; 48 49/// The result of parsing a literal or evaluating an expression. 50#[derive(Debug, Clone, PartialEq)] 51#[allow(missing_docs)] 52pub enum EvalResult { 53 Int(Wrapping<i64>), 54 Float(f64), 55 Char(CChar), 56 Str(Vec<u8>), 57 Invalid, 58} 59 60macro_rules! result_opt ( 61 (fn $n:ident: $e:ident -> $t:ty) => ( 62 #[allow(dead_code)] 63 #[allow(clippy::wrong_self_convention)] 64 fn $n(self) -> Option<$t> { 65 if let EvalResult::$e(v) = self { 66 Some(v) 67 } else { 68 None 69 } 70 } 71 ); 72); 73 74impl EvalResult { 75 result_opt!(fn as_int: Int -> Wrapping<i64>); 76 result_opt!(fn as_float: Float -> f64); 77 result_opt!(fn as_char: Char -> CChar); 78 result_opt!(fn as_str: Str -> Vec<u8>); 79 80 #[allow(clippy::wrong_self_convention)] 81 fn as_numeric(self) -> Option<EvalResult> { 82 match self { 83 EvalResult::Int(_) | EvalResult::Float(_) => Some(self), 84 _ => None, 85 } 86 } 87} 88 89impl From<Vec<u8>> for EvalResult { 90 fn from(s: Vec<u8>) -> EvalResult { 91 EvalResult::Str(s) 92 } 93} 94 95// =========================================== 96// ============= Clang tokens ================ 97// =========================================== 98 99macro_rules! exact_token ( 100 ($k:ident, $c:expr) => ({ 101 move |input: &[Token]| { 102 if input.is_empty() { 103 let res: CResult<'_, &[u8]> = Err(crate::nom::Err::Incomplete(Needed::new($c.len()))); 104 res 105 } else { 106 if input[0].kind==TokenKind::$k && &input[0].raw[..]==$c { 107 Ok((&input[1..], &input[0].raw[..])) 108 } else { 109 Err(crate::nom::Err::Error((input, crate::ErrorKind::ExactToken(TokenKind::$k,$c)).into())) 110 } 111 } 112 } 113 }); 114); 115 116fn identifier_token(input: &[Token]) -> CResult<'_, &[u8]> { 117 if input.is_empty() { 118 let res: CResult<'_, &[u8]> = Err(nom::Err::Incomplete(Needed::new(1))); 119 res 120 } else { 121 if input[0].kind == TokenKind::Identifier { 122 Ok((&input[1..], &input[0].raw[..])) 123 } else { 124 Err(crate::nom::Err::Error((input, crate::ErrorKind::TypedToken(TokenKind::Identifier)).into())) 125 } 126 } 127} 128 129fn p(c: &'static str) -> impl Fn(&[Token]) -> CResult<'_, &[u8]> { 130 exact_token!(Punctuation, c.as_bytes()) 131} 132 133fn one_of_punctuation(c: &'static [&'static str]) -> impl Fn(&[Token]) -> CResult<'_, &[u8]> { 134 move |input| { 135 if input.is_empty() { 136 let min = c 137 .iter() 138 .map(|opt| opt.len()) 139 .min() 140 .expect("at least one option"); 141 Err(crate::nom::Err::Incomplete(Needed::new(min))) 142 } else if input[0].kind == TokenKind::Punctuation 143 && c.iter().any(|opt| opt.as_bytes() == &input[0].raw[..]) 144 { 145 Ok((&input[1..], &input[0].raw[..])) 146 } else { 147 Err(crate::nom::Err::Error( 148 ( 149 input, 150 crate::ErrorKind::ExactTokens(TokenKind::Punctuation, c), 151 ) 152 .into(), 153 )) 154 } 155 } 156} 157 158// ================================================== 159// ============= Numeric expressions ================ 160// ================================================== 161 162impl<'a> AddAssign<&'a EvalResult> for EvalResult { 163 fn add_assign(&mut self, rhs: &'a EvalResult) { 164 use self::EvalResult::*; 165 *self = match (&*self, rhs) { 166 (&Int(a), &Int(b)) => Int(a + b), 167 (&Float(a), &Int(b)) => Float(a + (b.0 as f64)), 168 (&Int(a), &Float(b)) => Float(a.0 as f64 + b), 169 (&Float(a), &Float(b)) => Float(a + b), 170 _ => Invalid, 171 }; 172 } 173} 174impl<'a> BitAndAssign<&'a EvalResult> for EvalResult { 175 fn bitand_assign(&mut self, rhs: &'a EvalResult) { 176 use self::EvalResult::*; 177 *self = match (&*self, rhs) { 178 (&Int(a), &Int(b)) => Int(a & b), 179 _ => Invalid, 180 }; 181 } 182} 183impl<'a> BitOrAssign<&'a EvalResult> for EvalResult { 184 fn bitor_assign(&mut self, rhs: &'a EvalResult) { 185 use self::EvalResult::*; 186 *self = match (&*self, rhs) { 187 (&Int(a), &Int(b)) => Int(a | b), 188 _ => Invalid, 189 }; 190 } 191} 192impl<'a> BitXorAssign<&'a EvalResult> for EvalResult { 193 fn bitxor_assign(&mut self, rhs: &'a EvalResult) { 194 use self::EvalResult::*; 195 *self = match (&*self, rhs) { 196 (&Int(a), &Int(b)) => Int(a ^ b), 197 _ => Invalid, 198 }; 199 } 200} 201impl<'a> DivAssign<&'a EvalResult> for EvalResult { 202 fn div_assign(&mut self, rhs: &'a EvalResult) { 203 use self::EvalResult::*; 204 *self = match (&*self, rhs) { 205 (&Int(a), &Int(b)) => Int(a / b), 206 (&Float(a), &Int(b)) => Float(a / (b.0 as f64)), 207 (&Int(a), &Float(b)) => Float(a.0 as f64 / b), 208 (&Float(a), &Float(b)) => Float(a / b), 209 _ => Invalid, 210 }; 211 } 212} 213impl<'a> MulAssign<&'a EvalResult> for EvalResult { 214 fn mul_assign(&mut self, rhs: &'a EvalResult) { 215 use self::EvalResult::*; 216 *self = match (&*self, rhs) { 217 (&Int(a), &Int(b)) => Int(a * b), 218 (&Float(a), &Int(b)) => Float(a * (b.0 as f64)), 219 (&Int(a), &Float(b)) => Float(a.0 as f64 * b), 220 (&Float(a), &Float(b)) => Float(a * b), 221 _ => Invalid, 222 }; 223 } 224} 225impl<'a> RemAssign<&'a EvalResult> for EvalResult { 226 fn rem_assign(&mut self, rhs: &'a EvalResult) { 227 use self::EvalResult::*; 228 *self = match (&*self, rhs) { 229 (&Int(a), &Int(b)) => Int(a % b), 230 (&Float(a), &Int(b)) => Float(a % (b.0 as f64)), 231 (&Int(a), &Float(b)) => Float(a.0 as f64 % b), 232 (&Float(a), &Float(b)) => Float(a % b), 233 _ => Invalid, 234 }; 235 } 236} 237impl<'a> ShlAssign<&'a EvalResult> for EvalResult { 238 fn shl_assign(&mut self, rhs: &'a EvalResult) { 239 use self::EvalResult::*; 240 *self = match (&*self, rhs) { 241 (&Int(a), &Int(b)) => Int(a << (b.0 as usize)), 242 _ => Invalid, 243 }; 244 } 245} 246impl<'a> ShrAssign<&'a EvalResult> for EvalResult { 247 fn shr_assign(&mut self, rhs: &'a EvalResult) { 248 use self::EvalResult::*; 249 *self = match (&*self, rhs) { 250 (&Int(a), &Int(b)) => Int(a >> (b.0 as usize)), 251 _ => Invalid, 252 }; 253 } 254} 255impl<'a> SubAssign<&'a EvalResult> for EvalResult { 256 fn sub_assign(&mut self, rhs: &'a EvalResult) { 257 use self::EvalResult::*; 258 *self = match (&*self, rhs) { 259 (&Int(a), &Int(b)) => Int(a - b), 260 (&Float(a), &Int(b)) => Float(a - (b.0 as f64)), 261 (&Int(a), &Float(b)) => Float(a.0 as f64 - b), 262 (&Float(a), &Float(b)) => Float(a - b), 263 _ => Invalid, 264 }; 265 } 266} 267 268fn unary_op(input: (&[u8], EvalResult)) -> Option<EvalResult> { 269 use self::EvalResult::*; 270 assert_eq!(input.0.len(), 1); 271 match (input.0[0], input.1) { 272 (b'+', i) => Some(i), 273 (b'-', Int(i)) => Some(Int(Wrapping(i.0.wrapping_neg()))), // impl Neg for Wrapping not until rust 1.10... 274 (b'-', Float(i)) => Some(Float(-i)), 275 (b'-', _) => unreachable!("non-numeric unary op"), 276 (b'~', Int(i)) => Some(Int(!i)), 277 (b'~', Float(_)) => None, 278 (b'~', _) => unreachable!("non-numeric unary op"), 279 _ => unreachable!("invalid unary op"), 280 } 281} 282 283fn numeric<I: Clone, E: nom::error::ParseError<I>, F>( 284 f: F, 285) -> impl FnMut(I) -> nom::IResult<I, EvalResult, E> 286where 287 F: FnMut(I) -> nom::IResult<I, EvalResult, E>, 288{ 289 nom::combinator::map_opt(f, EvalResult::as_numeric) 290} 291 292impl<'a> PRef<'a> { 293 fn unary(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { 294 alt(( 295 delimited(p("("), |i| self.numeric_expr(i), p(")")), 296 numeric(|i| self.literal(i)), 297 numeric(|i| self.identifier(i)), 298 map_opt( 299 pair(one_of_punctuation(&["+", "-", "~"][..]), |i| self.unary(i)), 300 unary_op, 301 ), 302 ))(input) 303 } 304 305 fn mul_div_rem(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { 306 let (input, acc) = self.unary(input)?; 307 fold_many0( 308 pair(complete(one_of_punctuation(&["*", "/", "%"][..])), |i| { 309 self.unary(i) 310 }), 311 move || acc.clone(), 312 |mut acc, (op, val): (&[u8], EvalResult)| { 313 match op[0] as char { 314 '*' => acc *= &val, 315 '/' => acc /= &val, 316 '%' => acc %= &val, 317 _ => unreachable!(), 318 }; 319 acc 320 }, 321 )(input) 322 } 323 324 fn add_sub(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { 325 let (input, acc) = self.mul_div_rem(input)?; 326 fold_many0( 327 pair(complete(one_of_punctuation(&["+", "-"][..])), |i| { 328 self.mul_div_rem(i) 329 }), 330 move || acc.clone(), 331 |mut acc, (op, val): (&[u8], EvalResult)| { 332 match op[0] as char { 333 '+' => acc += &val, 334 '-' => acc -= &val, 335 _ => unreachable!(), 336 }; 337 acc 338 }, 339 )(input) 340 } 341 342 fn shl_shr(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { 343 let (input, acc) = self.add_sub(input)?; 344 numeric(fold_many0( 345 pair(complete(one_of_punctuation(&["<<", ">>"][..])), |i| { 346 self.add_sub(i) 347 }), 348 move || acc.clone(), 349 |mut acc, (op, val): (&[u8], EvalResult)| { 350 match op { 351 b"<<" => acc <<= &val, 352 b">>" => acc >>= &val, 353 _ => unreachable!(), 354 }; 355 acc 356 }, 357 ))(input) 358 } 359 360 fn and(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { 361 let (input, acc) = self.shl_shr(input)?; 362 numeric(fold_many0( 363 preceded(complete(p("&")), |i| self.shl_shr(i)), 364 move || acc.clone(), 365 |mut acc, val: EvalResult| { 366 acc &= &val; 367 acc 368 }, 369 ))(input) 370 } 371 372 fn xor(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { 373 let (input, acc) = self.and(input)?; 374 numeric(fold_many0( 375 preceded(complete(p("^")), |i| self.and(i)), 376 move || acc.clone(), 377 |mut acc, val: EvalResult| { 378 acc ^= &val; 379 acc 380 }, 381 ))(input) 382 } 383 384 fn or(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { 385 let (input, acc) = self.xor(input)?; 386 numeric(fold_many0( 387 preceded(complete(p("|")), |i| self.xor(i)), 388 move || acc.clone(), 389 |mut acc, val: EvalResult| { 390 acc |= &val; 391 acc 392 }, 393 ))(input) 394 } 395 396 #[inline(always)] 397 fn numeric_expr(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { 398 self.or(input) 399 } 400} 401 402// ======================================================= 403// ============= Literals and identifiers ================ 404// ======================================================= 405 406impl<'a> PRef<'a> { 407 fn identifier(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { 408 match input.split_first() { 409 None => Err(Err::Incomplete(Needed::new(1))), 410 Some(( 411 &Token { 412 kind: TokenKind::Identifier, 413 ref raw, 414 }, 415 rest, 416 )) => { 417 if let Some(r) = self.identifiers.get(&raw[..]) { 418 Ok((rest, r.clone())) 419 } else { 420 Err(Err::Error( 421 (input, crate::ErrorKind::UnknownIdentifier).into(), 422 )) 423 } 424 } 425 Some(_) => Err(Err::Error( 426 (input, crate::ErrorKind::TypedToken(TokenKind::Identifier)).into(), 427 )), 428 } 429 } 430 431 fn literal(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { 432 match input.split_first() { 433 None => Err(Err::Incomplete(Needed::new(1))), 434 Some(( 435 &Token { 436 kind: TokenKind::Literal, 437 ref raw, 438 }, 439 rest, 440 )) => match literal::parse(raw) { 441 Ok((_, result)) => Ok((rest, result)), 442 _ => Err(Err::Error((input, crate::ErrorKind::InvalidLiteral).into())), 443 }, 444 Some(_) => Err(Err::Error( 445 (input, crate::ErrorKind::TypedToken(TokenKind::Literal)).into(), 446 )), 447 } 448 } 449 450 fn string(self, input: &'_ [Token]) -> CResult<'_, Vec<u8>> { 451 alt(( 452 map_opt(|i| self.literal(i), EvalResult::as_str), 453 map_opt(|i| self.identifier(i), EvalResult::as_str), 454 ))(input) 455 .to_cexpr_result() 456 } 457 458 // "string1" "string2" etc... 459 fn concat_str(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { 460 map( 461 pair(|i| self.string(i), many0(complete(|i| self.string(i)))), 462 |(first, v)| { 463 Vec::into_iter(v) 464 .fold(first, |mut s, elem| { 465 Vec::extend_from_slice(&mut s, Vec::<u8>::as_slice(&elem)); 466 s 467 }) 468 .into() 469 }, 470 )(input) 471 .to_cexpr_result() 472 } 473 474 fn expr(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { 475 alt(( 476 |i| self.numeric_expr(i), 477 delimited(p("("), |i| self.expr(i), p(")")), 478 |i| self.concat_str(i), 479 |i| self.literal(i), 480 |i| self.identifier(i), 481 ))(input) 482 .to_cexpr_result() 483 } 484 485 fn macro_definition(self, input: &'_ [Token]) -> CResult<'_, (&'_ [u8], EvalResult)> { 486 pair(identifier_token, |i| self.expr(i))(input) 487 } 488} 489 490impl<'a> ::std::ops::Deref for PRef<'a> { 491 type Target = IdentifierParser<'a>; 492 fn deref(&self) -> &IdentifierParser<'a> { 493 self.0 494 } 495} 496 497impl<'ident> IdentifierParser<'ident> { 498 fn as_ref(&self) -> PRef<'_> { 499 PRef(self) 500 } 501 502 /// Create a new `IdentifierParser` with a set of known identifiers. When 503 /// a known identifier is encountered during parsing, it is substituted 504 /// for the value specified. 505 pub fn new(identifiers: &HashMap<Vec<u8>, EvalResult>) -> IdentifierParser<'_> { 506 IdentifierParser { identifiers } 507 } 508 509 /// Parse and evaluate an expression of a list of tokens. 510 /// 511 /// Returns an error if the input is not a valid expression or if the token 512 /// stream contains comments, keywords or unknown identifiers. 513 pub fn expr<'a>(&self, input: &'a [Token]) -> CResult<'a, EvalResult> { 514 self.as_ref().expr(input) 515 } 516 517 /// Parse and evaluate a macro definition from a list of tokens. 518 /// 519 /// Returns the identifier for the macro and its replacement evaluated as an 520 /// expression. The input should not include `#define`. 521 /// 522 /// Returns an error if the replacement is not a valid expression, if called 523 /// on most function-like macros, or if the token stream contains comments, 524 /// keywords or unknown identifiers. 525 /// 526 /// N.B. This is intended to fail on function-like macros, but if it the 527 /// macro takes a single argument, the argument name is defined as an 528 /// identifier, and the macro otherwise parses as an expression, it will 529 /// return a result even on function-like macros. 530 /// 531 /// ```c 532 /// // will evaluate into IDENTIFIER 533 /// #define DELETE(IDENTIFIER) 534 /// // will evaluate into IDENTIFIER-3 535 /// #define NEGATIVE_THREE(IDENTIFIER) -3 536 /// ``` 537 pub fn macro_definition<'a>(&self, input: &'a [Token]) -> CResult<'a, (&'a [u8], EvalResult)> { 538 crate::assert_full_parse(self.as_ref().macro_definition(input)) 539 } 540} 541 542/// Parse and evaluate an expression of a list of tokens. 543/// 544/// Returns an error if the input is not a valid expression or if the token 545/// stream contains comments, keywords or identifiers. 546pub fn expr(input: &[Token]) -> CResult<'_, EvalResult> { 547 IdentifierParser::new(&HashMap::new()).expr(input) 548} 549 550/// Parse and evaluate a macro definition from a list of tokens. 551/// 552/// Returns the identifier for the macro and its replacement evaluated as an 553/// expression. The input should not include `#define`. 554/// 555/// Returns an error if the replacement is not a valid expression, if called 556/// on a function-like macro, or if the token stream contains comments, 557/// keywords or identifiers. 558pub fn macro_definition(input: &[Token]) -> CResult<'_, (&'_ [u8], EvalResult)> { 559 IdentifierParser::new(&HashMap::new()).macro_definition(input) 560} 561 562/// Parse a functional macro declaration from a list of tokens. 563/// 564/// Returns the identifier for the macro and the argument list (in order). The 565/// input should not include `#define`. The actual definition is not parsed and 566/// may be obtained from the unparsed data returned. 567/// 568/// Returns an error if the input is not a functional macro or if the token 569/// stream contains comments. 570/// 571/// # Example 572/// ``` 573/// use cexpr::expr::{IdentifierParser, EvalResult, fn_macro_declaration}; 574/// use cexpr::assert_full_parse; 575/// use cexpr::token::Kind::*; 576/// use cexpr::token::Token; 577/// 578/// // #define SUFFIX(arg) arg "suffix" 579/// let tokens = vec![ 580/// (Identifier, &b"SUFFIX"[..]).into(), 581/// (Punctuation, &b"("[..]).into(), 582/// (Identifier, &b"arg"[..]).into(), 583/// (Punctuation, &b")"[..]).into(), 584/// (Identifier, &b"arg"[..]).into(), 585/// (Literal, &br#""suffix""#[..]).into(), 586/// ]; 587/// 588/// // Try to parse the functional part 589/// let (expr, (ident, args)) = fn_macro_declaration(&tokens).unwrap(); 590/// assert_eq!(ident, b"SUFFIX"); 591/// 592/// // Create dummy arguments 593/// let idents = args.into_iter().map(|arg| 594/// (arg.to_owned(), EvalResult::Str(b"test".to_vec())) 595/// ).collect(); 596/// 597/// // Evaluate the macro 598/// let (_, evaluated) = assert_full_parse(IdentifierParser::new(&idents).expr(expr)).unwrap(); 599/// assert_eq!(evaluated, EvalResult::Str(b"testsuffix".to_vec())); 600/// ``` 601pub fn fn_macro_declaration(input: &[Token]) -> CResult<'_, (&[u8], Vec<&[u8]>)> { 602 pair( 603 identifier_token, 604 delimited( 605 p("("), 606 separated_list0(p(","), identifier_token), 607 p(")"), 608 ), 609 )(input) 610} 611