10f66f451Sopenharmony_ci/* expr.c - evaluate expression
20f66f451Sopenharmony_ci *
30f66f451Sopenharmony_ci * Copyright 2016 Google Inc.
40f66f451Sopenharmony_ci * Copyright 2013 Daniel Verkamp <daniel@drv.nu>
50f66f451Sopenharmony_ci *
60f66f451Sopenharmony_ci * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/expr.html
70f66f451Sopenharmony_ci *
80f66f451Sopenharmony_ci * The web standard is incomplete (precedence grouping missing), see:
90f66f451Sopenharmony_ci * http://permalink.gmane.org/gmane.comp.standards.posix.austin.general/10141
100f66f451Sopenharmony_ci *
110f66f451Sopenharmony_ci * eval_expr() uses the recursive "Precedence Climbing" algorithm:
120f66f451Sopenharmony_ci *
130f66f451Sopenharmony_ci * Clarke, Keith. "The top-down parsing of expressions." University of London.
140f66f451Sopenharmony_ci * Queen Mary College. Department of Computer Science and Statistics, 1986.
150f66f451Sopenharmony_ci *
160f66f451Sopenharmony_ci * http://www.antlr.org/papers/Clarke-expr-parsing-1986.pdf
170f66f451Sopenharmony_ci *
180f66f451Sopenharmony_ci * Nice explanation and Python implementation:
190f66f451Sopenharmony_ci * http://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing
200f66f451Sopenharmony_ci
210f66f451Sopenharmony_ciUSE_EXPR(NEWTOY(expr, NULL, TOYFLAG_USR|TOYFLAG_BIN))
220f66f451Sopenharmony_ci
230f66f451Sopenharmony_ciconfig EXPR
240f66f451Sopenharmony_ci  bool "expr"
250f66f451Sopenharmony_ci  default n
260f66f451Sopenharmony_ci  help
270f66f451Sopenharmony_ci    usage: expr ARG1 OPERATOR ARG2...
280f66f451Sopenharmony_ci
290f66f451Sopenharmony_ci    Evaluate expression and print result. For example, "expr 1 + 2".
300f66f451Sopenharmony_ci
310f66f451Sopenharmony_ci    The supported operators are (grouped from highest to lowest priority):
320f66f451Sopenharmony_ci
330f66f451Sopenharmony_ci      ( )    :    * / %    + -    != <= < >= > =    &    |
340f66f451Sopenharmony_ci
350f66f451Sopenharmony_ci    Each constant and operator must be a separate command line argument.
360f66f451Sopenharmony_ci    All operators are infix, meaning they expect a constant (or expression
370f66f451Sopenharmony_ci    that resolves to a constant) on each side of the operator. Operators of
380f66f451Sopenharmony_ci    the same priority (within each group above) are evaluated left to right.
390f66f451Sopenharmony_ci    Parentheses may be used (as separate arguments) to elevate the priority
400f66f451Sopenharmony_ci    of expressions.
410f66f451Sopenharmony_ci
420f66f451Sopenharmony_ci    Calling expr from a command shell requires a lot of \( or '*' escaping
430f66f451Sopenharmony_ci    to avoid interpreting shell control characters.
440f66f451Sopenharmony_ci
450f66f451Sopenharmony_ci    The & and | operators are logical (not bitwise) and may operate on
460f66f451Sopenharmony_ci    strings (a blank string is "false"). Comparison operators may also
470f66f451Sopenharmony_ci    operate on strings (alphabetical sort).
480f66f451Sopenharmony_ci
490f66f451Sopenharmony_ci    Constants may be strings or integers. Comparison, logical, and regex
500f66f451Sopenharmony_ci    operators may operate on strings (a blank string is "false"), other
510f66f451Sopenharmony_ci    operators require integers.
520f66f451Sopenharmony_ci*/
530f66f451Sopenharmony_ci
540f66f451Sopenharmony_ci// TODO: int overflow checking
550f66f451Sopenharmony_ci
560f66f451Sopenharmony_ci#define FOR_expr
570f66f451Sopenharmony_ci#include "toys.h"
580f66f451Sopenharmony_ci
590f66f451Sopenharmony_ciGLOBALS(
600f66f451Sopenharmony_ci  char **tok; // current token, not on the stack since recursive calls mutate it
610f66f451Sopenharmony_ci
620f66f451Sopenharmony_ci  char *refree;
630f66f451Sopenharmony_ci)
640f66f451Sopenharmony_ci
650f66f451Sopenharmony_ci// Scalar value.  If s != NULL, it's a string, otherwise it's an int.
660f66f451Sopenharmony_cistruct value {
670f66f451Sopenharmony_ci  char *s;
680f66f451Sopenharmony_ci  long long i;
690f66f451Sopenharmony_ci};
700f66f451Sopenharmony_ci
710f66f451Sopenharmony_ci// Get the value as a string.
720f66f451Sopenharmony_cichar *get_str(struct value *v)
730f66f451Sopenharmony_ci{
740f66f451Sopenharmony_ci  if (v->s) return v->s;
750f66f451Sopenharmony_ci  else return xmprintf("%lld", v->i);
760f66f451Sopenharmony_ci}
770f66f451Sopenharmony_ci
780f66f451Sopenharmony_ci// Get the value as an integer and return 1, or return 0 on error.
790f66f451Sopenharmony_ciint get_int(struct value *v, long long *ret)
800f66f451Sopenharmony_ci{
810f66f451Sopenharmony_ci  if (v->s) {
820f66f451Sopenharmony_ci    char *endp;
830f66f451Sopenharmony_ci
840f66f451Sopenharmony_ci    *ret = strtoll(v->s, &endp, 10);
850f66f451Sopenharmony_ci
860f66f451Sopenharmony_ci    if (*endp) return 0; // If endp points to NUL, all chars were converted
870f66f451Sopenharmony_ci  } else *ret = v->i;
880f66f451Sopenharmony_ci
890f66f451Sopenharmony_ci  return 1;
900f66f451Sopenharmony_ci}
910f66f451Sopenharmony_ci
920f66f451Sopenharmony_ci// Preserve the invariant that v.s is NULL when the value is an integer.
930f66f451Sopenharmony_civoid assign_int(struct value *v, long long i)
940f66f451Sopenharmony_ci{
950f66f451Sopenharmony_ci  v->i = i;
960f66f451Sopenharmony_ci  v->s = NULL;
970f66f451Sopenharmony_ci}
980f66f451Sopenharmony_ci
990f66f451Sopenharmony_ci// Check if v is 0 or the empty string.
1000f66f451Sopenharmony_cistatic int is_false(struct value *v)
1010f66f451Sopenharmony_ci{
1020f66f451Sopenharmony_ci  return get_int(v, &v->i) && !v->i;
1030f66f451Sopenharmony_ci}
1040f66f451Sopenharmony_ci
1050f66f451Sopenharmony_ci// 'ret' is filled with a string capture or int match position.
1060f66f451Sopenharmony_cistatic void re(char *target, char *pattern, struct value *ret)
1070f66f451Sopenharmony_ci{
1080f66f451Sopenharmony_ci  regex_t pat;
1090f66f451Sopenharmony_ci  regmatch_t m[2];
1100f66f451Sopenharmony_ci
1110f66f451Sopenharmony_ci  xregcomp(&pat, pattern, 0);
1120f66f451Sopenharmony_ci  // must match at pos 0
1130f66f451Sopenharmony_ci  if (!regexec(&pat, target, 2, m, 0) && !m[0].rm_so) {
1140f66f451Sopenharmony_ci    // Return first parenthesized subexpression as string, or length of match
1150f66f451Sopenharmony_ci    if (pat.re_nsub>0) {
1160f66f451Sopenharmony_ci      ret->s = xmprintf("%.*s", (int)(m[1].rm_eo-m[1].rm_so),
1170f66f451Sopenharmony_ci          target+m[1].rm_so);
1180f66f451Sopenharmony_ci      if (TT.refree) free(TT.refree);
1190f66f451Sopenharmony_ci      TT.refree = ret->s;
1200f66f451Sopenharmony_ci    } else assign_int(ret, m[0].rm_eo);
1210f66f451Sopenharmony_ci  } else {
1220f66f451Sopenharmony_ci    if (pat.re_nsub>0) ret->s = "";
1230f66f451Sopenharmony_ci    else assign_int(ret, 0);
1240f66f451Sopenharmony_ci  }
1250f66f451Sopenharmony_ci  regfree(&pat);
1260f66f451Sopenharmony_ci}
1270f66f451Sopenharmony_ci
1280f66f451Sopenharmony_ci// 4 different signatures of operators.  S = string, I = int, SI = string or
1290f66f451Sopenharmony_ci// int.
1300f66f451Sopenharmony_cienum { SI_TO_SI = 1, SI_TO_I, I_TO_I, S_TO_SI };
1310f66f451Sopenharmony_ci
1320f66f451Sopenharmony_cienum { OR = 1, AND, EQ, NE, GT, GTE, LT, LTE, ADD, SUB, MUL, DIVI, MOD, RE };
1330f66f451Sopenharmony_ci
1340f66f451Sopenharmony_ci// operators grouped by precedence
1350f66f451Sopenharmony_cistatic struct op_def {
1360f66f451Sopenharmony_ci  char *tok;
1370f66f451Sopenharmony_ci  char prec, sig, op; // precedence, signature for type coercion, operator ID
1380f66f451Sopenharmony_ci} OPS[] = {
1390f66f451Sopenharmony_ci  // logical ops, precedence 1 and 2, signature SI_TO_SI
1400f66f451Sopenharmony_ci  {"|", 1, SI_TO_SI, OR  },
1410f66f451Sopenharmony_ci  {"&", 2, SI_TO_SI, AND },
1420f66f451Sopenharmony_ci  // comparison ops, precedence 3, signature SI_TO_I
1430f66f451Sopenharmony_ci  {"=", 3, SI_TO_I, EQ }, {"==", 3, SI_TO_I, EQ  }, {"!=", 3, SI_TO_I, NE },
1440f66f451Sopenharmony_ci  {">", 3, SI_TO_I, GT }, {">=", 3, SI_TO_I, GTE },
1450f66f451Sopenharmony_ci  {"<", 3, SI_TO_I, LT }, {"<=", 3, SI_TO_I, LTE },
1460f66f451Sopenharmony_ci  // arithmetic ops, precedence 4 and 5, signature I_TO_I
1470f66f451Sopenharmony_ci  {"+", 4, I_TO_I, ADD }, {"-",  4, I_TO_I, SUB },
1480f66f451Sopenharmony_ci  {"*", 5, I_TO_I, MUL }, {"/",  5, I_TO_I, DIVI }, {"%", 5, I_TO_I, MOD },
1490f66f451Sopenharmony_ci  // regex match, precedence 6, signature S_TO_SI
1500f66f451Sopenharmony_ci  {":", 6, S_TO_SI, RE },
1510f66f451Sopenharmony_ci  {NULL, 0, 0, 0}, // sentinel
1520f66f451Sopenharmony_ci};
1530f66f451Sopenharmony_ci
1540f66f451Sopenharmony_civoid eval_op(struct op_def *o, struct value *ret, struct value *rhs)
1550f66f451Sopenharmony_ci{
1560f66f451Sopenharmony_ci  long long a, b, x = 0; // x = a OP b for ints.
1570f66f451Sopenharmony_ci  char *s, *t; // string operands
1580f66f451Sopenharmony_ci  int cmp;
1590f66f451Sopenharmony_ci
1600f66f451Sopenharmony_ci  switch (o->sig) {
1610f66f451Sopenharmony_ci
1620f66f451Sopenharmony_ci  case SI_TO_SI:
1630f66f451Sopenharmony_ci    switch (o->op) {
1640f66f451Sopenharmony_ci    case OR:  if (is_false(ret)) *ret = *rhs; break;
1650f66f451Sopenharmony_ci    case AND: if (is_false(ret) || is_false(rhs)) assign_int(ret, 0); break;
1660f66f451Sopenharmony_ci    }
1670f66f451Sopenharmony_ci    break;
1680f66f451Sopenharmony_ci
1690f66f451Sopenharmony_ci  case SI_TO_I:
1700f66f451Sopenharmony_ci    if (get_int(ret, &a) && get_int(rhs, &b)) { // both are ints
1710f66f451Sopenharmony_ci      cmp = a - b;
1720f66f451Sopenharmony_ci    } else { // otherwise compare both as strings
1730f66f451Sopenharmony_ci      cmp = strcmp(s = get_str(ret), t = get_str(rhs));
1740f66f451Sopenharmony_ci      if (ret->s != s) free(s);
1750f66f451Sopenharmony_ci      if (rhs->s != t) free(t);
1760f66f451Sopenharmony_ci    }
1770f66f451Sopenharmony_ci    switch (o->op) {
1780f66f451Sopenharmony_ci    case EQ:  x = cmp == 0; break;
1790f66f451Sopenharmony_ci    case NE:  x = cmp != 0; break;
1800f66f451Sopenharmony_ci    case GT:  x = cmp >  0; break;
1810f66f451Sopenharmony_ci    case GTE: x = cmp >= 0; break;
1820f66f451Sopenharmony_ci    case LT:  x = cmp <  0; break;
1830f66f451Sopenharmony_ci    case LTE: x = cmp <= 0; break;
1840f66f451Sopenharmony_ci    }
1850f66f451Sopenharmony_ci    assign_int(ret, x);
1860f66f451Sopenharmony_ci    break;
1870f66f451Sopenharmony_ci
1880f66f451Sopenharmony_ci  case I_TO_I:
1890f66f451Sopenharmony_ci    if (!get_int(ret, &a) || !get_int(rhs, &b))
1900f66f451Sopenharmony_ci      error_exit("non-integer argument");
1910f66f451Sopenharmony_ci    switch (o->op) {
1920f66f451Sopenharmony_ci    case ADD: x = a + b; break;
1930f66f451Sopenharmony_ci    case SUB: x = a - b; break;
1940f66f451Sopenharmony_ci    case MUL: x = a * b; break;
1950f66f451Sopenharmony_ci    case DIVI: if (b == 0) error_exit("division by zero"); x = a / b; break;
1960f66f451Sopenharmony_ci    case MOD:  if (b == 0) error_exit("division by zero"); x = a % b; break;
1970f66f451Sopenharmony_ci    }
1980f66f451Sopenharmony_ci    assign_int(ret, x);
1990f66f451Sopenharmony_ci    break;
2000f66f451Sopenharmony_ci
2010f66f451Sopenharmony_ci  case S_TO_SI: // op == RE
2020f66f451Sopenharmony_ci    s = get_str(ret);
2030f66f451Sopenharmony_ci    cmp = ret->s!=s; // ret overwritten by re so check now
2040f66f451Sopenharmony_ci    re(s, t = get_str(rhs), ret);
2050f66f451Sopenharmony_ci    if (cmp) free(s);
2060f66f451Sopenharmony_ci    if (rhs->s!=t) free(t);
2070f66f451Sopenharmony_ci    break;
2080f66f451Sopenharmony_ci  }
2090f66f451Sopenharmony_ci}
2100f66f451Sopenharmony_ci
2110f66f451Sopenharmony_ci// Evalute a compound expression using recursive "Precedence Climbing"
2120f66f451Sopenharmony_ci// algorithm, setting 'ret'.
2130f66f451Sopenharmony_cistatic void eval_expr(struct value *ret, int min_prec)
2140f66f451Sopenharmony_ci{
2150f66f451Sopenharmony_ci  if (!*TT.tok) error_exit("Unexpected end of input");
2160f66f451Sopenharmony_ci
2170f66f451Sopenharmony_ci  // Evaluate LHS atom, setting 'ret'.
2180f66f451Sopenharmony_ci  if (!strcmp(*TT.tok, "(")) { // parenthesized expression
2190f66f451Sopenharmony_ci    TT.tok++;  // consume (
2200f66f451Sopenharmony_ci
2210f66f451Sopenharmony_ci    eval_expr(ret, 1);        // We're inside ( ), so min_prec = 1
2220f66f451Sopenharmony_ci    if (ret->s && !strcmp(ret->s, ")")) error_exit("empty ( )");
2230f66f451Sopenharmony_ci    if (!*TT.tok) error_exit("Expected )");
2240f66f451Sopenharmony_ci    if (strcmp(*TT.tok, ")")) error_exit("Expected ) but got %s", *TT.tok);
2250f66f451Sopenharmony_ci  } else ret->s = *TT.tok;  // simple literal, all values start as strings
2260f66f451Sopenharmony_ci  TT.tok++;
2270f66f451Sopenharmony_ci
2280f66f451Sopenharmony_ci  // Evaluate RHS and apply operator until precedence is too low.
2290f66f451Sopenharmony_ci  struct value rhs;
2300f66f451Sopenharmony_ci  while (*TT.tok) {
2310f66f451Sopenharmony_ci    struct op_def *o = OPS;
2320f66f451Sopenharmony_ci
2330f66f451Sopenharmony_ci    while (o->tok) { // Look up operator
2340f66f451Sopenharmony_ci      if (!strcmp(*TT.tok, o->tok)) break;
2350f66f451Sopenharmony_ci      o++;
2360f66f451Sopenharmony_ci    }
2370f66f451Sopenharmony_ci    if (!o->tok) break; // Not an operator (extra input will fail later)
2380f66f451Sopenharmony_ci    if (o->prec < min_prec) break; // Precedence too low, pop a stack frame
2390f66f451Sopenharmony_ci    TT.tok++;
2400f66f451Sopenharmony_ci
2410f66f451Sopenharmony_ci    eval_expr(&rhs, o->prec + 1); // Evaluate RHS, with higher min precedence
2420f66f451Sopenharmony_ci    eval_op(o, ret, &rhs); // Apply operator, setting 'ret'
2430f66f451Sopenharmony_ci  }
2440f66f451Sopenharmony_ci}
2450f66f451Sopenharmony_ci
2460f66f451Sopenharmony_civoid expr_main(void)
2470f66f451Sopenharmony_ci{
2480f66f451Sopenharmony_ci  struct value ret = {0};
2490f66f451Sopenharmony_ci
2500f66f451Sopenharmony_ci  toys.exitval = 2; // if exiting early, indicate error
2510f66f451Sopenharmony_ci  TT.tok = toys.optargs; // initialize global token
2520f66f451Sopenharmony_ci  eval_expr(&ret, 1);
2530f66f451Sopenharmony_ci  if (*TT.tok) error_exit("Unexpected extra input '%s'\n", *TT.tok);
2540f66f451Sopenharmony_ci
2550f66f451Sopenharmony_ci  if (ret.s) printf("%s\n", ret.s);
2560f66f451Sopenharmony_ci  else printf("%lld\n", ret.i);
2570f66f451Sopenharmony_ci
2580f66f451Sopenharmony_ci  toys.exitval = is_false(&ret);
2590f66f451Sopenharmony_ci
2600f66f451Sopenharmony_ci  if (TT.refree) free(TT.refree);
2610f66f451Sopenharmony_ci}
262