10f66f451Sopenharmony_ci/* tr.c - translate or delete characters
20f66f451Sopenharmony_ci *
30f66f451Sopenharmony_ci * Copyright 2014 Sandeep Sharma <sandeep.jack2756@gmail.com>
40f66f451Sopenharmony_ci *
50f66f451Sopenharmony_ci * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/tr.html
60f66f451Sopenharmony_ci
70f66f451Sopenharmony_ciUSE_TR(NEWTOY(tr, "^>2<1Ccsd[+cC]", TOYFLAG_USR|TOYFLAG_BIN))
80f66f451Sopenharmony_ci
90f66f451Sopenharmony_ciconfig TR
100f66f451Sopenharmony_ci  bool "tr"
110f66f451Sopenharmony_ci  default n
120f66f451Sopenharmony_ci  help
130f66f451Sopenharmony_ci    usage: tr [-cds] SET1 [SET2]
140f66f451Sopenharmony_ci
150f66f451Sopenharmony_ci    Translate, squeeze, or delete characters from stdin, writing to stdout
160f66f451Sopenharmony_ci
170f66f451Sopenharmony_ci    -c/-C  Take complement of SET1
180f66f451Sopenharmony_ci    -d     Delete input characters coded SET1
190f66f451Sopenharmony_ci    -s     Squeeze multiple output characters of SET2 into one character
200f66f451Sopenharmony_ci*/
210f66f451Sopenharmony_ci
220f66f451Sopenharmony_ci#define FOR_tr
230f66f451Sopenharmony_ci#include "toys.h"
240f66f451Sopenharmony_ci
250f66f451Sopenharmony_ciGLOBALS(
260f66f451Sopenharmony_ci  short map[256]; //map of chars
270f66f451Sopenharmony_ci  int len1, len2;
280f66f451Sopenharmony_ci)
290f66f451Sopenharmony_ci
300f66f451Sopenharmony_cienum {
310f66f451Sopenharmony_ci  class_alpha, class_alnum, class_digit,
320f66f451Sopenharmony_ci  class_lower,class_upper,class_space,class_blank,
330f66f451Sopenharmony_ci  class_punct,class_cntrl,class_xdigit,class_invalid
340f66f451Sopenharmony_ci};
350f66f451Sopenharmony_ci
360f66f451Sopenharmony_cistatic void map_translation(char *set1 , char *set2)
370f66f451Sopenharmony_ci{
380f66f451Sopenharmony_ci  int i = TT.len1, k = 0;
390f66f451Sopenharmony_ci
400f66f451Sopenharmony_ci  if (toys.optflags & FLAG_d)
410f66f451Sopenharmony_ci    for (; i; i--, k++) TT.map[set1[k]] = set1[k]|0x100; //set delete bit
420f66f451Sopenharmony_ci
430f66f451Sopenharmony_ci  if (toys.optflags & FLAG_s) {
440f66f451Sopenharmony_ci    for (i = TT.len1, k = 0; i; i--, k++)
450f66f451Sopenharmony_ci      TT.map[set1[k]] = TT.map[set1[k]]|0x200;
460f66f451Sopenharmony_ci    for (i = TT.len2, k = 0; i; i--, k++)
470f66f451Sopenharmony_ci      TT.map[set2[k]] = TT.map[set2[k]]|0x200;
480f66f451Sopenharmony_ci  }
490f66f451Sopenharmony_ci  i = k = 0;
500f66f451Sopenharmony_ci  while (!(toys.optflags & FLAG_d) && set2 && TT.len1--) { //ignore set2 if -d present
510f66f451Sopenharmony_ci    TT.map[set1[i]] = ((TT.map[set1[i]] & 0xFF00) | set2[k]);
520f66f451Sopenharmony_ci    if (set2[k + 1]) k++;
530f66f451Sopenharmony_ci    i++;
540f66f451Sopenharmony_ci  }
550f66f451Sopenharmony_ci}
560f66f451Sopenharmony_ci
570f66f451Sopenharmony_cistatic int handle_escape_char(char **esc_val) //taken from printf
580f66f451Sopenharmony_ci{
590f66f451Sopenharmony_ci  char *ptr = *esc_val;
600f66f451Sopenharmony_ci  int esc_length = 0;
610f66f451Sopenharmony_ci  unsigned  base = 0, num = 0, result = 0, count = 0;
620f66f451Sopenharmony_ci
630f66f451Sopenharmony_ci  if (*ptr == 'x') {
640f66f451Sopenharmony_ci    ptr++;
650f66f451Sopenharmony_ci    esc_length++;
660f66f451Sopenharmony_ci    base = 16;
670f66f451Sopenharmony_ci  } else if (isdigit(*ptr)) base = 8;
680f66f451Sopenharmony_ci
690f66f451Sopenharmony_ci  while (esc_length < 3 && base) {
700f66f451Sopenharmony_ci    num = tolower(*ptr) - '0';
710f66f451Sopenharmony_ci    if (num > 10) num += ('0' - 'a' + 10);
720f66f451Sopenharmony_ci    if (num >= base) {
730f66f451Sopenharmony_ci      if (base == 16) {
740f66f451Sopenharmony_ci        esc_length--;
750f66f451Sopenharmony_ci        if (!esc_length) {// Invalid hex value eg. /xvd, print as it is /xvd
760f66f451Sopenharmony_ci          result = '\\';
770f66f451Sopenharmony_ci          ptr--;
780f66f451Sopenharmony_ci        }
790f66f451Sopenharmony_ci      }
800f66f451Sopenharmony_ci      break;
810f66f451Sopenharmony_ci    }
820f66f451Sopenharmony_ci    esc_length++;
830f66f451Sopenharmony_ci    count = result = (count * base) + num;
840f66f451Sopenharmony_ci    ptr++;
850f66f451Sopenharmony_ci  }
860f66f451Sopenharmony_ci  if (base) {
870f66f451Sopenharmony_ci    ptr--;
880f66f451Sopenharmony_ci    *esc_val = ptr;
890f66f451Sopenharmony_ci    return (char)result;
900f66f451Sopenharmony_ci  } else {
910f66f451Sopenharmony_ci    switch (*ptr) {
920f66f451Sopenharmony_ci      case 'n':  result = '\n'; break;
930f66f451Sopenharmony_ci      case 't':  result = '\t'; break;
940f66f451Sopenharmony_ci      case 'e':  result = (char)27; break;
950f66f451Sopenharmony_ci      case 'b':  result = '\b'; break;
960f66f451Sopenharmony_ci      case 'a':  result = '\a'; break;
970f66f451Sopenharmony_ci      case 'f':  result = '\f'; break;
980f66f451Sopenharmony_ci      case 'v':  result = '\v'; break;
990f66f451Sopenharmony_ci      case 'r':  result = '\r'; break;
1000f66f451Sopenharmony_ci      case '\\': result = '\\'; break;
1010f66f451Sopenharmony_ci      default :
1020f66f451Sopenharmony_ci        result = '\\';
1030f66f451Sopenharmony_ci        ptr--; // Let pointer pointing to / we will increment after returning.
1040f66f451Sopenharmony_ci        break;
1050f66f451Sopenharmony_ci    }
1060f66f451Sopenharmony_ci  }
1070f66f451Sopenharmony_ci  *esc_val = ptr;
1080f66f451Sopenharmony_ci  return (char)result;
1090f66f451Sopenharmony_ci}
1100f66f451Sopenharmony_ci
1110f66f451Sopenharmony_cistatic int find_class(char *class_name)
1120f66f451Sopenharmony_ci{
1130f66f451Sopenharmony_ci  int i;
1140f66f451Sopenharmony_ci  static char *class[] = {
1150f66f451Sopenharmony_ci    "[:alpha:]","[:alnum:]","[:digit:]",
1160f66f451Sopenharmony_ci    "[:lower:]","[:upper:]","[:space:]",
1170f66f451Sopenharmony_ci    "[:blank:]","[:punct:]","[:cntrl:]",
1180f66f451Sopenharmony_ci    "[:xdigit:]","NULL"
1190f66f451Sopenharmony_ci  };
1200f66f451Sopenharmony_ci
1210f66f451Sopenharmony_ci  for (i = 0; i != class_invalid; i++) {
1220f66f451Sopenharmony_ci    if (!memcmp(class_name, class[i], (class_name[0] == 'x')?10:9)) break;
1230f66f451Sopenharmony_ci  }
1240f66f451Sopenharmony_ci  return i;
1250f66f451Sopenharmony_ci}
1260f66f451Sopenharmony_ci
1270f66f451Sopenharmony_cistatic char *expand_set(char *arg, int *len)
1280f66f451Sopenharmony_ci{
1290f66f451Sopenharmony_ci  int i = 0, j, k, size = 256;
1300f66f451Sopenharmony_ci  char *set = xzalloc(size*sizeof(char));
1310f66f451Sopenharmony_ci
1320f66f451Sopenharmony_ci  while (*arg) {
1330f66f451Sopenharmony_ci
1340f66f451Sopenharmony_ci    if (i >= size) {
1350f66f451Sopenharmony_ci      size += 256;
1360f66f451Sopenharmony_ci      set = xrealloc(set, size);
1370f66f451Sopenharmony_ci    }
1380f66f451Sopenharmony_ci    if (*arg == '\\') {
1390f66f451Sopenharmony_ci      arg++;
1400f66f451Sopenharmony_ci      set[i++] = (int)handle_escape_char(&arg);
1410f66f451Sopenharmony_ci      arg++;
1420f66f451Sopenharmony_ci      continue;
1430f66f451Sopenharmony_ci    }
1440f66f451Sopenharmony_ci    if (arg[1] == '-') {
1450f66f451Sopenharmony_ci      if (arg[2] == '\0') goto save;
1460f66f451Sopenharmony_ci      j = arg[0];
1470f66f451Sopenharmony_ci      k = arg[2];
1480f66f451Sopenharmony_ci      if (j > k) perror_exit("reverse colating order");
1490f66f451Sopenharmony_ci      while (j <= k) set[i++] = j++;
1500f66f451Sopenharmony_ci      arg += 3;
1510f66f451Sopenharmony_ci      continue;
1520f66f451Sopenharmony_ci    }
1530f66f451Sopenharmony_ci    if (arg[0] == '[' && arg[1] == ':') {
1540f66f451Sopenharmony_ci
1550f66f451Sopenharmony_ci      if ((j = find_class(arg)) == class_invalid) goto save;
1560f66f451Sopenharmony_ci
1570f66f451Sopenharmony_ci      if ((j == class_alpha) || (j == class_upper) || (j == class_alnum)) {
1580f66f451Sopenharmony_ci      for (k = 'A'; k <= 'Z'; k++) set[i++] = k;
1590f66f451Sopenharmony_ci      }
1600f66f451Sopenharmony_ci      if ((j == class_alpha) || (j == class_lower) || (j == class_alnum)) {
1610f66f451Sopenharmony_ci        for (k = 'a'; k <= 'z'; k++) set[i++] = k;
1620f66f451Sopenharmony_ci      }
1630f66f451Sopenharmony_ci      if ((j == class_alnum) || (j == class_digit) || (j == class_xdigit)) {
1640f66f451Sopenharmony_ci        for (k = '0'; k <= '9'; k++) set[i++] = k;
1650f66f451Sopenharmony_ci      }
1660f66f451Sopenharmony_ci      if (j == class_space || j == class_blank) {
1670f66f451Sopenharmony_ci        set[i++] = '\t';
1680f66f451Sopenharmony_ci        if (j == class_space) {
1690f66f451Sopenharmony_ci          set[i++] = '\n';
1700f66f451Sopenharmony_ci          set[i++] = '\f';
1710f66f451Sopenharmony_ci          set[i++] = '\r';
1720f66f451Sopenharmony_ci          set[i++] = '\v';
1730f66f451Sopenharmony_ci        }
1740f66f451Sopenharmony_ci        set[i++] = ' ';
1750f66f451Sopenharmony_ci      }
1760f66f451Sopenharmony_ci      if (j == class_punct) {
1770f66f451Sopenharmony_ci        for (k = 0; k <= 255; k++)
1780f66f451Sopenharmony_ci          if (ispunct(k)) set[i++] = k;
1790f66f451Sopenharmony_ci      }
1800f66f451Sopenharmony_ci      if (j == class_cntrl) {
1810f66f451Sopenharmony_ci        for (k = 0; k <= 255; k++)
1820f66f451Sopenharmony_ci          if (iscntrl(k)) set[i++] = k;
1830f66f451Sopenharmony_ci      }
1840f66f451Sopenharmony_ci      if (j == class_xdigit) {
1850f66f451Sopenharmony_ci        for (k = 'A'; k <= 'F'; k++) {
1860f66f451Sopenharmony_ci          set[i + 6] = k | 0x20;
1870f66f451Sopenharmony_ci          set[i++] = k;
1880f66f451Sopenharmony_ci        }
1890f66f451Sopenharmony_ci        i += 6;
1900f66f451Sopenharmony_ci        arg += 10;
1910f66f451Sopenharmony_ci        continue;
1920f66f451Sopenharmony_ci      }
1930f66f451Sopenharmony_ci
1940f66f451Sopenharmony_ci      arg += 9; //never here for class_xdigit.
1950f66f451Sopenharmony_ci      continue;
1960f66f451Sopenharmony_ci    }
1970f66f451Sopenharmony_ci    if (arg[0] == '[' && arg[1] == '=') { //[=char=] only
1980f66f451Sopenharmony_ci      arg += 2;
1990f66f451Sopenharmony_ci      if (*arg) set[i++] = *arg;
2000f66f451Sopenharmony_ci      if (!arg[1] || arg[1] != '=' || arg[2] != ']')
2010f66f451Sopenharmony_ci        error_exit("bad equiv class");
2020f66f451Sopenharmony_ci      continue;
2030f66f451Sopenharmony_ci    }
2040f66f451Sopenharmony_cisave:
2050f66f451Sopenharmony_ci    set[i++] = *arg++;
2060f66f451Sopenharmony_ci  }
2070f66f451Sopenharmony_ci  *len = i;
2080f66f451Sopenharmony_ci  return set;
2090f66f451Sopenharmony_ci}
2100f66f451Sopenharmony_ci
2110f66f451Sopenharmony_cistatic void print_map(char *set1, char *set2)
2120f66f451Sopenharmony_ci{
2130f66f451Sopenharmony_ci  int n, src, dst, prev = -1;
2140f66f451Sopenharmony_ci
2150f66f451Sopenharmony_ci  while ((n = read(0, toybuf, sizeof(toybuf)))) {
2160f66f451Sopenharmony_ci    if (!FLAG(d) && !FLAG(s)) {
2170f66f451Sopenharmony_ci      for (dst = 0; dst < n; dst++) toybuf[dst] = TT.map[toybuf[dst]];
2180f66f451Sopenharmony_ci    } else {
2190f66f451Sopenharmony_ci      for (src = dst = 0; src < n; src++) {
2200f66f451Sopenharmony_ci        int ch = TT.map[toybuf[src]];
2210f66f451Sopenharmony_ci
2220f66f451Sopenharmony_ci        if (FLAG(d) && (ch & 0x100)) continue;
2230f66f451Sopenharmony_ci        if (FLAG(s) && ((ch & 0x200) && prev == ch)) continue;
2240f66f451Sopenharmony_ci        toybuf[dst++] = prev = ch;
2250f66f451Sopenharmony_ci      }
2260f66f451Sopenharmony_ci    }
2270f66f451Sopenharmony_ci    xwrite(1, toybuf, dst);
2280f66f451Sopenharmony_ci  }
2290f66f451Sopenharmony_ci}
2300f66f451Sopenharmony_ci
2310f66f451Sopenharmony_cistatic void do_complement(char **set)
2320f66f451Sopenharmony_ci{
2330f66f451Sopenharmony_ci  int i, j;
2340f66f451Sopenharmony_ci  char *comp = xmalloc(256);
2350f66f451Sopenharmony_ci
2360f66f451Sopenharmony_ci  for (i = 0, j = 0;i < 256; i++) {
2370f66f451Sopenharmony_ci    if (memchr(*set, i, TT.len1)) continue;
2380f66f451Sopenharmony_ci    else comp[j++] = (char)i;
2390f66f451Sopenharmony_ci  }
2400f66f451Sopenharmony_ci  free(*set);
2410f66f451Sopenharmony_ci  TT.len1 = j;
2420f66f451Sopenharmony_ci  *set = comp;
2430f66f451Sopenharmony_ci}
2440f66f451Sopenharmony_ci
2450f66f451Sopenharmony_civoid tr_main(void)
2460f66f451Sopenharmony_ci{
2470f66f451Sopenharmony_ci  char *set1, *set2 = NULL;
2480f66f451Sopenharmony_ci  int i;
2490f66f451Sopenharmony_ci
2500f66f451Sopenharmony_ci  for (i = 0; i < 256; i++) TT.map[i] = i; //init map
2510f66f451Sopenharmony_ci
2520f66f451Sopenharmony_ci  set1 = expand_set(toys.optargs[0], &TT.len1);
2530f66f451Sopenharmony_ci  if (toys.optflags & FLAG_c) do_complement(&set1);
2540f66f451Sopenharmony_ci  if (toys.optargs[1]) {
2550f66f451Sopenharmony_ci    if (toys.optargs[1][0] == '\0') error_exit("set2 can't be empty string");
2560f66f451Sopenharmony_ci    set2 = expand_set(toys.optargs[1], &TT.len2);
2570f66f451Sopenharmony_ci  }
2580f66f451Sopenharmony_ci  map_translation(set1, set2);
2590f66f451Sopenharmony_ci
2600f66f451Sopenharmony_ci  print_map(set1, set2);
2610f66f451Sopenharmony_ci  free(set1);
2620f66f451Sopenharmony_ci  free(set2);
2630f66f451Sopenharmony_ci}
264