10f66f451Sopenharmony_ci/* tr.c - translate or delete characters 20f66f451Sopenharmony_ci * 30f66f451Sopenharmony_ci * Copyright 2014 Sandeep Sharma <sandeep.jack2756@gmail.com> 40f66f451Sopenharmony_ci * 50f66f451Sopenharmony_ci * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/tr.html 60f66f451Sopenharmony_ci 70f66f451Sopenharmony_ciUSE_TR(NEWTOY(tr, "^>2<1Ccsd[+cC]", TOYFLAG_USR|TOYFLAG_BIN)) 80f66f451Sopenharmony_ci 90f66f451Sopenharmony_ciconfig TR 100f66f451Sopenharmony_ci bool "tr" 110f66f451Sopenharmony_ci default n 120f66f451Sopenharmony_ci help 130f66f451Sopenharmony_ci usage: tr [-cds] SET1 [SET2] 140f66f451Sopenharmony_ci 150f66f451Sopenharmony_ci Translate, squeeze, or delete characters from stdin, writing to stdout 160f66f451Sopenharmony_ci 170f66f451Sopenharmony_ci -c/-C Take complement of SET1 180f66f451Sopenharmony_ci -d Delete input characters coded SET1 190f66f451Sopenharmony_ci -s Squeeze multiple output characters of SET2 into one character 200f66f451Sopenharmony_ci*/ 210f66f451Sopenharmony_ci 220f66f451Sopenharmony_ci#define FOR_tr 230f66f451Sopenharmony_ci#include "toys.h" 240f66f451Sopenharmony_ci 250f66f451Sopenharmony_ciGLOBALS( 260f66f451Sopenharmony_ci short map[256]; //map of chars 270f66f451Sopenharmony_ci int len1, len2; 280f66f451Sopenharmony_ci) 290f66f451Sopenharmony_ci 300f66f451Sopenharmony_cienum { 310f66f451Sopenharmony_ci class_alpha, class_alnum, class_digit, 320f66f451Sopenharmony_ci class_lower,class_upper,class_space,class_blank, 330f66f451Sopenharmony_ci class_punct,class_cntrl,class_xdigit,class_invalid 340f66f451Sopenharmony_ci}; 350f66f451Sopenharmony_ci 360f66f451Sopenharmony_cistatic void map_translation(char *set1 , char *set2) 370f66f451Sopenharmony_ci{ 380f66f451Sopenharmony_ci int i = TT.len1, k = 0; 390f66f451Sopenharmony_ci 400f66f451Sopenharmony_ci if (toys.optflags & FLAG_d) 410f66f451Sopenharmony_ci for (; i; i--, k++) TT.map[set1[k]] = set1[k]|0x100; //set delete bit 420f66f451Sopenharmony_ci 430f66f451Sopenharmony_ci if (toys.optflags & FLAG_s) { 440f66f451Sopenharmony_ci for (i = TT.len1, k = 0; i; i--, k++) 450f66f451Sopenharmony_ci TT.map[set1[k]] = TT.map[set1[k]]|0x200; 460f66f451Sopenharmony_ci for (i = TT.len2, k = 0; i; i--, k++) 470f66f451Sopenharmony_ci TT.map[set2[k]] = TT.map[set2[k]]|0x200; 480f66f451Sopenharmony_ci } 490f66f451Sopenharmony_ci i = k = 0; 500f66f451Sopenharmony_ci while (!(toys.optflags & FLAG_d) && set2 && TT.len1--) { //ignore set2 if -d present 510f66f451Sopenharmony_ci TT.map[set1[i]] = ((TT.map[set1[i]] & 0xFF00) | set2[k]); 520f66f451Sopenharmony_ci if (set2[k + 1]) k++; 530f66f451Sopenharmony_ci i++; 540f66f451Sopenharmony_ci } 550f66f451Sopenharmony_ci} 560f66f451Sopenharmony_ci 570f66f451Sopenharmony_cistatic int handle_escape_char(char **esc_val) //taken from printf 580f66f451Sopenharmony_ci{ 590f66f451Sopenharmony_ci char *ptr = *esc_val; 600f66f451Sopenharmony_ci int esc_length = 0; 610f66f451Sopenharmony_ci unsigned base = 0, num = 0, result = 0, count = 0; 620f66f451Sopenharmony_ci 630f66f451Sopenharmony_ci if (*ptr == 'x') { 640f66f451Sopenharmony_ci ptr++; 650f66f451Sopenharmony_ci esc_length++; 660f66f451Sopenharmony_ci base = 16; 670f66f451Sopenharmony_ci } else if (isdigit(*ptr)) base = 8; 680f66f451Sopenharmony_ci 690f66f451Sopenharmony_ci while (esc_length < 3 && base) { 700f66f451Sopenharmony_ci num = tolower(*ptr) - '0'; 710f66f451Sopenharmony_ci if (num > 10) num += ('0' - 'a' + 10); 720f66f451Sopenharmony_ci if (num >= base) { 730f66f451Sopenharmony_ci if (base == 16) { 740f66f451Sopenharmony_ci esc_length--; 750f66f451Sopenharmony_ci if (!esc_length) {// Invalid hex value eg. /xvd, print as it is /xvd 760f66f451Sopenharmony_ci result = '\\'; 770f66f451Sopenharmony_ci ptr--; 780f66f451Sopenharmony_ci } 790f66f451Sopenharmony_ci } 800f66f451Sopenharmony_ci break; 810f66f451Sopenharmony_ci } 820f66f451Sopenharmony_ci esc_length++; 830f66f451Sopenharmony_ci count = result = (count * base) + num; 840f66f451Sopenharmony_ci ptr++; 850f66f451Sopenharmony_ci } 860f66f451Sopenharmony_ci if (base) { 870f66f451Sopenharmony_ci ptr--; 880f66f451Sopenharmony_ci *esc_val = ptr; 890f66f451Sopenharmony_ci return (char)result; 900f66f451Sopenharmony_ci } else { 910f66f451Sopenharmony_ci switch (*ptr) { 920f66f451Sopenharmony_ci case 'n': result = '\n'; break; 930f66f451Sopenharmony_ci case 't': result = '\t'; break; 940f66f451Sopenharmony_ci case 'e': result = (char)27; break; 950f66f451Sopenharmony_ci case 'b': result = '\b'; break; 960f66f451Sopenharmony_ci case 'a': result = '\a'; break; 970f66f451Sopenharmony_ci case 'f': result = '\f'; break; 980f66f451Sopenharmony_ci case 'v': result = '\v'; break; 990f66f451Sopenharmony_ci case 'r': result = '\r'; break; 1000f66f451Sopenharmony_ci case '\\': result = '\\'; break; 1010f66f451Sopenharmony_ci default : 1020f66f451Sopenharmony_ci result = '\\'; 1030f66f451Sopenharmony_ci ptr--; // Let pointer pointing to / we will increment after returning. 1040f66f451Sopenharmony_ci break; 1050f66f451Sopenharmony_ci } 1060f66f451Sopenharmony_ci } 1070f66f451Sopenharmony_ci *esc_val = ptr; 1080f66f451Sopenharmony_ci return (char)result; 1090f66f451Sopenharmony_ci} 1100f66f451Sopenharmony_ci 1110f66f451Sopenharmony_cistatic int find_class(char *class_name) 1120f66f451Sopenharmony_ci{ 1130f66f451Sopenharmony_ci int i; 1140f66f451Sopenharmony_ci static char *class[] = { 1150f66f451Sopenharmony_ci "[:alpha:]","[:alnum:]","[:digit:]", 1160f66f451Sopenharmony_ci "[:lower:]","[:upper:]","[:space:]", 1170f66f451Sopenharmony_ci "[:blank:]","[:punct:]","[:cntrl:]", 1180f66f451Sopenharmony_ci "[:xdigit:]","NULL" 1190f66f451Sopenharmony_ci }; 1200f66f451Sopenharmony_ci 1210f66f451Sopenharmony_ci for (i = 0; i != class_invalid; i++) { 1220f66f451Sopenharmony_ci if (!memcmp(class_name, class[i], (class_name[0] == 'x')?10:9)) break; 1230f66f451Sopenharmony_ci } 1240f66f451Sopenharmony_ci return i; 1250f66f451Sopenharmony_ci} 1260f66f451Sopenharmony_ci 1270f66f451Sopenharmony_cistatic char *expand_set(char *arg, int *len) 1280f66f451Sopenharmony_ci{ 1290f66f451Sopenharmony_ci int i = 0, j, k, size = 256; 1300f66f451Sopenharmony_ci char *set = xzalloc(size*sizeof(char)); 1310f66f451Sopenharmony_ci 1320f66f451Sopenharmony_ci while (*arg) { 1330f66f451Sopenharmony_ci 1340f66f451Sopenharmony_ci if (i >= size) { 1350f66f451Sopenharmony_ci size += 256; 1360f66f451Sopenharmony_ci set = xrealloc(set, size); 1370f66f451Sopenharmony_ci } 1380f66f451Sopenharmony_ci if (*arg == '\\') { 1390f66f451Sopenharmony_ci arg++; 1400f66f451Sopenharmony_ci set[i++] = (int)handle_escape_char(&arg); 1410f66f451Sopenharmony_ci arg++; 1420f66f451Sopenharmony_ci continue; 1430f66f451Sopenharmony_ci } 1440f66f451Sopenharmony_ci if (arg[1] == '-') { 1450f66f451Sopenharmony_ci if (arg[2] == '\0') goto save; 1460f66f451Sopenharmony_ci j = arg[0]; 1470f66f451Sopenharmony_ci k = arg[2]; 1480f66f451Sopenharmony_ci if (j > k) perror_exit("reverse colating order"); 1490f66f451Sopenharmony_ci while (j <= k) set[i++] = j++; 1500f66f451Sopenharmony_ci arg += 3; 1510f66f451Sopenharmony_ci continue; 1520f66f451Sopenharmony_ci } 1530f66f451Sopenharmony_ci if (arg[0] == '[' && arg[1] == ':') { 1540f66f451Sopenharmony_ci 1550f66f451Sopenharmony_ci if ((j = find_class(arg)) == class_invalid) goto save; 1560f66f451Sopenharmony_ci 1570f66f451Sopenharmony_ci if ((j == class_alpha) || (j == class_upper) || (j == class_alnum)) { 1580f66f451Sopenharmony_ci for (k = 'A'; k <= 'Z'; k++) set[i++] = k; 1590f66f451Sopenharmony_ci } 1600f66f451Sopenharmony_ci if ((j == class_alpha) || (j == class_lower) || (j == class_alnum)) { 1610f66f451Sopenharmony_ci for (k = 'a'; k <= 'z'; k++) set[i++] = k; 1620f66f451Sopenharmony_ci } 1630f66f451Sopenharmony_ci if ((j == class_alnum) || (j == class_digit) || (j == class_xdigit)) { 1640f66f451Sopenharmony_ci for (k = '0'; k <= '9'; k++) set[i++] = k; 1650f66f451Sopenharmony_ci } 1660f66f451Sopenharmony_ci if (j == class_space || j == class_blank) { 1670f66f451Sopenharmony_ci set[i++] = '\t'; 1680f66f451Sopenharmony_ci if (j == class_space) { 1690f66f451Sopenharmony_ci set[i++] = '\n'; 1700f66f451Sopenharmony_ci set[i++] = '\f'; 1710f66f451Sopenharmony_ci set[i++] = '\r'; 1720f66f451Sopenharmony_ci set[i++] = '\v'; 1730f66f451Sopenharmony_ci } 1740f66f451Sopenharmony_ci set[i++] = ' '; 1750f66f451Sopenharmony_ci } 1760f66f451Sopenharmony_ci if (j == class_punct) { 1770f66f451Sopenharmony_ci for (k = 0; k <= 255; k++) 1780f66f451Sopenharmony_ci if (ispunct(k)) set[i++] = k; 1790f66f451Sopenharmony_ci } 1800f66f451Sopenharmony_ci if (j == class_cntrl) { 1810f66f451Sopenharmony_ci for (k = 0; k <= 255; k++) 1820f66f451Sopenharmony_ci if (iscntrl(k)) set[i++] = k; 1830f66f451Sopenharmony_ci } 1840f66f451Sopenharmony_ci if (j == class_xdigit) { 1850f66f451Sopenharmony_ci for (k = 'A'; k <= 'F'; k++) { 1860f66f451Sopenharmony_ci set[i + 6] = k | 0x20; 1870f66f451Sopenharmony_ci set[i++] = k; 1880f66f451Sopenharmony_ci } 1890f66f451Sopenharmony_ci i += 6; 1900f66f451Sopenharmony_ci arg += 10; 1910f66f451Sopenharmony_ci continue; 1920f66f451Sopenharmony_ci } 1930f66f451Sopenharmony_ci 1940f66f451Sopenharmony_ci arg += 9; //never here for class_xdigit. 1950f66f451Sopenharmony_ci continue; 1960f66f451Sopenharmony_ci } 1970f66f451Sopenharmony_ci if (arg[0] == '[' && arg[1] == '=') { //[=char=] only 1980f66f451Sopenharmony_ci arg += 2; 1990f66f451Sopenharmony_ci if (*arg) set[i++] = *arg; 2000f66f451Sopenharmony_ci if (!arg[1] || arg[1] != '=' || arg[2] != ']') 2010f66f451Sopenharmony_ci error_exit("bad equiv class"); 2020f66f451Sopenharmony_ci continue; 2030f66f451Sopenharmony_ci } 2040f66f451Sopenharmony_cisave: 2050f66f451Sopenharmony_ci set[i++] = *arg++; 2060f66f451Sopenharmony_ci } 2070f66f451Sopenharmony_ci *len = i; 2080f66f451Sopenharmony_ci return set; 2090f66f451Sopenharmony_ci} 2100f66f451Sopenharmony_ci 2110f66f451Sopenharmony_cistatic void print_map(char *set1, char *set2) 2120f66f451Sopenharmony_ci{ 2130f66f451Sopenharmony_ci int n, src, dst, prev = -1; 2140f66f451Sopenharmony_ci 2150f66f451Sopenharmony_ci while ((n = read(0, toybuf, sizeof(toybuf)))) { 2160f66f451Sopenharmony_ci if (!FLAG(d) && !FLAG(s)) { 2170f66f451Sopenharmony_ci for (dst = 0; dst < n; dst++) toybuf[dst] = TT.map[toybuf[dst]]; 2180f66f451Sopenharmony_ci } else { 2190f66f451Sopenharmony_ci for (src = dst = 0; src < n; src++) { 2200f66f451Sopenharmony_ci int ch = TT.map[toybuf[src]]; 2210f66f451Sopenharmony_ci 2220f66f451Sopenharmony_ci if (FLAG(d) && (ch & 0x100)) continue; 2230f66f451Sopenharmony_ci if (FLAG(s) && ((ch & 0x200) && prev == ch)) continue; 2240f66f451Sopenharmony_ci toybuf[dst++] = prev = ch; 2250f66f451Sopenharmony_ci } 2260f66f451Sopenharmony_ci } 2270f66f451Sopenharmony_ci xwrite(1, toybuf, dst); 2280f66f451Sopenharmony_ci } 2290f66f451Sopenharmony_ci} 2300f66f451Sopenharmony_ci 2310f66f451Sopenharmony_cistatic void do_complement(char **set) 2320f66f451Sopenharmony_ci{ 2330f66f451Sopenharmony_ci int i, j; 2340f66f451Sopenharmony_ci char *comp = xmalloc(256); 2350f66f451Sopenharmony_ci 2360f66f451Sopenharmony_ci for (i = 0, j = 0;i < 256; i++) { 2370f66f451Sopenharmony_ci if (memchr(*set, i, TT.len1)) continue; 2380f66f451Sopenharmony_ci else comp[j++] = (char)i; 2390f66f451Sopenharmony_ci } 2400f66f451Sopenharmony_ci free(*set); 2410f66f451Sopenharmony_ci TT.len1 = j; 2420f66f451Sopenharmony_ci *set = comp; 2430f66f451Sopenharmony_ci} 2440f66f451Sopenharmony_ci 2450f66f451Sopenharmony_civoid tr_main(void) 2460f66f451Sopenharmony_ci{ 2470f66f451Sopenharmony_ci char *set1, *set2 = NULL; 2480f66f451Sopenharmony_ci int i; 2490f66f451Sopenharmony_ci 2500f66f451Sopenharmony_ci for (i = 0; i < 256; i++) TT.map[i] = i; //init map 2510f66f451Sopenharmony_ci 2520f66f451Sopenharmony_ci set1 = expand_set(toys.optargs[0], &TT.len1); 2530f66f451Sopenharmony_ci if (toys.optflags & FLAG_c) do_complement(&set1); 2540f66f451Sopenharmony_ci if (toys.optargs[1]) { 2550f66f451Sopenharmony_ci if (toys.optargs[1][0] == '\0') error_exit("set2 can't be empty string"); 2560f66f451Sopenharmony_ci set2 = expand_set(toys.optargs[1], &TT.len2); 2570f66f451Sopenharmony_ci } 2580f66f451Sopenharmony_ci map_translation(set1, set2); 2590f66f451Sopenharmony_ci 2600f66f451Sopenharmony_ci print_map(set1, set2); 2610f66f451Sopenharmony_ci free(set1); 2620f66f451Sopenharmony_ci free(set2); 2630f66f451Sopenharmony_ci} 264