10f66f451Sopenharmony_ci/* wc.c - Word count 20f66f451Sopenharmony_ci * 30f66f451Sopenharmony_ci * Copyright 2011 Rob Landley <rob@landley.net> 40f66f451Sopenharmony_ci * 50f66f451Sopenharmony_ci * See http://opengroup.org/onlinepubs/9699919799/utilities/wc.html 60f66f451Sopenharmony_ci 70f66f451Sopenharmony_ciUSE_WC(NEWTOY(wc, "mcwl", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_LOCALE)) 80f66f451Sopenharmony_ci 90f66f451Sopenharmony_ciconfig WC 100f66f451Sopenharmony_ci bool "wc" 110f66f451Sopenharmony_ci default y 120f66f451Sopenharmony_ci help 130f66f451Sopenharmony_ci usage: wc -lwcm [FILE...] 140f66f451Sopenharmony_ci 150f66f451Sopenharmony_ci Count lines, words, and characters in input. 160f66f451Sopenharmony_ci 170f66f451Sopenharmony_ci -l Show lines 180f66f451Sopenharmony_ci -w Show words 190f66f451Sopenharmony_ci -c Show bytes 200f66f451Sopenharmony_ci -m Show characters 210f66f451Sopenharmony_ci 220f66f451Sopenharmony_ci By default outputs lines, words, bytes, and filename for each 230f66f451Sopenharmony_ci argument (or from stdin if none). Displays only either bytes 240f66f451Sopenharmony_ci or characters. 250f66f451Sopenharmony_ci*/ 260f66f451Sopenharmony_ci 270f66f451Sopenharmony_ci#define FOR_wc 280f66f451Sopenharmony_ci#include "toys.h" 290f66f451Sopenharmony_ci 300f66f451Sopenharmony_ciGLOBALS( 310f66f451Sopenharmony_ci unsigned long totals[4]; 320f66f451Sopenharmony_ci) 330f66f451Sopenharmony_ci 340f66f451Sopenharmony_cistatic void show_lengths(unsigned long *lengths, char *name) 350f66f451Sopenharmony_ci{ 360f66f451Sopenharmony_ci int i, space = 0, first = 1; 370f66f451Sopenharmony_ci 380f66f451Sopenharmony_ci // POSIX says there should never be leading spaces, but accepts that 390f66f451Sopenharmony_ci // traditional implementations use 7 spaces, unless only one file (or 400f66f451Sopenharmony_ci // just stdin) is being counted, when there should be no leading spaces, 410f66f451Sopenharmony_ci // *except* for the case where we're going to output multiple numbers. 420f66f451Sopenharmony_ci // And, yes, folks have test scripts that rely on all this nonsense :-( 430f66f451Sopenharmony_ci // Note: sufficiently modern versions of coreutils wc will use the smallest 440f66f451Sopenharmony_ci // column width necessary to have all columns be equal width rather than 0. 450f66f451Sopenharmony_ci if (!(!toys.optc && !(toys.optflags & (toys.optflags-1))) && toys.optc!=1) 460f66f451Sopenharmony_ci space = 7; 470f66f451Sopenharmony_ci 480f66f451Sopenharmony_ci for (i = 0; i<4; i++) { 490f66f451Sopenharmony_ci if (toys.optflags&(1<<i)) { 500f66f451Sopenharmony_ci printf(" %*ld"+first, space, lengths[i]); 510f66f451Sopenharmony_ci first = 0; 520f66f451Sopenharmony_ci } 530f66f451Sopenharmony_ci TT.totals[i] += lengths[i]; 540f66f451Sopenharmony_ci } 550f66f451Sopenharmony_ci if (*toys.optargs) printf(" %s", name); 560f66f451Sopenharmony_ci xputc('\n'); 570f66f451Sopenharmony_ci} 580f66f451Sopenharmony_ci 590f66f451Sopenharmony_cistatic void do_wc(int fd, char *name) 600f66f451Sopenharmony_ci{ 610f66f451Sopenharmony_ci int len = 0, clen = 1, space = 0; 620f66f451Sopenharmony_ci unsigned long word = 0, lengths[] = {0,0,0,0}; 630f66f451Sopenharmony_ci 640f66f451Sopenharmony_ci // Speed up common case: wc -c normalfile is file length. 650f66f451Sopenharmony_ci if (toys.optflags == FLAG_c) { 660f66f451Sopenharmony_ci struct stat st; 670f66f451Sopenharmony_ci 680f66f451Sopenharmony_ci // On Linux, files in /proc often report their size as 0. 690f66f451Sopenharmony_ci if (!fstat(fd, &st) && S_ISREG(st.st_mode) && st.st_size) { 700f66f451Sopenharmony_ci lengths[2] = st.st_size; 710f66f451Sopenharmony_ci goto show; 720f66f451Sopenharmony_ci } 730f66f451Sopenharmony_ci } 740f66f451Sopenharmony_ci 750f66f451Sopenharmony_ci for (;;) { 760f66f451Sopenharmony_ci int pos, done = 0, len2 = read(fd, toybuf+len, sizeof(toybuf)-len); 770f66f451Sopenharmony_ci unsigned wchar; 780f66f451Sopenharmony_ci 790f66f451Sopenharmony_ci if (len2<0) perror_msg_raw(name); 800f66f451Sopenharmony_ci else len += len2; 810f66f451Sopenharmony_ci if (len2<1) done++; 820f66f451Sopenharmony_ci 830f66f451Sopenharmony_ci for (pos = 0; pos<len; pos++) { 840f66f451Sopenharmony_ci if (toybuf[pos]=='\n') lengths[0]++; 850f66f451Sopenharmony_ci lengths[2]++; 860f66f451Sopenharmony_ci if (FLAG(m)) { 870f66f451Sopenharmony_ci // If we've consumed next wide char 880f66f451Sopenharmony_ci if (--clen<1) { 890f66f451Sopenharmony_ci // next wide size, don't count invalid, fetch more data if necessary 900f66f451Sopenharmony_ci clen = utf8towc(&wchar, toybuf+pos, len-pos); 910f66f451Sopenharmony_ci if (clen == -1) continue; 920f66f451Sopenharmony_ci if (clen == -2 && !done) break; 930f66f451Sopenharmony_ci 940f66f451Sopenharmony_ci lengths[3]++; 950f66f451Sopenharmony_ci space = iswspace(wchar); 960f66f451Sopenharmony_ci } 970f66f451Sopenharmony_ci } else space = isspace(toybuf[pos]); 980f66f451Sopenharmony_ci 990f66f451Sopenharmony_ci if (space) word=0; 1000f66f451Sopenharmony_ci else { 1010f66f451Sopenharmony_ci if (!word) lengths[1]++; 1020f66f451Sopenharmony_ci word=1; 1030f66f451Sopenharmony_ci } 1040f66f451Sopenharmony_ci } 1050f66f451Sopenharmony_ci if (done) break; 1060f66f451Sopenharmony_ci if (pos != len) memmove(toybuf, toybuf+pos, len-pos); 1070f66f451Sopenharmony_ci len -= pos; 1080f66f451Sopenharmony_ci } 1090f66f451Sopenharmony_ci 1100f66f451Sopenharmony_cishow: 1110f66f451Sopenharmony_ci show_lengths(lengths, name); 1120f66f451Sopenharmony_ci} 1130f66f451Sopenharmony_ci 1140f66f451Sopenharmony_civoid wc_main(void) 1150f66f451Sopenharmony_ci{ 1160f66f451Sopenharmony_ci if (!toys.optflags) toys.optflags = FLAG_l|FLAG_w|FLAG_c; 1170f66f451Sopenharmony_ci loopfiles(toys.optargs, do_wc); 1180f66f451Sopenharmony_ci if (toys.optc>1) show_lengths(TT.totals, "total"); 1190f66f451Sopenharmony_ci} 120