10f66f451Sopenharmony_ci/* wc.c - Word count
20f66f451Sopenharmony_ci *
30f66f451Sopenharmony_ci * Copyright 2011 Rob Landley <rob@landley.net>
40f66f451Sopenharmony_ci *
50f66f451Sopenharmony_ci * See http://opengroup.org/onlinepubs/9699919799/utilities/wc.html
60f66f451Sopenharmony_ci
70f66f451Sopenharmony_ciUSE_WC(NEWTOY(wc, "mcwl", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_LOCALE))
80f66f451Sopenharmony_ci
90f66f451Sopenharmony_ciconfig WC
100f66f451Sopenharmony_ci  bool "wc"
110f66f451Sopenharmony_ci  default y
120f66f451Sopenharmony_ci  help
130f66f451Sopenharmony_ci    usage: wc -lwcm [FILE...]
140f66f451Sopenharmony_ci
150f66f451Sopenharmony_ci    Count lines, words, and characters in input.
160f66f451Sopenharmony_ci
170f66f451Sopenharmony_ci    -l	Show lines
180f66f451Sopenharmony_ci    -w	Show words
190f66f451Sopenharmony_ci    -c	Show bytes
200f66f451Sopenharmony_ci    -m	Show characters
210f66f451Sopenharmony_ci
220f66f451Sopenharmony_ci    By default outputs lines, words, bytes, and filename for each
230f66f451Sopenharmony_ci    argument (or from stdin if none). Displays only either bytes
240f66f451Sopenharmony_ci    or characters.
250f66f451Sopenharmony_ci*/
260f66f451Sopenharmony_ci
270f66f451Sopenharmony_ci#define FOR_wc
280f66f451Sopenharmony_ci#include "toys.h"
290f66f451Sopenharmony_ci
300f66f451Sopenharmony_ciGLOBALS(
310f66f451Sopenharmony_ci  unsigned long totals[4];
320f66f451Sopenharmony_ci)
330f66f451Sopenharmony_ci
340f66f451Sopenharmony_cistatic void show_lengths(unsigned long *lengths, char *name)
350f66f451Sopenharmony_ci{
360f66f451Sopenharmony_ci  int i, space = 0, first = 1;
370f66f451Sopenharmony_ci
380f66f451Sopenharmony_ci  // POSIX says there should never be leading spaces, but accepts that
390f66f451Sopenharmony_ci  // traditional implementations use 7 spaces, unless only one file (or
400f66f451Sopenharmony_ci  // just stdin) is being counted, when there should be no leading spaces,
410f66f451Sopenharmony_ci  // *except* for the case where we're going to output multiple numbers.
420f66f451Sopenharmony_ci  // And, yes, folks have test scripts that rely on all this nonsense :-(
430f66f451Sopenharmony_ci  // Note: sufficiently modern versions of coreutils wc will use the smallest
440f66f451Sopenharmony_ci  // column width necessary to have all columns be equal width rather than 0.
450f66f451Sopenharmony_ci  if (!(!toys.optc && !(toys.optflags & (toys.optflags-1))) && toys.optc!=1)
460f66f451Sopenharmony_ci    space = 7;
470f66f451Sopenharmony_ci
480f66f451Sopenharmony_ci  for (i = 0; i<4; i++) {
490f66f451Sopenharmony_ci    if (toys.optflags&(1<<i)) {
500f66f451Sopenharmony_ci      printf(" %*ld"+first, space, lengths[i]);
510f66f451Sopenharmony_ci      first = 0;
520f66f451Sopenharmony_ci    }
530f66f451Sopenharmony_ci    TT.totals[i] += lengths[i];
540f66f451Sopenharmony_ci  }
550f66f451Sopenharmony_ci  if (*toys.optargs) printf(" %s", name);
560f66f451Sopenharmony_ci  xputc('\n');
570f66f451Sopenharmony_ci}
580f66f451Sopenharmony_ci
590f66f451Sopenharmony_cistatic void do_wc(int fd, char *name)
600f66f451Sopenharmony_ci{
610f66f451Sopenharmony_ci  int len = 0, clen = 1, space = 0;
620f66f451Sopenharmony_ci  unsigned long word = 0, lengths[] = {0,0,0,0};
630f66f451Sopenharmony_ci
640f66f451Sopenharmony_ci  // Speed up common case: wc -c normalfile is file length.
650f66f451Sopenharmony_ci  if (toys.optflags == FLAG_c) {
660f66f451Sopenharmony_ci    struct stat st;
670f66f451Sopenharmony_ci
680f66f451Sopenharmony_ci    // On Linux, files in /proc often report their size as 0.
690f66f451Sopenharmony_ci    if (!fstat(fd, &st) && S_ISREG(st.st_mode) && st.st_size) {
700f66f451Sopenharmony_ci      lengths[2] = st.st_size;
710f66f451Sopenharmony_ci      goto show;
720f66f451Sopenharmony_ci    }
730f66f451Sopenharmony_ci  }
740f66f451Sopenharmony_ci
750f66f451Sopenharmony_ci  for (;;) {
760f66f451Sopenharmony_ci    int pos, done = 0, len2 = read(fd, toybuf+len, sizeof(toybuf)-len);
770f66f451Sopenharmony_ci    unsigned wchar;
780f66f451Sopenharmony_ci
790f66f451Sopenharmony_ci    if (len2<0) perror_msg_raw(name);
800f66f451Sopenharmony_ci    else len += len2;
810f66f451Sopenharmony_ci    if (len2<1) done++;
820f66f451Sopenharmony_ci
830f66f451Sopenharmony_ci    for (pos = 0; pos<len; pos++) {
840f66f451Sopenharmony_ci      if (toybuf[pos]=='\n') lengths[0]++;
850f66f451Sopenharmony_ci      lengths[2]++;
860f66f451Sopenharmony_ci      if (FLAG(m)) {
870f66f451Sopenharmony_ci        // If we've consumed next wide char
880f66f451Sopenharmony_ci        if (--clen<1) {
890f66f451Sopenharmony_ci          // next wide size, don't count invalid, fetch more data if necessary
900f66f451Sopenharmony_ci          clen = utf8towc(&wchar, toybuf+pos, len-pos);
910f66f451Sopenharmony_ci          if (clen == -1) continue;
920f66f451Sopenharmony_ci          if (clen == -2 && !done) break;
930f66f451Sopenharmony_ci
940f66f451Sopenharmony_ci          lengths[3]++;
950f66f451Sopenharmony_ci          space = iswspace(wchar);
960f66f451Sopenharmony_ci        }
970f66f451Sopenharmony_ci      } else space = isspace(toybuf[pos]);
980f66f451Sopenharmony_ci
990f66f451Sopenharmony_ci      if (space) word=0;
1000f66f451Sopenharmony_ci      else {
1010f66f451Sopenharmony_ci        if (!word) lengths[1]++;
1020f66f451Sopenharmony_ci        word=1;
1030f66f451Sopenharmony_ci      }
1040f66f451Sopenharmony_ci    }
1050f66f451Sopenharmony_ci    if (done) break;
1060f66f451Sopenharmony_ci    if (pos != len) memmove(toybuf, toybuf+pos, len-pos);
1070f66f451Sopenharmony_ci    len -= pos;
1080f66f451Sopenharmony_ci  }
1090f66f451Sopenharmony_ci
1100f66f451Sopenharmony_cishow:
1110f66f451Sopenharmony_ci  show_lengths(lengths, name);
1120f66f451Sopenharmony_ci}
1130f66f451Sopenharmony_ci
1140f66f451Sopenharmony_civoid wc_main(void)
1150f66f451Sopenharmony_ci{
1160f66f451Sopenharmony_ci  if (!toys.optflags) toys.optflags = FLAG_l|FLAG_w|FLAG_c;
1170f66f451Sopenharmony_ci  loopfiles(toys.optargs, do_wc);
1180f66f451Sopenharmony_ci  if (toys.optc>1) show_lengths(TT.totals, "total");
1190f66f451Sopenharmony_ci}
120