xref: /third_party/toybox/toys/posix/uniq.c (revision 0f66f451)
10f66f451Sopenharmony_ci/* uniq.c - report or filter out repeated lines in a file
20f66f451Sopenharmony_ci *
30f66f451Sopenharmony_ci * Copyright 2012 Georgi Chorbadzhiyski <georgi@unixsol.org>
40f66f451Sopenharmony_ci *
50f66f451Sopenharmony_ci * See http://opengroup.org/onlinepubs/9699919799/utilities/uniq.html
60f66f451Sopenharmony_ci
70f66f451Sopenharmony_ciUSE_UNIQ(NEWTOY(uniq, "f#s#w#zicdu", TOYFLAG_USR|TOYFLAG_BIN))
80f66f451Sopenharmony_ci
90f66f451Sopenharmony_ciconfig UNIQ
100f66f451Sopenharmony_ci  bool "uniq"
110f66f451Sopenharmony_ci  default y
120f66f451Sopenharmony_ci  help
130f66f451Sopenharmony_ci    usage: uniq [-cduiz] [-w MAXCHARS] [-f FIELDS] [-s CHAR] [INFILE [OUTFILE]]
140f66f451Sopenharmony_ci
150f66f451Sopenharmony_ci    Report or filter out repeated lines in a file
160f66f451Sopenharmony_ci
170f66f451Sopenharmony_ci    -c	Show counts before each line
180f66f451Sopenharmony_ci    -d	Show only lines that are repeated
190f66f451Sopenharmony_ci    -u	Show only lines that are unique
200f66f451Sopenharmony_ci    -i	Ignore case when comparing lines
210f66f451Sopenharmony_ci    -z	Lines end with \0 not \n
220f66f451Sopenharmony_ci    -w	Compare maximum X chars per line
230f66f451Sopenharmony_ci    -f	Ignore first X fields
240f66f451Sopenharmony_ci    -s	Ignore first X chars
250f66f451Sopenharmony_ci*/
260f66f451Sopenharmony_ci
270f66f451Sopenharmony_ci#define FOR_uniq
280f66f451Sopenharmony_ci#include "toys.h"
290f66f451Sopenharmony_ci
300f66f451Sopenharmony_ciGLOBALS(
310f66f451Sopenharmony_ci  long w, s, f;
320f66f451Sopenharmony_ci
330f66f451Sopenharmony_ci  long repeats;
340f66f451Sopenharmony_ci)
350f66f451Sopenharmony_ci
360f66f451Sopenharmony_cistatic char *skip(char *str)
370f66f451Sopenharmony_ci{
380f66f451Sopenharmony_ci  long nchars = TT.s, nfields = TT.f;
390f66f451Sopenharmony_ci
400f66f451Sopenharmony_ci  // Skip fields first
410f66f451Sopenharmony_ci  while (nfields--) {
420f66f451Sopenharmony_ci    while (*str && isspace(*str)) str++;
430f66f451Sopenharmony_ci    while (*str && !isspace(*str)) str++;
440f66f451Sopenharmony_ci  }
450f66f451Sopenharmony_ci  // Skip chars
460f66f451Sopenharmony_ci  while (*str && nchars--) str++;
470f66f451Sopenharmony_ci
480f66f451Sopenharmony_ci  return str;
490f66f451Sopenharmony_ci}
500f66f451Sopenharmony_ci
510f66f451Sopenharmony_cistatic void print_line(FILE *f, char *line)
520f66f451Sopenharmony_ci{
530f66f451Sopenharmony_ci  if (TT.repeats ? FLAG(u) : FLAG(d)) return;
540f66f451Sopenharmony_ci  if (FLAG(c)) fprintf(f, "%7lu ", TT.repeats + 1);
550f66f451Sopenharmony_ci  fputs(line, f);
560f66f451Sopenharmony_ci  if (FLAG(z)) fputc(0, f);
570f66f451Sopenharmony_ci}
580f66f451Sopenharmony_ci
590f66f451Sopenharmony_civoid uniq_main(void)
600f66f451Sopenharmony_ci{
610f66f451Sopenharmony_ci  FILE *infile = stdin, *outfile = stdout;
620f66f451Sopenharmony_ci  char *thisline = 0, *prevline = 0, *tmpline, eol = '\n';
630f66f451Sopenharmony_ci  size_t thissize, prevsize = 0, tmpsize;
640f66f451Sopenharmony_ci
650f66f451Sopenharmony_ci  if (toys.optc >= 1) infile = xfopen(toys.optargs[0], "r");
660f66f451Sopenharmony_ci  if (toys.optc >= 2) outfile = xfopen(toys.optargs[1], "w");
670f66f451Sopenharmony_ci
680f66f451Sopenharmony_ci  if (FLAG(z)) eol = 0;
690f66f451Sopenharmony_ci
700f66f451Sopenharmony_ci  // If first line can't be read
710f66f451Sopenharmony_ci  if (getdelim(&prevline, &prevsize, eol, infile) < 0) return;
720f66f451Sopenharmony_ci
730f66f451Sopenharmony_ci  while (getdelim(&thisline, &thissize, eol, infile) > 0) {
740f66f451Sopenharmony_ci    int diff;
750f66f451Sopenharmony_ci    char *t1, *t2;
760f66f451Sopenharmony_ci
770f66f451Sopenharmony_ci    // If requested get the chosen fields + character offsets.
780f66f451Sopenharmony_ci    if (TT.f || TT.s) {
790f66f451Sopenharmony_ci      t1 = skip(thisline);
800f66f451Sopenharmony_ci      t2 = skip(prevline);
810f66f451Sopenharmony_ci    } else {
820f66f451Sopenharmony_ci      t1 = thisline;
830f66f451Sopenharmony_ci      t2 = prevline;
840f66f451Sopenharmony_ci    }
850f66f451Sopenharmony_ci
860f66f451Sopenharmony_ci    if (!TT.w)
870f66f451Sopenharmony_ci      diff = !FLAG(i) ? strcmp(t1, t2) : strcasecmp(t1, t2);
880f66f451Sopenharmony_ci    else diff = !FLAG(i) ? strncmp(t1, t2, TT.w) : strncasecmp(t1, t2, TT.w);
890f66f451Sopenharmony_ci
900f66f451Sopenharmony_ci    if (!diff) TT.repeats++;
910f66f451Sopenharmony_ci    else {
920f66f451Sopenharmony_ci      print_line(outfile, prevline);
930f66f451Sopenharmony_ci
940f66f451Sopenharmony_ci      TT.repeats = 0;
950f66f451Sopenharmony_ci
960f66f451Sopenharmony_ci      tmpline = prevline;
970f66f451Sopenharmony_ci      prevline = thisline;
980f66f451Sopenharmony_ci      thisline = tmpline;
990f66f451Sopenharmony_ci
1000f66f451Sopenharmony_ci      tmpsize = prevsize;
1010f66f451Sopenharmony_ci      prevsize = thissize;
1020f66f451Sopenharmony_ci      thissize = tmpsize;
1030f66f451Sopenharmony_ci    }
1040f66f451Sopenharmony_ci  }
1050f66f451Sopenharmony_ci
1060f66f451Sopenharmony_ci  print_line(outfile, prevline);
1070f66f451Sopenharmony_ci
1080f66f451Sopenharmony_ci  if (CFG_TOYBOX_FREE) {
1090f66f451Sopenharmony_ci    if (outfile != stdout) fclose(outfile);
1100f66f451Sopenharmony_ci    if (infile != stdin) fclose(infile);
1110f66f451Sopenharmony_ci    free(prevline);
1120f66f451Sopenharmony_ci    free(thisline);
1130f66f451Sopenharmony_ci  }
1140f66f451Sopenharmony_ci}
115