10f66f451Sopenharmony_ci/* uniq.c - report or filter out repeated lines in a file 20f66f451Sopenharmony_ci * 30f66f451Sopenharmony_ci * Copyright 2012 Georgi Chorbadzhiyski <georgi@unixsol.org> 40f66f451Sopenharmony_ci * 50f66f451Sopenharmony_ci * See http://opengroup.org/onlinepubs/9699919799/utilities/uniq.html 60f66f451Sopenharmony_ci 70f66f451Sopenharmony_ciUSE_UNIQ(NEWTOY(uniq, "f#s#w#zicdu", TOYFLAG_USR|TOYFLAG_BIN)) 80f66f451Sopenharmony_ci 90f66f451Sopenharmony_ciconfig UNIQ 100f66f451Sopenharmony_ci bool "uniq" 110f66f451Sopenharmony_ci default y 120f66f451Sopenharmony_ci help 130f66f451Sopenharmony_ci usage: uniq [-cduiz] [-w MAXCHARS] [-f FIELDS] [-s CHAR] [INFILE [OUTFILE]] 140f66f451Sopenharmony_ci 150f66f451Sopenharmony_ci Report or filter out repeated lines in a file 160f66f451Sopenharmony_ci 170f66f451Sopenharmony_ci -c Show counts before each line 180f66f451Sopenharmony_ci -d Show only lines that are repeated 190f66f451Sopenharmony_ci -u Show only lines that are unique 200f66f451Sopenharmony_ci -i Ignore case when comparing lines 210f66f451Sopenharmony_ci -z Lines end with \0 not \n 220f66f451Sopenharmony_ci -w Compare maximum X chars per line 230f66f451Sopenharmony_ci -f Ignore first X fields 240f66f451Sopenharmony_ci -s Ignore first X chars 250f66f451Sopenharmony_ci*/ 260f66f451Sopenharmony_ci 270f66f451Sopenharmony_ci#define FOR_uniq 280f66f451Sopenharmony_ci#include "toys.h" 290f66f451Sopenharmony_ci 300f66f451Sopenharmony_ciGLOBALS( 310f66f451Sopenharmony_ci long w, s, f; 320f66f451Sopenharmony_ci 330f66f451Sopenharmony_ci long repeats; 340f66f451Sopenharmony_ci) 350f66f451Sopenharmony_ci 360f66f451Sopenharmony_cistatic char *skip(char *str) 370f66f451Sopenharmony_ci{ 380f66f451Sopenharmony_ci long nchars = TT.s, nfields = TT.f; 390f66f451Sopenharmony_ci 400f66f451Sopenharmony_ci // Skip fields first 410f66f451Sopenharmony_ci while (nfields--) { 420f66f451Sopenharmony_ci while (*str && isspace(*str)) str++; 430f66f451Sopenharmony_ci while (*str && !isspace(*str)) str++; 440f66f451Sopenharmony_ci } 450f66f451Sopenharmony_ci // Skip chars 460f66f451Sopenharmony_ci while (*str && nchars--) str++; 470f66f451Sopenharmony_ci 480f66f451Sopenharmony_ci return str; 490f66f451Sopenharmony_ci} 500f66f451Sopenharmony_ci 510f66f451Sopenharmony_cistatic void print_line(FILE *f, char *line) 520f66f451Sopenharmony_ci{ 530f66f451Sopenharmony_ci if (TT.repeats ? FLAG(u) : FLAG(d)) return; 540f66f451Sopenharmony_ci if (FLAG(c)) fprintf(f, "%7lu ", TT.repeats + 1); 550f66f451Sopenharmony_ci fputs(line, f); 560f66f451Sopenharmony_ci if (FLAG(z)) fputc(0, f); 570f66f451Sopenharmony_ci} 580f66f451Sopenharmony_ci 590f66f451Sopenharmony_civoid uniq_main(void) 600f66f451Sopenharmony_ci{ 610f66f451Sopenharmony_ci FILE *infile = stdin, *outfile = stdout; 620f66f451Sopenharmony_ci char *thisline = 0, *prevline = 0, *tmpline, eol = '\n'; 630f66f451Sopenharmony_ci size_t thissize, prevsize = 0, tmpsize; 640f66f451Sopenharmony_ci 650f66f451Sopenharmony_ci if (toys.optc >= 1) infile = xfopen(toys.optargs[0], "r"); 660f66f451Sopenharmony_ci if (toys.optc >= 2) outfile = xfopen(toys.optargs[1], "w"); 670f66f451Sopenharmony_ci 680f66f451Sopenharmony_ci if (FLAG(z)) eol = 0; 690f66f451Sopenharmony_ci 700f66f451Sopenharmony_ci // If first line can't be read 710f66f451Sopenharmony_ci if (getdelim(&prevline, &prevsize, eol, infile) < 0) return; 720f66f451Sopenharmony_ci 730f66f451Sopenharmony_ci while (getdelim(&thisline, &thissize, eol, infile) > 0) { 740f66f451Sopenharmony_ci int diff; 750f66f451Sopenharmony_ci char *t1, *t2; 760f66f451Sopenharmony_ci 770f66f451Sopenharmony_ci // If requested get the chosen fields + character offsets. 780f66f451Sopenharmony_ci if (TT.f || TT.s) { 790f66f451Sopenharmony_ci t1 = skip(thisline); 800f66f451Sopenharmony_ci t2 = skip(prevline); 810f66f451Sopenharmony_ci } else { 820f66f451Sopenharmony_ci t1 = thisline; 830f66f451Sopenharmony_ci t2 = prevline; 840f66f451Sopenharmony_ci } 850f66f451Sopenharmony_ci 860f66f451Sopenharmony_ci if (!TT.w) 870f66f451Sopenharmony_ci diff = !FLAG(i) ? strcmp(t1, t2) : strcasecmp(t1, t2); 880f66f451Sopenharmony_ci else diff = !FLAG(i) ? strncmp(t1, t2, TT.w) : strncasecmp(t1, t2, TT.w); 890f66f451Sopenharmony_ci 900f66f451Sopenharmony_ci if (!diff) TT.repeats++; 910f66f451Sopenharmony_ci else { 920f66f451Sopenharmony_ci print_line(outfile, prevline); 930f66f451Sopenharmony_ci 940f66f451Sopenharmony_ci TT.repeats = 0; 950f66f451Sopenharmony_ci 960f66f451Sopenharmony_ci tmpline = prevline; 970f66f451Sopenharmony_ci prevline = thisline; 980f66f451Sopenharmony_ci thisline = tmpline; 990f66f451Sopenharmony_ci 1000f66f451Sopenharmony_ci tmpsize = prevsize; 1010f66f451Sopenharmony_ci prevsize = thissize; 1020f66f451Sopenharmony_ci thissize = tmpsize; 1030f66f451Sopenharmony_ci } 1040f66f451Sopenharmony_ci } 1050f66f451Sopenharmony_ci 1060f66f451Sopenharmony_ci print_line(outfile, prevline); 1070f66f451Sopenharmony_ci 1080f66f451Sopenharmony_ci if (CFG_TOYBOX_FREE) { 1090f66f451Sopenharmony_ci if (outfile != stdout) fclose(outfile); 1100f66f451Sopenharmony_ci if (infile != stdin) fclose(infile); 1110f66f451Sopenharmony_ci free(prevline); 1120f66f451Sopenharmony_ci free(thisline); 1130f66f451Sopenharmony_ci } 1140f66f451Sopenharmony_ci} 115