xref: /third_party/toybox/toys/posix/sort.c (revision 0f66f451)
1/* sort.c - put input lines into order
2 *
3 * Copyright 2004, 2008 Rob Landley <rob@landley.net>
4 *
5 * See http://opengroup.org/onlinepubs/007904975/utilities/sort.html
6 *
7 * Deviations from POSIX: Lots.
8 * We invented -x
9
10USE_SORT(NEWTOY(sort, USE_SORT_FLOAT("g")"S:T:m" "o:k*t:" "xVbMcszdfirun", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)))
11
12config SORT
13  bool "sort"
14  default y
15  help
16    usage: sort [-runbcdfiMsz] [FILE...] [-k#[,#[x]] [-t X]] [-o FILE]
17
18    Sort all lines of text from input files (or stdin) to stdout.
19
20    -r	Reverse
21    -u	Unique lines only
22    -n	Numeric order (instead of alphabetical)
23    -b	Ignore leading blanks (or trailing blanks in second part of key)
24    -c	Check whether input is sorted
25    -d	Dictionary order (use alphanumeric and whitespace chars only)
26    -f	Force uppercase (case insensitive sort)
27    -i	Ignore nonprinting characters
28    -M	Month sort (jan, feb, etc)
29    -x	Hexadecimal numerical sort
30    -s	Skip fallback sort (only sort with keys)
31    -z	Zero (null) terminated lines
32    -k	Sort by "key" (see below)
33    -t	Use a key separator other than whitespace
34    -o	Output to FILE instead of stdout
35    -V	Version numbers (name-1.234-rc6.5b.tgz)
36
37    Sorting by key looks at a subset of the words on each line. -k2 uses the
38    second word to the end of the line, -k2,2 looks at only the second word,
39    -k2,4 looks from the start of the second to the end of the fourth word.
40    -k2.4,5 starts from the fourth character of the second word, to the end
41    of the fifth word. Specifying multiple keys uses the later keys as tie
42    breakers, in order. A type specifier appended to a sort key (such as -2,2n)
43    applies only to sorting that key.
44
45config SORT_FLOAT
46  bool
47  default y
48  depends on TOYBOX_FLOAT
49  help
50    usage: sort [-g]
51
52    -g	General numeric sort (double precision with nan and inf)
53*/
54
55#define FOR_sort
56#include "toys.h"
57
58GLOBALS(
59  char *t;
60  struct arg_list *k;
61  char *o, *T, S;
62
63  void *key_list;
64  int linecount;
65  char **lines, *name;
66)
67
68// The sort types are n, g, and M.
69// u, c, s, and z apply to top level only, not to keys.
70// b at top level implies bb.
71// The remaining options can be applied to search keys.
72
73#define FLAG_bb (1<<31)  // Ignore trailing blanks
74
75struct sort_key
76{
77  struct sort_key *next_key;  // linked list
78  unsigned range[4];          // start word, start char, end word, end char
79  int flags;
80};
81
82// Copy of the part of this string corresponding to a key/flags.
83
84static char *get_key_data(char *str, struct sort_key *key, int flags)
85{
86  int start = 0, end, len, i, j;
87
88  // Special case whole string, so we don't have to make a copy
89
90  if(key->range[0]==1 && !key->range[1] && !key->range[2] && !key->range[3]
91    && !(flags&(FLAG_b|FLAG_d|FLAG_i|FLAG_bb))) return str;
92
93  // Find start of key on first pass, end on second pass
94
95  len = strlen(str);
96  for (j=0; j<2; j++) {
97    if (!key->range[2*j]) end=len;
98
99    // Loop through fields
100    else {
101      end = 0;
102      for (i = 1; i < key->range[2*j]+j; i++) {
103
104        // Skip leading blanks
105        if (str[end] && !TT.t) while (isspace(str[end])) end++;
106
107        // Skip body of key
108        for (; str[end]; end++) {
109          if (TT.t) {
110            if (str[end]==*TT.t) {
111              end++;
112              break;
113            }
114          } else if (isspace(str[end])) break;
115        }
116      }
117    }
118    if (!j) start=end;
119  }
120
121  // Key with explicit separator starts after the separator
122  if (TT.t && str[start]==*TT.t) start++;
123
124  // Strip leading and trailing whitespace if necessary
125  if ((flags&FLAG_b) || (!TT.t && !key->range[3]))
126    while (isspace(str[start])) start++;
127  if (flags&FLAG_bb) while (end>start && isspace(str[end-1])) end--;
128
129  // Handle offsets on start and end
130  if (key->range[3]) {
131    end += key->range[3]-1;
132    if (end>len) end=len;
133  }
134  if (key->range[1]) {
135    start += key->range[1]-1;
136    if (start>len) start=len;
137  }
138
139  // Make the copy
140  if (end<start) end = start;
141  str = xstrndup(str+start, end-start);
142
143  // Handle -d
144  if (flags&FLAG_d) {
145    for (start = end = 0; str[end]; end++)
146      if (isspace(str[end]) || isalnum(str[end])) str[start++] = str[end];
147    str[start] = 0;
148  }
149
150  // Handle -i
151  if (flags&FLAG_i) {
152    for (start = end = 0; str[end]; end++)
153      if (isprint(str[end])) str[start++] = str[end];
154    str[start] = 0;
155  }
156
157  return str;
158}
159
160// append a sort_key to key_list.
161
162static struct sort_key *add_key(void)
163{
164  void **stupid_compiler = &TT.key_list;
165  struct sort_key **pkey = (struct sort_key **)stupid_compiler;
166
167  while (*pkey) pkey = &((*pkey)->next_key);
168  return *pkey = xzalloc(sizeof(struct sort_key));
169}
170
171// Perform actual comparison
172static int compare_values(int flags, char *x, char *y)
173{
174  if (CFG_SORT_FLOAT && (flags & FLAG_g)) {
175    char *xx,*yy;
176    double dx = strtod(x,&xx), dy = strtod(y,&yy);
177    int xinf, yinf;
178
179    // not numbers < NaN < -infinity < numbers < +infinity
180
181    if (x==xx) return y==yy ? 0 : -1;
182    if (y==yy) return 1;
183
184    // Check for isnan
185    if (dx!=dx) return (dy!=dy) ? 0 : -1;
186    if (dy!=dy) return 1;
187
188    // Check for infinity.  (Could underflow, but avoids needing libm.)
189    xinf = (1.0/dx == 0.0);
190    yinf = (1.0/dy == 0.0);
191    if (xinf) {
192      if(dx<0) return (yinf && dy<0) ? 0 : -1;
193      return (yinf && dy>0) ? 0 : 1;
194    }
195    if (yinf) return dy<0 ? 1 : -1;
196
197    return dx>dy ? 1 : (dx<dy ? -1 : 0);
198  } else if (flags & FLAG_M) {
199    struct tm thyme;
200    int dx;
201    char *xx,*yy;
202
203    xx = strptime(x,"%b",&thyme);
204    dx = thyme.tm_mon;
205    yy = strptime(y,"%b",&thyme);
206    if (!xx) return !yy ? 0 : -1;
207    else if (!yy) return 1;
208    else return dx==thyme.tm_mon ? 0 : dx-thyme.tm_mon;
209
210  } else if (flags & FLAG_x) return strtol(x, NULL, 16)-strtol(y, NULL, 16);
211  else if (flags & FLAG_V) {
212    while (*x && *y) {
213      while (*x && *x == *y) x++, y++;
214      if (isdigit(*x) && isdigit(*y)) {
215        long long xx = strtoll(x, &x, 10), yy = strtoll(y, &y, 10);
216
217        if (xx<yy) return -1;
218        if (xx>yy) return 1;
219      } else {
220        char xx = *x ? *x : x[-1], yy = *y ? *y : y[-1];
221
222        // -rc/-pre hack so abc-123 > abc-123-rc1 (other way already - < 0-9)
223        if (xx != yy) {
224          if (xx<yy && !strstart(&y, "-rc") && !strstart(&y, "-pre")) return -1;
225          else return 1;
226        }
227      }
228    }
229    return *x ? !!*y : -1;
230  } else if (flags & FLAG_n) {
231    // Full floating point version of -n
232    if (CFG_SORT_FLOAT) {
233      double dx = atof(x), dy = atof(y);
234
235      return dx>dy ? 1 : (dx<dy ? -1 : 0);
236    // Integer version of -n for tiny systems
237    } else return atoi(x)-atoi(y);
238
239  // Ascii sort
240  } else return ((flags&FLAG_f) ? strcasecmp : strcmp)(x, y);
241}
242
243// Callback from qsort(): Iterate through key_list and perform comparisons.
244static int compare_keys(const void *xarg, const void *yarg)
245{
246  int flags = toys.optflags, retval = 0;
247  char *x, *y, *xx = *(char **)xarg, *yy = *(char **)yarg;
248  struct sort_key *key;
249
250  for (key=(struct sort_key *)TT.key_list; !retval && key; key = key->next_key){
251    flags = key->flags ? key->flags : toys.optflags;
252
253    // Chop out and modify key chunks, handling -dfib
254
255    x = get_key_data(xx, key, flags);
256    y = get_key_data(yy, key, flags);
257
258    retval = compare_values(flags, x, y);
259
260    // Free the copies get_key_data() made.
261
262    if (x != xx) free(x);
263    if (y != yy) free(y);
264
265    if (retval) break;
266  }
267
268  // Perform fallback sort if necessary (always case insensitive, no -f,
269  // the point is to get a stable order even for -f sorts)
270  if (!retval && !FLAG(s)) {
271    flags = toys.optflags;
272    retval = strcmp(xx, yy);
273  }
274
275  return retval * ((flags&FLAG_r) ? -1 : 1);
276}
277
278// Read each line from file, appending to a big array.
279static void sort_lines(char **pline, long len)
280{
281  char *line;
282
283  if (!pline) return;
284  line = *pline;
285  if (!FLAG(z) && len && line[len-1]=='\n') line[--len] = 0;
286  *pline = 0;
287
288  // handle -c here so we don't allocate more memory than necessary.
289  if (FLAG(c)) {
290    int j = FLAG(u) ? -1 : 0;
291
292    if (TT.lines && compare_keys((void *)&TT.lines, &line)>j)
293      error_exit("%s: Check line %d\n", TT.name, TT.linecount);
294    free(TT.lines);
295    TT.lines = (void *)line;
296  } else {
297    if (!(TT.linecount&63))
298      TT.lines = xrealloc(TT.lines, sizeof(char *)*(TT.linecount+64));
299    TT.lines[TT.linecount] = line;
300  }
301  TT.linecount++;
302}
303
304// Callback from loopfiles to handle input files.
305static void sort_read(int fd, char *name)
306{
307  TT.name = name;
308  do_lines(fd, FLAG(z) ? '\0' : '\n', sort_lines);
309}
310
311void sort_main(void)
312{
313  int idx, fd = 1;
314
315  // Parse -k sort keys.
316  if (TT.k) {
317    struct arg_list *arg;
318
319    for (arg = TT.k; arg; arg = arg->next) {
320      struct sort_key *key = add_key();
321      char *temp;
322      int flag;
323
324      idx = 0;
325      temp = arg->arg;
326      while (*temp) {
327        // Start of range
328        key->range[2*idx] = (unsigned)strtol(temp, &temp, 10);
329        if (*temp=='.')
330          key->range[(2*idx)+1] = (unsigned)strtol(temp+1, &temp, 10);
331
332        // Handle flags appended to a key type.
333        for (;*temp;temp++) {
334          char *temp2, *optlist;
335
336          // Note that a second comma becomes an "Unknown key" error.
337
338          if (*temp==',' && !idx++) {
339            temp++;
340            break;
341          }
342
343          // Which flag is this?
344
345          optlist = toys.which->options;
346          temp2 = strchr(optlist, *temp);
347          flag = (1<<(optlist-temp2+strlen(optlist)-1));
348
349          // Was it a flag that can apply to a key?
350
351          if (!temp2 || flag>FLAG_x
352            || (flag&(FLAG_u|FLAG_c|FLAG_s|FLAG_z)))
353          {
354            toys.exitval = 2;
355            error_exit("Unknown key option.");
356          }
357          // b after , means strip _trailing_ space, not leading.
358          if (idx && flag==FLAG_b) flag = FLAG_bb;
359          key->flags |= flag;
360        }
361      }
362    }
363  }
364
365  // global b flag strips both leading and trailing spaces
366  if (FLAG(b)) toys.optflags |= FLAG_bb;
367
368  // If no keys, perform alphabetic sort over the whole line.
369  if (!TT.key_list) add_key()->range[0] = 1;
370
371  // Open input files and read data, populating TT.lines[TT.linecount]
372  loopfiles(toys.optargs, sort_read);
373
374  // The compare (-c) logic was handled in sort_read(),
375  // so if we got here, we're done.
376  if (FLAG(c)) goto exit_now;
377
378  // Perform the actual sort
379  qsort(TT.lines, TT.linecount, sizeof(char *), compare_keys);
380
381  // handle unique (-u)
382  if (FLAG(u)) {
383    int jdx;
384
385    for (jdx=0, idx=1; idx<TT.linecount; idx++) {
386      if (!compare_keys(&TT.lines[jdx], &TT.lines[idx]))
387        free(TT.lines[idx]);
388      else TT.lines[++jdx] = TT.lines[idx];
389    }
390    if (TT.linecount) TT.linecount = jdx+1;
391  }
392
393  // Open output file if necessary. We can't do this until we've finished
394  // reading in case the output file is one of the input files.
395  if (TT.o) fd = xcreate(TT.o, O_CREAT|O_TRUNC|O_WRONLY, 0666);
396
397  // Output result
398  for (idx = 0; idx<TT.linecount; idx++) {
399    char *s = TT.lines[idx];
400    unsigned i = strlen(s);
401
402    if (!FLAG(z)) s[i] = '\n';
403    xwrite(fd, s, i+1);
404    if (CFG_TOYBOX_FREE) free(s);
405  }
406
407exit_now:
408  if (CFG_TOYBOX_FREE) {
409    if (fd != 1) close(fd);
410    free(TT.lines);
411  }
412}
413