1/* sort.c - put input lines into order 2 * 3 * Copyright 2004, 2008 Rob Landley <rob@landley.net> 4 * 5 * See http://opengroup.org/onlinepubs/007904975/utilities/sort.html 6 * 7 * Deviations from POSIX: Lots. 8 * We invented -x 9 10USE_SORT(NEWTOY(sort, USE_SORT_FLOAT("g")"S:T:m" "o:k*t:" "xVbMcszdfirun", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_ARGFAIL(2))) 11 12config SORT 13 bool "sort" 14 default y 15 help 16 usage: sort [-runbcdfiMsz] [FILE...] [-k#[,#[x]] [-t X]] [-o FILE] 17 18 Sort all lines of text from input files (or stdin) to stdout. 19 20 -r Reverse 21 -u Unique lines only 22 -n Numeric order (instead of alphabetical) 23 -b Ignore leading blanks (or trailing blanks in second part of key) 24 -c Check whether input is sorted 25 -d Dictionary order (use alphanumeric and whitespace chars only) 26 -f Force uppercase (case insensitive sort) 27 -i Ignore nonprinting characters 28 -M Month sort (jan, feb, etc) 29 -x Hexadecimal numerical sort 30 -s Skip fallback sort (only sort with keys) 31 -z Zero (null) terminated lines 32 -k Sort by "key" (see below) 33 -t Use a key separator other than whitespace 34 -o Output to FILE instead of stdout 35 -V Version numbers (name-1.234-rc6.5b.tgz) 36 37 Sorting by key looks at a subset of the words on each line. -k2 uses the 38 second word to the end of the line, -k2,2 looks at only the second word, 39 -k2,4 looks from the start of the second to the end of the fourth word. 40 -k2.4,5 starts from the fourth character of the second word, to the end 41 of the fifth word. Specifying multiple keys uses the later keys as tie 42 breakers, in order. A type specifier appended to a sort key (such as -2,2n) 43 applies only to sorting that key. 44 45config SORT_FLOAT 46 bool 47 default y 48 depends on TOYBOX_FLOAT 49 help 50 usage: sort [-g] 51 52 -g General numeric sort (double precision with nan and inf) 53*/ 54 55#define FOR_sort 56#include "toys.h" 57 58GLOBALS( 59 char *t; 60 struct arg_list *k; 61 char *o, *T, S; 62 63 void *key_list; 64 int linecount; 65 char **lines, *name; 66) 67 68// The sort types are n, g, and M. 69// u, c, s, and z apply to top level only, not to keys. 70// b at top level implies bb. 71// The remaining options can be applied to search keys. 72 73#define FLAG_bb (1<<31) // Ignore trailing blanks 74 75struct sort_key 76{ 77 struct sort_key *next_key; // linked list 78 unsigned range[4]; // start word, start char, end word, end char 79 int flags; 80}; 81 82// Copy of the part of this string corresponding to a key/flags. 83 84static char *get_key_data(char *str, struct sort_key *key, int flags) 85{ 86 int start = 0, end, len, i, j; 87 88 // Special case whole string, so we don't have to make a copy 89 90 if(key->range[0]==1 && !key->range[1] && !key->range[2] && !key->range[3] 91 && !(flags&(FLAG_b|FLAG_d|FLAG_i|FLAG_bb))) return str; 92 93 // Find start of key on first pass, end on second pass 94 95 len = strlen(str); 96 for (j=0; j<2; j++) { 97 if (!key->range[2*j]) end=len; 98 99 // Loop through fields 100 else { 101 end = 0; 102 for (i = 1; i < key->range[2*j]+j; i++) { 103 104 // Skip leading blanks 105 if (str[end] && !TT.t) while (isspace(str[end])) end++; 106 107 // Skip body of key 108 for (; str[end]; end++) { 109 if (TT.t) { 110 if (str[end]==*TT.t) { 111 end++; 112 break; 113 } 114 } else if (isspace(str[end])) break; 115 } 116 } 117 } 118 if (!j) start=end; 119 } 120 121 // Key with explicit separator starts after the separator 122 if (TT.t && str[start]==*TT.t) start++; 123 124 // Strip leading and trailing whitespace if necessary 125 if ((flags&FLAG_b) || (!TT.t && !key->range[3])) 126 while (isspace(str[start])) start++; 127 if (flags&FLAG_bb) while (end>start && isspace(str[end-1])) end--; 128 129 // Handle offsets on start and end 130 if (key->range[3]) { 131 end += key->range[3]-1; 132 if (end>len) end=len; 133 } 134 if (key->range[1]) { 135 start += key->range[1]-1; 136 if (start>len) start=len; 137 } 138 139 // Make the copy 140 if (end<start) end = start; 141 str = xstrndup(str+start, end-start); 142 143 // Handle -d 144 if (flags&FLAG_d) { 145 for (start = end = 0; str[end]; end++) 146 if (isspace(str[end]) || isalnum(str[end])) str[start++] = str[end]; 147 str[start] = 0; 148 } 149 150 // Handle -i 151 if (flags&FLAG_i) { 152 for (start = end = 0; str[end]; end++) 153 if (isprint(str[end])) str[start++] = str[end]; 154 str[start] = 0; 155 } 156 157 return str; 158} 159 160// append a sort_key to key_list. 161 162static struct sort_key *add_key(void) 163{ 164 void **stupid_compiler = &TT.key_list; 165 struct sort_key **pkey = (struct sort_key **)stupid_compiler; 166 167 while (*pkey) pkey = &((*pkey)->next_key); 168 return *pkey = xzalloc(sizeof(struct sort_key)); 169} 170 171// Perform actual comparison 172static int compare_values(int flags, char *x, char *y) 173{ 174 if (CFG_SORT_FLOAT && (flags & FLAG_g)) { 175 char *xx,*yy; 176 double dx = strtod(x,&xx), dy = strtod(y,&yy); 177 int xinf, yinf; 178 179 // not numbers < NaN < -infinity < numbers < +infinity 180 181 if (x==xx) return y==yy ? 0 : -1; 182 if (y==yy) return 1; 183 184 // Check for isnan 185 if (dx!=dx) return (dy!=dy) ? 0 : -1; 186 if (dy!=dy) return 1; 187 188 // Check for infinity. (Could underflow, but avoids needing libm.) 189 xinf = (1.0/dx == 0.0); 190 yinf = (1.0/dy == 0.0); 191 if (xinf) { 192 if(dx<0) return (yinf && dy<0) ? 0 : -1; 193 return (yinf && dy>0) ? 0 : 1; 194 } 195 if (yinf) return dy<0 ? 1 : -1; 196 197 return dx>dy ? 1 : (dx<dy ? -1 : 0); 198 } else if (flags & FLAG_M) { 199 struct tm thyme; 200 int dx; 201 char *xx,*yy; 202 203 xx = strptime(x,"%b",&thyme); 204 dx = thyme.tm_mon; 205 yy = strptime(y,"%b",&thyme); 206 if (!xx) return !yy ? 0 : -1; 207 else if (!yy) return 1; 208 else return dx==thyme.tm_mon ? 0 : dx-thyme.tm_mon; 209 210 } else if (flags & FLAG_x) return strtol(x, NULL, 16)-strtol(y, NULL, 16); 211 else if (flags & FLAG_V) { 212 while (*x && *y) { 213 while (*x && *x == *y) x++, y++; 214 if (isdigit(*x) && isdigit(*y)) { 215 long long xx = strtoll(x, &x, 10), yy = strtoll(y, &y, 10); 216 217 if (xx<yy) return -1; 218 if (xx>yy) return 1; 219 } else { 220 char xx = *x ? *x : x[-1], yy = *y ? *y : y[-1]; 221 222 // -rc/-pre hack so abc-123 > abc-123-rc1 (other way already - < 0-9) 223 if (xx != yy) { 224 if (xx<yy && !strstart(&y, "-rc") && !strstart(&y, "-pre")) return -1; 225 else return 1; 226 } 227 } 228 } 229 return *x ? !!*y : -1; 230 } else if (flags & FLAG_n) { 231 // Full floating point version of -n 232 if (CFG_SORT_FLOAT) { 233 double dx = atof(x), dy = atof(y); 234 235 return dx>dy ? 1 : (dx<dy ? -1 : 0); 236 // Integer version of -n for tiny systems 237 } else return atoi(x)-atoi(y); 238 239 // Ascii sort 240 } else return ((flags&FLAG_f) ? strcasecmp : strcmp)(x, y); 241} 242 243// Callback from qsort(): Iterate through key_list and perform comparisons. 244static int compare_keys(const void *xarg, const void *yarg) 245{ 246 int flags = toys.optflags, retval = 0; 247 char *x, *y, *xx = *(char **)xarg, *yy = *(char **)yarg; 248 struct sort_key *key; 249 250 for (key=(struct sort_key *)TT.key_list; !retval && key; key = key->next_key){ 251 flags = key->flags ? key->flags : toys.optflags; 252 253 // Chop out and modify key chunks, handling -dfib 254 255 x = get_key_data(xx, key, flags); 256 y = get_key_data(yy, key, flags); 257 258 retval = compare_values(flags, x, y); 259 260 // Free the copies get_key_data() made. 261 262 if (x != xx) free(x); 263 if (y != yy) free(y); 264 265 if (retval) break; 266 } 267 268 // Perform fallback sort if necessary (always case insensitive, no -f, 269 // the point is to get a stable order even for -f sorts) 270 if (!retval && !FLAG(s)) { 271 flags = toys.optflags; 272 retval = strcmp(xx, yy); 273 } 274 275 return retval * ((flags&FLAG_r) ? -1 : 1); 276} 277 278// Read each line from file, appending to a big array. 279static void sort_lines(char **pline, long len) 280{ 281 char *line; 282 283 if (!pline) return; 284 line = *pline; 285 if (!FLAG(z) && len && line[len-1]=='\n') line[--len] = 0; 286 *pline = 0; 287 288 // handle -c here so we don't allocate more memory than necessary. 289 if (FLAG(c)) { 290 int j = FLAG(u) ? -1 : 0; 291 292 if (TT.lines && compare_keys((void *)&TT.lines, &line)>j) 293 error_exit("%s: Check line %d\n", TT.name, TT.linecount); 294 free(TT.lines); 295 TT.lines = (void *)line; 296 } else { 297 if (!(TT.linecount&63)) 298 TT.lines = xrealloc(TT.lines, sizeof(char *)*(TT.linecount+64)); 299 TT.lines[TT.linecount] = line; 300 } 301 TT.linecount++; 302} 303 304// Callback from loopfiles to handle input files. 305static void sort_read(int fd, char *name) 306{ 307 TT.name = name; 308 do_lines(fd, FLAG(z) ? '\0' : '\n', sort_lines); 309} 310 311void sort_main(void) 312{ 313 int idx, fd = 1; 314 315 // Parse -k sort keys. 316 if (TT.k) { 317 struct arg_list *arg; 318 319 for (arg = TT.k; arg; arg = arg->next) { 320 struct sort_key *key = add_key(); 321 char *temp; 322 int flag; 323 324 idx = 0; 325 temp = arg->arg; 326 while (*temp) { 327 // Start of range 328 key->range[2*idx] = (unsigned)strtol(temp, &temp, 10); 329 if (*temp=='.') 330 key->range[(2*idx)+1] = (unsigned)strtol(temp+1, &temp, 10); 331 332 // Handle flags appended to a key type. 333 for (;*temp;temp++) { 334 char *temp2, *optlist; 335 336 // Note that a second comma becomes an "Unknown key" error. 337 338 if (*temp==',' && !idx++) { 339 temp++; 340 break; 341 } 342 343 // Which flag is this? 344 345 optlist = toys.which->options; 346 temp2 = strchr(optlist, *temp); 347 flag = (1<<(optlist-temp2+strlen(optlist)-1)); 348 349 // Was it a flag that can apply to a key? 350 351 if (!temp2 || flag>FLAG_x 352 || (flag&(FLAG_u|FLAG_c|FLAG_s|FLAG_z))) 353 { 354 toys.exitval = 2; 355 error_exit("Unknown key option."); 356 } 357 // b after , means strip _trailing_ space, not leading. 358 if (idx && flag==FLAG_b) flag = FLAG_bb; 359 key->flags |= flag; 360 } 361 } 362 } 363 } 364 365 // global b flag strips both leading and trailing spaces 366 if (FLAG(b)) toys.optflags |= FLAG_bb; 367 368 // If no keys, perform alphabetic sort over the whole line. 369 if (!TT.key_list) add_key()->range[0] = 1; 370 371 // Open input files and read data, populating TT.lines[TT.linecount] 372 loopfiles(toys.optargs, sort_read); 373 374 // The compare (-c) logic was handled in sort_read(), 375 // so if we got here, we're done. 376 if (FLAG(c)) goto exit_now; 377 378 // Perform the actual sort 379 qsort(TT.lines, TT.linecount, sizeof(char *), compare_keys); 380 381 // handle unique (-u) 382 if (FLAG(u)) { 383 int jdx; 384 385 for (jdx=0, idx=1; idx<TT.linecount; idx++) { 386 if (!compare_keys(&TT.lines[jdx], &TT.lines[idx])) 387 free(TT.lines[idx]); 388 else TT.lines[++jdx] = TT.lines[idx]; 389 } 390 if (TT.linecount) TT.linecount = jdx+1; 391 } 392 393 // Open output file if necessary. We can't do this until we've finished 394 // reading in case the output file is one of the input files. 395 if (TT.o) fd = xcreate(TT.o, O_CREAT|O_TRUNC|O_WRONLY, 0666); 396 397 // Output result 398 for (idx = 0; idx<TT.linecount; idx++) { 399 char *s = TT.lines[idx]; 400 unsigned i = strlen(s); 401 402 if (!FLAG(z)) s[i] = '\n'; 403 xwrite(fd, s, i+1); 404 if (CFG_TOYBOX_FREE) free(s); 405 } 406 407exit_now: 408 if (CFG_TOYBOX_FREE) { 409 if (fd != 1) close(fd); 410 free(TT.lines); 411 } 412} 413