10f66f451Sopenharmony_ci/* split.c - split a file into smaller files 20f66f451Sopenharmony_ci * 30f66f451Sopenharmony_ci * Copyright 2013 Rob Landley <rob@landley.net> 40f66f451Sopenharmony_ci * 50f66f451Sopenharmony_ci * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/split.html 60f66f451Sopenharmony_ci * 70f66f451Sopenharmony_ci * Standard does not cover: 80f66f451Sopenharmony_ci * - should splitting an empty file produce an empty outfile? (Went with "no".) 90f66f451Sopenharmony_ci * - permissions on output file 100f66f451Sopenharmony_ci 110f66f451Sopenharmony_ciUSE_SPLIT(NEWTOY(split, ">2a#<1=2>9b#<1l#<1n#<1[!bl][!bn][!ln]", TOYFLAG_USR|TOYFLAG_BIN)) 120f66f451Sopenharmony_ci 130f66f451Sopenharmony_ciconfig SPLIT 140f66f451Sopenharmony_ci bool "split" 150f66f451Sopenharmony_ci default y 160f66f451Sopenharmony_ci help 170f66f451Sopenharmony_ci usage: split [-a SUFFIX_LEN] [-b BYTES] [-l LINES] [-n PARTS] [INPUT [OUTPUT]] 180f66f451Sopenharmony_ci 190f66f451Sopenharmony_ci Copy INPUT (or stdin) data to a series of OUTPUT (or "x") files with 200f66f451Sopenharmony_ci alphabetically increasing suffix (aa, ab, ac... az, ba, bb...). 210f66f451Sopenharmony_ci 220f66f451Sopenharmony_ci -a Suffix length (default 2) 230f66f451Sopenharmony_ci -b BYTES/file (10, 10k, 10m, 10g...) 240f66f451Sopenharmony_ci -l LINES/file (default 1000) 250f66f451Sopenharmony_ci -n PARTS many equal length files 260f66f451Sopenharmony_ci*/ 270f66f451Sopenharmony_ci 280f66f451Sopenharmony_ci#define FOR_split 290f66f451Sopenharmony_ci#include "toys.h" 300f66f451Sopenharmony_ci 310f66f451Sopenharmony_ciGLOBALS( 320f66f451Sopenharmony_ci long n, l, b, a; 330f66f451Sopenharmony_ci 340f66f451Sopenharmony_ci char *outfile; 350f66f451Sopenharmony_ci) 360f66f451Sopenharmony_ci 370f66f451Sopenharmony_cistatic void do_split(int infd, char *in) 380f66f451Sopenharmony_ci{ 390f66f451Sopenharmony_ci unsigned long bytesleft, linesleft, filenum, len, pos; 400f66f451Sopenharmony_ci int outfd = -1; 410f66f451Sopenharmony_ci struct stat st; 420f66f451Sopenharmony_ci 430f66f451Sopenharmony_ci // posix doesn't cover permissions on output file, so copy input (or 0777) 440f66f451Sopenharmony_ci st.st_mode = 0777; 450f66f451Sopenharmony_ci st.st_size = 0; 460f66f451Sopenharmony_ci fstat(infd, &st); 470f66f451Sopenharmony_ci 480f66f451Sopenharmony_ci if (TT.n && (TT.b = st.st_size/TT.n)<1) return error_msg("%s: no size", in); 490f66f451Sopenharmony_ci len = pos = filenum = bytesleft = linesleft = 0; 500f66f451Sopenharmony_ci for (;;) { 510f66f451Sopenharmony_ci int i, j; 520f66f451Sopenharmony_ci 530f66f451Sopenharmony_ci // Refill toybuf? 540f66f451Sopenharmony_ci if (len == pos) { 550f66f451Sopenharmony_ci if (!(len = xread(infd, toybuf, sizeof(toybuf)))) break; 560f66f451Sopenharmony_ci pos = 0; 570f66f451Sopenharmony_ci } 580f66f451Sopenharmony_ci 590f66f451Sopenharmony_ci // Start new output file? 600f66f451Sopenharmony_ci if ((TT.b && !bytesleft) || (TT.l && !linesleft)) { 610f66f451Sopenharmony_ci char *s = TT.outfile + strlen(TT.outfile); 620f66f451Sopenharmony_ci 630f66f451Sopenharmony_ci j = filenum++; 640f66f451Sopenharmony_ci for (i = 0; i<TT.a; i++) { 650f66f451Sopenharmony_ci *(--s) = 'a'+(j%26); 660f66f451Sopenharmony_ci j /= 26; 670f66f451Sopenharmony_ci } 680f66f451Sopenharmony_ci if (j) error_exit("bad suffix"); 690f66f451Sopenharmony_ci bytesleft = TT.b + ((filenum == TT.n) ? st.st_size%TT.n : 0); 700f66f451Sopenharmony_ci linesleft = TT.l; 710f66f451Sopenharmony_ci xclose(outfd); 720f66f451Sopenharmony_ci outfd = xcreate(TT.outfile, O_RDWR|O_CREAT|O_TRUNC, st.st_mode & 0777); 730f66f451Sopenharmony_ci } 740f66f451Sopenharmony_ci 750f66f451Sopenharmony_ci // Write next chunk of output. 760f66f451Sopenharmony_ci if (TT.l) { 770f66f451Sopenharmony_ci for (i = pos; i < len; ) { 780f66f451Sopenharmony_ci if (toybuf[i++] == '\n' && !--linesleft) break; 790f66f451Sopenharmony_ci if (!--bytesleft) break; 800f66f451Sopenharmony_ci } 810f66f451Sopenharmony_ci j = i - pos; 820f66f451Sopenharmony_ci } else { 830f66f451Sopenharmony_ci j = len - pos; 840f66f451Sopenharmony_ci if (j > bytesleft) j = bytesleft; 850f66f451Sopenharmony_ci bytesleft -= j; 860f66f451Sopenharmony_ci } 870f66f451Sopenharmony_ci xwrite(outfd, toybuf+pos, j); 880f66f451Sopenharmony_ci pos += j; 890f66f451Sopenharmony_ci } 900f66f451Sopenharmony_ci 910f66f451Sopenharmony_ci if (CFG_TOYBOX_FREE) { 920f66f451Sopenharmony_ci xclose(outfd); 930f66f451Sopenharmony_ci if (infd) close(infd); 940f66f451Sopenharmony_ci free(TT.outfile); 950f66f451Sopenharmony_ci } 960f66f451Sopenharmony_ci xexit(); 970f66f451Sopenharmony_ci} 980f66f451Sopenharmony_ci 990f66f451Sopenharmony_civoid split_main(void) 1000f66f451Sopenharmony_ci{ 1010f66f451Sopenharmony_ci if (!TT.b && !TT.l && !TT.n) TT.l = 1000; 1020f66f451Sopenharmony_ci 1030f66f451Sopenharmony_ci // Allocate template for output filenames 1040f66f451Sopenharmony_ci TT.outfile = xmprintf("%s%*c", (toys.optc == 2) ? toys.optargs[1] : "x", 1050f66f451Sopenharmony_ci (int)TT.a, ' '); 1060f66f451Sopenharmony_ci 1070f66f451Sopenharmony_ci // We only ever use one input, but this handles '-' or no input for us. 1080f66f451Sopenharmony_ci loopfiles(toys.optargs, do_split); 1090f66f451Sopenharmony_ci} 110