xref: /third_party/toybox/toys/posix/tar.c (revision 0f66f451)
1/* tar.c - create/extract archives
2 *
3 * Copyright 2014 Ashwini Kumar <ak.ashwini81@gmail.com>
4 *
5 * For the command, see
6 *   http://pubs.opengroup.org/onlinepubs/007908799/xcu/tar.html
7 * For the modern file format, see
8 *   http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06
9 *   https://en.wikipedia.org/wiki/Tar_(computing)#File_format
10 *   https://www.gnu.org/software/tar/manual/html_node/Tar-Internals.html
11 *
12 * For writing to external program
13 * http://www.gnu.org/software/tar/manual/html_node/Writing-to-an-External-Program.html
14 *
15 * Toybox will never implement the "pax" command as a matter of policy.
16 *
17 * Why --exclude pattern but no --include? tar cvzf a.tgz dir --include '*.txt'
18 *
19
20USE_TAR(NEWTOY(tar, "&(restrict)(full-time)(no-recursion)(numeric-owner)(no-same-permissions)(overwrite)(exclude)*(mode):(mtime):(group):(owner):(to-command):o(no-same-owner)p(same-permissions)k(keep-old)c(create)|h(dereference)x(extract)|t(list)|v(verbose)J(xz)j(bzip2)z(gzip)S(sparse)O(to-stdout)m(touch)X(exclude-from)*T(files-from)*C(directory):f(file):a[!txc][!jzJa]", TOYFLAG_USR|TOYFLAG_BIN))
21
22config TAR
23  bool "tar"
24  default y
25  help
26    usage: tar [-cxt] [-fvohmjkOS] [-XTCf NAME] [FILES]
27
28    Create, extract, or list files in a .tar (or compressed t?z) file.
29
30    Options:
31    c  Create                x  Extract               t  Test (list)
32    f  tar FILE (default -)  C  Change to DIR first   v  Verbose display
33    o  Ignore owner          h  Follow symlinks       m  Ignore mtime
34    J  xz compression        j  bzip2 compression     z  gzip compression
35    O  Extract to stdout     X  exclude names in FILE T  include names in FILE
36
37    --exclude        FILENAME to exclude    --full-time   Show seconds with -tv
38    --mode MODE      Adjust modes           --mtime TIME  Override timestamps
39    --owner NAME     Set file owner to NAME --group NAME  Set file group to NAME
40    --sparse         Record sparse files
41    --restrict       All archive contents must extract under one subdirctory
42    --numeric-owner  Save/use/display uid and gid, not user/group name
43    --no-recursion   Don't store directory contents
44*/
45
46#define FOR_tar
47#include "toys.h"
48
49GLOBALS(
50  char *f, *C;
51  struct arg_list *T, *X;
52  char *to_command, *owner, *group, *mtime, *mode;
53  struct arg_list *exclude;
54
55  struct double_list *incl, *excl, *seen;
56  struct string_list *dirs;
57  char *cwd;
58  int fd, ouid, ggid, hlc, warn, adev, aino, sparselen;
59  long long *sparse;
60  time_t mtt;
61
62  // hardlinks seen so far (hlc many)
63  struct {
64    char *arg;
65    ino_t ino;
66    dev_t dev;
67  } *hlx;
68
69  // Parsed information about a tar header.
70  struct tar_header {
71    char *name, *link_target, *uname, *gname;
72    long long size, ssize;
73    uid_t uid;
74    gid_t gid;
75    mode_t mode;
76    time_t mtime;
77    dev_t device;
78  } hdr;
79)
80
81struct tar_hdr {
82  char name[100], mode[8], uid[8], gid[8],size[12], mtime[12], chksum[8],
83       type, link[100], magic[8], uname[32], gname[32], major[8], minor[8],
84       prefix[155], padd[12];
85};
86
87// convert from int to octal (or base-256)
88static void itoo(char *str, int len, unsigned long long val)
89{
90  // Do we need binary encoding?
91  if (!(val>>(3*(len-1)))) sprintf(str, "%0*llo", len-1, val);
92  else {
93    *str = 128;
94    while (--len) *++str = val>>(3*len);
95  }
96}
97#define ITOO(x, y) itoo(x, sizeof(x), y)
98
99// convert octal (or base-256) to int
100static unsigned long long otoi(char *str, unsigned len)
101{
102  unsigned long long val = 0;
103
104  // When tar value too big or octal, use binary encoding with high bit set
105  if (128&*str) while (--len) val = (val<<8)+*++str;
106  else {
107    while (len && *str == ' ') str++;
108    while (len && *str>='0' && *str<='7') val = val*8+*str++-'0', len--;
109    if (len && *str && *str != ' ') error_exit("bad header");
110  }
111
112  return val;
113}
114#define OTOI(x) otoi(x, sizeof(x))
115
116static void write_longname(char *name, char type)
117{
118  struct tar_hdr tmp;
119  int sz = strlen(name) +1;
120
121  memset(&tmp, 0, sizeof(tmp));
122  strcpy(tmp.name, "././@LongLink");
123  ITOO(tmp.uid, 0);
124  ITOO(tmp.gid, 0);
125  ITOO(tmp.size, sz);
126  ITOO(tmp.mtime, 0);
127  tmp.type = type;
128  strcpy(tmp.magic, "ustar  ");
129
130  // Historical nonsense to match other implementations. Never used.
131  ITOO(tmp.mode, 0644);
132  strcpy(tmp.uname, "root");
133  strcpy(tmp.gname, "root");
134
135  // Calculate checksum. Since 512*255 = 0377000 in octal, this can never
136  // use more than 6 digits. The last byte is ' ' for historical reasons.
137  itoo(tmp.chksum, sizeof(tmp.chksum)-1, tar_cksum(&tmp));
138  tmp.chksum[7] = ' ';
139
140  // write header and name, padded with NUL to block size
141  xwrite(TT.fd, &tmp, 512);
142  xwrite(TT.fd, name, sz);
143  if (sz%512) xwrite(TT.fd, toybuf, 512-(sz%512));
144}
145
146static struct double_list *filter(struct double_list *lst, char *name)
147{
148  struct double_list *end = lst;
149
150  if (lst)
151    // constant is FNM_LEADING_DIR
152    do if (!fnmatch(lst->data, name, 1<<3)) return lst;
153    while (end != (lst = lst->next));
154
155  return 0;
156}
157
158static void skippy(long long len)
159{
160  if (lskip(TT.fd, len)) perror_exit("EOF");
161}
162
163// allocate and read data from TT.fd
164static void alloread(void *buf, int len)
165{
166  // actually void **, but automatic typecasting doesn't work with void ** :(
167  void **b = buf;
168
169  free(*b);
170  *b = xmalloc(len+1);
171  xreadall(TT.fd, *b, len);
172  b[len] = 0;
173}
174
175// callback from dirtree to create archive
176static int add_to_tar(struct dirtree *node)
177{
178  struct stat *st = &(node->st);
179  struct tar_hdr hdr;
180  struct passwd *pw = pw;
181  struct group *gr = gr;
182  int i, fd =-1;
183  char *name, *lnk, *hname;
184
185  if (!dirtree_notdotdot(node)) return 0;
186  if (TT.adev == st->st_dev && TT.aino == st->st_ino) {
187    error_msg("'%s' file is the archive; not dumped", node->name);
188    return 0;
189  }
190
191  i = 1;
192  name = dirtree_path(node, &i);
193
194  // exclusion defaults to --no-anchored and --wildcards-match-slash
195  for (lnk = name; *lnk;) {
196    if (filter(TT.excl, lnk)) goto done;
197    while (*lnk && *lnk!='/') lnk++;
198    while (*lnk=='/') lnk++;
199  }
200
201  // Consume the 1 extra byte alocated in dirtree_path()
202  if (S_ISDIR(st->st_mode) && name[i-1] != '/') strcat(name, "/");
203
204  // remove leading / and any .. entries from saved name
205  for (hname = name; *hname == '/'; hname++);
206  for (lnk = hname;;) {
207    if (!(lnk = strstr(lnk, ".."))) break;
208    if (lnk == hname || lnk[-1] == '/') {
209      if (!lnk[2]) goto done;
210      if (lnk[2]=='/') lnk = hname = lnk+3;
211    } else lnk+= 2;
212  }
213  if (!*hname) goto done;
214
215  if (TT.warn && hname != name) {
216    fprintf(stderr, "removing leading '%.*s' from member names\n",
217           (int)(hname-name), name);
218    TT.warn = 0;
219  }
220
221  if (TT.owner) st->st_uid = TT.ouid;
222  if (TT.group) st->st_gid = TT.ggid;
223  if (TT.mode) st->st_mode = string_to_mode(TT.mode, st->st_mode);
224  if (TT.mtime) st->st_mtime = TT.mtt;
225
226  memset(&hdr, 0, sizeof(hdr));
227  strncpy(hdr.name, hname, sizeof(hdr.name));
228  ITOO(hdr.mode, st->st_mode &07777);
229  ITOO(hdr.uid, st->st_uid);
230  ITOO(hdr.gid, st->st_gid);
231  ITOO(hdr.size, 0); //set size later
232  ITOO(hdr.mtime, st->st_mtime);
233  strcpy(hdr.magic, "ustar  ");
234
235  // Hard link or symlink? i=0 neither, i=1 hardlink, i=2 symlink
236
237  // Are there hardlinks to a non-directory entry?
238  if (st->st_nlink>1 && !S_ISDIR(st->st_mode)) {
239    // Have we seen this dev&ino before?
240    for (i = 0; i<TT.hlc; i++) {
241      if (st->st_ino == TT.hlx[i].ino && st->st_dev == TT.hlx[i].dev)
242        break;
243    }
244    if (i != TT.hlc) {
245      lnk = TT.hlx[i].arg;
246      i = 1;
247    } else {
248      // first time we've seen it. Store as normal file, but remember it.
249      if (!(TT.hlc&255)) TT.hlx = xrealloc(TT.hlx, TT.hlc+256);
250      TT.hlx[TT.hlc].arg = xstrdup(hname);
251      TT.hlx[TT.hlc].ino = st->st_ino;
252      TT.hlx[TT.hlc].dev = st->st_dev;
253      TT.hlc++;
254      i = 0;
255    }
256  } else i = 0;
257
258  // !i because hardlink to a symlink is a thing.
259  if (!i && S_ISLNK(st->st_mode)) {
260    i = 2;
261    lnk = xreadlink(name);
262  }
263
264  // Handle file types
265  if (i) {
266    hdr.type = '0'+i;
267    if (i==2 && !(lnk = xreadlink(name))) {
268      perror_msg("readlink");
269      goto done;
270    }
271    if (strlen(lnk) > sizeof(hdr.link)) write_longname(lnk, 'K');
272    strncpy(hdr.link, lnk, sizeof(hdr.link));
273    if (i) free(lnk);
274  } else if (S_ISREG(st->st_mode)) {
275    hdr.type = '0';
276    ITOO(hdr.size, st->st_size);
277  } else if (S_ISDIR(st->st_mode)) hdr.type = '5';
278  else if (S_ISFIFO(st->st_mode)) hdr.type = '6';
279  else if (S_ISBLK(st->st_mode) || S_ISCHR(st->st_mode)) {
280    hdr.type = (S_ISCHR(st->st_mode))?'3':'4';
281    ITOO(hdr.major, dev_major(st->st_rdev));
282    ITOO(hdr.minor, dev_minor(st->st_rdev));
283  } else {
284    error_msg("unknown file type '%o'", st->st_mode & S_IFMT);
285    goto done;
286  }
287
288  if (strlen(hname) > sizeof(hdr.name)) write_longname(hname, 'L');
289
290  if (!FLAG(numeric_owner)) {
291    if (TT.owner || (pw = bufgetpwuid(st->st_uid)))
292      strncpy(hdr.uname, TT.owner ? TT.owner : pw->pw_name, sizeof(hdr.uname));
293    if (TT.group || (gr = bufgetgrgid(st->st_gid)))
294      strncpy(hdr.gname, TT.group ? TT.group : gr->gr_name, sizeof(hdr.gname));
295  }
296
297  TT.sparselen = 0;
298  if (hdr.type == '0') {
299    // Before we write the header, make sure we can read the file
300    if ((fd = open(name, O_RDONLY)) < 0) {
301      perror_msg("can't open '%s'", name);
302
303      return 0;
304    }
305    if (FLAG(S)) {
306      long long lo, ld = 0, len = 0;
307
308      // Enumerate the extents
309      while ((lo = lseek(fd, ld, SEEK_HOLE)) != -1) {
310        if (!(TT.sparselen&511))
311          TT.sparse = xrealloc(TT.sparse, (TT.sparselen+514)*sizeof(long long));
312        if (ld != lo) {
313          TT.sparse[TT.sparselen++] = ld;
314          len += TT.sparse[TT.sparselen++] = lo-ld;
315        }
316        if (lo == st->st_size || (ld = lseek(fd, lo, SEEK_DATA)) < lo) break;
317      }
318
319      // If there were extents, change type to S record
320      if (TT.sparselen>2) {
321        TT.sparse[TT.sparselen++] = st->st_size;
322        TT.sparse[TT.sparselen++] = 0;
323        hdr.type = 'S';
324        lnk = (char *)&hdr;
325        for (i = 0; i<TT.sparselen && i<8; i++)
326          itoo(lnk+386+12*i, 12, TT.sparse[i]);
327
328        // Record if there's overflow records, change length to sparse length,
329        // record apparent length
330        if (TT.sparselen>8) lnk[482] = 1;
331        itoo(lnk+483, 12, st->st_size);
332        ITOO(hdr.size, len);
333      } else TT.sparselen = 0;
334      lseek(fd, 0, SEEK_SET);
335    }
336  }
337
338  itoo(hdr.chksum, sizeof(hdr.chksum)-1, tar_cksum(&hdr));
339  hdr.chksum[7] = ' ';
340
341  if (FLAG(v)) dprintf(TT.fd ? 2 : 1, "%s\n", hname);
342
343  // Write header and data to archive
344  xwrite(TT.fd, &hdr, 512);
345  if (TT.sparselen>8) {
346    char buf[512];
347
348    // write extent overflow blocks
349    for (i=8;;i++) {
350      int j = (i-8)%42;
351
352      if (!j || i==TT.sparselen) {
353        if (i!=8) {
354          if (i!=TT.sparselen) buf[504] = 1;
355          xwrite(TT.fd, buf, 512);
356        }
357        if (i==TT.sparselen) break;
358        memset(buf, 0, sizeof(buf));
359      }
360      itoo(buf+12*j, 12, TT.sparse[i]);
361    }
362  }
363  TT.sparselen >>= 1;
364  if (hdr.type == '0' || hdr.type == 'S') {
365    if (hdr.type == '0') xsendfile_pad(fd, TT.fd, st->st_size);
366    else for (i = 0; i<TT.sparselen; i++) {
367      if (TT.sparse[i*2] != lseek(fd, TT.sparse[i*2], SEEK_SET))
368        perror_msg("%s: seek %lld", name, TT.sparse[i*2]);
369      xsendfile_pad(fd, TT.fd, TT.sparse[i*2+1]);
370    }
371    if (st->st_size%512) writeall(TT.fd, toybuf, (512-(st->st_size%512)));
372    close(fd);
373  }
374done:
375  free(name);
376
377  return (DIRTREE_RECURSE|(FLAG(h)?DIRTREE_SYMFOLLOW:0))*!FLAG(no_recursion);
378}
379
380static void wsettime(char *s, long long sec)
381{
382  struct timespec times[2] = {{sec, 0},{sec, 0}};
383
384  if (utimensat(AT_FDCWD, s, times, AT_SYMLINK_NOFOLLOW))
385    perror_msg("settime %lld %s", sec, s);
386}
387
388// Do pending directory utimes(), NULL to flush all.
389static int dirflush(char *name)
390{
391  char *s = 0, *ss;
392
393  // Barf if name not in TT.cwd
394  if (name) {
395    ss = s = xabspath(name, -1);
396    if (TT.cwd[1] && (!strstart(&ss, TT.cwd) || *ss!='/')) {
397      error_msg("'%s' not under '%s'", name, TT.cwd);
398      free(s);
399
400      return 1;
401    }
402
403    if (FLAG(restrict)) {
404      free(TT.cwd);
405      TT.cwd = strdup(s);
406      toys.optflags ^= FLAG_restrict;
407    }
408  }
409
410  // Set deferred utimes() for directories this file isn't under.
411  // (Files must be depth-first ordered in tarball for this to matter.)
412  while (TT.dirs) {
413
414    // If next file is under (or equal to) this dir, keep waiting
415    if (name && strstart(&ss, ss = s) && (!*ss || *ss=='/')) break;
416
417    wsettime(TT.dirs->str+sizeof(long long), *(long long *)TT.dirs->str);
418    free(llist_pop(&TT.dirs));
419  }
420  free(s);
421
422  // name was under TT.cwd
423  return 0;
424}
425
426// write data to file
427static void sendfile_sparse(int fd)
428{
429  long long len, used = 0, sent;
430  int i = 0, j;
431
432  do {
433    if (TT.sparselen) {
434      // Seek past holes or fill output with zeroes.
435      if (-1 == lseek(fd, len = TT.sparse[i*2], SEEK_SET)) {
436        sent = 0;
437        while (len) {
438          // first/last 512 bytes used, rest left zeroes
439          j = (len>3072) ? 3072 : len;
440          if (j != writeall(fd, toybuf+512, j)) goto error;
441          len -= j;
442        }
443      } else {
444        sent = len;
445        if (!(len = TT.sparse[i*2+1]) && ftruncate(fd, sent+len))
446          perror_msg("ftruncate");
447      }
448      if (len+used>TT.hdr.size) error_exit("sparse overflow");
449    } else len = TT.hdr.size;
450
451    len -= sendfile_len(TT.fd, fd, len, &sent);
452    used += sent;
453    if (len) {
454error:
455      if (fd!=1) perror_msg(0);
456      skippy(TT.hdr.size-used);
457
458      break;
459    }
460  } while (++i<TT.sparselen);
461
462  close(fd);
463}
464
465static void extract_to_disk(void)
466{
467  char *name = TT.hdr.name;
468  int ala = TT.hdr.mode;
469
470  if (dirflush(name)) {
471    if (S_ISREG(ala) && !TT.hdr.link_target) skippy(TT.hdr.size);
472
473    return;
474  }
475
476  // create path before file if necessary
477  if (strrchr(name, '/') && mkpath(name) && errno !=EEXIST)
478      return perror_msg(":%s: can't mkdir", name);
479
480  // remove old file, if exists
481  if (!FLAG(k) && !S_ISDIR(ala) && unlink(name) && errno!=ENOENT)
482    return perror_msg("can't remove: %s", name);
483
484  if (S_ISREG(ala)) {
485    // hardlink?
486    if (TT.hdr.link_target) {
487      if (link(TT.hdr.link_target, name))
488        return perror_msg("can't link '%s' -> '%s'", name, TT.hdr.link_target);
489    // write contents
490    } else {
491      int fd = xcreate(name, O_WRONLY|O_CREAT|(FLAG(overwrite)?O_TRUNC:O_EXCL),
492        WARN_ONLY|(ala & 07777));
493      if (fd != -1) sendfile_sparse(fd);
494      else skippy(TT.hdr.size);
495    }
496  } else if (S_ISDIR(ala)) {
497    if ((mkdir(name, 0700) == -1) && errno != EEXIST)
498      return perror_msg("%s: can't create", TT.hdr.name);
499  } else if (S_ISLNK(ala)) {
500    if (symlink(TT.hdr.link_target, TT.hdr.name))
501      return perror_msg("can't link '%s' -> '%s'", name, TT.hdr.link_target);
502  } else if (mknod(name, ala, TT.hdr.device))
503    return perror_msg("can't create '%s'", name);
504
505  // Set ownership
506  if (!FLAG(o) && !geteuid()) {
507    int u = TT.hdr.uid, g = TT.hdr.gid;
508
509    if (TT.owner) TT.hdr.uid = TT.ouid;
510    else if (!FLAG(numeric_owner) && *TT.hdr.uname) {
511      struct passwd *pw = getpwnam(TT.hdr.uname);
512      if (pw && (TT.owner || !FLAG(numeric_owner))) TT.hdr.uid = pw->pw_uid;
513    }
514
515    if (TT.group) TT.hdr.gid = TT.ggid;
516    else if (!FLAG(numeric_owner) && *TT.hdr.uname) {
517      struct group *gr = getgrnam(TT.hdr.gname);
518      if (gr) TT.hdr.gid = gr->gr_gid;
519    }
520
521    if (lchown(name, u, g)) perror_msg("chown %d:%d '%s'", u, g, name);;
522  }
523
524  if (!S_ISLNK(ala)) chmod(TT.hdr.name, FLAG(p) ? ala : ala&0777);
525
526  // Apply mtime.
527  if (!FLAG(m)) {
528    if (S_ISDIR(ala)) {
529      struct string_list *sl;
530
531      // Writing files into a directory changes directory timestamps, so
532      // defer mtime updates until contents written.
533
534      sl = xmalloc(sizeof(struct string_list)+sizeof(long long)+strlen(name)+1);
535      *(long long *)sl->str = TT.hdr.mtime;
536      strcpy(sl->str+sizeof(long long), name);
537      sl->next = TT.dirs;
538      TT.dirs = sl;
539    } else wsettime(TT.hdr.name, TT.hdr.mtime);
540  }
541}
542
543static void unpack_tar(char *first)
544{
545  struct double_list *walk, *delete;
546  struct tar_hdr tar;
547  int i, and = 0;
548  unsigned maj, min;
549  char *s;
550
551  for (;;) {
552    if (first) {
553      memcpy(&tar, first, i = 512);
554      first = 0;
555    } else {
556      // align to next block and read it
557      if (TT.hdr.size%512) skippy(512-TT.hdr.size%512);
558      i = readall(TT.fd, &tar, 512);
559    }
560
561    if (i && i!=512) error_exit("short header");
562
563    // Two consecutive empty headers ends tar even if there's more data
564    if (!i || !*tar.name) {
565      if (!i || and++) return;
566      TT.hdr.size = 0;
567      continue;
568    }
569    // ensure null temination even of pathological packets
570    tar.padd[0] = and = 0;
571
572    // Is this a valid TAR header?
573    if (!is_tar_header(&tar)) error_exit("bad header");
574    TT.hdr.size = OTOI(tar.size);
575
576    // If this header isn't writing something to the filesystem
577    if ((tar.type<'0' || tar.type>'7') && tar.type!='S'
578        && (*tar.magic && tar.type))
579    {
580      // Long name extension header?
581      if (tar.type == 'K') alloread(&TT.hdr.link_target, TT.hdr.size);
582      else if (tar.type == 'L') alloread(&TT.hdr.name, TT.hdr.size);
583      else if (tar.type == 'x') {
584        char *p, *buf = 0;
585        int i, len, n;
586
587        // Posix extended record "LEN NAME=VALUE\n" format
588        alloread(&buf, TT.hdr.size);
589        for (p = buf; (p-buf)<TT.hdr.size; p += len) {
590          i = sscanf(p, "%u path=%n", &len, &n);
591          if (i<1 || len<4 || len>TT.hdr.size) {
592            error_msg("bad header");
593            break;
594          }
595          p[len-1] = 0;
596          if (i == 2) {
597            TT.hdr.name = xstrdup(p+n);
598            break;
599          }
600        }
601        free(buf);
602
603      // Ignore everything else.
604      } else skippy(TT.hdr.size);
605
606      continue;
607    }
608
609    // Handle sparse file type
610    if (tar.type == 'S') {
611      char sparse[512];
612      int max = 8;
613
614      // Load 4 pairs of offset/len from S block, plus 21 pairs from each
615      // continuation block, list says where to seek/write sparse file contents
616      TT.sparselen = 0;
617      s = 386+(char *)&tar;
618      *sparse = i = 0;
619
620      for (;;) {
621        if (!(TT.sparselen&511))
622          TT.sparse = xrealloc(TT.sparse, (TT.sparselen+512)*sizeof(long long));
623
624        // If out of data in block check continue flag, stop or load next block
625        if (++i>max || !*s) {
626          if (!(*sparse ? sparse[504] : ((char *)&tar)[482])) break;
627          xreadall(TT.fd, s = sparse, 512);
628          max = 41;
629          i = 0;
630        }
631        // Load next entry
632        TT.sparse[TT.sparselen++] = otoi(s, 12);
633        s += 12;
634      }
635
636      // Odd number of entries (from corrupted tar) would be dropped here
637      TT.sparselen /= 2;
638      if (TT.sparselen)
639        TT.hdr.ssize = TT.sparse[2*TT.sparselen-1]+TT.sparse[2*TT.sparselen-2];
640    } else {
641      TT.sparselen = 0;
642      TT.hdr.ssize = TT.hdr.size;
643    }
644
645    // At this point, we have something to output. Convert metadata.
646    TT.hdr.mode = OTOI(tar.mode)&0xfff;
647    if (tar.type == 'S' || !tar.type) TT.hdr.mode |= 0x8000;
648    else TT.hdr.mode |= (char []){8,8,10,2,6,4,1,8}[tar.type-'0']<<12;
649    TT.hdr.uid = OTOI(tar.uid);
650    TT.hdr.gid = OTOI(tar.gid);
651    TT.hdr.mtime = OTOI(tar.mtime);
652    maj = OTOI(tar.major);
653    min = OTOI(tar.minor);
654    TT.hdr.device = dev_makedev(maj, min);
655    TT.hdr.uname = xstrndup(TT.owner ? TT.owner : tar.uname, sizeof(tar.uname));
656    TT.hdr.gname = xstrndup(TT.group ? TT.group : tar.gname, sizeof(tar.gname));
657
658    if (TT.owner) TT.hdr.uid = TT.ouid;
659    else if (!FLAG(numeric_owner)) {
660      struct passwd *pw = getpwnam(TT.hdr.uname);
661      if (pw && (TT.owner || !FLAG(numeric_owner))) TT.hdr.uid = pw->pw_uid;
662    }
663
664    if (TT.group) TT.hdr.gid = TT.ggid;
665    else if (!FLAG(numeric_owner)) {
666      struct group *gr = getgrnam(TT.hdr.gname);
667      if (gr) TT.hdr.gid = gr->gr_gid;
668    }
669
670    if (!TT.hdr.link_target && *tar.link)
671      TT.hdr.link_target = xstrndup(tar.link, sizeof(tar.link));
672    if (!TT.hdr.name) {
673      // Glue prefix and name fields together with / if necessary
674      i = (tar.type=='S') ? 0 : strnlen(tar.prefix, sizeof(tar.prefix));
675      TT.hdr.name = xmprintf("%.*s%s%.*s", i, tar.prefix,
676        (i && tar.prefix[i-1] != '/') ? "/" : "",
677        (int)sizeof(tar.name), tar.name);
678    }
679
680    // Old broken tar recorded dir as "file with trailing slash"
681    if (S_ISREG(TT.hdr.mode) && (s = strend(TT.hdr.name, "/"))) {
682      *s = 0;
683      TT.hdr.mode = (TT.hdr.mode & ~S_IFMT) | S_IFDIR;
684    }
685
686    // Non-regular files don't have contents stored in archive.
687    if ((TT.hdr.link_target && *TT.hdr.link_target)
688      || (tar.type && !S_ISREG(TT.hdr.mode)))
689        TT.hdr.size = 0;
690
691    // Files are seen even if excluded, so check them here.
692    // TT.seen points to first seen entry in TT.incl, or NULL if none yet.
693
694    if ((delete = filter(TT.incl, TT.hdr.name)) && TT.incl != TT.seen) {
695      if (!TT.seen) TT.seen = delete;
696
697      // Move seen entry to end of list.
698      if (TT.incl == delete) TT.incl = TT.incl->next;
699      else for (walk = TT.incl; walk != TT.seen; walk = walk->next) {
700        if (walk == delete) {
701          dlist_pop(&walk);
702          dlist_add_nomalloc(&TT.incl, delete);
703        }
704      }
705    }
706
707    // Skip excluded files
708    if (filter(TT.excl, TT.hdr.name) || (TT.incl && !delete))
709      skippy(TT.hdr.size);
710    else if (FLAG(t)) {
711      if (FLAG(v)) {
712        struct tm *lc = localtime(TT.mtime ? &TT.mtt : &TT.hdr.mtime);
713        char perm[12], gname[12];
714
715        mode_to_string(TT.hdr.mode, perm);
716        printf("%s", perm);
717        sprintf(perm, "%u", TT.hdr.uid);
718        sprintf(gname, "%u", TT.hdr.gid);
719        printf(" %s/%s ", *TT.hdr.uname ? TT.hdr.uname : perm,
720          *TT.hdr.gname ? TT.hdr.gname : gname);
721        if (tar.type=='3' || tar.type=='4') printf("%u,%u", maj, min);
722        else printf("%9lld", TT.hdr.ssize);
723        sprintf(perm, ":%02d", lc->tm_sec);
724        printf("  %d-%02d-%02d %02d:%02d%s ", 1900+lc->tm_year, 1+lc->tm_mon,
725          lc->tm_mday, lc->tm_hour, lc->tm_min, FLAG(full_time) ? perm : "");
726      }
727      printf("%s", TT.hdr.name);
728      if (TT.hdr.link_target) printf(" -> %s", TT.hdr.link_target);
729      xputc('\n');
730      skippy(TT.hdr.size);
731    } else {
732      if (FLAG(v)) printf("%s\n", TT.hdr.name);
733      if (FLAG(O)) sendfile_sparse(1);
734      else if (FLAG(to_command)) {
735        if (S_ISREG(TT.hdr.mode)) {
736          int fd, pid;
737
738          xsetenv("TAR_FILETYPE", "f");
739          xsetenv(xmprintf("TAR_MODE=%o", TT.hdr.mode), 0);
740          xsetenv(xmprintf("TAR_SIZE=%lld", TT.hdr.ssize), 0);
741          xsetenv("TAR_FILENAME", TT.hdr.name);
742          xsetenv("TAR_UNAME", TT.hdr.uname);
743          xsetenv("TAR_GNAME", TT.hdr.gname);
744          xsetenv(xmprintf("TAR_MTIME=%llo", (long long)TT.hdr.mtime), 0);
745          xsetenv(xmprintf("TAR_UID=%o", TT.hdr.uid), 0);
746          xsetenv(xmprintf("TAR_GID=%o", TT.hdr.gid), 0);
747
748          pid = xpopen((char *[]){"sh", "-c", TT.to_command, NULL}, &fd, 0);
749          // todo: short write exits tar here, other skips data.
750          sendfile_sparse(fd);
751          fd = xpclose_both(pid, 0);
752          if (fd) error_msg("%d: Child returned %d", pid, fd);
753        }
754      } else extract_to_disk();
755    }
756
757    free(TT.hdr.name);
758    free(TT.hdr.link_target);
759    free(TT.hdr.uname);
760    free(TT.hdr.gname);
761    TT.hdr.name = TT.hdr.link_target = 0;
762  }
763}
764
765// Add copy of filename (minus trailing \n and /) to dlist **
766static void trim2list(void *list, char *pline)
767{
768  char *n = xstrdup(pline);
769  int i = strlen(n);
770
771  dlist_add(list, n);
772  if (i && n[i-1]=='\n') i--;
773  while (i && n[i-1] == '/') i--;
774  n[i] = 0;
775}
776
777// do_lines callback, selects TT.incl or TT.excl based on call order
778static void do_XT(char **pline, long len)
779{
780  if (pline) trim2list(TT.X ? &TT.excl : &TT.incl, *pline);
781}
782
783void tar_main(void)
784{
785  char *s, **args = toys.optargs,
786    *archiver = FLAG(z) ? "gzip" : (FLAG(J) ? "xz" : "bzip2");
787  int len = 0;
788
789  // Needed when extracting to command
790  signal(SIGPIPE, SIG_IGN);
791
792  // Get possible early errors out of the way
793  if (!geteuid()) toys.optflags |= FLAG_p;
794  if (TT.owner) TT.ouid = xgetuid(TT.owner);
795  if (TT.group) TT.ggid = xgetgid(TT.group);
796  if (TT.mtime) xparsedate(TT.mtime, &TT.mtt, (void *)&s, 1);
797
798  // Collect file list.
799  for (; TT.exclude; TT.exclude = TT.exclude->next)
800    trim2list(&TT.excl, TT.exclude->arg);
801  for (;TT.X; TT.X = TT.X->next) do_lines(xopenro(TT.X->arg), '\n', do_XT);
802  for (args = toys.optargs; *args; args++) trim2list(&TT.incl, *args);
803  for (;TT.T; TT.T = TT.T->next) do_lines(xopenro(TT.T->arg), '\n', do_XT);
804
805  // If include file list empty, don't create empty archive
806  if (FLAG(c)) {
807    if (!TT.incl) error_exit("empty archive");
808    TT.fd = 1;
809  }
810
811  // nommu reentry for nonseekable input skips this, parent did it for us
812  if (toys.stacktop) {
813    if (TT.f && strcmp(TT.f, "-"))
814      TT.fd = xcreate(TT.f, TT.fd*(O_WRONLY|O_CREAT|O_TRUNC), 0666);
815    // Get destination directory
816    if (TT.C) xchdir(TT.C);
817  }
818
819  // Get destination directory
820  TT.cwd = xabspath(s = xgetcwd(), 1);
821  free(s);
822
823  // Remember archive inode so we don't overwrite it or add it to itself
824  {
825    struct stat st;
826
827    if (!fstat(TT.fd, &st)) {
828      TT.aino = st.st_ino;
829      TT.adev = st.st_dev;
830    }
831  }
832
833  // Are we reading?
834  if (FLAG(x)||FLAG(t)) {
835    char *hdr = 0;
836
837    // autodetect compression type when not specified
838    if (!(FLAG(j)||FLAG(z)||FLAG(J))) {
839      len = xread(TT.fd, hdr = toybuf+sizeof(toybuf)-512, 512);
840      if (len!=512 || !is_tar_header(hdr)) {
841        // detect gzip and bzip signatures
842        if (SWAP_BE16(*(short *)hdr)==0x1f8b) toys.optflags |= FLAG_z;
843        else if (!memcmp(hdr, "BZh", 3)) toys.optflags |= FLAG_j;
844        else if (peek_be(hdr, 7) == 0xfd377a585a0000) toys.optflags |= FLAG_J;
845        else error_exit("Not tar");
846
847        // if we can seek back we don't need to loop and copy data
848        if (!lseek(TT.fd, -len, SEEK_CUR)) hdr = 0;
849      }
850    }
851
852    if (FLAG(j)||FLAG(z)||FLAG(J)) {
853      int pipefd[2] = {hdr ? -1 : TT.fd, -1}, i, pid;
854      struct string_list *zcat = find_in_path(getenv("PATH"),
855        FLAG(j) ? "bzcat" : FLAG(J) ? "xzcat" : "zcat");
856
857      // Toybox provides more decompressors than compressors, so try them first
858      xpopen_both(zcat ? (char *[]){zcat->str, 0} :
859        (char *[]){archiver, "-dc", 0}, pipefd);
860      if (CFG_TOYBOX_FREE) llist_traverse(zcat, free);
861
862      if (!hdr) {
863        // If we could seek, child gzip inherited fd and we read its output
864        close(TT.fd);
865        TT.fd = pipefd[1];
866
867      } else {
868
869        // If we autodetected type but then couldn't lseek to put the data back
870        // we have to loop reading data from TT.fd and pass it to gzip ourselves
871        // (starting with the block of data we read to autodetect).
872
873        // dirty trick: move gzip input pipe to stdin so child closes spare copy
874        dup2(pipefd[0], 0);
875        if (pipefd[0]) close(pipefd[0]);
876
877        // Fork a copy of ourselves to handle extraction (reads from zip output
878        // pipe, writes to stdout).
879        pipefd[0] = pipefd[1];
880        pipefd[1] = 1;
881        pid = xpopen_both(0, pipefd);
882        close(pipefd[1]);
883
884        // loop writing collated data to zip proc
885        xwrite(0, hdr, len);
886        for (;;) {
887          if ((i = read(TT.fd, toybuf, sizeof(toybuf)))<1) {
888            close(0);
889            xwaitpid(pid);
890            return;
891          }
892          xwrite(0, toybuf, i);
893        }
894      }
895    }
896
897    unpack_tar(hdr);
898    dirflush(0);
899
900    // Each time a TT.incl entry is seen it's moved to the end of the list,
901    // with TT.seen pointing to first seen list entry. Anything between
902    // TT.incl and TT.seen wasn't encountered in archive..
903    if (TT.seen != TT.incl) {
904      if (!TT.seen) TT.seen = TT.incl;
905      while (TT.incl != TT.seen) {
906        error_msg("'%s' not in archive", TT.incl->data);
907        TT.incl = TT.incl->next;
908      }
909    }
910
911  // are we writing? (Don't have to test flag here, one of 3 must be set)
912  } else {
913    struct double_list *dl = TT.incl;
914
915    // autodetect compression type based on -f name. (Use > to avoid.)
916    if (TT.f && !FLAG(j) && !FLAG(z)) {
917      char *tbz[] = {".tbz", ".tbz2", ".tar.bz", ".tar.bz2"};
918      if (strend(TT.f, ".tgz") || strend(TT.f, ".tar.gz"))
919        toys.optflags |= FLAG_z;
920      if (strend(TT.f, ".txz") || strend(TT.f, ".tar.xz"))
921        toys.optflags |= FLAG_J;
922      else for (len = 0; len<ARRAY_LEN(tbz); len++)
923        if (strend(TT.f, tbz[len])) toys.optflags |= FLAG_j;
924    }
925
926    if (FLAG(j)||FLAG(z)||FLAG(J)) {
927      int pipefd[2] = {-1, TT.fd};
928
929      xpopen_both((char *[]){archiver, "-f", 0}, pipefd);
930      close(TT.fd);
931      TT.fd = pipefd[0];
932    }
933    do {
934      TT.warn = 1;
935      dirtree_flagread(dl->data, FLAG(h)?DIRTREE_SYMFOLLOW:0, add_to_tar);
936    } while (TT.incl != (dl = dl->next));
937
938    writeall(TT.fd, toybuf, 1024);
939  }
940
941  if (CFG_TOYBOX_FREE) {
942    llist_traverse(TT.excl, llist_free_double);
943    llist_traverse(TT.incl, llist_free_double);
944    while(TT.hlc) free(TT.hlx[--TT.hlc].arg);
945    free(TT.hlx);
946    free(TT.cwd);
947    close(TT.fd);
948  }
949}
950