xref: /third_party/curl/lib/file.c (revision 13498266)
1/***************************************************************************
2 *                                  _   _ ____  _
3 *  Project                     ___| | | |  _ \| |
4 *                             / __| | | | |_) | |
5 *                            | (__| |_| |  _ <| |___
6 *                             \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 * SPDX-License-Identifier: curl
22 *
23 ***************************************************************************/
24
25#include "curl_setup.h"
26
27#ifndef CURL_DISABLE_FILE
28
29#ifdef HAVE_NETINET_IN_H
30#include <netinet/in.h>
31#endif
32#ifdef HAVE_NETDB_H
33#include <netdb.h>
34#endif
35#ifdef HAVE_ARPA_INET_H
36#include <arpa/inet.h>
37#endif
38#ifdef HAVE_NET_IF_H
39#include <net/if.h>
40#endif
41#ifdef HAVE_SYS_IOCTL_H
42#include <sys/ioctl.h>
43#endif
44
45#ifdef HAVE_SYS_PARAM_H
46#include <sys/param.h>
47#endif
48
49#ifdef HAVE_FCNTL_H
50#include <fcntl.h>
51#endif
52
53#include "strtoofft.h"
54#include "urldata.h"
55#include <curl/curl.h>
56#include "progress.h"
57#include "sendf.h"
58#include "escape.h"
59#include "file.h"
60#include "speedcheck.h"
61#include "getinfo.h"
62#include "transfer.h"
63#include "url.h"
64#include "parsedate.h" /* for the week day and month names */
65#include "warnless.h"
66#include "curl_range.h"
67/* The last 3 #include files should be in this order */
68#include "curl_printf.h"
69#include "curl_memory.h"
70#include "memdebug.h"
71
72#if defined(_WIN32) || defined(MSDOS) || defined(__EMX__)
73#define DOS_FILESYSTEM 1
74#elif defined(__amigaos4__)
75#define AMIGA_FILESYSTEM 1
76#endif
77
78#ifdef OPEN_NEEDS_ARG3
79#  define open_readonly(p,f) open((p),(f),(0))
80#else
81#  define open_readonly(p,f) open((p),(f))
82#endif
83
84/*
85 * Forward declarations.
86 */
87
88static CURLcode file_do(struct Curl_easy *data, bool *done);
89static CURLcode file_done(struct Curl_easy *data,
90                          CURLcode status, bool premature);
91static CURLcode file_connect(struct Curl_easy *data, bool *done);
92static CURLcode file_disconnect(struct Curl_easy *data,
93                                struct connectdata *conn,
94                                bool dead_connection);
95static CURLcode file_setup_connection(struct Curl_easy *data,
96                                      struct connectdata *conn);
97
98/*
99 * FILE scheme handler.
100 */
101
102const struct Curl_handler Curl_handler_file = {
103  "FILE",                               /* scheme */
104  file_setup_connection,                /* setup_connection */
105  file_do,                              /* do_it */
106  file_done,                            /* done */
107  ZERO_NULL,                            /* do_more */
108  file_connect,                         /* connect_it */
109  ZERO_NULL,                            /* connecting */
110  ZERO_NULL,                            /* doing */
111  ZERO_NULL,                            /* proto_getsock */
112  ZERO_NULL,                            /* doing_getsock */
113  ZERO_NULL,                            /* domore_getsock */
114  ZERO_NULL,                            /* perform_getsock */
115  file_disconnect,                      /* disconnect */
116  ZERO_NULL,                            /* write_resp */
117  ZERO_NULL,                            /* connection_check */
118  ZERO_NULL,                            /* attach connection */
119  0,                                    /* defport */
120  CURLPROTO_FILE,                       /* protocol */
121  CURLPROTO_FILE,                       /* family */
122  PROTOPT_NONETWORK | PROTOPT_NOURLQUERY /* flags */
123};
124
125
126static CURLcode file_setup_connection(struct Curl_easy *data,
127                                      struct connectdata *conn)
128{
129  (void)conn;
130  /* allocate the FILE specific struct */
131  data->req.p.file = calloc(1, sizeof(struct FILEPROTO));
132  if(!data->req.p.file)
133    return CURLE_OUT_OF_MEMORY;
134
135  return CURLE_OK;
136}
137
138/*
139 * file_connect() gets called from Curl_protocol_connect() to allow us to
140 * do protocol-specific actions at connect-time.  We emulate a
141 * connect-then-transfer protocol and "connect" to the file here
142 */
143static CURLcode file_connect(struct Curl_easy *data, bool *done)
144{
145  char *real_path;
146  struct FILEPROTO *file = data->req.p.file;
147  int fd;
148#ifdef DOS_FILESYSTEM
149  size_t i;
150  char *actual_path;
151#endif
152  size_t real_path_len;
153  CURLcode result;
154
155  if(file->path) {
156    /* already connected.
157     * the handler->connect_it() is normally only called once, but
158     * FILE does a special check on setting up the connection which
159     * calls this explicitly. */
160    *done = TRUE;
161    return CURLE_OK;
162  }
163
164  result = Curl_urldecode(data->state.up.path, 0, &real_path,
165                          &real_path_len, REJECT_ZERO);
166  if(result)
167    return result;
168
169#ifdef DOS_FILESYSTEM
170  /* If the first character is a slash, and there's
171     something that looks like a drive at the beginning of
172     the path, skip the slash.  If we remove the initial
173     slash in all cases, paths without drive letters end up
174     relative to the current directory which isn't how
175     browsers work.
176
177     Some browsers accept | instead of : as the drive letter
178     separator, so we do too.
179
180     On other platforms, we need the slash to indicate an
181     absolute pathname.  On Windows, absolute paths start
182     with a drive letter.
183  */
184  actual_path = real_path;
185  if((actual_path[0] == '/') &&
186      actual_path[1] &&
187     (actual_path[2] == ':' || actual_path[2] == '|')) {
188    actual_path[2] = ':';
189    actual_path++;
190    real_path_len--;
191  }
192
193  /* change path separators from '/' to '\\' for DOS, Windows and OS/2 */
194  for(i = 0; i < real_path_len; ++i)
195    if(actual_path[i] == '/')
196      actual_path[i] = '\\';
197    else if(!actual_path[i]) { /* binary zero */
198      Curl_safefree(real_path);
199      return CURLE_URL_MALFORMAT;
200    }
201
202  fd = open_readonly(actual_path, O_RDONLY|O_BINARY);
203  file->path = actual_path;
204#else
205  if(memchr(real_path, 0, real_path_len)) {
206    /* binary zeroes indicate foul play */
207    Curl_safefree(real_path);
208    return CURLE_URL_MALFORMAT;
209  }
210
211  #ifdef AMIGA_FILESYSTEM
212  /*
213   * A leading slash in an AmigaDOS path denotes the parent
214   * directory, and hence we block this as it is relative.
215   * Absolute paths start with 'volumename:', so we check for
216   * this first. Failing that, we treat the path as a real unix
217   * path, but only if the application was compiled with -lunix.
218   */
219  fd = -1;
220  file->path = real_path;
221
222  if(real_path[0] == '/') {
223    extern int __unix_path_semantics;
224    if(strchr(real_path + 1, ':')) {
225      /* Amiga absolute path */
226      fd = open_readonly(real_path + 1, O_RDONLY);
227      file->path++;
228    }
229    else if(__unix_path_semantics) {
230      /* -lunix fallback */
231      fd = open_readonly(real_path, O_RDONLY);
232    }
233  }
234  #else
235  fd = open_readonly(real_path, O_RDONLY);
236  file->path = real_path;
237  #endif
238#endif
239  Curl_safefree(file->freepath);
240  file->freepath = real_path; /* free this when done */
241
242  file->fd = fd;
243  if(!data->state.upload && (fd == -1)) {
244    failf(data, "Couldn't open file %s", data->state.up.path);
245    file_done(data, CURLE_FILE_COULDNT_READ_FILE, FALSE);
246    return CURLE_FILE_COULDNT_READ_FILE;
247  }
248  *done = TRUE;
249
250  return CURLE_OK;
251}
252
253static CURLcode file_done(struct Curl_easy *data,
254                          CURLcode status, bool premature)
255{
256  struct FILEPROTO *file = data->req.p.file;
257  (void)status; /* not used */
258  (void)premature; /* not used */
259
260  if(file) {
261    Curl_safefree(file->freepath);
262    file->path = NULL;
263    if(file->fd != -1)
264      close(file->fd);
265    file->fd = -1;
266  }
267
268  return CURLE_OK;
269}
270
271static CURLcode file_disconnect(struct Curl_easy *data,
272                                struct connectdata *conn,
273                                bool dead_connection)
274{
275  (void)dead_connection; /* not used */
276  (void)conn;
277  return file_done(data, CURLE_OK, FALSE);
278}
279
280#ifdef DOS_FILESYSTEM
281#define DIRSEP '\\'
282#else
283#define DIRSEP '/'
284#endif
285
286static CURLcode file_upload(struct Curl_easy *data)
287{
288  struct FILEPROTO *file = data->req.p.file;
289  const char *dir = strchr(file->path, DIRSEP);
290  int fd;
291  int mode;
292  CURLcode result = CURLE_OK;
293  char buffer[8*1024], *uphere_save;
294  curl_off_t bytecount = 0;
295  struct_stat file_stat;
296  const char *sendbuf;
297
298  /*
299   * Since FILE: doesn't do the full init, we need to provide some extra
300   * assignments here.
301   */
302
303  if(!dir)
304    return CURLE_FILE_COULDNT_READ_FILE; /* fix: better error code */
305
306  if(!dir[1])
307    return CURLE_FILE_COULDNT_READ_FILE; /* fix: better error code */
308
309#ifdef O_BINARY
310#define MODE_DEFAULT O_WRONLY|O_CREAT|O_BINARY
311#else
312#define MODE_DEFAULT O_WRONLY|O_CREAT
313#endif
314
315  if(data->state.resume_from)
316    mode = MODE_DEFAULT|O_APPEND;
317  else
318    mode = MODE_DEFAULT|O_TRUNC;
319
320  fd = open(file->path, mode, data->set.new_file_perms);
321  if(fd < 0) {
322    failf(data, "Can't open %s for writing", file->path);
323    return CURLE_WRITE_ERROR;
324  }
325
326  if(-1 != data->state.infilesize)
327    /* known size of data to "upload" */
328    Curl_pgrsSetUploadSize(data, data->state.infilesize);
329
330  /* treat the negative resume offset value as the case of "-" */
331  if(data->state.resume_from < 0) {
332    if(fstat(fd, &file_stat)) {
333      close(fd);
334      failf(data, "Can't get the size of %s", file->path);
335      return CURLE_WRITE_ERROR;
336    }
337    data->state.resume_from = (curl_off_t)file_stat.st_size;
338  }
339
340  /* Yikes! Curl_fillreadbuffer uses data->req.upload_fromhere to READ
341   * client data to! Please, someone fix... */
342  uphere_save = data->req.upload_fromhere;
343  while(!result) {
344    size_t nread;
345    ssize_t nwrite;
346    size_t readcount;
347    data->req.upload_fromhere = buffer;
348    result = Curl_fillreadbuffer(data, sizeof(buffer), &readcount);
349    if(result)
350      break;
351
352    if(!readcount)
353      break;
354
355    nread = readcount;
356
357    /* skip bytes before resume point */
358    if(data->state.resume_from) {
359      if((curl_off_t)nread <= data->state.resume_from) {
360        data->state.resume_from -= nread;
361        nread = 0;
362        sendbuf = buffer;
363      }
364      else {
365        sendbuf = buffer + data->state.resume_from;
366        nread -= (size_t)data->state.resume_from;
367        data->state.resume_from = 0;
368      }
369    }
370    else
371      sendbuf = buffer;
372
373    /* write the data to the target */
374    nwrite = write(fd, sendbuf, nread);
375    if((size_t)nwrite != nread) {
376      result = CURLE_SEND_ERROR;
377      break;
378    }
379
380    bytecount += nread;
381
382    Curl_pgrsSetUploadCounter(data, bytecount);
383
384    if(Curl_pgrsUpdate(data))
385      result = CURLE_ABORTED_BY_CALLBACK;
386    else
387      result = Curl_speedcheck(data, Curl_now());
388  }
389  if(!result && Curl_pgrsUpdate(data))
390    result = CURLE_ABORTED_BY_CALLBACK;
391
392  close(fd);
393  data->req.upload_fromhere = uphere_save;
394
395  return result;
396}
397
398/*
399 * file_do() is the protocol-specific function for the do-phase, separated
400 * from the connect-phase above. Other protocols merely setup the transfer in
401 * the do-phase, to have it done in the main transfer loop but since some
402 * platforms we support don't allow select()ing etc on file handles (as
403 * opposed to sockets) we instead perform the whole do-operation in this
404 * function.
405 */
406static CURLcode file_do(struct Curl_easy *data, bool *done)
407{
408  /* This implementation ignores the host name in conformance with
409     RFC 1738. Only local files (reachable via the standard file system)
410     are supported. This means that files on remotely mounted directories
411     (via NFS, Samba, NT sharing) can be accessed through a file:// URL
412  */
413  CURLcode result = CURLE_OK;
414  struct_stat statbuf; /* struct_stat instead of struct stat just to allow the
415                          Windows version to have a different struct without
416                          having to redefine the simple word 'stat' */
417  curl_off_t expected_size = -1;
418  bool size_known;
419  bool fstated = FALSE;
420  int fd;
421  struct FILEPROTO *file;
422
423  *done = TRUE; /* unconditionally */
424
425  if(data->state.upload)
426    return file_upload(data);
427
428  file = data->req.p.file;
429
430  /* get the fd from the connection phase */
431  fd = file->fd;
432
433  /* VMS: This only works reliable for STREAMLF files */
434  if(-1 != fstat(fd, &statbuf)) {
435    if(!S_ISDIR(statbuf.st_mode))
436      expected_size = statbuf.st_size;
437    /* and store the modification time */
438    data->info.filetime = statbuf.st_mtime;
439    fstated = TRUE;
440  }
441
442  if(fstated && !data->state.range && data->set.timecondition) {
443    if(!Curl_meets_timecondition(data, data->info.filetime)) {
444      *done = TRUE;
445      return CURLE_OK;
446    }
447  }
448
449  if(fstated) {
450    time_t filetime;
451    struct tm buffer;
452    const struct tm *tm = &buffer;
453    char header[80];
454    int headerlen;
455    char accept_ranges[24]= { "Accept-ranges: bytes\r\n" };
456    if(expected_size >= 0) {
457      headerlen = msnprintf(header, sizeof(header),
458                "Content-Length: %" CURL_FORMAT_CURL_OFF_T "\r\n",
459                expected_size);
460      result = Curl_client_write(data, CLIENTWRITE_HEADER, header, headerlen);
461      if(result)
462        return result;
463
464      result = Curl_client_write(data, CLIENTWRITE_HEADER,
465                                 accept_ranges, strlen(accept_ranges));
466      if(result != CURLE_OK)
467        return result;
468    }
469
470    filetime = (time_t)statbuf.st_mtime;
471    result = Curl_gmtime(filetime, &buffer);
472    if(result)
473      return result;
474
475    /* format: "Tue, 15 Nov 1994 12:45:26 GMT" */
476    headerlen = msnprintf(header, sizeof(header),
477              "Last-Modified: %s, %02d %s %4d %02d:%02d:%02d GMT\r\n%s",
478              Curl_wkday[tm->tm_wday?tm->tm_wday-1:6],
479              tm->tm_mday,
480              Curl_month[tm->tm_mon],
481              tm->tm_year + 1900,
482              tm->tm_hour,
483              tm->tm_min,
484              tm->tm_sec,
485              data->req.no_body ? "": "\r\n");
486    result = Curl_client_write(data, CLIENTWRITE_HEADER, header, headerlen);
487    if(result)
488      return result;
489    /* set the file size to make it available post transfer */
490    Curl_pgrsSetDownloadSize(data, expected_size);
491    if(data->req.no_body)
492      return result;
493  }
494
495  /* Check whether file range has been specified */
496  result = Curl_range(data);
497  if(result)
498    return result;
499
500  /* Adjust the start offset in case we want to get the N last bytes
501   * of the stream if the filesize could be determined */
502  if(data->state.resume_from < 0) {
503    if(!fstated) {
504      failf(data, "Can't get the size of file.");
505      return CURLE_READ_ERROR;
506    }
507    data->state.resume_from += (curl_off_t)statbuf.st_size;
508  }
509
510  if(data->state.resume_from > 0) {
511    /* We check explicitly if we have a start offset, because
512     * expected_size may be -1 if we don't know how large the file is,
513     * in which case we should not adjust it. */
514    if(data->state.resume_from <= expected_size)
515      expected_size -= data->state.resume_from;
516    else {
517      failf(data, "failed to resume file:// transfer");
518      return CURLE_BAD_DOWNLOAD_RESUME;
519    }
520  }
521
522  /* A high water mark has been specified so we obey... */
523  if(data->req.maxdownload > 0)
524    expected_size = data->req.maxdownload;
525
526  if(!fstated || (expected_size <= 0))
527    size_known = FALSE;
528  else
529    size_known = TRUE;
530
531  /* The following is a shortcut implementation of file reading
532     this is both more efficient than the former call to download() and
533     it avoids problems with select() and recv() on file descriptors
534     in Winsock */
535  if(size_known)
536    Curl_pgrsSetDownloadSize(data, expected_size);
537
538  if(data->state.resume_from) {
539    if(data->state.resume_from !=
540       lseek(fd, data->state.resume_from, SEEK_SET))
541      return CURLE_BAD_DOWNLOAD_RESUME;
542  }
543
544  Curl_pgrsTime(data, TIMER_STARTTRANSFER);
545
546  while(!result) {
547    char tmpbuf[8*1024];
548    ssize_t nread;
549    /* Don't fill a whole buffer if we want less than all data */
550    size_t bytestoread;
551
552    if(size_known) {
553      bytestoread = (expected_size < (curl_off_t)(sizeof(tmpbuf)-1)) ?
554        curlx_sotouz(expected_size) : (sizeof(tmpbuf)-1);
555    }
556    else
557      bytestoread = sizeof(tmpbuf)-1;
558
559    nread = read(fd, tmpbuf, bytestoread);
560
561    if(nread > 0)
562      tmpbuf[nread] = 0;
563
564    if(nread <= 0 || (size_known && (expected_size == 0)))
565      break;
566
567    if(size_known)
568      expected_size -= nread;
569
570    result = Curl_client_write(data, CLIENTWRITE_BODY, tmpbuf, nread);
571    if(result)
572      return result;
573
574    if(Curl_pgrsUpdate(data))
575      result = CURLE_ABORTED_BY_CALLBACK;
576    else
577      result = Curl_speedcheck(data, Curl_now());
578  }
579  if(Curl_pgrsUpdate(data))
580    result = CURLE_ABORTED_BY_CALLBACK;
581
582  return result;
583}
584
585#endif
586