1 /***************************************************************************
2  *                                  _   _ ____  _
3  *  Project                     ___| | | |  _ \| |
4  *                             / __| | | | |_) | |
5  *                            | (__| |_| |  _ <| |___
6  *                             \___|\___/|_| \_\_____|
7  *
8  * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9  *
10  * This software is licensed as described in the file COPYING, which
11  * you should have received as part of this distribution. The terms
12  * are also available at https://curl.se/docs/copyright.html.
13  *
14  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15  * copies of the Software, and permit persons to whom the Software is
16  * furnished to do so, under the terms of the COPYING file.
17  *
18  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19  * KIND, either express or implied.
20  *
21  * SPDX-License-Identifier: curl
22  *
23  ***************************************************************************/
24 
25 #include "curl_setup.h"
26 
27 #ifndef CURL_DISABLE_FILE
28 
29 #ifdef HAVE_NETINET_IN_H
30 #include <netinet/in.h>
31 #endif
32 #ifdef HAVE_NETDB_H
33 #include <netdb.h>
34 #endif
35 #ifdef HAVE_ARPA_INET_H
36 #include <arpa/inet.h>
37 #endif
38 #ifdef HAVE_NET_IF_H
39 #include <net/if.h>
40 #endif
41 #ifdef HAVE_SYS_IOCTL_H
42 #include <sys/ioctl.h>
43 #endif
44 
45 #ifdef HAVE_SYS_PARAM_H
46 #include <sys/param.h>
47 #endif
48 
49 #ifdef HAVE_FCNTL_H
50 #include <fcntl.h>
51 #endif
52 
53 #include "strtoofft.h"
54 #include "urldata.h"
55 #include <curl/curl.h>
56 #include "progress.h"
57 #include "sendf.h"
58 #include "escape.h"
59 #include "file.h"
60 #include "speedcheck.h"
61 #include "getinfo.h"
62 #include "transfer.h"
63 #include "url.h"
64 #include "parsedate.h" /* for the week day and month names */
65 #include "warnless.h"
66 #include "curl_range.h"
67 /* The last 3 #include files should be in this order */
68 #include "curl_printf.h"
69 #include "curl_memory.h"
70 #include "memdebug.h"
71 
72 #if defined(_WIN32) || defined(MSDOS) || defined(__EMX__)
73 #define DOS_FILESYSTEM 1
74 #elif defined(__amigaos4__)
75 #define AMIGA_FILESYSTEM 1
76 #endif
77 
78 #ifdef OPEN_NEEDS_ARG3
79 #  define open_readonly(p,f) open((p),(f),(0))
80 #else
81 #  define open_readonly(p,f) open((p),(f))
82 #endif
83 
84 /*
85  * Forward declarations.
86  */
87 
88 static CURLcode file_do(struct Curl_easy *data, bool *done);
89 static CURLcode file_done(struct Curl_easy *data,
90                           CURLcode status, bool premature);
91 static CURLcode file_connect(struct Curl_easy *data, bool *done);
92 static CURLcode file_disconnect(struct Curl_easy *data,
93                                 struct connectdata *conn,
94                                 bool dead_connection);
95 static CURLcode file_setup_connection(struct Curl_easy *data,
96                                       struct connectdata *conn);
97 
98 /*
99  * FILE scheme handler.
100  */
101 
102 const struct Curl_handler Curl_handler_file = {
103   "FILE",                               /* scheme */
104   file_setup_connection,                /* setup_connection */
105   file_do,                              /* do_it */
106   file_done,                            /* done */
107   ZERO_NULL,                            /* do_more */
108   file_connect,                         /* connect_it */
109   ZERO_NULL,                            /* connecting */
110   ZERO_NULL,                            /* doing */
111   ZERO_NULL,                            /* proto_getsock */
112   ZERO_NULL,                            /* doing_getsock */
113   ZERO_NULL,                            /* domore_getsock */
114   ZERO_NULL,                            /* perform_getsock */
115   file_disconnect,                      /* disconnect */
116   ZERO_NULL,                            /* write_resp */
117   ZERO_NULL,                            /* connection_check */
118   ZERO_NULL,                            /* attach connection */
119   0,                                    /* defport */
120   CURLPROTO_FILE,                       /* protocol */
121   CURLPROTO_FILE,                       /* family */
122   PROTOPT_NONETWORK | PROTOPT_NOURLQUERY /* flags */
123 };
124 
125 
file_setup_connection(struct Curl_easy *data, struct connectdata *conn)126 static CURLcode file_setup_connection(struct Curl_easy *data,
127                                       struct connectdata *conn)
128 {
129   (void)conn;
130   /* allocate the FILE specific struct */
131   data->req.p.file = calloc(1, sizeof(struct FILEPROTO));
132   if(!data->req.p.file)
133     return CURLE_OUT_OF_MEMORY;
134 
135   return CURLE_OK;
136 }
137 
138 /*
139  * file_connect() gets called from Curl_protocol_connect() to allow us to
140  * do protocol-specific actions at connect-time.  We emulate a
141  * connect-then-transfer protocol and "connect" to the file here
142  */
file_connect(struct Curl_easy *data, bool *done)143 static CURLcode file_connect(struct Curl_easy *data, bool *done)
144 {
145   char *real_path;
146   struct FILEPROTO *file = data->req.p.file;
147   int fd;
148 #ifdef DOS_FILESYSTEM
149   size_t i;
150   char *actual_path;
151 #endif
152   size_t real_path_len;
153   CURLcode result;
154 
155   if(file->path) {
156     /* already connected.
157      * the handler->connect_it() is normally only called once, but
158      * FILE does a special check on setting up the connection which
159      * calls this explicitly. */
160     *done = TRUE;
161     return CURLE_OK;
162   }
163 
164   result = Curl_urldecode(data->state.up.path, 0, &real_path,
165                           &real_path_len, REJECT_ZERO);
166   if(result)
167     return result;
168 
169 #ifdef DOS_FILESYSTEM
170   /* If the first character is a slash, and there's
171      something that looks like a drive at the beginning of
172      the path, skip the slash.  If we remove the initial
173      slash in all cases, paths without drive letters end up
174      relative to the current directory which isn't how
175      browsers work.
176 
177      Some browsers accept | instead of : as the drive letter
178      separator, so we do too.
179 
180      On other platforms, we need the slash to indicate an
181      absolute pathname.  On Windows, absolute paths start
182      with a drive letter.
183   */
184   actual_path = real_path;
185   if((actual_path[0] == '/') &&
186       actual_path[1] &&
187      (actual_path[2] == ':' || actual_path[2] == '|')) {
188     actual_path[2] = ':';
189     actual_path++;
190     real_path_len--;
191   }
192 
193   /* change path separators from '/' to '\\' for DOS, Windows and OS/2 */
194   for(i = 0; i < real_path_len; ++i)
195     if(actual_path[i] == '/')
196       actual_path[i] = '\\';
197     else if(!actual_path[i]) { /* binary zero */
198       Curl_safefree(real_path);
199       return CURLE_URL_MALFORMAT;
200     }
201 
202   fd = open_readonly(actual_path, O_RDONLY|O_BINARY);
203   file->path = actual_path;
204 #else
205   if(memchr(real_path, 0, real_path_len)) {
206     /* binary zeroes indicate foul play */
207     Curl_safefree(real_path);
208     return CURLE_URL_MALFORMAT;
209   }
210 
211   #ifdef AMIGA_FILESYSTEM
212   /*
213    * A leading slash in an AmigaDOS path denotes the parent
214    * directory, and hence we block this as it is relative.
215    * Absolute paths start with 'volumename:', so we check for
216    * this first. Failing that, we treat the path as a real unix
217    * path, but only if the application was compiled with -lunix.
218    */
219   fd = -1;
220   file->path = real_path;
221 
222   if(real_path[0] == '/') {
223     extern int __unix_path_semantics;
224     if(strchr(real_path + 1, ':')) {
225       /* Amiga absolute path */
226       fd = open_readonly(real_path + 1, O_RDONLY);
227       file->path++;
228     }
229     else if(__unix_path_semantics) {
230       /* -lunix fallback */
231       fd = open_readonly(real_path, O_RDONLY);
232     }
233   }
234   #else
235   fd = open_readonly(real_path, O_RDONLY);
236   file->path = real_path;
237   #endif
238 #endif
239   Curl_safefree(file->freepath);
240   file->freepath = real_path; /* free this when done */
241 
242   file->fd = fd;
243   if(!data->state.upload && (fd == -1)) {
244     failf(data, "Couldn't open file %s", data->state.up.path);
245     file_done(data, CURLE_FILE_COULDNT_READ_FILE, FALSE);
246     return CURLE_FILE_COULDNT_READ_FILE;
247   }
248   *done = TRUE;
249 
250   return CURLE_OK;
251 }
252 
file_done(struct Curl_easy *data, CURLcode status, bool premature)253 static CURLcode file_done(struct Curl_easy *data,
254                           CURLcode status, bool premature)
255 {
256   struct FILEPROTO *file = data->req.p.file;
257   (void)status; /* not used */
258   (void)premature; /* not used */
259 
260   if(file) {
261     Curl_safefree(file->freepath);
262     file->path = NULL;
263     if(file->fd != -1)
264       close(file->fd);
265     file->fd = -1;
266   }
267 
268   return CURLE_OK;
269 }
270 
file_disconnect(struct Curl_easy *data, struct connectdata *conn, bool dead_connection)271 static CURLcode file_disconnect(struct Curl_easy *data,
272                                 struct connectdata *conn,
273                                 bool dead_connection)
274 {
275   (void)dead_connection; /* not used */
276   (void)conn;
277   return file_done(data, CURLE_OK, FALSE);
278 }
279 
280 #ifdef DOS_FILESYSTEM
281 #define DIRSEP '\\'
282 #else
283 #define DIRSEP '/'
284 #endif
285 
file_upload(struct Curl_easy *data)286 static CURLcode file_upload(struct Curl_easy *data)
287 {
288   struct FILEPROTO *file = data->req.p.file;
289   const char *dir = strchr(file->path, DIRSEP);
290   int fd;
291   int mode;
292   CURLcode result = CURLE_OK;
293   char buffer[8*1024], *uphere_save;
294   curl_off_t bytecount = 0;
295   struct_stat file_stat;
296   const char *sendbuf;
297 
298   /*
299    * Since FILE: doesn't do the full init, we need to provide some extra
300    * assignments here.
301    */
302 
303   if(!dir)
304     return CURLE_FILE_COULDNT_READ_FILE; /* fix: better error code */
305 
306   if(!dir[1])
307     return CURLE_FILE_COULDNT_READ_FILE; /* fix: better error code */
308 
309 #ifdef O_BINARY
310 #define MODE_DEFAULT O_WRONLY|O_CREAT|O_BINARY
311 #else
312 #define MODE_DEFAULT O_WRONLY|O_CREAT
313 #endif
314 
315   if(data->state.resume_from)
316     mode = MODE_DEFAULT|O_APPEND;
317   else
318     mode = MODE_DEFAULT|O_TRUNC;
319 
320   fd = open(file->path, mode, data->set.new_file_perms);
321   if(fd < 0) {
322     failf(data, "Can't open %s for writing", file->path);
323     return CURLE_WRITE_ERROR;
324   }
325 
326   if(-1 != data->state.infilesize)
327     /* known size of data to "upload" */
328     Curl_pgrsSetUploadSize(data, data->state.infilesize);
329 
330   /* treat the negative resume offset value as the case of "-" */
331   if(data->state.resume_from < 0) {
332     if(fstat(fd, &file_stat)) {
333       close(fd);
334       failf(data, "Can't get the size of %s", file->path);
335       return CURLE_WRITE_ERROR;
336     }
337     data->state.resume_from = (curl_off_t)file_stat.st_size;
338   }
339 
340   /* Yikes! Curl_fillreadbuffer uses data->req.upload_fromhere to READ
341    * client data to! Please, someone fix... */
342   uphere_save = data->req.upload_fromhere;
343   while(!result) {
344     size_t nread;
345     ssize_t nwrite;
346     size_t readcount;
347     data->req.upload_fromhere = buffer;
348     result = Curl_fillreadbuffer(data, sizeof(buffer), &readcount);
349     if(result)
350       break;
351 
352     if(!readcount)
353       break;
354 
355     nread = readcount;
356 
357     /* skip bytes before resume point */
358     if(data->state.resume_from) {
359       if((curl_off_t)nread <= data->state.resume_from) {
360         data->state.resume_from -= nread;
361         nread = 0;
362         sendbuf = buffer;
363       }
364       else {
365         sendbuf = buffer + data->state.resume_from;
366         nread -= (size_t)data->state.resume_from;
367         data->state.resume_from = 0;
368       }
369     }
370     else
371       sendbuf = buffer;
372 
373     /* write the data to the target */
374     nwrite = write(fd, sendbuf, nread);
375     if((size_t)nwrite != nread) {
376       result = CURLE_SEND_ERROR;
377       break;
378     }
379 
380     bytecount += nread;
381 
382     Curl_pgrsSetUploadCounter(data, bytecount);
383 
384     if(Curl_pgrsUpdate(data))
385       result = CURLE_ABORTED_BY_CALLBACK;
386     else
387       result = Curl_speedcheck(data, Curl_now());
388   }
389   if(!result && Curl_pgrsUpdate(data))
390     result = CURLE_ABORTED_BY_CALLBACK;
391 
392   close(fd);
393   data->req.upload_fromhere = uphere_save;
394 
395   return result;
396 }
397 
398 /*
399  * file_do() is the protocol-specific function for the do-phase, separated
400  * from the connect-phase above. Other protocols merely setup the transfer in
401  * the do-phase, to have it done in the main transfer loop but since some
402  * platforms we support don't allow select()ing etc on file handles (as
403  * opposed to sockets) we instead perform the whole do-operation in this
404  * function.
405  */
file_do(struct Curl_easy *data, bool *done)406 static CURLcode file_do(struct Curl_easy *data, bool *done)
407 {
408   /* This implementation ignores the host name in conformance with
409      RFC 1738. Only local files (reachable via the standard file system)
410      are supported. This means that files on remotely mounted directories
411      (via NFS, Samba, NT sharing) can be accessed through a file:// URL
412   */
413   CURLcode result = CURLE_OK;
414   struct_stat statbuf; /* struct_stat instead of struct stat just to allow the
415                           Windows version to have a different struct without
416                           having to redefine the simple word 'stat' */
417   curl_off_t expected_size = -1;
418   bool size_known;
419   bool fstated = FALSE;
420   int fd;
421   struct FILEPROTO *file;
422 
423   *done = TRUE; /* unconditionally */
424 
425   if(data->state.upload)
426     return file_upload(data);
427 
428   file = data->req.p.file;
429 
430   /* get the fd from the connection phase */
431   fd = file->fd;
432 
433   /* VMS: This only works reliable for STREAMLF files */
434   if(-1 != fstat(fd, &statbuf)) {
435     if(!S_ISDIR(statbuf.st_mode))
436       expected_size = statbuf.st_size;
437     /* and store the modification time */
438     data->info.filetime = statbuf.st_mtime;
439     fstated = TRUE;
440   }
441 
442   if(fstated && !data->state.range && data->set.timecondition) {
443     if(!Curl_meets_timecondition(data, data->info.filetime)) {
444       *done = TRUE;
445       return CURLE_OK;
446     }
447   }
448 
449   if(fstated) {
450     time_t filetime;
451     struct tm buffer;
452     const struct tm *tm = &buffer;
453     char header[80];
454     int headerlen;
455     char accept_ranges[24]= { "Accept-ranges: bytes\r\n" };
456     if(expected_size >= 0) {
457       headerlen = msnprintf(header, sizeof(header),
458                 "Content-Length: %" CURL_FORMAT_CURL_OFF_T "\r\n",
459                 expected_size);
460       result = Curl_client_write(data, CLIENTWRITE_HEADER, header, headerlen);
461       if(result)
462         return result;
463 
464       result = Curl_client_write(data, CLIENTWRITE_HEADER,
465                                  accept_ranges, strlen(accept_ranges));
466       if(result != CURLE_OK)
467         return result;
468     }
469 
470     filetime = (time_t)statbuf.st_mtime;
471     result = Curl_gmtime(filetime, &buffer);
472     if(result)
473       return result;
474 
475     /* format: "Tue, 15 Nov 1994 12:45:26 GMT" */
476     headerlen = msnprintf(header, sizeof(header),
477               "Last-Modified: %s, %02d %s %4d %02d:%02d:%02d GMT\r\n%s",
478               Curl_wkday[tm->tm_wday?tm->tm_wday-1:6],
479               tm->tm_mday,
480               Curl_month[tm->tm_mon],
481               tm->tm_year + 1900,
482               tm->tm_hour,
483               tm->tm_min,
484               tm->tm_sec,
485               data->req.no_body ? "": "\r\n");
486     result = Curl_client_write(data, CLIENTWRITE_HEADER, header, headerlen);
487     if(result)
488       return result;
489     /* set the file size to make it available post transfer */
490     Curl_pgrsSetDownloadSize(data, expected_size);
491     if(data->req.no_body)
492       return result;
493   }
494 
495   /* Check whether file range has been specified */
496   result = Curl_range(data);
497   if(result)
498     return result;
499 
500   /* Adjust the start offset in case we want to get the N last bytes
501    * of the stream if the filesize could be determined */
502   if(data->state.resume_from < 0) {
503     if(!fstated) {
504       failf(data, "Can't get the size of file.");
505       return CURLE_READ_ERROR;
506     }
507     data->state.resume_from += (curl_off_t)statbuf.st_size;
508   }
509 
510   if(data->state.resume_from > 0) {
511     /* We check explicitly if we have a start offset, because
512      * expected_size may be -1 if we don't know how large the file is,
513      * in which case we should not adjust it. */
514     if(data->state.resume_from <= expected_size)
515       expected_size -= data->state.resume_from;
516     else {
517       failf(data, "failed to resume file:// transfer");
518       return CURLE_BAD_DOWNLOAD_RESUME;
519     }
520   }
521 
522   /* A high water mark has been specified so we obey... */
523   if(data->req.maxdownload > 0)
524     expected_size = data->req.maxdownload;
525 
526   if(!fstated || (expected_size <= 0))
527     size_known = FALSE;
528   else
529     size_known = TRUE;
530 
531   /* The following is a shortcut implementation of file reading
532      this is both more efficient than the former call to download() and
533      it avoids problems with select() and recv() on file descriptors
534      in Winsock */
535   if(size_known)
536     Curl_pgrsSetDownloadSize(data, expected_size);
537 
538   if(data->state.resume_from) {
539     if(data->state.resume_from !=
540        lseek(fd, data->state.resume_from, SEEK_SET))
541       return CURLE_BAD_DOWNLOAD_RESUME;
542   }
543 
544   Curl_pgrsTime(data, TIMER_STARTTRANSFER);
545 
546   while(!result) {
547     char tmpbuf[8*1024];
548     ssize_t nread;
549     /* Don't fill a whole buffer if we want less than all data */
550     size_t bytestoread;
551 
552     if(size_known) {
553       bytestoread = (expected_size < (curl_off_t)(sizeof(tmpbuf)-1)) ?
554         curlx_sotouz(expected_size) : (sizeof(tmpbuf)-1);
555     }
556     else
557       bytestoread = sizeof(tmpbuf)-1;
558 
559     nread = read(fd, tmpbuf, bytestoread);
560 
561     if(nread > 0)
562       tmpbuf[nread] = 0;
563 
564     if(nread <= 0 || (size_known && (expected_size == 0)))
565       break;
566 
567     if(size_known)
568       expected_size -= nread;
569 
570     result = Curl_client_write(data, CLIENTWRITE_BODY, tmpbuf, nread);
571     if(result)
572       return result;
573 
574     if(Curl_pgrsUpdate(data))
575       result = CURLE_ABORTED_BY_CALLBACK;
576     else
577       result = Curl_speedcheck(data, Curl_now());
578   }
579   if(Curl_pgrsUpdate(data))
580     result = CURLE_ABORTED_BY_CALLBACK;
581 
582   return result;
583 }
584 
585 #endif
586