xref: /third_party/curl/lib/http_chunks.c (revision 13498266)
1/***************************************************************************
2 *                                  _   _ ____  _
3 *  Project                     ___| | | |  _ \| |
4 *                             / __| | | | |_) | |
5 *                            | (__| |_| |  _ <| |___
6 *                             \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 * SPDX-License-Identifier: curl
22 *
23 ***************************************************************************/
24
25#include "curl_setup.h"
26
27#ifndef CURL_DISABLE_HTTP
28
29#include "urldata.h" /* it includes http_chunks.h */
30#include "sendf.h"   /* for the client write stuff */
31#include "dynbuf.h"
32#include "content_encoding.h"
33#include "http.h"
34#include "strtoofft.h"
35#include "warnless.h"
36
37/* The last #include files should be: */
38#include "curl_memory.h"
39#include "memdebug.h"
40
41/*
42 * Chunk format (simplified):
43 *
44 * <HEX SIZE>[ chunk extension ] CRLF
45 * <DATA> CRLF
46 *
47 * Highlights from RFC2616 section 3.6 say:
48
49   The chunked encoding modifies the body of a message in order to
50   transfer it as a series of chunks, each with its own size indicator,
51   followed by an OPTIONAL trailer containing entity-header fields. This
52   allows dynamically produced content to be transferred along with the
53   information necessary for the recipient to verify that it has
54   received the full message.
55
56       Chunked-Body   = *chunk
57                        last-chunk
58                        trailer
59                        CRLF
60
61       chunk          = chunk-size [ chunk-extension ] CRLF
62                        chunk-data CRLF
63       chunk-size     = 1*HEX
64       last-chunk     = 1*("0") [ chunk-extension ] CRLF
65
66       chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
67       chunk-ext-name = token
68       chunk-ext-val  = token | quoted-string
69       chunk-data     = chunk-size(OCTET)
70       trailer        = *(entity-header CRLF)
71
72   The chunk-size field is a string of hex digits indicating the size of
73   the chunk. The chunked encoding is ended by any chunk whose size is
74   zero, followed by the trailer, which is terminated by an empty line.
75
76 */
77
78void Curl_httpchunk_init(struct Curl_easy *data, struct Curl_chunker *ch,
79                         bool ignore_body)
80{
81  (void)data;
82  ch->hexindex = 0;      /* start at 0 */
83  ch->state = CHUNK_HEX; /* we get hex first! */
84  ch->last_code = CHUNKE_OK;
85  Curl_dyn_init(&ch->trailer, DYN_H1_TRAILER);
86  ch->ignore_body = ignore_body;
87}
88
89void Curl_httpchunk_reset(struct Curl_easy *data, struct Curl_chunker *ch,
90                          bool ignore_body)
91{
92  (void)data;
93  ch->hexindex = 0;      /* start at 0 */
94  ch->state = CHUNK_HEX; /* we get hex first! */
95  ch->last_code = CHUNKE_OK;
96  Curl_dyn_reset(&ch->trailer);
97  ch->ignore_body = ignore_body;
98}
99
100void Curl_httpchunk_free(struct Curl_easy *data, struct Curl_chunker *ch)
101{
102  (void)data;
103  Curl_dyn_free(&ch->trailer);
104}
105
106bool Curl_httpchunk_is_done(struct Curl_easy *data, struct Curl_chunker *ch)
107{
108  (void)data;
109  return ch->state == CHUNK_DONE;
110}
111
112static CURLcode httpchunk_readwrite(struct Curl_easy *data,
113                                    struct Curl_chunker *ch,
114                                    struct Curl_cwriter *cw_next,
115                                    const char *buf, size_t blen,
116                                    size_t *pconsumed)
117{
118  CURLcode result = CURLE_OK;
119  size_t piece;
120
121  *pconsumed = 0; /* nothing's written yet */
122  /* first check terminal states that will not progress anywhere */
123  if(ch->state == CHUNK_DONE)
124    return CURLE_OK;
125  if(ch->state == CHUNK_FAILED)
126    return CURLE_RECV_ERROR;
127
128  /* the original data is written to the client, but we go on with the
129     chunk read process, to properly calculate the content length */
130  if(data->set.http_te_skip && !ch->ignore_body) {
131    if(cw_next)
132      result = Curl_cwriter_write(data, cw_next, CLIENTWRITE_BODY, buf, blen);
133    else
134      result = Curl_client_write(data, CLIENTWRITE_BODY, (char *)buf, blen);
135    if(result) {
136      ch->state = CHUNK_FAILED;
137      ch->last_code = CHUNKE_PASSTHRU_ERROR;
138      return result;
139    }
140  }
141
142  while(blen) {
143    switch(ch->state) {
144    case CHUNK_HEX:
145      if(ISXDIGIT(*buf)) {
146        if(ch->hexindex >= CHUNK_MAXNUM_LEN) {
147          failf(data, "chunk hex-length longer than %d", CHUNK_MAXNUM_LEN);
148          ch->state = CHUNK_FAILED;
149          ch->last_code = CHUNKE_TOO_LONG_HEX; /* longer than we support */
150          return CURLE_RECV_ERROR;
151        }
152        ch->hexbuffer[ch->hexindex++] = *buf;
153        buf++;
154        blen--;
155      }
156      else {
157        char *endptr;
158        if(0 == ch->hexindex) {
159          /* This is illegal data, we received junk where we expected
160             a hexadecimal digit. */
161          failf(data, "chunk hex-length char not a hex digit: 0x%x", *buf);
162          ch->state = CHUNK_FAILED;
163          ch->last_code = CHUNKE_ILLEGAL_HEX;
164          return CURLE_RECV_ERROR;
165        }
166
167        /* blen and buf are unmodified */
168        ch->hexbuffer[ch->hexindex] = 0;
169        if(curlx_strtoofft(ch->hexbuffer, &endptr, 16, &ch->datasize)) {
170          failf(data, "chunk hex-length not valid: '%s'", ch->hexbuffer);
171          ch->state = CHUNK_FAILED;
172          ch->last_code = CHUNKE_ILLEGAL_HEX;
173          return CURLE_RECV_ERROR;
174        }
175        ch->state = CHUNK_LF; /* now wait for the CRLF */
176      }
177      break;
178
179    case CHUNK_LF:
180      /* waiting for the LF after a chunk size */
181      if(*buf == 0x0a) {
182        /* we're now expecting data to come, unless size was zero! */
183        if(0 == ch->datasize) {
184          ch->state = CHUNK_TRAILER; /* now check for trailers */
185        }
186        else
187          ch->state = CHUNK_DATA;
188      }
189
190      buf++;
191      blen--;
192      break;
193
194    case CHUNK_DATA:
195      /* We expect 'datasize' of data. We have 'blen' right now, it can be
196         more or less than 'datasize'. Get the smallest piece.
197      */
198      piece = blen;
199      if(ch->datasize < (curl_off_t)blen)
200        piece = curlx_sotouz(ch->datasize);
201
202      /* Write the data portion available */
203      if(!data->set.http_te_skip && !ch->ignore_body) {
204        if(cw_next)
205          result = Curl_cwriter_write(data, cw_next, CLIENTWRITE_BODY,
206                                      buf, piece);
207        else
208          result = Curl_client_write(data, CLIENTWRITE_BODY,
209                                    (char *)buf, piece);
210        if(result) {
211          ch->state = CHUNK_FAILED;
212          ch->last_code = CHUNKE_PASSTHRU_ERROR;
213          return result;
214        }
215      }
216
217      *pconsumed += piece;
218      ch->datasize -= piece; /* decrease amount left to expect */
219      buf += piece;    /* move read pointer forward */
220      blen -= piece;   /* decrease space left in this round */
221
222      if(0 == ch->datasize)
223        /* end of data this round, we now expect a trailing CRLF */
224        ch->state = CHUNK_POSTLF;
225      break;
226
227    case CHUNK_POSTLF:
228      if(*buf == 0x0a) {
229        /* The last one before we go back to hex state and start all over. */
230        Curl_httpchunk_reset(data, ch, ch->ignore_body);
231      }
232      else if(*buf != 0x0d) {
233        ch->state = CHUNK_FAILED;
234        ch->last_code = CHUNKE_BAD_CHUNK;
235        return CURLE_RECV_ERROR;
236      }
237      buf++;
238      blen--;
239      break;
240
241    case CHUNK_TRAILER:
242      if((*buf == 0x0d) || (*buf == 0x0a)) {
243        char *tr = Curl_dyn_ptr(&ch->trailer);
244        /* this is the end of a trailer, but if the trailer was zero bytes
245           there was no trailer and we move on */
246
247        if(tr) {
248          size_t trlen;
249          result = Curl_dyn_addn(&ch->trailer, (char *)STRCONST("\x0d\x0a"));
250          if(result) {
251            ch->state = CHUNK_FAILED;
252            ch->last_code = CHUNKE_OUT_OF_MEMORY;
253            return result;
254          }
255          tr = Curl_dyn_ptr(&ch->trailer);
256          trlen = Curl_dyn_len(&ch->trailer);
257          if(!data->set.http_te_skip) {
258            if(cw_next)
259              result = Curl_cwriter_write(data, cw_next,
260                                          CLIENTWRITE_HEADER|
261                                          CLIENTWRITE_TRAILER,
262                                          tr, trlen);
263            else
264              result = Curl_client_write(data,
265                                         CLIENTWRITE_HEADER|
266                                         CLIENTWRITE_TRAILER,
267                                         tr, trlen);
268            if(result) {
269              ch->state = CHUNK_FAILED;
270              ch->last_code = CHUNKE_PASSTHRU_ERROR;
271              return result;
272            }
273          }
274          Curl_dyn_reset(&ch->trailer);
275          ch->state = CHUNK_TRAILER_CR;
276          if(*buf == 0x0a)
277            /* already on the LF */
278            break;
279        }
280        else {
281          /* no trailer, we're on the final CRLF pair */
282          ch->state = CHUNK_TRAILER_POSTCR;
283          break; /* don't advance the pointer */
284        }
285      }
286      else {
287        result = Curl_dyn_addn(&ch->trailer, buf, 1);
288        if(result) {
289          ch->state = CHUNK_FAILED;
290          ch->last_code = CHUNKE_OUT_OF_MEMORY;
291          return result;
292        }
293      }
294      buf++;
295      blen--;
296      break;
297
298    case CHUNK_TRAILER_CR:
299      if(*buf == 0x0a) {
300        ch->state = CHUNK_TRAILER_POSTCR;
301        buf++;
302        blen--;
303      }
304      else {
305        ch->state = CHUNK_FAILED;
306        ch->last_code = CHUNKE_BAD_CHUNK;
307        return CURLE_RECV_ERROR;
308      }
309      break;
310
311    case CHUNK_TRAILER_POSTCR:
312      /* We enter this state when a CR should arrive so we expect to
313         have to first pass a CR before we wait for LF */
314      if((*buf != 0x0d) && (*buf != 0x0a)) {
315        /* not a CR then it must be another header in the trailer */
316        ch->state = CHUNK_TRAILER;
317        break;
318      }
319      if(*buf == 0x0d) {
320        /* skip if CR */
321        buf++;
322        blen--;
323      }
324      /* now wait for the final LF */
325      ch->state = CHUNK_STOP;
326      break;
327
328    case CHUNK_STOP:
329      if(*buf == 0x0a) {
330        blen--;
331        /* Record the length of any data left in the end of the buffer
332           even if there's no more chunks to read */
333        ch->datasize = blen;
334        ch->state = CHUNK_DONE;
335        return CURLE_OK;
336      }
337      else {
338        ch->state = CHUNK_FAILED;
339        ch->last_code = CHUNKE_BAD_CHUNK;
340        return CURLE_RECV_ERROR;
341      }
342    case CHUNK_DONE:
343      return CURLE_OK;
344
345    case CHUNK_FAILED:
346      return CURLE_RECV_ERROR;
347    }
348
349  }
350  return CURLE_OK;
351}
352
353static const char *Curl_chunked_strerror(CHUNKcode code)
354{
355  switch(code) {
356  default:
357    return "OK";
358  case CHUNKE_TOO_LONG_HEX:
359    return "Too long hexadecimal number";
360  case CHUNKE_ILLEGAL_HEX:
361    return "Illegal or missing hexadecimal sequence";
362  case CHUNKE_BAD_CHUNK:
363    return "Malformed encoding found";
364  case CHUNKE_PASSTHRU_ERROR:
365    return "Error writing data to client";
366  case CHUNKE_BAD_ENCODING:
367    return "Bad content-encoding found";
368  case CHUNKE_OUT_OF_MEMORY:
369    return "Out of memory";
370  }
371}
372
373CURLcode Curl_httpchunk_read(struct Curl_easy *data,
374                             struct Curl_chunker *ch,
375                             char *buf, size_t blen,
376                             size_t *pconsumed)
377{
378  return httpchunk_readwrite(data, ch, NULL, buf, blen, pconsumed);
379}
380
381struct chunked_writer {
382  struct Curl_cwriter super;
383  struct Curl_chunker ch;
384};
385
386static CURLcode cw_chunked_init(struct Curl_easy *data,
387                                struct Curl_cwriter *writer)
388{
389  struct chunked_writer *ctx = (struct chunked_writer *)writer;
390
391  data->req.chunk = TRUE;      /* chunks coming our way. */
392  Curl_httpchunk_init(data, &ctx->ch, FALSE);
393  return CURLE_OK;
394}
395
396static void cw_chunked_close(struct Curl_easy *data,
397                             struct Curl_cwriter *writer)
398{
399  struct chunked_writer *ctx = (struct chunked_writer *)writer;
400  Curl_httpchunk_free(data, &ctx->ch);
401}
402
403static CURLcode cw_chunked_write(struct Curl_easy *data,
404                                 struct Curl_cwriter *writer, int type,
405                                 const char *buf, size_t blen)
406{
407  struct chunked_writer *ctx = (struct chunked_writer *)writer;
408  CURLcode result;
409  size_t consumed;
410
411  if(!(type & CLIENTWRITE_BODY))
412    return Curl_cwriter_write(data, writer->next, type, buf, blen);
413
414  consumed = 0;
415  result = httpchunk_readwrite(data, &ctx->ch, writer->next, buf, blen,
416                               &consumed);
417
418  if(result) {
419    if(CHUNKE_PASSTHRU_ERROR == ctx->ch.last_code) {
420      failf(data, "Failed reading the chunked-encoded stream");
421    }
422    else {
423      failf(data, "%s in chunked-encoding",
424            Curl_chunked_strerror(ctx->ch.last_code));
425    }
426    return result;
427  }
428
429  blen -= consumed;
430  if(CHUNK_DONE == ctx->ch.state) {
431    /* chunks read successfully, download is complete */
432    data->req.download_done = TRUE;
433    if(blen) {
434      infof(data, "Leftovers after chunking: %zu bytes", blen);
435    }
436  }
437  else if((type & CLIENTWRITE_EOS) && !data->req.no_body) {
438    failf(data, "transfer closed with outstanding read data remaining");
439    return CURLE_PARTIAL_FILE;
440  }
441
442  return CURLE_OK;
443}
444
445/* HTTP chunked Transfer-Encoding decoder */
446const struct Curl_cwtype Curl_httpchunk_unencoder = {
447  "chunked",
448  NULL,
449  cw_chunked_init,
450  cw_chunked_write,
451  cw_chunked_close,
452  sizeof(struct chunked_writer)
453};
454
455#endif /* CURL_DISABLE_HTTP */
456