http_chunks.c (20650B)
1 /*************************************************************************** 2 * _ _ ____ _ 3 * Project ___| | | | _ \| | 4 * / __| | | | |_) | | 5 * | (__| |_| | _ <| |___ 6 * \___|\___/|_| \_\_____| 7 * 8 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al. 9 * 10 * This software is licensed as described in the file COPYING, which 11 * you should have received as part of this distribution. The terms 12 * are also available at https://curl.se/docs/copyright.html. 13 * 14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell 15 * copies of the Software, and permit persons to whom the Software is 16 * furnished to do so, under the terms of the COPYING file. 17 * 18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 19 * KIND, either express or implied. 20 * 21 * SPDX-License-Identifier: curl 22 * 23 ***************************************************************************/ 24 25 #include "curl_setup.h" 26 27 #ifndef CURL_DISABLE_HTTP 28 29 #include "urldata.h" /* it includes http_chunks.h */ 30 #include "curl_printf.h" 31 #include "curl_trc.h" 32 #include "sendf.h" /* for the client write stuff */ 33 #include "curlx/dynbuf.h" 34 #include "content_encoding.h" 35 #include "http.h" 36 #include "multiif.h" 37 #include "curlx/strparse.h" 38 #include "curlx/warnless.h" 39 40 /* The last #include files should be: */ 41 #include "curl_memory.h" 42 #include "memdebug.h" 43 44 /* 45 * Chunk format (simplified): 46 * 47 * <HEX SIZE>[ chunk extension ] CRLF 48 * <DATA> CRLF 49 * 50 * Highlights from RFC2616 section 3.6 say: 51 52 The chunked encoding modifies the body of a message in order to 53 transfer it as a series of chunks, each with its own size indicator, 54 followed by an OPTIONAL trailer containing entity-header fields. This 55 allows dynamically produced content to be transferred along with the 56 information necessary for the recipient to verify that it has 57 received the full message. 58 59 Chunked-Body = *chunk 60 last-chunk 61 trailer 62 CRLF 63 64 chunk = chunk-size [ chunk-extension ] CRLF 65 chunk-data CRLF 66 chunk-size = 1*HEX 67 last-chunk = 1*("0") [ chunk-extension ] CRLF 68 69 chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) 70 chunk-ext-name = token 71 chunk-ext-val = token | quoted-string 72 chunk-data = chunk-size(OCTET) 73 trailer = *(entity-header CRLF) 74 75 The chunk-size field is a string of hex digits indicating the size of 76 the chunk. The chunked encoding is ended by any chunk whose size is 77 zero, followed by the trailer, which is terminated by an empty line. 78 79 */ 80 81 void Curl_httpchunk_init(struct Curl_easy *data, struct Curl_chunker *ch, 82 bool ignore_body) 83 { 84 (void)data; 85 ch->hexindex = 0; /* start at 0 */ 86 ch->state = CHUNK_HEX; /* we get hex first! */ 87 ch->last_code = CHUNKE_OK; 88 curlx_dyn_init(&ch->trailer, DYN_H1_TRAILER); 89 ch->ignore_body = ignore_body; 90 } 91 92 void Curl_httpchunk_reset(struct Curl_easy *data, struct Curl_chunker *ch, 93 bool ignore_body) 94 { 95 (void)data; 96 ch->hexindex = 0; /* start at 0 */ 97 ch->state = CHUNK_HEX; /* we get hex first! */ 98 ch->last_code = CHUNKE_OK; 99 curlx_dyn_reset(&ch->trailer); 100 ch->ignore_body = ignore_body; 101 } 102 103 void Curl_httpchunk_free(struct Curl_easy *data, struct Curl_chunker *ch) 104 { 105 (void)data; 106 curlx_dyn_free(&ch->trailer); 107 } 108 109 bool Curl_httpchunk_is_done(struct Curl_easy *data, struct Curl_chunker *ch) 110 { 111 (void)data; 112 return ch->state == CHUNK_DONE; 113 } 114 115 static CURLcode httpchunk_readwrite(struct Curl_easy *data, 116 struct Curl_chunker *ch, 117 struct Curl_cwriter *cw_next, 118 const char *buf, size_t blen, 119 size_t *pconsumed) 120 { 121 CURLcode result = CURLE_OK; 122 size_t piece; 123 124 *pconsumed = 0; /* nothing's written yet */ 125 /* first check terminal states that will not progress anywhere */ 126 if(ch->state == CHUNK_DONE) 127 return CURLE_OK; 128 if(ch->state == CHUNK_FAILED) 129 return CURLE_RECV_ERROR; 130 131 /* the original data is written to the client, but we go on with the 132 chunk read process, to properly calculate the content length */ 133 if(data->set.http_te_skip && !ch->ignore_body) { 134 if(cw_next) 135 result = Curl_cwriter_write(data, cw_next, CLIENTWRITE_BODY, buf, blen); 136 else 137 result = Curl_client_write(data, CLIENTWRITE_BODY, buf, blen); 138 if(result) { 139 ch->state = CHUNK_FAILED; 140 ch->last_code = CHUNKE_PASSTHRU_ERROR; 141 return result; 142 } 143 } 144 145 while(blen) { 146 switch(ch->state) { 147 case CHUNK_HEX: 148 if(ISXDIGIT(*buf)) { 149 if(ch->hexindex >= CHUNK_MAXNUM_LEN) { 150 failf(data, "chunk hex-length longer than %d", CHUNK_MAXNUM_LEN); 151 ch->state = CHUNK_FAILED; 152 ch->last_code = CHUNKE_TOO_LONG_HEX; /* longer than we support */ 153 return CURLE_RECV_ERROR; 154 } 155 ch->hexbuffer[ch->hexindex++] = *buf; 156 buf++; 157 blen--; 158 (*pconsumed)++; 159 } 160 else { 161 const char *p; 162 if(0 == ch->hexindex) { 163 /* This is illegal data, we received junk where we expected 164 a hexadecimal digit. */ 165 failf(data, "chunk hex-length char not a hex digit: 0x%x", *buf); 166 ch->state = CHUNK_FAILED; 167 ch->last_code = CHUNKE_ILLEGAL_HEX; 168 return CURLE_RECV_ERROR; 169 } 170 /* blen and buf are unmodified */ 171 ch->hexbuffer[ch->hexindex] = 0; 172 p = &ch->hexbuffer[0]; 173 if(curlx_str_hex(&p, &ch->datasize, CURL_OFF_T_MAX)) { 174 failf(data, "invalid chunk size: '%s'", ch->hexbuffer); 175 ch->state = CHUNK_FAILED; 176 ch->last_code = CHUNKE_ILLEGAL_HEX; 177 return CURLE_RECV_ERROR; 178 } 179 ch->state = CHUNK_LF; /* now wait for the CRLF */ 180 } 181 break; 182 183 case CHUNK_LF: 184 /* waiting for the LF after a chunk size */ 185 if(*buf == 0x0a) { 186 /* we are now expecting data to come, unless size was zero! */ 187 if(0 == ch->datasize) { 188 ch->state = CHUNK_TRAILER; /* now check for trailers */ 189 } 190 else { 191 ch->state = CHUNK_DATA; 192 CURL_TRC_WRITE(data, "http_chunked, chunk start of %" 193 FMT_OFF_T " bytes", ch->datasize); 194 } 195 } 196 197 buf++; 198 blen--; 199 (*pconsumed)++; 200 break; 201 202 case CHUNK_DATA: 203 /* We expect 'datasize' of data. We have 'blen' right now, it can be 204 more or less than 'datasize'. Get the smallest piece. 205 */ 206 piece = blen; 207 if(ch->datasize < (curl_off_t)blen) 208 piece = curlx_sotouz(ch->datasize); 209 210 /* Write the data portion available */ 211 if(!data->set.http_te_skip && !ch->ignore_body) { 212 if(cw_next) 213 result = Curl_cwriter_write(data, cw_next, CLIENTWRITE_BODY, 214 buf, piece); 215 else 216 result = Curl_client_write(data, CLIENTWRITE_BODY, buf, piece); 217 if(result) { 218 ch->state = CHUNK_FAILED; 219 ch->last_code = CHUNKE_PASSTHRU_ERROR; 220 return result; 221 } 222 } 223 224 *pconsumed += piece; 225 ch->datasize -= piece; /* decrease amount left to expect */ 226 buf += piece; /* move read pointer forward */ 227 blen -= piece; /* decrease space left in this round */ 228 CURL_TRC_WRITE(data, "http_chunked, write %zu body bytes, %" 229 FMT_OFF_T " bytes in chunk remain", 230 piece, ch->datasize); 231 232 if(0 == ch->datasize) 233 /* end of data this round, we now expect a trailing CRLF */ 234 ch->state = CHUNK_POSTLF; 235 break; 236 237 case CHUNK_POSTLF: 238 if(*buf == 0x0a) { 239 /* The last one before we go back to hex state and start all over. */ 240 Curl_httpchunk_reset(data, ch, ch->ignore_body); 241 } 242 else if(*buf != 0x0d) { 243 ch->state = CHUNK_FAILED; 244 ch->last_code = CHUNKE_BAD_CHUNK; 245 return CURLE_RECV_ERROR; 246 } 247 buf++; 248 blen--; 249 (*pconsumed)++; 250 break; 251 252 case CHUNK_TRAILER: 253 if((*buf == 0x0d) || (*buf == 0x0a)) { 254 char *tr = curlx_dyn_ptr(&ch->trailer); 255 /* this is the end of a trailer, but if the trailer was zero bytes 256 there was no trailer and we move on */ 257 258 if(tr) { 259 result = curlx_dyn_addn(&ch->trailer, STRCONST("\x0d\x0a")); 260 if(result) { 261 ch->state = CHUNK_FAILED; 262 ch->last_code = CHUNKE_OUT_OF_MEMORY; 263 return result; 264 } 265 tr = curlx_dyn_ptr(&ch->trailer); 266 if(!data->set.http_te_skip) { 267 size_t trlen = curlx_dyn_len(&ch->trailer); 268 if(cw_next) 269 result = Curl_cwriter_write(data, cw_next, 270 CLIENTWRITE_HEADER| 271 CLIENTWRITE_TRAILER, 272 tr, trlen); 273 else 274 result = Curl_client_write(data, 275 CLIENTWRITE_HEADER| 276 CLIENTWRITE_TRAILER, 277 tr, trlen); 278 if(result) { 279 ch->state = CHUNK_FAILED; 280 ch->last_code = CHUNKE_PASSTHRU_ERROR; 281 return result; 282 } 283 } 284 curlx_dyn_reset(&ch->trailer); 285 ch->state = CHUNK_TRAILER_CR; 286 if(*buf == 0x0a) 287 /* already on the LF */ 288 break; 289 } 290 else { 291 /* no trailer, we are on the final CRLF pair */ 292 ch->state = CHUNK_TRAILER_POSTCR; 293 break; /* do not advance the pointer */ 294 } 295 } 296 else { 297 result = curlx_dyn_addn(&ch->trailer, buf, 1); 298 if(result) { 299 ch->state = CHUNK_FAILED; 300 ch->last_code = CHUNKE_OUT_OF_MEMORY; 301 return result; 302 } 303 } 304 buf++; 305 blen--; 306 (*pconsumed)++; 307 break; 308 309 case CHUNK_TRAILER_CR: 310 if(*buf == 0x0a) { 311 ch->state = CHUNK_TRAILER_POSTCR; 312 buf++; 313 blen--; 314 (*pconsumed)++; 315 } 316 else { 317 ch->state = CHUNK_FAILED; 318 ch->last_code = CHUNKE_BAD_CHUNK; 319 return CURLE_RECV_ERROR; 320 } 321 break; 322 323 case CHUNK_TRAILER_POSTCR: 324 /* We enter this state when a CR should arrive so we expect to 325 have to first pass a CR before we wait for LF */ 326 if((*buf != 0x0d) && (*buf != 0x0a)) { 327 /* not a CR then it must be another header in the trailer */ 328 ch->state = CHUNK_TRAILER; 329 break; 330 } 331 if(*buf == 0x0d) { 332 /* skip if CR */ 333 buf++; 334 blen--; 335 (*pconsumed)++; 336 } 337 /* now wait for the final LF */ 338 ch->state = CHUNK_STOP; 339 break; 340 341 case CHUNK_STOP: 342 if(*buf == 0x0a) { 343 blen--; 344 (*pconsumed)++; 345 /* Record the length of any data left in the end of the buffer 346 even if there is no more chunks to read */ 347 ch->datasize = blen; 348 ch->state = CHUNK_DONE; 349 CURL_TRC_WRITE(data, "http_chunk, response complete"); 350 return CURLE_OK; 351 } 352 else { 353 ch->state = CHUNK_FAILED; 354 ch->last_code = CHUNKE_BAD_CHUNK; 355 CURL_TRC_WRITE(data, "http_chunk error, expected 0x0a, seeing 0x%ux", 356 (unsigned int)*buf); 357 return CURLE_RECV_ERROR; 358 } 359 case CHUNK_DONE: 360 return CURLE_OK; 361 362 case CHUNK_FAILED: 363 return CURLE_RECV_ERROR; 364 } 365 366 } 367 return CURLE_OK; 368 } 369 370 static const char *Curl_chunked_strerror(CHUNKcode code) 371 { 372 switch(code) { 373 default: 374 return "OK"; 375 case CHUNKE_TOO_LONG_HEX: 376 return "Too long hexadecimal number"; 377 case CHUNKE_ILLEGAL_HEX: 378 return "Illegal or missing hexadecimal sequence"; 379 case CHUNKE_BAD_CHUNK: 380 return "Malformed encoding found"; 381 case CHUNKE_PASSTHRU_ERROR: 382 return "Error writing data to client"; 383 case CHUNKE_BAD_ENCODING: 384 return "Bad content-encoding found"; 385 case CHUNKE_OUT_OF_MEMORY: 386 return "Out of memory"; 387 } 388 } 389 390 CURLcode Curl_httpchunk_read(struct Curl_easy *data, 391 struct Curl_chunker *ch, 392 char *buf, size_t blen, 393 size_t *pconsumed) 394 { 395 return httpchunk_readwrite(data, ch, NULL, buf, blen, pconsumed); 396 } 397 398 struct chunked_writer { 399 struct Curl_cwriter super; 400 struct Curl_chunker ch; 401 }; 402 403 static CURLcode cw_chunked_init(struct Curl_easy *data, 404 struct Curl_cwriter *writer) 405 { 406 struct chunked_writer *ctx = writer->ctx; 407 408 data->req.chunk = TRUE; /* chunks coming our way. */ 409 Curl_httpchunk_init(data, &ctx->ch, FALSE); 410 return CURLE_OK; 411 } 412 413 static void cw_chunked_close(struct Curl_easy *data, 414 struct Curl_cwriter *writer) 415 { 416 struct chunked_writer *ctx = writer->ctx; 417 Curl_httpchunk_free(data, &ctx->ch); 418 } 419 420 static CURLcode cw_chunked_write(struct Curl_easy *data, 421 struct Curl_cwriter *writer, int type, 422 const char *buf, size_t blen) 423 { 424 struct chunked_writer *ctx = writer->ctx; 425 CURLcode result; 426 size_t consumed; 427 428 if(!(type & CLIENTWRITE_BODY)) 429 return Curl_cwriter_write(data, writer->next, type, buf, blen); 430 431 consumed = 0; 432 result = httpchunk_readwrite(data, &ctx->ch, writer->next, buf, blen, 433 &consumed); 434 435 if(result) { 436 if(CHUNKE_PASSTHRU_ERROR == ctx->ch.last_code) { 437 failf(data, "Failed reading the chunked-encoded stream"); 438 } 439 else { 440 failf(data, "%s in chunked-encoding", 441 Curl_chunked_strerror(ctx->ch.last_code)); 442 } 443 return result; 444 } 445 446 blen -= consumed; 447 if(CHUNK_DONE == ctx->ch.state) { 448 /* chunks read successfully, download is complete */ 449 data->req.download_done = TRUE; 450 if(blen) { 451 infof(data, "Leftovers after chunking: %zu bytes", blen); 452 } 453 } 454 else if((type & CLIENTWRITE_EOS) && !data->req.no_body) { 455 failf(data, "transfer closed with outstanding read data remaining"); 456 return CURLE_PARTIAL_FILE; 457 } 458 459 return CURLE_OK; 460 } 461 462 /* HTTP chunked Transfer-Encoding decoder */ 463 const struct Curl_cwtype Curl_httpchunk_unencoder = { 464 "chunked", 465 NULL, 466 cw_chunked_init, 467 cw_chunked_write, 468 cw_chunked_close, 469 sizeof(struct chunked_writer) 470 }; 471 472 /* max length of an HTTP chunk that we want to generate */ 473 #define CURL_CHUNKED_MINLEN (1024) 474 #define CURL_CHUNKED_MAXLEN (64 * 1024) 475 476 struct chunked_reader { 477 struct Curl_creader super; 478 struct bufq chunkbuf; 479 BIT(read_eos); /* we read an EOS from the next reader */ 480 BIT(eos); /* we have returned an EOS */ 481 }; 482 483 static CURLcode cr_chunked_init(struct Curl_easy *data, 484 struct Curl_creader *reader) 485 { 486 struct chunked_reader *ctx = reader->ctx; 487 (void)data; 488 Curl_bufq_init2(&ctx->chunkbuf, CURL_CHUNKED_MAXLEN, 2, BUFQ_OPT_SOFT_LIMIT); 489 return CURLE_OK; 490 } 491 492 static void cr_chunked_close(struct Curl_easy *data, 493 struct Curl_creader *reader) 494 { 495 struct chunked_reader *ctx = reader->ctx; 496 (void)data; 497 Curl_bufq_free(&ctx->chunkbuf); 498 } 499 500 static CURLcode add_last_chunk(struct Curl_easy *data, 501 struct Curl_creader *reader) 502 { 503 struct chunked_reader *ctx = reader->ctx; 504 struct curl_slist *trailers = NULL, *tr; 505 CURLcode result; 506 size_t n; 507 int rc; 508 509 if(!data->set.trailer_callback) { 510 CURL_TRC_READ(data, "http_chunk, added last, empty chunk"); 511 return Curl_bufq_cwrite(&ctx->chunkbuf, STRCONST("0\r\n\r\n"), &n); 512 } 513 514 result = Curl_bufq_cwrite(&ctx->chunkbuf, STRCONST("0\r\n"), &n); 515 if(result) 516 goto out; 517 518 Curl_set_in_callback(data, TRUE); 519 rc = data->set.trailer_callback(&trailers, data->set.trailer_data); 520 Curl_set_in_callback(data, FALSE); 521 522 if(rc != CURL_TRAILERFUNC_OK) { 523 failf(data, "operation aborted by trailing headers callback"); 524 result = CURLE_ABORTED_BY_CALLBACK; 525 goto out; 526 } 527 528 for(tr = trailers; tr; tr = tr->next) { 529 /* only add correctly formatted trailers */ 530 char *ptr = strchr(tr->data, ':'); 531 if(!ptr || *(ptr + 1) != ' ') { 532 infof(data, "Malformatted trailing header, skipping trailer"); 533 continue; 534 } 535 536 result = Curl_bufq_cwrite(&ctx->chunkbuf, tr->data, 537 strlen(tr->data), &n); 538 if(!result) 539 result = Curl_bufq_cwrite(&ctx->chunkbuf, STRCONST("\r\n"), &n); 540 if(result) 541 goto out; 542 } 543 544 result = Curl_bufq_cwrite(&ctx->chunkbuf, STRCONST("\r\n"), &n); 545 546 out: 547 curl_slist_free_all(trailers); 548 CURL_TRC_READ(data, "http_chunk, added last chunk with trailers " 549 "from client -> %d", result); 550 return result; 551 } 552 553 static CURLcode add_chunk(struct Curl_easy *data, 554 struct Curl_creader *reader, 555 char *buf, size_t blen) 556 { 557 struct chunked_reader *ctx = reader->ctx; 558 CURLcode result; 559 char tmp[CURL_CHUNKED_MINLEN]; 560 size_t nread; 561 bool eos; 562 563 DEBUGASSERT(!ctx->read_eos); 564 blen = CURLMIN(blen, CURL_CHUNKED_MAXLEN); /* respect our buffer pref */ 565 if(blen < sizeof(tmp)) { 566 /* small read, make a chunk of decent size */ 567 buf = tmp; 568 blen = sizeof(tmp); 569 } 570 else { 571 /* larger read, make a chunk that will fit when read back */ 572 blen -= (8 + 2 + 2); /* deduct max overhead, 8 hex + 2*crlf */ 573 } 574 575 result = Curl_creader_read(data, reader->next, buf, blen, &nread, &eos); 576 if(result) 577 return result; 578 if(eos) 579 ctx->read_eos = TRUE; 580 581 if(nread) { 582 /* actually got bytes, wrap them into the chunkbuf */ 583 char hd[11] = ""; 584 int hdlen; 585 size_t n; 586 587 hdlen = msnprintf(hd, sizeof(hd), "%zx\r\n", nread); 588 if(hdlen <= 0) 589 return CURLE_READ_ERROR; 590 /* On a soft-limited bufq, we do not need to check that all was written */ 591 result = Curl_bufq_cwrite(&ctx->chunkbuf, hd, hdlen, &n); 592 if(!result) 593 result = Curl_bufq_cwrite(&ctx->chunkbuf, buf, nread, &n); 594 if(!result) 595 result = Curl_bufq_cwrite(&ctx->chunkbuf, "\r\n", 2, &n); 596 CURL_TRC_READ(data, "http_chunk, made chunk of %zu bytes -> %d", 597 nread, result); 598 if(result) 599 return result; 600 } 601 602 if(ctx->read_eos) 603 return add_last_chunk(data, reader); 604 return CURLE_OK; 605 } 606 607 static CURLcode cr_chunked_read(struct Curl_easy *data, 608 struct Curl_creader *reader, 609 char *buf, size_t blen, 610 size_t *pnread, bool *peos) 611 { 612 struct chunked_reader *ctx = reader->ctx; 613 CURLcode result = CURLE_READ_ERROR; 614 615 *pnread = 0; 616 *peos = ctx->eos; 617 618 if(!ctx->eos) { 619 if(!ctx->read_eos && Curl_bufq_is_empty(&ctx->chunkbuf)) { 620 /* Still getting data form the next reader, buffer is empty */ 621 result = add_chunk(data, reader, buf, blen); 622 if(result) 623 return result; 624 } 625 626 if(!Curl_bufq_is_empty(&ctx->chunkbuf)) { 627 result = Curl_bufq_cread(&ctx->chunkbuf, buf, blen, pnread); 628 if(!result && ctx->read_eos && Curl_bufq_is_empty(&ctx->chunkbuf)) { 629 /* no more data, read all, done. */ 630 ctx->eos = TRUE; 631 *peos = TRUE; 632 } 633 return result; 634 } 635 } 636 /* We may get here, because we are done or because callbacks paused */ 637 DEBUGASSERT(ctx->eos || !ctx->read_eos); 638 return CURLE_OK; 639 } 640 641 static curl_off_t cr_chunked_total_length(struct Curl_easy *data, 642 struct Curl_creader *reader) 643 { 644 /* this reader changes length depending on input */ 645 (void)data; 646 (void)reader; 647 return -1; 648 } 649 650 /* HTTP chunked Transfer-Encoding encoder */ 651 const struct Curl_crtype Curl_httpchunk_encoder = { 652 "chunked", 653 cr_chunked_init, 654 cr_chunked_read, 655 cr_chunked_close, 656 Curl_creader_def_needs_rewind, 657 cr_chunked_total_length, 658 Curl_creader_def_resume_from, 659 Curl_creader_def_rewind, 660 Curl_creader_def_unpause, 661 Curl_creader_def_is_paused, 662 Curl_creader_def_done, 663 sizeof(struct chunked_reader) 664 }; 665 666 CURLcode Curl_httpchunk_add_reader(struct Curl_easy *data) 667 { 668 struct Curl_creader *reader = NULL; 669 CURLcode result; 670 671 result = Curl_creader_create(&reader, data, &Curl_httpchunk_encoder, 672 CURL_CR_TRANSFER_ENCODE); 673 if(!result) 674 result = Curl_creader_add(data, reader); 675 676 if(result && reader) 677 Curl_creader_free(data, reader); 678 return result; 679 } 680 681 #endif /* CURL_DISABLE_HTTP */