paivana

HTTP paywall reverse proxy
Log | Files | Refs | Submodules | README | LICENSE

commit c3c7932bc40daed2f3221f6cb59f6aad4faa96f0
parent 5b0299ab685c7a467f3c0fa7d15f7c94fb9ab087
Author: Christian Grothoff <christian@grothoff.org>
Date:   Thu, 23 Apr 2026 21:05:44 +0200

more comprehensive reverse proxy logic

Diffstat:
Msrc/backend/paivana-httpd_reverse.c | 372+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 327 insertions(+), 45 deletions(-)

diff --git a/src/backend/paivana-httpd_reverse.c b/src/backend/paivana-httpd_reverse.c @@ -32,7 +32,10 @@ #include <taler/taler_mhd_lib.h> #include "paivana-httpd_reverse.h" -#define REQUEST_BUFFER_MAX (1024 * 1024) +/** + * Maximum upload allowed. + */ +#define REQUEST_BUFFER_MAX (1024 * 1024 * 128) /** @@ -214,6 +217,28 @@ struct HttpRequest * Did we pause CURL processing? */ int curl_paused; + + /** + * Have we observed the initial `HEADERS_PROCESSED` (a.k.a. "first") + * access-handler call for this request yet? MHD invokes the + * handler once immediately after parsing the request headers with + * `upload_data_size == 0` (and no body data yet), then again later + * with body chunks (if any), and finally once more with + * `upload_data_size == 0` at `FULL_REQ_RECEIVED`. We must defer + * curl setup to the "final" call so that request bodies are + * available in `io_buf` by the time curl runs. + */ + bool accepted; + + /** + * Set when the request body exceeded `REQUEST_BUFFER_MAX`. We + * must drain the rest of the upload silently and queue the 413 + * response on the "final" access-handler call. + * + * FIXME: better: do not generate 100 continue in this case + * in the first place! + */ + bool reject_upload; }; @@ -246,6 +271,13 @@ static struct GNUNET_SCHEDULER_Task *curl_download_task; bool PAIVANA_HTTPD_reverse_init (void) { + static const char *failure_body = + "<!DOCTYPE html>\n" + "<html><head><title>Bad Gateway</title></head>" + "<body><h1>502 Bad Gateway</h1>" + "<p>The upstream server could not be reached.</p>" + "</body></html>\n"; + if (0 != curl_global_init (CURL_GLOBAL_WIN32)) { GNUNET_log (GNUNET_ERROR_TYPE_ERROR, @@ -258,6 +290,19 @@ PAIVANA_HTTPD_reverse_init (void) "Failed to create cURL multi handle!\n"); return false; } + curl_failure_response + = MHD_create_response_from_buffer_static (strlen (failure_body), + failure_body); + if (NULL == curl_failure_response) + { + GNUNET_log (GNUNET_ERROR_TYPE_ERROR, + "Failed to create cURL failure response!\n"); + return false; + } + GNUNET_break (MHD_YES == + MHD_add_response_header (curl_failure_response, + MHD_HTTP_HEADER_CONTENT_TYPE, + "text/html; charset=utf-8")); return true; } @@ -275,19 +320,59 @@ PAIVANA_HTTPD_reverse_shutdown (void) MHD_resume_connection (hr->con); } } + if (NULL != curl_download_task) + { + GNUNET_SCHEDULER_cancel (curl_download_task); + curl_download_task = NULL; + } if (NULL != curl_multi) { curl_multi_cleanup (curl_multi); curl_multi = NULL; } - if (NULL != curl_download_task) + if (NULL != curl_failure_response) { - GNUNET_SCHEDULER_cancel (curl_download_task); - curl_download_task = NULL; + MHD_destroy_response (curl_failure_response); + curl_failure_response = NULL; } } +/** + * Is @a name a hop-by-hop HTTP header name that a proxy must not + * forward (RFC 9110 section 7.6.1 and RFC 7230, section 6.1). + * + * FIXME: Note that we *additionally* must remove all headers listed + * within the "Connection" header (for which we should keep that + * header around separately and to another pass after getting all the + * headers). + * + * @param name header field name + * @return true if @a name is a hop-by-hop header + */ +static bool +is_hop_by_hop_header (const char *name) +{ + static const char *const hop_headers[] = { + MHD_HTTP_HEADER_CONNECTION, + MHD_HTTP_HEADER_KEEP_ALIVE, + MHD_HTTP_HEADER_PROXY_AUTHENTICATE, + MHD_HTTP_HEADER_PROXY_AUTHORIZATION, + MHD_HTTP_HEADER_TE, + MHD_HTTP_HEADER_TRAILER, + MHD_HTTP_HEADER_TRANSFER_ENCODING, + MHD_HTTP_HEADER_UPGRADE, + NULL + }; + + for (unsigned int i = 0; NULL != hop_headers[i]; i++) + if (0 == strcasecmp (name, + hop_headers[i])) + return true; + return false; +} + + /* *************** HTTP handling with cURL ***************** */ @@ -358,12 +443,10 @@ curl_check_hdr (void *buffer, GNUNET_free (ndup); return bytes; } - /* Skip "Connection: Keep-Alive" header, it will be - done by MHD if possible */ - if ( (0 == strcasecmp (hdr_type, - MHD_HTTP_HEADER_CONNECTION)) && - (0 == strcasecmp (hdr_val, - "Keep-Alive")) ) + /* Skip hop-by-hop headers. In particular Transfer-Encoding + must not leak through: libcurl has already dechunked the + body for us and MHD will decide whether to re-chunk. */ + if (is_hop_by_hop_header (hdr_type)) { GNUNET_free (ndup); return bytes; @@ -714,18 +797,39 @@ con_val_iter (void *cls, char *new_value = NULL; (void) kind; - if (0 == strcmp (MHD_HTTP_HEADER_HOST, - key)) + if (0 == strcasecmp (MHD_HTTP_HEADER_HOST, + key)) { /* We don't take the host header as given in the request. * We'll instead put the proxied service's hostname in it*/ return MHD_YES; } - if ((0 == strcmp (MHD_HTTP_HEADER_CONTENT_LENGTH, - key))) + if (0 == strcasecmp (MHD_HTTP_HEADER_CONTENT_LENGTH, + key)) { - PAIVANA_LOG_INFO ( - "Do not set Content-Length for request\n"); + /* libcurl sets Content-Length itself from CURLOPT_POSTFIELDSIZE + / CURLOPT_INFILESIZE. */ + return MHD_YES; + } + if (0 == strcasecmp (MHD_HTTP_HEADER_EXPECT, + key)) + { + /* libcurl manages Expect: 100-continue on its own. */ + return MHD_YES; + } + if (is_hop_by_hop_header (key)) + return MHD_YES; + if ( (0 == strncasecmp ("X-Forwarded-", + key, + strlen ("X-Forwarded-"))) || + (0 == strcasecmp (MHD_HTTP_HEADER_FORWARDED, + key)) || + (0 == strcasecmp (MHD_HTTP_HEADER_VIA, + key)) ) + { + /* We will replace these with our own below. */ + /* FIXME: we should RFC 9110 says we should APPEND + our existence to the Via members, not replace it! */ return MHD_YES; } GNUNET_asprintf (&hdr, @@ -793,7 +897,7 @@ curl_task_download (void *cls) GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "CURL download completed.\n"); hr->state = REQUEST_STATE_PROXY_DOWNLOAD_DONE; - if (NULL == hr->response) + if (0 == hr->response_code) GNUNET_assert (GNUNET_OK == create_mhd_response_from_hr (hr)); break; @@ -801,19 +905,21 @@ curl_task_download (void *cls) GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Download curl failed: %s\n", curl_easy_strerror (msg->data.result)); - /* FIXME: indicate error somehow? - * close MHD connection badly as well? */ hr->state = REQUEST_STATE_PROXY_DOWNLOAD_DONE; + /* Surface upstream failures as 502 Bad Gateway. Drop + any partial body we might have accumulated and let + MHD return our pre-built failure page instead of + half a truncated upstream response. */ + hr->response_code = MHD_HTTP_BAD_GATEWAY; + hr->io_len = 0; + hr->response = curl_failure_response; if (GNUNET_YES == hr->suspended) { MHD_resume_connection (hr->con); hr->suspended = GNUNET_NO; } - TALER_MHD_daemon_trigger (); break; - } - if (NULL == hr->response) - hr->response = curl_failure_response; + } /* end switch (msg->data.result) */ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Curl request for `%s' finished (got the response)\n", hr->url); @@ -827,8 +933,8 @@ curl_task_download (void *cls) /* unexpected status code */ GNUNET_break (0); break; - } - } + } /* end switch msg->msg */ + } /* end while (curl_multi_info_read()) */ } while (mret == CURLM_CALL_MULTI_PERFORM); if (CURLM_OK != mret) GNUNET_log (GNUNET_ERROR_TYPE_ERROR, @@ -944,6 +1050,29 @@ PAIVANA_HTTPD_reverse (struct HttpRequest *hr, const char *upload_data, size_t *upload_data_size) { + /* MHD's "first" call (immediately after headers) arrives with + `upload_data_size == 0` and no body. Just acknowledge it so + MHD will proceed to deliver the request body (if any) and then + make the "final" call. Setting up curl here would start the + upstream request with an empty io_buf and leave body chunks + nowhere to land. */ + if (! hr->accepted) + { + /* FIXME: we should check that the content-length header value is + acceptable here to not send the client 100 continue it it is + too big */ + hr->accepted = true; + return MHD_YES; + } + /* On the "final" access-handler call after we drained an + over-sized upload, queue the deferred 413 response now that + MHD is back in a state where that is allowed. */ + if (hr->reject_upload && 0 == *upload_data_size) + { + return MHD_queue_response (con, + MHD_HTTP_CONTENT_TOO_LARGE, + curl_failure_response); + } /* FIXME: make state machine more explicit by switching on hr->state here! */ if (0 != *upload_data_size) @@ -955,6 +1084,28 @@ PAIVANA_HTTPD_reverse (struct HttpRequest *hr, "Processing %u bytes UPLOAD\n", (unsigned int) *upload_data_size); + /* Reject uploads that would exceed our buffering cap. The + entire request body is currently buffered before forwarding, + so this also bounds memory usage per request. + MHD does not allow queuing a response while BODY_RECEIVING + (MHD_queue_response returns MHD_NO outside of + HEADERS_PROCESSED / FULL_REQ_RECEIVED), so we mark the + request as over-limit, silently drain the remaining body, + and queue the 413 on the "final" access-handler call. */ + if (hr->reject_upload || + hr->io_len + *upload_data_size > REQUEST_BUFFER_MAX) + { + if (! hr->reject_upload) + { + GNUNET_log (GNUNET_ERROR_TYPE_WARNING, + "Upload exceeds %u byte limit, rejecting\n", + (unsigned int) REQUEST_BUFFER_MAX); + hr->reject_upload = true; + } + *upload_data_size = 0; + return MHD_YES; + } + /* Grow the buffer if remaining space isn't enough. */ if (hr->io_size - hr->io_len < *upload_data_size) { @@ -1075,7 +1226,6 @@ PAIVANA_HTTPD_reverse (struct HttpRequest *hr, GNUNET_free (host_hdr); } - // FIXME: support PATCH, etc. if (0 == strcasecmp (meth, MHD_HTTP_METHOD_PUT)) { @@ -1095,12 +1245,45 @@ PAIVANA_HTTPD_reverse (struct HttpRequest *hr, curl_easy_setopt (hr->curl, CURLOPT_POST, 1L); + hr->state = REQUEST_STATE_PROXY_UPLOAD_STARTED; + } + else if (0 == strcasecmp (meth, + MHD_HTTP_METHOD_PATCH)) + { + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "Crafting a CURL PATCH request\n"); + /* CURLOPT_POST=1 turns on body upload via the read callback; + CURLOPT_CUSTOMREQUEST then overrides the verb on the wire. */ curl_easy_setopt (hr->curl, - CURLOPT_VERBOSE, + CURLOPT_POST, 1L); + curl_easy_setopt (hr->curl, + CURLOPT_CUSTOMREQUEST, + "PATCH"); hr->state = REQUEST_STATE_PROXY_UPLOAD_STARTED; } else if (0 == strcasecmp (meth, + MHD_HTTP_METHOD_DELETE)) + { + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "Crafting a CURL DELETE request\n"); + curl_easy_setopt (hr->curl, + CURLOPT_CUSTOMREQUEST, + "DELETE"); + if (0 != hr->io_len) + { + /* DELETE with a request body is unusual but legal. */ + curl_easy_setopt (hr->curl, + CURLOPT_POST, + 1L); + hr->state = REQUEST_STATE_PROXY_UPLOAD_STARTED; + } + else + { + hr->state = REQUEST_STATE_PROXY_DOWNLOAD_STARTED; + } + } + else if (0 == strcasecmp (meth, MHD_HTTP_METHOD_HEAD)) { hr->state = REQUEST_STATE_PROXY_DOWNLOAD_STARTED; @@ -1126,12 +1309,17 @@ PAIVANA_HTTPD_reverse (struct HttpRequest *hr, } else { + /* TRACE leaks headers back to the client; CONNECT is for + TLS tunnelling and doesn't fit the reverse-proxy model. + Reject anything else with a proper 405. */ GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Unsupported HTTP method `%s'\n", meth); curl_easy_cleanup (hr->curl); hr->curl = NULL; - return MHD_NO; + return MHD_queue_response (con, + MHD_HTTP_METHOD_NOT_ALLOWED, + curl_failure_response); } if (CURLM_OK != @@ -1141,7 +1329,9 @@ PAIVANA_HTTPD_reverse (struct HttpRequest *hr, GNUNET_break (0); curl_easy_cleanup (hr->curl); hr->curl = NULL; - return MHD_NO; + return MHD_queue_response (con, + MHD_HTTP_BAD_GATEWAY, + curl_failure_response); } MHD_get_connection_values (con, @@ -1149,6 +1339,84 @@ PAIVANA_HTTPD_reverse (struct HttpRequest *hr, &con_val_iter, hr); + /* Add standard reverse-proxy forwarding headers. We always + overwrite any client-supplied X-Forwarded-* / Forwarded / + Via (filtered out in con_val_iter) so upstream sees our + view of the connection, not anything the client made up. */ + { + const union MHD_ConnectionInfo *ci; + char *hdr; + const char *proto; + const char *fhost; + + ci = MHD_get_connection_info (con, + MHD_CONNECTION_INFO_CLIENT_ADDRESS); + if ( (NULL != ci) && (NULL != ci->client_addr) ) + { + char ipbuf[INET6_ADDRSTRLEN]; + const char *ip = NULL; + + switch (ci->client_addr->sa_family) + { + case AF_INET: + ip = inet_ntop ( + AF_INET, + &((const struct sockaddr_in *) ci->client_addr)->sin_addr, + ipbuf, sizeof (ipbuf)); + break; + case AF_INET6: + ip = inet_ntop ( + AF_INET6, + &((const struct sockaddr_in6 *) ci->client_addr)->sin6_addr, + ipbuf, sizeof (ipbuf)); + break; + default: + break; + } + if (NULL != ip) + { + GNUNET_asprintf (&hdr, + "X-Forwarded-For: %s", + ip); + hr->headers = curl_slist_append (hr->headers, hdr); + GNUNET_free (hdr); + } + } + proto = (GNUNET_YES == TALER_mhd_is_https (con)) + ? "https" : "http"; + GNUNET_asprintf (&hdr, + "X-Forwarded-Proto: %s", + proto); + hr->headers = curl_slist_append (hr->headers, hdr); + GNUNET_free (hdr); + fhost = MHD_lookup_connection_value (con, + MHD_HEADER_KIND, + MHD_HTTP_HEADER_HOST); + if (NULL != fhost) + { + GNUNET_asprintf (&hdr, + "X-Forwarded-Host: %s", + fhost); + hr->headers = curl_slist_append (hr->headers, hdr); + GNUNET_free (hdr); + } + /* Via: pseudonym + protocol-version (RFC 7230 5.7.1); + MHD hands us e.g. "HTTP/1.1" but Via wants just "1.1". */ + { + /* FIXME: we should append our Via, not replace... */ + const char *via_ver = "1.1"; + + if ( (NULL != ver) && + (0 == strncasecmp (ver, "HTTP/", 5)) ) + via_ver = ver + 5; + GNUNET_asprintf (&hdr, + "Via: %s paivana", + via_ver); + } + hr->headers = curl_slist_append (hr->headers, hdr); + GNUNET_free (hdr); + } + curl_easy_setopt (hr->curl, CURLOPT_HTTPHEADER, hr->headers); @@ -1167,23 +1435,37 @@ PAIVANA_HTTPD_reverse (struct HttpRequest *hr, GNUNET_assert (REQUEST_STATE_PROXY_DOWNLOAD_DONE == hr->state); - hr->response - = MHD_create_response_from_buffer_copy (hr->io_len, - hr->io_buf); - for (struct HttpResponseHeader *header = hr->header_head; - NULL != header; - header = header->next) + /* Response may already be set to curl_failure_response by the + curl task on upstream failure; in that case, don't build a + buffer response and don't attach per-request headers to the + shared failure response. */ + if (NULL == hr->response) { - const char *value = header->value; - - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, - "Adding MHD response header %s->%s\n", - header->type, - value); - GNUNET_break (MHD_YES == - MHD_add_response_header (hr->response, - header->type, - value)); + hr->response + = MHD_create_response_from_buffer_static (hr->io_len, + hr->io_buf); + if (NULL == hr->response) + { + GNUNET_break (0); + hr->response_code = MHD_HTTP_INTERNAL_SERVER_ERROR; + hr->response = curl_failure_response; + } + else + { + for (struct HttpResponseHeader *header = hr->header_head; + NULL != header; + header = header->next) + { + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "Adding MHD response header %s->%s\n", + header->type, + header->value); + GNUNET_break (MHD_YES == + MHD_add_response_header (hr->response, + header->type, + header->value)); + } + } } TALER_MHD_daemon_trigger ();