url.c (9210B)
1 /* 2 This file is part of TALER 3 Copyright (C) 2014-2020 Taler Systems SA 4 5 TALER is free software; you can redistribute it and/or modify it under the 6 terms of the GNU General Public License as published by the Free Software 7 Foundation; either version 3, or (at your option) any later version. 8 9 TALER is distributed in the hope that it will be useful, but WITHOUT ANY 10 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 11 A PARTICULAR PURPOSE. See the GNU General Public License for more details. 12 13 You should have received a copy of the GNU General Public License along with 14 TALER; see the file COPYING. If not, see <http://www.gnu.org/licenses/> 15 */ 16 /** 17 * @file url.c 18 * @brief URL handling utility functions 19 * @author Florian Dold 20 */ 21 #include "taler/platform.h" 22 #include "taler/taler_util.h" 23 24 25 /** 26 * Check if a character is reserved and should 27 * be urlencoded. 28 * 29 * @param c character to look at 30 * @return true if @a c needs to be urlencoded, 31 * false otherwise 32 */ 33 static bool 34 is_reserved (char c) 35 { 36 switch (c) 37 { 38 case '0': case '1': case '2': case '3': case '4': 39 case '5': case '6': case '7': case '8': case '9': 40 case 'a': case 'b': case 'c': case 'd': case 'e': 41 case 'f': case 'g': case 'h': case 'i': case 'j': 42 case 'k': case 'l': case 'm': case 'n': case 'o': 43 case 'p': case 'q': case 'r': case 's': case 't': 44 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': 45 case 'A': case 'B': case 'C': case 'D': case 'E': 46 case 'F': case 'G': case 'H': case 'I': case 'J': 47 case 'K': case 'L': case 'M': case 'N': case 'O': 48 case 'P': case 'Q': case 'R': case 'S': case 'T': 49 case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': 50 case '-': case '.': case '_': case '~': 51 return false; 52 default: 53 break; 54 } 55 return true; 56 } 57 58 59 /** 60 * Get the length of a string after it has been 61 * urlencoded. 62 * 63 * @param s the string 64 * @returns the size of the urlencoded @a s 65 */ 66 static size_t 67 urlencode_len (const char *s) 68 { 69 size_t len = 0; 70 for (; *s != '\0'; len++, s++) 71 if (is_reserved (*s)) 72 len += 2; 73 return len; 74 } 75 76 77 /** 78 * URL-encode a string according to rfc3986. 79 * 80 * @param buf buffer to write the result to 81 * @param s string to encode 82 */ 83 static void 84 buffer_write_urlencode (struct GNUNET_Buffer *buf, 85 const char *s) 86 { 87 size_t ulen; 88 89 ulen = urlencode_len (s); 90 GNUNET_assert (ulen < ulen + 1); 91 GNUNET_buffer_ensure_remaining (buf, 92 ulen + 1); 93 for (size_t i = 0; i < strlen (s); i++) 94 { 95 if (GNUNET_YES == is_reserved (s[i])) 96 GNUNET_buffer_write_fstr (buf, 97 "%%%02X", 98 s[i]); 99 else 100 buf->mem[buf->position++] = s[i]; 101 } 102 } 103 104 105 char * 106 TALER_urlencode (const char *s) 107 { 108 struct GNUNET_Buffer buf = { 0 }; 109 110 buffer_write_urlencode (&buf, 111 s); 112 return GNUNET_buffer_reap_str (&buf); 113 } 114 115 116 /** 117 * Compute the total length of the @a args given. The args are a 118 * NULL-terminated list of key-value pairs, where the values 119 * must be URL-encoded. When serializing, the pairs will be separated 120 * via '?' or '&' and an '=' between key and value. Hence each 121 * pair takes an extra 2 characters to encode. This function computes 122 * how many bytes are needed. It must match the #serialize_arguments() 123 * function. 124 * 125 * @param args NULL-terminated key-value pairs (char *) for query parameters 126 * @return number of bytes needed (excluding 0-terminator) for the string buffer 127 */ 128 static size_t 129 calculate_argument_length (va_list args) 130 { 131 size_t len = 0; 132 va_list ap; 133 134 va_copy (ap, 135 args); 136 while (1) 137 { 138 char *key; 139 char *value; 140 size_t vlen; 141 size_t klen; 142 143 key = va_arg (ap, 144 char *); 145 if (NULL == key) 146 break; 147 value = va_arg (ap, 148 char *); 149 if (NULL == value) 150 continue; 151 vlen = urlencode_len (value); 152 klen = strlen (key); 153 GNUNET_assert ( (len <= len + vlen) && 154 (len <= len + vlen + klen) && 155 (len < len + vlen + klen + 2) ); 156 len += vlen + klen + 2; 157 } 158 va_end (ap); 159 return len; 160 } 161 162 163 /** 164 * Take the key-value pairs in @a args and serialize them into 165 * @a buf, using URL encoding for the values. If a 'value' is 166 * given as NULL, both the key and the value are skipped. Note 167 * that a NULL value does not terminate the list, only a NULL 168 * key signals the end of the list of arguments. 169 * 170 * @param buf where to write the values 171 * @param args NULL-terminated key-value pairs (char *) for query parameters, 172 * the value will be url-encoded 173 */ 174 static void 175 serialize_arguments (struct GNUNET_Buffer *buf, 176 va_list args) 177 { 178 /* used to indicate if we are processing the initial 179 parameter which starts with '?' or subsequent 180 parameters which are separated with '&' */ 181 unsigned int iparam = 0; 182 183 while (1) 184 { 185 char *key; 186 char *value; 187 188 key = va_arg (args, 189 char *); 190 if (NULL == key) 191 break; 192 value = va_arg (args, 193 char *); 194 if (NULL == value) 195 continue; 196 GNUNET_buffer_write_str (buf, 197 (0 == iparam) ? "?" : "&"); 198 iparam = 1; 199 GNUNET_buffer_write_str (buf, 200 key); 201 GNUNET_buffer_write_str (buf, 202 "="); 203 buffer_write_urlencode (buf, 204 value); 205 } 206 } 207 208 209 char * 210 TALER_url_join (const char *base_url, 211 const char *path, 212 ...) 213 { 214 struct GNUNET_Buffer buf = { 0 }; 215 216 GNUNET_assert (NULL != base_url); 217 GNUNET_assert (NULL != path); 218 if (0 == strlen (base_url)) 219 { 220 /* base URL can't be empty */ 221 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, 222 "Empty base URL specified\n"); 223 return NULL; 224 } 225 if ('\0' != path[0]) 226 { 227 if ('/' != base_url[strlen (base_url) - 1]) 228 { 229 /* Must be an actual base URL! */ 230 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, 231 "Base URL `%s' does not end with '/', cannot join with `%s'\n", 232 base_url, 233 path); 234 return NULL; 235 } 236 if ('/' == path[0]) 237 { 238 /* The path must be relative. */ 239 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, 240 "Path `%s' is not relative\n", 241 path); 242 return NULL; 243 } 244 } 245 246 { 247 va_list args; 248 size_t len; 249 250 va_start (args, 251 path); 252 len = strlen (base_url) + strlen (path) + 1; 253 len += calculate_argument_length (args); 254 GNUNET_buffer_prealloc (&buf, 255 len); 256 GNUNET_buffer_write_str (&buf, 257 base_url); 258 GNUNET_buffer_write_str (&buf, 259 path); 260 serialize_arguments (&buf, 261 args); 262 va_end (args); 263 } 264 return GNUNET_buffer_reap_str (&buf); 265 } 266 267 268 char * 269 TALER_url_absolute_raw_va (const char *proto, 270 const char *host, 271 const char *prefix, 272 const char *path, 273 va_list args) 274 { 275 struct GNUNET_Buffer buf = { 0 }; 276 size_t len = 0; 277 278 len += strlen (proto) + strlen ("://") + strlen (host); 279 len += strlen (prefix) + strlen (path); 280 len += calculate_argument_length (args) + 1; /* 0-terminator */ 281 282 GNUNET_buffer_prealloc (&buf, 283 len); 284 GNUNET_buffer_write_str (&buf, 285 proto); 286 GNUNET_buffer_write_str (&buf, 287 "://"); 288 GNUNET_buffer_write_str (&buf, 289 host); 290 GNUNET_buffer_write_path (&buf, 291 prefix); 292 GNUNET_buffer_write_path (&buf, 293 path); 294 serialize_arguments (&buf, 295 args); 296 return GNUNET_buffer_reap_str (&buf); 297 } 298 299 300 char * 301 TALER_url_absolute_raw (const char *proto, 302 const char *host, 303 const char *prefix, 304 const char *path, 305 ...) 306 { 307 char *result; 308 va_list args; 309 310 va_start (args, 311 path); 312 result = TALER_url_absolute_raw_va (proto, 313 host, 314 prefix, 315 path, 316 args); 317 va_end (args); 318 return result; 319 } 320 321 322 bool 323 TALER_url_valid_charset (const char *url) 324 { 325 for (unsigned int i = 0; '\0' != url[i]; i++) 326 { 327 #define ALLOWED_CHARACTERS \ 328 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789/:;&?-.,=_~%+#" 329 if (NULL == strchr (ALLOWED_CHARACTERS, 330 (int) url[i])) 331 return false; 332 #undef ALLOWED_CHARACTERS 333 } 334 return true; 335 } 336 337 338 bool 339 TALER_is_web_url (const char *url) 340 { 341 if ( (0 != strncasecmp (url, 342 "https://", 343 strlen ("https://"))) && 344 (0 != strncasecmp (url, 345 "http://", 346 strlen ("http://"))) ) 347 return false; 348 if (! TALER_url_valid_charset (url) ) 349 return false; 350 return true; 351 } 352 353 354 /* end of url.c */