validators.c (8934B)
1 /* 2 This file is part of TALER 3 (C) 2025 Taler Systems SA 4 5 TALER is free software; you can redistribute it and/or modify it under the 6 terms of the GNU Lesser General Public License as published by the Free Software 7 Foundation; either version 3, or (at your option) any later version. 8 9 TALER is distributed in the hope that it will be useful, but WITHOUT ANY 10 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 11 A PARTICULAR PURPOSE. See the GNU General Public License for more details. 12 13 You should have received a copy of the GNU General Public License along with 14 TALER; see the file COPYING. If not, see <http://www.gnu.org/licenses/> 15 */ 16 /** 17 * @file validators.c 18 * @brief Input validators 19 * @author Christian Grothoff 20 */ 21 #include "taler/platform.h" 22 #include <gnunet/gnunet_util_lib.h> 23 #include <gnunet/gnunet_db_lib.h> 24 #include <taler/taler_json_lib.h> 25 #include "taler/taler_merchant_util.h" 26 #include <regex.h> 27 28 bool 29 TALER_MERCHANT_image_data_url_valid (const char *image_data_url) 30 { 31 if (0 == strcmp (image_data_url, 32 "")) 33 return true; 34 if (0 != strncasecmp ("data:image/", 35 image_data_url, 36 strlen ("data:image/"))) 37 { 38 GNUNET_break_op (0); 39 return false; 40 } 41 if (NULL == strstr (image_data_url, 42 ";base64,")) 43 { 44 GNUNET_break_op (0); 45 return false; 46 } 47 if (! TALER_url_valid_charset (image_data_url)) 48 { 49 GNUNET_break_op (0); 50 return false; 51 } 52 return true; 53 } 54 55 56 bool 57 TALER_MERCHANT_email_valid (const char *email) 58 { 59 regex_t regex; 60 bool is_valid; 61 62 /* 63 * Email regex pattern supporting: 64 * 65 * Local part (before @): 66 * - Dot-atom: alphanumeric, dots, hyphens, underscores 67 * (no leading/trailing dots, no consecutive dots) 68 * - Quoted-string: quoted text with escaped chars inside 69 * 70 * Domain part (after @): 71 * - Domain labels: alphanumeric and hyphens 72 * (no leading/trailing hyphens per label) 73 * - IP literals: [IPv4] or [IPv6:...] 74 * 75 * Pattern breakdown: 76 * Local part: 77 * ([a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+ 78 * (\.[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+)*) 79 * = dot-atom (atext chars, dots allowed between parts) 80 * 81 * |"([^"\\]|\\.)*" 82 * = quoted-string (anything in quotes with escaping) 83 * 84 * Domain part: 85 * ([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? 86 * (\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*) 87 * = domain labels (63 chars max, hyphens in middle) 88 * 89 * |\[([0-9]{1,3}\.){3}[0-9]{1,3}\] 90 * = IPv4 literal 91 * 92 * |\[IPv6:[0-9a-fA-F:]+\] 93 * = IPv6 literal 94 */ 95 const char *pattern = 96 "^(" 97 /* Local part: dot-atom-text or quoted-string */ 98 "([a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+(\\.)?)*[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+" 99 "|" 100 "\"([^\"\\\\]|\\\\.)*\"" 101 ")" 102 "@" 103 "(" 104 /* Domain: domain labels (with at least one dot) or IP literal */ 105 "([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)" 106 "|" 107 "\\[((([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}" 108 "([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))\\]" 109 "|" 110 "\\[IPv6:[0-9a-fA-F:]*[0-9a-fA-F]\\]" 111 ")$"; 112 113 if ('\0' == email[0]) 114 return false; 115 116 /* Maximum email length per RFC 5321 */ 117 if (strlen (email) > 254) 118 return false; 119 120 GNUNET_assert (0 == 121 regcomp (®ex, 122 pattern, 123 REG_EXTENDED | REG_NOSUB)); 124 is_valid = (0 == 125 regexec (®ex, 126 email, 127 0, 128 NULL, 129 0)); 130 regfree (®ex); 131 return is_valid; 132 } 133 134 135 char * 136 TALER_MERCHANT_phone_validate_normalize (const char *phone, 137 bool allow_letters) 138 { 139 if ('\0' == phone[0]) 140 return NULL; 141 142 /* Maximum phone length (reasonable practical limit) */ 143 if (strlen (phone) > 30) 144 return NULL; 145 146 { 147 regex_t regex; 148 int ret; 149 150 /* 151 * Phone number regex pattern with +CC prefix requirement: 152 * 153 * Supports: 154 * - Country codes (1-3 digits after +) 155 * - Variable length national numbers 156 * - Spaces, hyphens, and dots as separators 157 * - Parentheses for area codes 158 * - Optional extension notation (x, ext, extension) 159 * - Optional letters representing digits (2-9) if allow_letters is true 160 * 161 * Examples: 162 * +1-202-555-0173 163 * +33 1 42 68 53 00 164 * +44.20.7946.0958 165 * +1 (202) 555-0173 166 * +886 2 2345 6789 167 * +1-800-CALL-NOW (if allow_letters is true) 168 * +49-30-12345678x123 169 * 170 * Pattern breakdown: 171 * ^\+[0-9]{1,3} 172 * = Plus sign followed by 1-3 digit country code 173 * 174 * [-. ]? 175 * = Optional separator after country code 176 * 177 * (\([0-9]{1,4}\)[-. ]?)? 178 * = Optional parenthesized area code with separator 179 * 180 * [0-9A-Z] 181 * = Start with digit or letter 182 * 183 * ([-. ]?[0-9A-Z])* 184 * = Digit/letter groups with optional separators 185 * 186 * ([ ]?(x|ext|extension)[ ]?[0-9]{1,6})? 187 * = Optional extension 188 * 189 * $ 190 * = End of string 191 */ 192 const char *pattern_digits = 193 "^\\+[0-9]{1,3}" /* Plus and country code (1-3 digits) */ 194 "[-. ]?" /* Optional single separator */ 195 "(" /* Optional area code group */ 196 "\\([0-9]{1,4}\\)" /* Area code in parens */ 197 "[-. ]?" /* Optional separator after parens */ 198 ")?" 199 "[0-9]" /* Start national number with digit */ 200 "(" /* National number: alternating digits and separators */ 201 "[-. ]?[0-9]" /* Separator optionally followed by digit */ 202 ")*" 203 "([ ]?(x|ext|extension)[ ]?[0-9]{1,6})?" /* Optional extension */ 204 "$"; 205 206 const char *pattern_with_letters = 207 "^\\+[0-9]{1,3}" /* Plus and country code (1-3 digits) */ 208 "[-. ]?" /* Optional single separator */ 209 "(" /* Optional area code group */ 210 "\\([0-9]{1,4}\\)" /* Area code in parens */ 211 "[-. ]?" /* Optional separator after parens */ 212 ")?" 213 "[0-9A-Z]" /* Start national number with digit or letter */ 214 "(" /* National number: alternating digits/letters and separators */ 215 "[-. ]?[0-9A-Z]" /* Separator optionally followed by digit or letter */ 216 ")*" 217 "([ ]?(x|ext|extension)[ ]?[0-9]{1,6})?" /* Optional extension */ 218 "$"; 219 220 const char *pattern = allow_letters 221 ? pattern_with_letters 222 : pattern_digits; 223 224 GNUNET_assert (0 == 225 regcomp (®ex, 226 pattern, 227 REG_EXTENDED | REG_NOSUB | REG_ICASE)); 228 ret = regexec (®ex, 229 phone, 0, 230 NULL, 0); 231 regfree (®ex); 232 if (0 != ret) 233 return NULL; /* invalid number */ 234 } 235 236 /* Phone is valid - normalize it */ 237 { 238 char *normalized; 239 char *out; 240 241 normalized = GNUNET_malloc (strlen (phone) + 1); 242 out = normalized; 243 *out++ = '+'; /* Start with plus sign */ 244 245 for (const char *in = phone; 246 '\0' != *in; 247 in++) 248 { 249 if (isdigit ((unsigned char) *in)) 250 { 251 /* Copy digit as-is */ 252 *out++ = *in; 253 } 254 else if (allow_letters && isalpha ((unsigned char) *in)) 255 { 256 /* Convert letter to corresponding digit (A-Z maps to 2-9) */ 257 char upper = toupper ((unsigned char) *in); 258 /* T9 keypad mapping: 259 * 2: ABC 260 * 3: DEF 261 * 4: GHI 262 * 5: JKL 263 * 6: MNO 264 * 7: PQRS 265 * 8: TUV 266 * 9: WXYZ 267 */ 268 char digit; 269 270 if (upper >= 'A' && upper <= 'C') 271 digit = '2'; 272 else if (upper >= 'D' && upper <= 'F') 273 digit = '3'; 274 else if (upper >= 'G' && upper <= 'I') 275 digit = '4'; 276 else if (upper >= 'J' && upper <= 'L') 277 digit = '5'; 278 else if (upper >= 'M' && upper <= 'O') 279 digit = '6'; 280 else if (upper >= 'P' && upper <= 'S') 281 digit = '7'; 282 else if (upper >= 'T' && upper <= 'V') 283 digit = '8'; 284 else if (upper >= 'W' && upper <= 'Z') 285 digit = '9'; 286 else 287 digit = '0'; /* Fallback (shouldn't happen) */ 288 *out++ = digit; 289 } 290 /* Skip separators, parentheses, and spaces */ 291 /* Skip 'x', 'ext', 'extension' keywords and their extension digits */ 292 } 293 *out = '\0'; /* redundant, but helps analyzers... */ 294 return normalized; 295 } 296 }