merchant

Merchant backend to process payments, run by merchants
Log | Files | Refs | Submodules | README | LICENSE

validators.c (8934B)


      1 /*
      2   This file is part of TALER
      3   (C) 2025 Taler Systems SA
      4 
      5   TALER is free software; you can redistribute it and/or modify it under the
      6   terms of the GNU Lesser General Public License as published by the Free Software
      7   Foundation; either version 3, or (at your option) any later version.
      8 
      9   TALER is distributed in the hope that it will be useful, but WITHOUT ANY
     10   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
     11   A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
     12 
     13   You should have received a copy of the GNU General Public License along with
     14   TALER; see the file COPYING.  If not, see <http://www.gnu.org/licenses/>
     15 */
     16 /**
     17  * @file validators.c
     18  * @brief Input validators
     19  * @author Christian Grothoff
     20  */
     21 #include "taler/platform.h"
     22 #include <gnunet/gnunet_util_lib.h>
     23 #include <gnunet/gnunet_db_lib.h>
     24 #include <taler/taler_json_lib.h>
     25 #include "taler/taler_merchant_util.h"
     26 #include <regex.h>
     27 
     28 bool
     29 TALER_MERCHANT_image_data_url_valid (const char *image_data_url)
     30 {
     31   if (0 == strcmp (image_data_url,
     32                    ""))
     33     return true;
     34   if (0 != strncasecmp ("data:image/",
     35                         image_data_url,
     36                         strlen ("data:image/")))
     37   {
     38     GNUNET_break_op (0);
     39     return false;
     40   }
     41   if (NULL == strstr (image_data_url,
     42                       ";base64,"))
     43   {
     44     GNUNET_break_op (0);
     45     return false;
     46   }
     47   if (! TALER_url_valid_charset (image_data_url))
     48   {
     49     GNUNET_break_op (0);
     50     return false;
     51   }
     52   return true;
     53 }
     54 
     55 
     56 bool
     57 TALER_MERCHANT_email_valid (const char *email)
     58 {
     59   regex_t regex;
     60   bool is_valid;
     61 
     62   /*
     63    * Email regex pattern supporting:
     64    *
     65    * Local part (before @):
     66    * - Dot-atom: alphanumeric, dots, hyphens, underscores
     67    *   (no leading/trailing dots, no consecutive dots)
     68    * - Quoted-string: quoted text with escaped chars inside
     69    *
     70    * Domain part (after @):
     71    * - Domain labels: alphanumeric and hyphens
     72    *   (no leading/trailing hyphens per label)
     73    * - IP literals: [IPv4] or [IPv6:...]
     74    *
     75    * Pattern breakdown:
     76    * Local part:
     77    *   ([a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+
     78    *    (\.[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+)*)
     79    *   = dot-atom (atext chars, dots allowed between parts)
     80    *
     81    *   |"([^"\\]|\\.)*"
     82    *   = quoted-string (anything in quotes with escaping)
     83    *
     84    * Domain part:
     85    *   ([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
     86    *    (\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)
     87    *   = domain labels (63 chars max, hyphens in middle)
     88    *
     89    *   |\[([0-9]{1,3}\.){3}[0-9]{1,3}\]
     90    *   = IPv4 literal
     91    *
     92    *   |\[IPv6:[0-9a-fA-F:]+\]
     93    *   = IPv6 literal
     94    */
     95   const char *pattern =
     96     "^("
     97     /* Local part: dot-atom-text or quoted-string */
     98     "([a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+(\\.)?)*[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+"
     99     "|"
    100     "\"([^\"\\\\]|\\\\.)*\""
    101     ")"
    102     "@"
    103     "("
    104     /* Domain: domain labels (with at least one dot) or IP literal */
    105     "([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)"
    106     "|"
    107     "\\[((([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}"
    108     "([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))\\]"
    109     "|"
    110     "\\[IPv6:[0-9a-fA-F:]*[0-9a-fA-F]\\]"
    111     ")$";
    112 
    113   if ('\0' == email[0])
    114     return false;
    115 
    116   /* Maximum email length per RFC 5321 */
    117   if (strlen (email) > 254)
    118     return false;
    119 
    120   GNUNET_assert (0 ==
    121                  regcomp (&regex,
    122                           pattern,
    123                           REG_EXTENDED | REG_NOSUB));
    124   is_valid = (0 ==
    125               regexec (&regex,
    126                        email,
    127                        0,
    128                        NULL,
    129                        0));
    130   regfree (&regex);
    131   return is_valid;
    132 }
    133 
    134 
    135 char *
    136 TALER_MERCHANT_phone_validate_normalize (const char *phone,
    137                                          bool allow_letters)
    138 {
    139   if ('\0' == phone[0])
    140     return NULL;
    141 
    142   /* Maximum phone length (reasonable practical limit) */
    143   if (strlen (phone) > 30)
    144     return NULL;
    145 
    146   {
    147     regex_t regex;
    148     int ret;
    149 
    150     /*
    151      * Phone number regex pattern with +CC prefix requirement:
    152      *
    153      * Supports:
    154      * - Country codes (1-3 digits after +)
    155      * - Variable length national numbers
    156      * - Spaces, hyphens, and dots as separators
    157      * - Parentheses for area codes
    158      * - Optional extension notation (x, ext, extension)
    159      * - Optional letters representing digits (2-9) if allow_letters is true
    160      *
    161      * Examples:
    162      *   +1-202-555-0173
    163      *   +33 1 42 68 53 00
    164      *   +44.20.7946.0958
    165      *   +1 (202) 555-0173
    166      *   +886 2 2345 6789
    167      *   +1-800-CALL-NOW (if allow_letters is true)
    168      *   +49-30-12345678x123
    169      *
    170      * Pattern breakdown:
    171      * ^\+[0-9]{1,3}
    172      *   = Plus sign followed by 1-3 digit country code
    173      *
    174      * [-. ]?
    175      *   = Optional separator after country code
    176      *
    177      * (\([0-9]{1,4}\)[-. ]?)?
    178      *   = Optional parenthesized area code with separator
    179      *
    180      * [0-9A-Z]
    181      *   = Start with digit or letter
    182      *
    183      * ([-. ]?[0-9A-Z])*
    184      *   = Digit/letter groups with optional separators
    185      *
    186      * ([ ]?(x|ext|extension)[ ]?[0-9]{1,6})?
    187      *   = Optional extension
    188      *
    189      * $
    190      *   = End of string
    191      */
    192     const char *pattern_digits =
    193       "^\\+[0-9]{1,3}"                     /* Plus and country code (1-3 digits) */
    194       "[-. ]?"                             /* Optional single separator */
    195       "("                                  /* Optional area code group */
    196       "\\([0-9]{1,4}\\)"                   /* Area code in parens */
    197       "[-. ]?"                             /* Optional separator after parens */
    198       ")?"
    199       "[0-9]"                              /* Start national number with digit */
    200       "("                                  /* National number: alternating digits and separators */
    201       "[-. ]?[0-9]"                        /* Separator optionally followed by digit */
    202       ")*"
    203       "([ ]?(x|ext|extension)[ ]?[0-9]{1,6})?" /* Optional extension */
    204       "$";
    205 
    206     const char *pattern_with_letters =
    207       "^\\+[0-9]{1,3}"                     /* Plus and country code (1-3 digits) */
    208       "[-. ]?"                             /* Optional single separator */
    209       "("                                  /* Optional area code group */
    210       "\\([0-9]{1,4}\\)"                   /* Area code in parens */
    211       "[-. ]?"                             /* Optional separator after parens */
    212       ")?"
    213       "[0-9A-Z]"                           /* Start national number with digit or letter */
    214       "("                                  /* National number: alternating digits/letters and separators */
    215       "[-. ]?[0-9A-Z]"                     /* Separator optionally followed by digit or letter */
    216       ")*"
    217       "([ ]?(x|ext|extension)[ ]?[0-9]{1,6})?" /* Optional extension */
    218       "$";
    219 
    220     const char *pattern = allow_letters
    221       ? pattern_with_letters
    222       : pattern_digits;
    223 
    224     GNUNET_assert (0 ==
    225                    regcomp (&regex,
    226                             pattern,
    227                             REG_EXTENDED | REG_NOSUB | REG_ICASE));
    228     ret = regexec (&regex,
    229                    phone, 0,
    230                    NULL, 0);
    231     regfree (&regex);
    232     if (0 != ret)
    233       return NULL; /* invalid number */
    234   }
    235 
    236   /* Phone is valid - normalize it */
    237   {
    238     char *normalized;
    239     char *out;
    240 
    241     normalized = GNUNET_malloc (strlen (phone) + 1);
    242     out = normalized;
    243     *out++ = '+';  /* Start with plus sign */
    244 
    245     for (const char *in = phone;
    246          '\0' != *in;
    247          in++)
    248     {
    249       if (isdigit ((unsigned char) *in))
    250       {
    251         /* Copy digit as-is */
    252         *out++ = *in;
    253       }
    254       else if (allow_letters && isalpha ((unsigned char) *in))
    255       {
    256         /* Convert letter to corresponding digit (A-Z maps to 2-9) */
    257         char upper = toupper ((unsigned char) *in);
    258         /* T9 keypad mapping:
    259          * 2: ABC
    260          * 3: DEF
    261          * 4: GHI
    262          * 5: JKL
    263          * 6: MNO
    264          * 7: PQRS
    265          * 8: TUV
    266          * 9: WXYZ
    267          */
    268         char digit;
    269 
    270         if (upper >= 'A' && upper <= 'C')
    271           digit = '2';
    272         else if (upper >= 'D' && upper <= 'F')
    273           digit = '3';
    274         else if (upper >= 'G' && upper <= 'I')
    275           digit = '4';
    276         else if (upper >= 'J' && upper <= 'L')
    277           digit = '5';
    278         else if (upper >= 'M' && upper <= 'O')
    279           digit = '6';
    280         else if (upper >= 'P' && upper <= 'S')
    281           digit = '7';
    282         else if (upper >= 'T' && upper <= 'V')
    283           digit = '8';
    284         else if (upper >= 'W' && upper <= 'Z')
    285           digit = '9';
    286         else
    287           digit = '0';  /* Fallback (shouldn't happen) */
    288         *out++ = digit;
    289       }
    290       /* Skip separators, parentheses, and spaces */
    291       /* Skip 'x', 'ext', 'extension' keywords and their extension digits */
    292     }
    293     *out = '\0'; /* redundant, but helps analyzers... */
    294     return normalized;
    295   }
    296 }