iban-tools

Tools / code generators for IBAN validation
Log | Files | Refs

parse_registry.py (6135B)


      1 #!/usr/bin/env python3
      2 
      3 import json
      4 import re
      5 
      6 with open("registry.txt", mode="r") as registry_file:
      7     registry_txt = registry_file.readlines()
      8 
      9 STRUCTURE_PART_PATTERN = "[0-9]+![nca]"
     10 STRUCTURE_PATTERN = f"^({STRUCTURE_PART_PATTERN})+$"
     11 
     12 assert len(registry_txt) == 57
     13 
     14 
     15 def parse_line(prefix):
     16     line = registry_txt.pop(0)
     17     parts = [None if x == "" or "N/A" in x else x.strip() for x in line.split("\t")]
     18     first = parts.pop(0)
     19     assert first == prefix, first
     20     return parts
     21 
     22 
     23 def parse_countries(prefix):
     24     def parse_countries(encoded):
     25         return [x.split(' ', 1)[0] for x in encoded.strip('"').split(", ")]
     26 
     27     return [[] if x is None else parse_countries(x) for x in parse_line(prefix)]
     28 
     29 
     30 def parse_bool_line(prefix):
     31     return [x == "Yes" for x in parse_line(prefix)]
     32 
     33 
     34 def parse_int_line(prefix):
     35     return [int(x.split("!")[0]) for x in parse_line(prefix)]
     36 
     37 
     38 def parse_pattern(encoded):
     39     if encoded is None:
     40         return (0, [], "")
     41     assert re.match(STRUCTURE_PATTERN, encoded), f"{STRUCTURE_PATTERN} {encoded}"
     42     pattern_len = 0
     43     rules = []
     44     for match in re.finditer(STRUCTURE_PART_PATTERN, encoded):
     45         [repetition, kind] = match.group().split("!", 1)
     46         repetition = int(repetition)
     47         pattern_len += repetition
     48         if len(rules) == 0:
     49             rules.append((repetition, kind))
     50         else:
     51             last = rules[-1]
     52             if last[1] == kind:
     53                 rules[-1] = (last[0] + repetition, kind)
     54             else:
     55                 rules.append((repetition, kind))
     56     regex = ""
     57     for [repetition, kind] in rules:
     58         if kind == "n":
     59             regex += f"[0-9]{{{repetition}}}"
     60         elif kind == "a":
     61             regex += f"[A-Z]{{{repetition}}}"
     62         else:
     63             regex += f"[0-9A-Z]{{{repetition}}}"
     64     return (pattern_len, rules, regex)
     65 
     66 
     67 def parse_range(range):
     68     if range is None:
     69         return None
     70     (start, end) = [int(x) for x in range.split("-", 1)]
     71     return (start - 1, end)
     72 
     73 
     74 parse_line("Data element")
     75 country_names = parse_line("Name of country")
     76 country_code = parse_line("IBAN prefix country code (ISO 3166)")
     77 country_code_include = parse_countries(
     78     "Country code includes other countries/territories"
     79 )
     80 sepa = parse_bool_line("SEPA country")
     81 sepa_include = parse_countries("SEPA country also includes")
     82 account_example = parse_line("Domestic account number example")
     83 
     84 parse_line("BBAN")
     85 bban_patterns = parse_line("BBAN structure")
     86 bban_len = parse_int_line("BBAN length")
     87 bank_range = parse_line("Bank identifier position within the BBAN")
     88 bank_patterns = parse_line("Bank identifier pattern")
     89 branch_range = parse_line("Branch identifier position within the BBAN")
     90 branch_patterns = parse_line("Branch identifier pattern")
     91 bban_bank_example = parse_line("Bank identifier example")
     92 bban_branch_example = parse_line("Branch identifier example")
     93 bban_example = parse_line("BBAN example")
     94 
     95 parse_line("IBAN")
     96 iban_pattern = parse_line("IBAN structure")
     97 iban_len = parse_int_line("IBAN length")
     98 parse_line("Effective date")
     99 iban_example = parse_line("IBAN electronic format example")
    100 iban_print_example = parse_line("IBAN print format example")
    101 
    102 parse_line("Contact details")
    103 # We ignore contact details
    104 
    105 registry = []
    106 for i in range(len(country_names)):
    107     code = country_code[i]
    108     if code == "IQ":
    109         bank_range[i] = "1-4"
    110         branch_range[i] = "5-7"
    111     elif code == "NO":
    112         bban_patterns[i] = "4!n6!n1!n"
    113     elif code == "AL":
    114         bank_patterns[i] = "3!n"
    115         branch_patterns[i] = "5!n"
    116     elif code == "EG":
    117         bank_patterns[i] += "n"
    118         branch_patterns[i] += "n"
    119     elif code == "FI":
    120         bank_patterns[i] = "3!n"
    121     elif code == "BA":
    122         # The BBAN does not match the IBAN. The bank and branch match
    123         # the BBAN. Manually fix all three to correspond to IBAN.
    124         assert bban_example[i] == "1990440001200279"
    125         assert bban_bank_example[i] == "199"
    126         assert bban_branch_example[i] == "044"
    127         assert account_example[i] == "199-044-00012002-79"
    128         bban_example[i] = "1290079401028494"
    129         bban_bank_example[i] = "129"
    130         bban_branch_example[i] = "007"
    131         account_example[i] = "129-007-94010284-94"
    132     elif code == "BR":
    133         # The BBAN differs by one letter. Fix.
    134         assert bban_example[i] == "00360305000010009795493P1"
    135         bban_example[i] = "00360305000010009795493C1"
    136     elif code == "CR":
    137         # The BBAN removes the leading '0'. Add it back.
    138         assert bban_example[i] == "15202001026284066"
    139         bban_example[i] = "015202001026284066"
    140     elif code == "IL":
    141         # This looks like a typo. There is one 0 missing in the BBAN.
    142         assert bban_example[i] == "010800000099999999"
    143         bban_example[i] = "0108000000099999999"
    144 
    145     print(code)
    146     bban_pattern = bban_patterns[i]
    147     (bban_length, bban_rules, bban_regex) = parse_pattern(bban_pattern)
    148     assert bban_len[i] == bban_length == iban_len[i] - 4
    149 
    150     (_, bank_rules, _) = parse_pattern(bank_patterns[i])
    151     (_, branch_rules, _) = parse_pattern(branch_patterns[i])
    152 
    153     # if bban_bank[i] is not None:
    154     #    assert range_len(bban_bank[i]) == structure_len(bban_bank_structure[i])
    155     # if bban_branch[i] is not None:
    156     #    assert range_len(bban_branch[i]) == structure_len(bban_branch_structure[i])
    157     registry.append(
    158         {
    159             "name": country_names[i],
    160             "code": code,
    161             "code_include": country_code_include[i],
    162             "sepa": sepa[i],
    163             "sepa_include": sepa_include[i],
    164             "iban_len": iban_len[i],
    165             "iban_example": iban_example[i],
    166             "bban_len": bban_length,
    167             "bban_pattern": bban_pattern,
    168             "bban_rules": bban_rules,
    169             "bban_regex": bban_regex,
    170             "bban_example": bban_example[i],
    171             "bank_range": parse_range(bank_range[i]),
    172             "bank_rules": bank_rules,
    173             "branch_range": parse_range(branch_range[i]),
    174             "branch_rules": branch_rules,
    175         }
    176     )
    177 
    178 with open("registry.json", "w") as json_file:
    179     json.dump(registry, json_file, indent=2)