parse_registry.py (6135B)
1 #!/usr/bin/env python3 2 3 import json 4 import re 5 6 with open("registry.txt", mode="r") as registry_file: 7 registry_txt = registry_file.readlines() 8 9 STRUCTURE_PART_PATTERN = "[0-9]+![nca]" 10 STRUCTURE_PATTERN = f"^({STRUCTURE_PART_PATTERN})+$" 11 12 assert len(registry_txt) == 57 13 14 15 def parse_line(prefix): 16 line = registry_txt.pop(0) 17 parts = [None if x == "" or "N/A" in x else x.strip() for x in line.split("\t")] 18 first = parts.pop(0) 19 assert first == prefix, first 20 return parts 21 22 23 def parse_countries(prefix): 24 def parse_countries(encoded): 25 return [x.split(' ', 1)[0] for x in encoded.strip('"').split(", ")] 26 27 return [[] if x is None else parse_countries(x) for x in parse_line(prefix)] 28 29 30 def parse_bool_line(prefix): 31 return [x == "Yes" for x in parse_line(prefix)] 32 33 34 def parse_int_line(prefix): 35 return [int(x.split("!")[0]) for x in parse_line(prefix)] 36 37 38 def parse_pattern(encoded): 39 if encoded is None: 40 return (0, [], "") 41 assert re.match(STRUCTURE_PATTERN, encoded), f"{STRUCTURE_PATTERN} {encoded}" 42 pattern_len = 0 43 rules = [] 44 for match in re.finditer(STRUCTURE_PART_PATTERN, encoded): 45 [repetition, kind] = match.group().split("!", 1) 46 repetition = int(repetition) 47 pattern_len += repetition 48 if len(rules) == 0: 49 rules.append((repetition, kind)) 50 else: 51 last = rules[-1] 52 if last[1] == kind: 53 rules[-1] = (last[0] + repetition, kind) 54 else: 55 rules.append((repetition, kind)) 56 regex = "" 57 for [repetition, kind] in rules: 58 if kind == "n": 59 regex += f"[0-9]{{{repetition}}}" 60 elif kind == "a": 61 regex += f"[A-Z]{{{repetition}}}" 62 else: 63 regex += f"[0-9A-Z]{{{repetition}}}" 64 return (pattern_len, rules, regex) 65 66 67 def parse_range(range): 68 if range is None: 69 return None 70 (start, end) = [int(x) for x in range.split("-", 1)] 71 return (start - 1, end) 72 73 74 parse_line("Data element") 75 country_names = parse_line("Name of country") 76 country_code = parse_line("IBAN prefix country code (ISO 3166)") 77 country_code_include = parse_countries( 78 "Country code includes other countries/territories" 79 ) 80 sepa = parse_bool_line("SEPA country") 81 sepa_include = parse_countries("SEPA country also includes") 82 account_example = parse_line("Domestic account number example") 83 84 parse_line("BBAN") 85 bban_patterns = parse_line("BBAN structure") 86 bban_len = parse_int_line("BBAN length") 87 bank_range = parse_line("Bank identifier position within the BBAN") 88 bank_patterns = parse_line("Bank identifier pattern") 89 branch_range = parse_line("Branch identifier position within the BBAN") 90 branch_patterns = parse_line("Branch identifier pattern") 91 bban_bank_example = parse_line("Bank identifier example") 92 bban_branch_example = parse_line("Branch identifier example") 93 bban_example = parse_line("BBAN example") 94 95 parse_line("IBAN") 96 iban_pattern = parse_line("IBAN structure") 97 iban_len = parse_int_line("IBAN length") 98 parse_line("Effective date") 99 iban_example = parse_line("IBAN electronic format example") 100 iban_print_example = parse_line("IBAN print format example") 101 102 parse_line("Contact details") 103 # We ignore contact details 104 105 registry = [] 106 for i in range(len(country_names)): 107 code = country_code[i] 108 if code == "IQ": 109 bank_range[i] = "1-4" 110 branch_range[i] = "5-7" 111 elif code == "NO": 112 bban_patterns[i] = "4!n6!n1!n" 113 elif code == "AL": 114 bank_patterns[i] = "3!n" 115 branch_patterns[i] = "5!n" 116 elif code == "EG": 117 bank_patterns[i] += "n" 118 branch_patterns[i] += "n" 119 elif code == "FI": 120 bank_patterns[i] = "3!n" 121 elif code == "BA": 122 # The BBAN does not match the IBAN. The bank and branch match 123 # the BBAN. Manually fix all three to correspond to IBAN. 124 assert bban_example[i] == "1990440001200279" 125 assert bban_bank_example[i] == "199" 126 assert bban_branch_example[i] == "044" 127 assert account_example[i] == "199-044-00012002-79" 128 bban_example[i] = "1290079401028494" 129 bban_bank_example[i] = "129" 130 bban_branch_example[i] = "007" 131 account_example[i] = "129-007-94010284-94" 132 elif code == "BR": 133 # The BBAN differs by one letter. Fix. 134 assert bban_example[i] == "00360305000010009795493P1" 135 bban_example[i] = "00360305000010009795493C1" 136 elif code == "CR": 137 # The BBAN removes the leading '0'. Add it back. 138 assert bban_example[i] == "15202001026284066" 139 bban_example[i] = "015202001026284066" 140 elif code == "IL": 141 # This looks like a typo. There is one 0 missing in the BBAN. 142 assert bban_example[i] == "010800000099999999" 143 bban_example[i] = "0108000000099999999" 144 145 print(code) 146 bban_pattern = bban_patterns[i] 147 (bban_length, bban_rules, bban_regex) = parse_pattern(bban_pattern) 148 assert bban_len[i] == bban_length == iban_len[i] - 4 149 150 (_, bank_rules, _) = parse_pattern(bank_patterns[i]) 151 (_, branch_rules, _) = parse_pattern(branch_patterns[i]) 152 153 # if bban_bank[i] is not None: 154 # assert range_len(bban_bank[i]) == structure_len(bban_bank_structure[i]) 155 # if bban_branch[i] is not None: 156 # assert range_len(bban_branch[i]) == structure_len(bban_branch_structure[i]) 157 registry.append( 158 { 159 "name": country_names[i], 160 "code": code, 161 "code_include": country_code_include[i], 162 "sepa": sepa[i], 163 "sepa_include": sepa_include[i], 164 "iban_len": iban_len[i], 165 "iban_example": iban_example[i], 166 "bban_len": bban_length, 167 "bban_pattern": bban_pattern, 168 "bban_rules": bban_rules, 169 "bban_regex": bban_regex, 170 "bban_example": bban_example[i], 171 "bank_range": parse_range(bank_range[i]), 172 "bank_rules": bank_rules, 173 "branch_range": parse_range(branch_range[i]), 174 "branch_rules": branch_rules, 175 } 176 ) 177 178 with open("registry.json", "w") as json_file: 179 json.dump(registry, json_file, indent=2)