diff options
Diffstat (limited to 'codegen/codeset.py')
-rw-r--r-- | codegen/codeset.py | 88 |
1 files changed, 88 insertions, 0 deletions
diff --git a/codegen/codeset.py b/codegen/codeset.py new file mode 100644 index 00000000..03f8a343 --- /dev/null +++ b/codegen/codeset.py @@ -0,0 +1,88 @@ +"""Extract and generate Kotlin code for ISO 20022 code sets""" + +from dataclasses import dataclass +import requests +from zipfile import ZipFile +from io import BytesIO +import polars as pl + + +@dataclass +class Code: + """ISO 20022 code""" + + value: str + isoCode: str + description: str + + +@dataclass +class CodeSet: + """ISO 20022 code set""" + + name: str + values: list[Code] + description: str + + +def extract() -> dict[str, CodeSet]: + """Extract latest code set from specification""" + # Get XLSX zip file from server + r = requests.get( + "https://www.iso20022.org/sites/default/files/media/file/ExternalCodeSets_XLSX.zip" + ) + assert r.status_code == 200 + + # Unzip the XLSX file + zip = ZipFile(BytesIO(r.content)) + files = zip.namelist() + assert len(files) == 1 + file = zip.open(files[0]) + + # Parse excel + descriptions = { + k: v + for k, v in pl.read_excel(file, sheet_name="CodeSetsDefinition") + .select(["Code Set", "Code Set Definition"]) + .rows() + } + codes = ( + pl.read_excel(file, sheet_name="AllCodeSets") + .lazy() + .filter(pl.col("Status") != "Obsolete") + .sort(["Code Set", "Code Value"]) + .collect() + ) + sets = {} + for name, codes in codes.partition_by("Code Set", as_dict=True).items(): + description = descriptions[name].split("\n", 1)[0].rstrip("_x000D_").strip(".") + values = [] + + for row in codes.rows(named=True): + (value, isoCode, definition) = ( + row["Code Value"], + row["Code Name"], + row["Code Definition"].split("\n", 1)[0].rstrip().replace('"', '\\"'), + ) + # ISO 20022 allow code value starting with digits which is incompatible with Java + if value[0].isdigit(): + value = f"_{value}" + values.append(Code(value, isoCode, definition)) + + sets[name] = CodeSet(name, values, description) + + return sets + + +def codegen(sets: list[CodeSet]) -> str: + """Generate kotlin code for the code sets""" + kt = "" + for set in sets: + kt += f"\n/** {set.description} */" + kt += ( + f"\nenum class {set.name}(val isoCode: String, val description: String) {{" + ) + for code in set.values: + kt += f'\n\t{code.value}("{code.isoCode}", "{code.description}"),' + kt += "\n}\n" + return kt |