summaryrefslogtreecommitdiff
path: root/codegen/codeset.py
diff options
context:
space:
mode:
Diffstat (limited to 'codegen/codeset.py')
-rw-r--r--codegen/codeset.py88
1 files changed, 88 insertions, 0 deletions
diff --git a/codegen/codeset.py b/codegen/codeset.py
new file mode 100644
index 00000000..03f8a343
--- /dev/null
+++ b/codegen/codeset.py
@@ -0,0 +1,88 @@
+"""Extract and generate Kotlin code for ISO 20022 code sets"""
+
+from dataclasses import dataclass
+import requests
+from zipfile import ZipFile
+from io import BytesIO
+import polars as pl
+
+
+@dataclass
+class Code:
+ """ISO 20022 code"""
+
+ value: str
+ isoCode: str
+ description: str
+
+
+@dataclass
+class CodeSet:
+ """ISO 20022 code set"""
+
+ name: str
+ values: list[Code]
+ description: str
+
+
+def extract() -> dict[str, CodeSet]:
+ """Extract latest code set from specification"""
+ # Get XLSX zip file from server
+ r = requests.get(
+ "https://www.iso20022.org/sites/default/files/media/file/ExternalCodeSets_XLSX.zip"
+ )
+ assert r.status_code == 200
+
+ # Unzip the XLSX file
+ zip = ZipFile(BytesIO(r.content))
+ files = zip.namelist()
+ assert len(files) == 1
+ file = zip.open(files[0])
+
+ # Parse excel
+ descriptions = {
+ k: v
+ for k, v in pl.read_excel(file, sheet_name="CodeSetsDefinition")
+ .select(["Code Set", "Code Set Definition"])
+ .rows()
+ }
+ codes = (
+ pl.read_excel(file, sheet_name="AllCodeSets")
+ .lazy()
+ .filter(pl.col("Status") != "Obsolete")
+ .sort(["Code Set", "Code Value"])
+ .collect()
+ )
+ sets = {}
+ for name, codes in codes.partition_by("Code Set", as_dict=True).items():
+ description = descriptions[name].split("\n", 1)[0].rstrip("_x000D_").strip(".")
+ values = []
+
+ for row in codes.rows(named=True):
+ (value, isoCode, definition) = (
+ row["Code Value"],
+ row["Code Name"],
+ row["Code Definition"].split("\n", 1)[0].rstrip().replace('"', '\\"'),
+ )
+ # ISO 20022 allow code value starting with digits which is incompatible with Java
+ if value[0].isdigit():
+ value = f"_{value}"
+ values.append(Code(value, isoCode, definition))
+
+ sets[name] = CodeSet(name, values, description)
+
+ return sets
+
+
+def codegen(sets: list[CodeSet]) -> str:
+ """Generate kotlin code for the code sets"""
+ kt = ""
+ for set in sets:
+ kt += f"\n/** {set.description} */"
+ kt += (
+ f"\nenum class {set.name}(val isoCode: String, val description: String) {{"
+ )
+ for code in set.values:
+ kt += f'\n\t{code.value}("{code.isoCode}", "{code.description}"),'
+ kt += "\n}\n"
+ return kt