robocop

Checks KYC attributes against sanction lists
Log | Files | Refs | Submodules | README | LICENSE

commit b9bab0a42664a81dbcdd15d793050050555544ae
parent ff9c8dd00275f9db4ca2d6d172279855ed0c66b0
Author: Vint Leenaars <vl.software@leenaa.rs>
Date:   Mon,  5 May 2025 12:37:05 +0200

Improve address matching

Diffstat:
Msrc/KYC/Check.hs | 51+++++++++++++++++++++++++++++++--------------------
Mtest/Tests/Check.hs | 2+-
Mtest/data/target_38925.xml | 3+++
Mtest/data/target_68815.xml | 6+++---
4 files changed, 38 insertions(+), 24 deletions(-)

diff --git a/src/KYC/Check.hs b/src/KYC/Check.hs @@ -204,32 +204,40 @@ checkAddress :: Map Int (Quality SSL.Address) -> GLS.Address -> [WithSSID Qualit checkAddress addresses' address' = catMaybes $ map (compareAddress address') $ toList addresses' compareAddress :: GLS.Address -> (Int, Quality SSL.Address) -> Maybe (WithSSID QualityFloat) -compareAddress gls_address (ssid, quality) = if total_score >= threshold_float - then Just $ WithSSID ssid $ liftQuality (\_ -> total_score) quality +compareAddress gls_address (ssid, quality) = if country_score >= 0.75 + then Just $ WithSSID ssid $ liftQuality (\_ -> country_score) quality else Nothing where ssl_address = removeQuality quality - total_score = totalFromMaybes [ country_score - , details_score + total_score = totalFromMaybes [ details_score , area_score , location_score , zip_code_score ] country_score = case SSL.country ssl_address of - Just c -> Just $ if c == GLS.country gls_address then 1 else 0 - Nothing -> Nothing + Just c -> if c == GLS.country gls_address then 0.5 * (total_score + 1) else 0 + Nothing -> total_score details_score = case details ssl_address of Just det -> let - perms = permutateText $ catMaybes [ Just $ street_name gls_address - , Just $ street_number gls_address - , country_subdivision gls_address - , lines gls_address - , town_district gls_address - , town_location gls_address - , Just $ zipcode gls_address - ] - ratio = compareTexts (cleanText det) 0 perms + possible_numbers = catMaybes [ Just $ street_number gls_address + , building_number gls_address + , lines gls_address + ] + possible_info = catMaybes [ Just $ street_name gls_address + , building_name gls_address + , country_subdivision gls_address + , town_district gls_address + , town_location gls_address + , Just $ zipcode gls_address + ] + possible_details = possible_numbers ++ possible_info + perms = permutateText possible_details + clean_details = cleanText det + ratio = compareTexts clean_details 0 perms + perfect_matches = foldl (\c i -> if foldl (\b d -> b || i `T.isInfixOf` d) False clean_details then c + 1 else c) 0 possible_info in - Just $ if ratio >= threshold_ratio then ratioToFloat ratio else 0 + Just $ if ratio >= threshold_ratio + then ratioToFloat ratio + else if perfect_matches /= 0 then 0.5 else 0 Nothing -> Nothing area_score = case area ssl_address of Just areas -> let @@ -239,7 +247,7 @@ compareAddress gls_address (ssid, quality) = if total_score >= threshold_float ] ratio = compareTexts areas 0 perms in - Just $ if ratio >= threshold_ratio then ratioToFloat ratio else 0 + if ratio >= threshold_ratio then Just $ ratioToFloat ratio else Nothing Nothing -> Nothing location_score = case location ssl_address of Just lcs -> let @@ -249,18 +257,21 @@ compareAddress gls_address (ssid, quality) = if total_score >= threshold_float ] ratio = compareTexts lcs 0 perms in - Just $ if ratio >= threshold_ratio then ratioToFloat ratio else 0 + if ratio >= threshold_ratio then Just $ ratioToFloat ratio else Nothing Nothing -> Nothing zip_code_score = case zip_code ssl_address of Just zc -> let ratio = compareText zc 0 [zipcode gls_address] in - Just $ if ratio >= threshold_ratio then ratioToFloat ratio else 0 + if ratio >= threshold_ratio then Just $ ratioToFloat ratio else Nothing Nothing -> Nothing cleanText :: [Text] -> [Text] -cleanText text = map (T.foldl (\new_text char -> if char `elem` chars_to_rm then new_text else snoc new_text char) start_text) text +cleanText text = map (T.foldl (\new_text char -> if char `elem` chars_to_rm + then new_text + else snoc new_text char + ) start_text) text where chars_to_rm = ".,-" :: String start_text = "" :: Text diff --git a/test/Tests/Check.hs b/test/Tests/Check.hs @@ -139,7 +139,7 @@ personTests sanction_list = , testTarget False 38925 target_38925_v3 sanction_list $ distribution 0 100 0 0 0 0.75 , testTarget False 38925 target_38925_v4 sanction_list $ distribution 100 0 0 0 0 0.75 , testTarget False 38925 target_38925_v5 sanction_list $ distribution 100 100 0 0 0 0.75 - , testTarget False 68815 target_68815 sanction_list $ distribution 100 100 0 125 0 0.75 + , testTarget False 68815 target_68815 sanction_list $ distribution 75 100 0 125 50 0.75 ] , testGroup "Fake target with XML file" diff --git a/test/data/target_38925.xml b/test/data/target_38925.xml @@ -35,4 +35,7 @@ <justification ssid="38923">Secretary of the Crimea Electoral Commission. In this capacity she participated in the organisation of the Russian presidential elections of 18 Mar 2018 in the illegally annexed Crimea and Sevastopol, and thereby actively supported and implemented policies that undermine the territorial integrity, sovereignty and independence of Ukraine.</justification> </individual> </target> + <place ssid="38924"> + <location>Autonomous Republic of Crimea</location> + </place> </swiss-sanctions-list> diff --git a/test/data/target_68815.xml b/test/data/target_68815.xml @@ -24,8 +24,8 @@ <other-information ssid="68814">National identification no: Haiti 004-341-263-3</other-information> </individual> </target> - <place ssid="49815"> - <location>Saint-Petersburg</location> - <country iso-code="RU">Russian Federation</country> + <place ssid="66660"> + <location>Port-au-Prince</location> + <country iso-code="HT">Haiti</country> </place> </swiss-sanctions-list>