commit b9bab0a42664a81dbcdd15d793050050555544ae
parent ff9c8dd00275f9db4ca2d6d172279855ed0c66b0
Author: Vint Leenaars <vl.software@leenaa.rs>
Date: Mon, 5 May 2025 12:37:05 +0200
Improve address matching
Diffstat:
4 files changed, 38 insertions(+), 24 deletions(-)
diff --git a/src/KYC/Check.hs b/src/KYC/Check.hs
@@ -204,32 +204,40 @@ checkAddress :: Map Int (Quality SSL.Address) -> GLS.Address -> [WithSSID Qualit
checkAddress addresses' address' = catMaybes $ map (compareAddress address') $ toList addresses'
compareAddress :: GLS.Address -> (Int, Quality SSL.Address) -> Maybe (WithSSID QualityFloat)
-compareAddress gls_address (ssid, quality) = if total_score >= threshold_float
- then Just $ WithSSID ssid $ liftQuality (\_ -> total_score) quality
+compareAddress gls_address (ssid, quality) = if country_score >= 0.75
+ then Just $ WithSSID ssid $ liftQuality (\_ -> country_score) quality
else Nothing
where ssl_address = removeQuality quality
- total_score = totalFromMaybes [ country_score
- , details_score
+ total_score = totalFromMaybes [ details_score
, area_score
, location_score
, zip_code_score
]
country_score = case SSL.country ssl_address of
- Just c -> Just $ if c == GLS.country gls_address then 1 else 0
- Nothing -> Nothing
+ Just c -> if c == GLS.country gls_address then 0.5 * (total_score + 1) else 0
+ Nothing -> total_score
details_score = case details ssl_address of
Just det -> let
- perms = permutateText $ catMaybes [ Just $ street_name gls_address
- , Just $ street_number gls_address
- , country_subdivision gls_address
- , lines gls_address
- , town_district gls_address
- , town_location gls_address
- , Just $ zipcode gls_address
- ]
- ratio = compareTexts (cleanText det) 0 perms
+ possible_numbers = catMaybes [ Just $ street_number gls_address
+ , building_number gls_address
+ , lines gls_address
+ ]
+ possible_info = catMaybes [ Just $ street_name gls_address
+ , building_name gls_address
+ , country_subdivision gls_address
+ , town_district gls_address
+ , town_location gls_address
+ , Just $ zipcode gls_address
+ ]
+ possible_details = possible_numbers ++ possible_info
+ perms = permutateText possible_details
+ clean_details = cleanText det
+ ratio = compareTexts clean_details 0 perms
+ perfect_matches = foldl (\c i -> if foldl (\b d -> b || i `T.isInfixOf` d) False clean_details then c + 1 else c) 0 possible_info
in
- Just $ if ratio >= threshold_ratio then ratioToFloat ratio else 0
+ Just $ if ratio >= threshold_ratio
+ then ratioToFloat ratio
+ else if perfect_matches /= 0 then 0.5 else 0
Nothing -> Nothing
area_score = case area ssl_address of
Just areas -> let
@@ -239,7 +247,7 @@ compareAddress gls_address (ssid, quality) = if total_score >= threshold_float
]
ratio = compareTexts areas 0 perms
in
- Just $ if ratio >= threshold_ratio then ratioToFloat ratio else 0
+ if ratio >= threshold_ratio then Just $ ratioToFloat ratio else Nothing
Nothing -> Nothing
location_score = case location ssl_address of
Just lcs -> let
@@ -249,18 +257,21 @@ compareAddress gls_address (ssid, quality) = if total_score >= threshold_float
]
ratio = compareTexts lcs 0 perms
in
- Just $ if ratio >= threshold_ratio then ratioToFloat ratio else 0
+ if ratio >= threshold_ratio then Just $ ratioToFloat ratio else Nothing
Nothing -> Nothing
zip_code_score = case zip_code ssl_address of
Just zc -> let
ratio = compareText zc 0 [zipcode gls_address]
in
- Just $ if ratio >= threshold_ratio then ratioToFloat ratio else 0
+ if ratio >= threshold_ratio then Just $ ratioToFloat ratio else Nothing
Nothing -> Nothing
cleanText :: [Text] -> [Text]
-cleanText text = map (T.foldl (\new_text char -> if char `elem` chars_to_rm then new_text else snoc new_text char) start_text) text
+cleanText text = map (T.foldl (\new_text char -> if char `elem` chars_to_rm
+ then new_text
+ else snoc new_text char
+ ) start_text) text
where chars_to_rm = ".,-" :: String
start_text = "" :: Text
diff --git a/test/Tests/Check.hs b/test/Tests/Check.hs
@@ -139,7 +139,7 @@ personTests sanction_list =
, testTarget False 38925 target_38925_v3 sanction_list $ distribution 0 100 0 0 0 0.75
, testTarget False 38925 target_38925_v4 sanction_list $ distribution 100 0 0 0 0 0.75
, testTarget False 38925 target_38925_v5 sanction_list $ distribution 100 100 0 0 0 0.75
- , testTarget False 68815 target_68815 sanction_list $ distribution 100 100 0 125 0 0.75
+ , testTarget False 68815 target_68815 sanction_list $ distribution 75 100 0 125 50 0.75
]
, testGroup "Fake target with XML file"
diff --git a/test/data/target_38925.xml b/test/data/target_38925.xml
@@ -35,4 +35,7 @@
<justification ssid="38923">Secretary of the Crimea Electoral Commission. In this capacity she participated in the organisation of the Russian presidential elections of 18 Mar 2018 in the illegally annexed Crimea and Sevastopol, and thereby actively supported and implemented policies that undermine the territorial integrity, sovereignty and independence of Ukraine.</justification>
</individual>
</target>
+ <place ssid="38924">
+ <location>Autonomous Republic of Crimea</location>
+ </place>
</swiss-sanctions-list>
diff --git a/test/data/target_68815.xml b/test/data/target_68815.xml
@@ -24,8 +24,8 @@
<other-information ssid="68814">National identification no: Haiti 004-341-263-3</other-information>
</individual>
</target>
- <place ssid="49815">
- <location>Saint-Petersburg</location>
- <country iso-code="RU">Russian Federation</country>
+ <place ssid="66660">
+ <location>Port-au-Prince</location>
+ <country iso-code="HT">Haiti</country>
</place>
</swiss-sanctions-list>