robocop

Checks KYC attributes against sanction lists
Log | Files | Refs | Submodules | README | LICENSE

commit 4aebf67215006ffb494dc7b6195d639d034e85a0
parent fe6daba521ed47be7b5d948b5fb0b076eda46185
Author: Vint Leenaars <vl.software@leenaa.rs>
Date:   Thu,  8 May 2025 13:02:36 +0200

More configuration possibilities

Diffstat:
Mapp/Main.hs | 41++++++-----------------------------------
Mconfig.dhall | 2+-
Mpackage.yaml | 1+
Msrc/KYCheck/Check.hs | 91+++++++++++++++++++++++++++++++++++++++++--------------------------------------
Msrc/KYCheck/Config.hs | 123++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------
Asrc/KYCheck/Error.hs | 88+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/KYCheck/Type.hs | 61+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtest/Tests/Check.hs | 6+++---
Mtest/data/test.config.dhall | 4++--
Mtest/test-kycheck.hs | 7++++++-
10 files changed, 302 insertions(+), 122 deletions(-)

diff --git a/app/Main.hs b/app/Main.hs @@ -15,6 +15,7 @@ import Data.Time import KYCheck.Config import KYCheck.SSL import KYCheck.SSL.XML.Type +import KYCheck.Type import Options.Applicative (execParser) @@ -41,41 +42,11 @@ main = do Just _ -> do curr_dir <- getCurrentDirectory putStrLn $ "Searching for 'config.dhall' in " ++ curr_dir - dhall <- input auto "./config.dhall" - - let config = Config { verbosity = case cl_verbosity cl of - Just v -> v - Nothing -> verbosity dhall - , ssl_location = case cl_ssl_location cl of - Just fp -> fp - Nothing -> ssl_location dhall - , threshold_percentage = case cl_threshold_percentage cl of - Just p -> p - Nothing -> threshold_percentage dhall - , threshold_points = case cl_threshold_points cl of - Just p -> p - Nothing -> threshold_points dhall - , perfect_points = case cl_perfect_points cl of - Just p -> p - Nothing -> perfect_points dhall - , points_address = case cl_points_address cl of - Just p -> p - Nothing -> points_address dhall - , points_date = case cl_points_date cl of - Just p -> p - Nothing -> points_date dhall - , points_id = case cl_points_id cl of - Just p -> p - Nothing -> points_id dhall - , points_name = case cl_points_name cl of - Just p -> p - Nothing -> points_name dhall - , points_nationality = case cl_points_nationality cl of - Just p -> p - Nothing -> points_nationality dhall - } - - -- validConfig <- checkConfig config + dhall <- input auto "./config.dhall" :: IO DhallConfig + let config = inputToConfig dhall $ Just cl + + valid_config <- checkConfig config + let v = verbosity config start <- getCurrentTime diff --git a/config.dhall b/config.dhall @@ -4,7 +4,7 @@ -- SPDX-License-Identifier: EUPL-1.2 -- Verbosity levels -let Verbosity = < Silent | Info | Errors | Debug > +let Verbosity = < Test | Silent | Info | Errors | Debug > in { verbosity = Verbosity.Silent , ssl_location = "files/consolidated-list_2024-07-30.xml" diff --git a/package.yaml b/package.yaml @@ -34,6 +34,7 @@ dependencies: - country-codes - dhall - directory +- filepath - hxt - optparse-applicative - tasty diff --git a/src/KYCheck/Check.hs b/src/KYCheck/Check.hs @@ -31,6 +31,7 @@ import Data.Text.Metrics import Data.Time.Calendar import Data.Time.Calendar.OrdinalDate +import KYCheck.Config import KYCheck.Type import KYCheck.SSL.Type as SSL import KYCheck.GLS.Type as GLS @@ -50,8 +51,8 @@ threshold_ratio = 4 % 5 threshold_float :: Float threshold_float = 0.8 -threshold_points :: Float -threshold_points = 200 +-- threshold_points :: Float +-- threshold_points = 200 suspicious_dates :: (Int, Int) suspicious_dates = (3, 75) -- Difference in years and days that will be marked suspicious (exponential) @@ -60,24 +61,25 @@ type QualityFloat = Quality Float -checkEntity :: Map Int Entity -> LegalEntity -> [Score] -checkEntity entities' entity = map (\(confidence', reference') -> - Score { match_quality = 0 - , confidence = confidence' - , expiration = 0 - , references = [reference'] - } - ) $ findMatchingEntities entity $ toList entities' +checkEntity :: Config -> Map Int Entity -> LegalEntity -> [Score] +checkEntity config entities' entity = map (\(confidence', reference') -> + Score { match_quality = 0 + , confidence = confidence' + , expiration = 0 + , references = [reference'] + } + ) $ findMatchingEntities config entity $ toList entities' -findMatchingEntities :: LegalEntity -> [(Int, Entity)] -> [(Float, Int)] -findMatchingEntities _ [] = [] -findMatchingEntities entity ((ssid,ent):ents) = let - points = compareEntity entity ent - score = if points >= 300 then 1 else points / 300 - in - if points >= threshold_points - then (score,ssid):findMatchingEntities entity ents - else findMatchingEntities entity ents +findMatchingEntities :: Config -> LegalEntity -> [(Int, Entity)] -> [(Float, Int)] +findMatchingEntities _ _ [] = [] +findMatchingEntities config entity ((ssid,ent):ents) = + let + points = compareEntity entity ent + score = if points >= 300 then 1 else points / 300 + in + if points >= threshold_points config + then (score,ssid):findMatchingEntities config entity ents + else findMatchingEntities config entity ents compareEntity :: LegalEntity -> Entity -> Float compareEntity legal_entity entity = foldl1 (+) [ multFloats 300 address_score (removeQuality . removeSSID) @@ -90,31 +92,32 @@ compareEntity legal_entity entity = foldl1 (+) [ multFloats 300 address_score (r -checkPersons :: Map Int Individual -> NaturalPerson -> Score -checkPersons individuals' person = Score { match_quality = 0 - , confidence = confidence' - , expiration = 0 - , references = references' - } - where (confidence', references') = checkPersons' person (0, []) $ toList individuals' - -checkPersons' :: NaturalPerson -> (Float, [Int]) -> [(Int, Individual)] -> (Float, [Int]) -checkPersons' _ score [] = score -checkPersons' person (score, ssids) ((ssid,ind):inds) = let - points = checkPerson person ind - new_score = if max_points >= threshold_points - then if points >= 300 then 1 else points / 300 - else points / max_points - in - if new_score >= 2 / 3 - then checkPersons' person (max score new_score, ssid:ssids) inds - else checkPersons' person (score, ssids) inds - where max_points = foldl1 (+) [ if toList (addresses ind) == [] then 0 else 150 - , if toList (birth_dates ind) == [] then 0 else 100 - , if toList (ids ind) == [] then 0 else 200 - , if toList (names ind) == [] then 0 else 125 - , if toList (nationalities ind) == [] then 0 else 50 - ] +checkPersons :: Config -> Map Int Individual -> NaturalPerson -> Score +checkPersons config individuals' person = Score { match_quality = 0 + , confidence = confidence' + , expiration = 0 + , references = references' + } + where (confidence', references') = checkPersons' config person (0, []) $ toList individuals' + +checkPersons' :: Config -> NaturalPerson -> (Float, [Int]) -> [(Int, Individual)] -> (Float, [Int]) +checkPersons' _ _ score [] = score +checkPersons' config person (score, ssids) ((ssid,ind):inds) = + let + points = checkPerson person ind + max_points = foldl1 (+) [ if toList (addresses ind) == [] then 0 else points_address config + , if toList (birth_dates ind) == [] then 0 else points_date config + , if toList (ids ind) == [] then 0 else points_id config + , if toList (names ind) == [] then 0 else points_name config + , if toList (nationalities ind) == [] then 0 else points_nationality config + ] + new_score = if max_points >= threshold_points config + then if points >= 300 then 1 else points / 300 + else points / max_points + in + if new_score >= 2 / 3 + then checkPersons' config person (max score new_score, ssid:ssids) inds + else checkPersons' config person (score, ssids) inds checkPerson :: NaturalPerson -> Individual -> Float checkPerson person individual = foldl1 (+) [ address_points diff --git a/src/KYCheck/Config.hs b/src/KYCheck/Config.hs @@ -7,48 +7,99 @@ {-# LANGUAGE OverloadedStrings #-} module KYCheck.Config - ( CommandLine(..) - , Config(..) + ( inputToConfig + , checkConfig , opts - , Verbosity(..) ) where -import Dhall hiding (auto) +import KYCheck.Error +import KYCheck.Type + +import Control.Monad + +import qualified Data.Text as T import Options.Applicative --- | Data type containing all configuration information (config.dhall) -data Config = Config - { verbosity :: Verbosity - , ssl_location :: FilePath - , threshold_percentage :: Double - , threshold_points :: Natural - , perfect_points :: Natural - , points_address :: Natural - , points_date :: Natural - , points_id :: Natural - , points_name :: Natural - , points_nationality :: Natural - } deriving (Show, Eq, Generic) - -data Verbosity = Silent | Info | Errors | Debug deriving (Show, Eq, Generic, Ord) - -instance FromDhall Config -instance FromDhall Verbosity - --- | Data type for user configuration via commandline -data CommandLine = CommandLine - { cl_verbosity :: Maybe Verbosity - , cl_ssl_location :: Maybe FilePath - , cl_threshold_percentage :: Maybe Double - , cl_threshold_points :: Maybe Natural - , cl_perfect_points :: Maybe Natural - , cl_points_address :: Maybe Natural - , cl_points_date :: Maybe Natural - , cl_points_id :: Maybe Natural - , cl_points_name :: Maybe Natural - , cl_points_nationality :: Maybe Natural - } deriving (Show, Eq, Generic) +import System.Directory +import System.FilePath + +inputToConfig :: DhallConfig -> Maybe CommandLine -> Config +inputToConfig dhall commandline = + let + d_v = dhall_verbosity dhall + d_ssl = dhall_ssl_location dhall + d_pct = realToFrac $ dhall_threshold_percentage dhall + d_pts = fromIntegral $ dhall_threshold_points dhall + d_pp = fromIntegral $ dhall_perfect_points dhall + d_addr = fromIntegral $ dhall_points_address dhall + d_date = fromIntegral $ dhall_points_date dhall + d_id = fromIntegral $ dhall_points_id dhall + d_name = fromIntegral $ dhall_points_name dhall + d_nat = fromIntegral $ dhall_points_nationality dhall + in + case commandline of + Just cl -> Config { verbosity = case cl_verbosity cl of Just v -> v; Nothing -> d_v + , ssl_location = case cl_ssl_location cl of Just l -> l; Nothing -> d_ssl + , threshold_percentage = case cl_threshold_percentage cl of Just p -> p; Nothing -> d_pct + , threshold_points = case cl_threshold_points cl of Just p -> p; Nothing -> d_pts + , perfect_points = case cl_perfect_points cl of Just p -> p; Nothing -> d_pp + , points_address = case cl_points_address cl of Just p -> p; Nothing -> d_addr + , points_date = case cl_points_date cl of Just p -> p; Nothing -> d_date + , points_id = case cl_points_id cl of Just p -> p; Nothing -> d_id + , points_name = case cl_points_name cl of Just p -> p; Nothing -> d_name + , points_nationality = case cl_points_nationality cl of Just p -> p; Nothing -> d_nat + } + Nothing -> Config { verbosity = d_v + , ssl_location = d_ssl + , threshold_percentage = d_pct + , threshold_points = d_pts + , perfect_points = d_pp + , points_address = d_addr + , points_date = d_date + , points_id = d_id + , points_name = d_name + , points_nationality = d_nat + } + +checkConfig :: Config -> IO Bool +checkConfig config = do + valid_ssl <- checkSSL config + valid_points <- checkPoints config + + let valid = foldl1 (&&) [ valid_ssl + , valid_points + ] + return valid + +checkSSL :: Config -> IO Bool +checkSSL config = do + let fp = ssl_location config + let v = verbosity config + + bool <- do exists <- doesFileExist fp + when (not exists) $ do curr_dir <- getCurrentDirectory + (handleError v . Left) $ KYCheck_NotFound (T.pack $ curr_dir </> fp) ".xml file containing sanctions" + -- TODO: implement RelaxNG validation + return exists + + return bool + +checkPoints :: Config -> IO Bool +checkPoints config = do + let v = verbosity config + + bool <- do let total = foldl1 (+) [ points_address config + , points_date config + , points_id config + , points_name config + , points_nationality config + ] + threshold = threshold_points config + when (total < threshold) $ do (handleError v . Left) $ KYCheck_InvalidDistribution $ T.pack ("Total points obtainable (" ++ show total ++ ") lower than threshold (" ++ show threshold ++ ")") + return (total >= threshold) + return bool + -- | Custom commandline flag for verbosity-level Silent: --silent verbositySilent :: Parser Verbosity diff --git a/src/KYCheck/Error.hs b/src/KYCheck/Error.hs @@ -0,0 +1,88 @@ +-- SPDX-FileCopyrightText: 2025 LNRS +-- +-- SPDX-License-Identifier: AGPL-3.0-or-later +-- SPDX-License-Identifier: EUPL-1.2 + +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE OverloadedStrings #-} + +module KYCheck.Error + ( KYCheck_Error(..) + , renderError + , handleError + , showError + ) where + +import KYCheck.Type + +import Control.Exception (Exception, SomeException) +import Control.Monad (when) +import Data.Text (Text) +import Data.Typeable (Typeable) +import GHC.Generics (Generic) +import System.Exit (ExitCode (..), exitWith) +import System.IO hiding (hPutStrLn) + +import qualified Data.Text as T (pack) +import qualified Data.Text.IO as TIO + +-- | Custom type to express exceptions in DMARC.* that cause errors +data KYCheck_Error = KYCheck_InvalidDistribution Text + | KYCheck_InvalidRelaxNG Text + | KYCheck_InvalidXML Text String + | KYCheck_NotFound Text Text + | KYCheck_NotImplemented Text + | KYCheck_XMLParsingError Text SomeException + deriving (Show, Typeable, Generic) + +instance Exception KYCheck_Error + +renderError :: KYCheck_Error -> Text +renderError e = + case e of + KYCheck_InvalidDistribution msg -> "Error: " <> msg + KYCheck_InvalidRelaxNG fp -> "Error: " <> fp <> " is not a valid relaxng-schema" + KYCheck_InvalidXML fp err' -> "Error: " <> fp <> " is not valid for relaxng-schema\n" <> tshow err' + KYCheck_NotImplemented msg -> "Error: " <> msg <> " has not been implemented yet, please change your config.dhall" + KYCheck_NotFound fp desc -> "Error: could not find " <> fp <> " (" <> desc <> ")" + KYCheck_XMLParsingError dat err' -> "Error: could not parse " <> dat <> "\n" <> tshow err' + +handleError :: Verbosity -> Either KYCheck_Error a -> IO a +handleError _ (Right r) = return r +handleError v (Left e) = + case e of + _ -> err v exit_code (renderError e) + where + exit_code = + case e of + KYCheck_InvalidDistribution{} -> 1 + KYCheck_InvalidRelaxNG{} -> 2 + KYCheck_InvalidXML{} -> 3 + KYCheck_NotImplemented{} -> 4 + KYCheck_NotFound{} -> 5 + KYCheck_XMLParsingError{} -> 6 + +-- | Function to show helpfull error/warning message +showError :: KYCheck_Error -> IO () +showError e = hPutStrLn stderr (renderError e) + +-- | Function to exit program with specific exit code and msg +err :: Verbosity -> Int -> Text -> IO a +err v exit_code msg = do + when (v /= Test) $ hPutStrLn stderr msg + _ <- exitWith $ ExitFailure exit_code + return undefined + +-- | Convert any Showable type to T.Text +tshow :: Show a => a -> Text +tshow = T.pack . show + +-- | Print T.Text nicely to commandline +hPutStrLnWith :: Newline -> Handle -> Text -> IO () +hPutStrLnWith eol h s = + hSetNewlineMode h (NewlineMode eol eol) >> + hSetEncoding h utf8 >> TIO.hPutStrLn h s + +hPutStrLn :: Handle -> Text -> IO () +hPutStrLn = hPutStrLnWith nativeNewline diff --git a/src/KYCheck/Type.hs b/src/KYCheck/Type.hs @@ -3,6 +3,9 @@ -- SPDX-License-Identifier: AGPL-3.0-or-later -- SPDX-License-Identifier: EUPL-1.2 + +{-# LANGUAGE DeriveGeneric #-} + module KYCheck.Type ( getSSID , liftQuality @@ -11,8 +14,17 @@ module KYCheck.Type , removeSSID , stringToQuality , WithSSID(..) + + , CommandLine(..) + , Config(..) + , DhallConfig(..) + , Verbosity(..) ) where +import Dhall + +import qualified Data.Text as T + data Quality a = Good a | Low a | UnknownQuality a deriving (Show, Eq) @@ -41,3 +53,52 @@ getSSID (WithSSID n _) = n removeSSID :: WithSSID a -> a removeSSID (WithSSID _ x) = x + + + +-- | Data type containing all configuration information (config.dhall) +data Config = Config + { verbosity :: Verbosity + , ssl_location :: FilePath + , threshold_percentage :: Float + , threshold_points :: Float + , perfect_points :: Float + , points_address :: Float + , points_date :: Float + , points_id :: Float + , points_name :: Float + , points_nationality :: Float + } deriving (Show, Eq, Generic) + +data Verbosity = Test | Silent | Info | Errors | Debug deriving (Show, Eq, Generic, Ord) + +data DhallConfig = DhallConfig + { dhall_verbosity :: Verbosity + , dhall_ssl_location :: FilePath + , dhall_threshold_percentage :: Double + , dhall_threshold_points :: Natural + , dhall_perfect_points :: Natural + , dhall_points_address :: Natural + , dhall_points_date :: Natural + , dhall_points_id :: Natural + , dhall_points_name :: Natural + , dhall_points_nationality :: Natural + } deriving (Show, Eq, Generic) + +instance FromDhall DhallConfig where + autoWith _ = genericAutoWith (defaultInterpretOptions { fieldModifier = T.drop 6 }) +instance FromDhall Verbosity + +-- | Data type for user configuration via commandline +data CommandLine = CommandLine + { cl_verbosity :: Maybe Verbosity + , cl_ssl_location :: Maybe FilePath + , cl_threshold_percentage :: Maybe Float + , cl_threshold_points :: Maybe Float + , cl_perfect_points :: Maybe Float + , cl_points_address :: Maybe Float + , cl_points_date :: Maybe Float + , cl_points_id :: Maybe Float + , cl_points_name :: Maybe Float + , cl_points_nationality :: Maybe Float + } deriving (Show, Eq, Generic) diff --git a/test/Tests/Check.hs b/test/Tests/Check.hs @@ -85,7 +85,7 @@ testTargetVersions config verbose number sanction_list versions = findRealTarget :: Config -> Bool -> Int -> NaturalPerson -> Distribution -> Map Int Individual -> TestTree findRealTarget config verbose number target distribution sanction_list = testCase ("Find target " ++ show number ++ " in Swiss Sanction List") $ do - let score = checkPersons sanction_list target + let score = checkPersons config sanction_list target when verbose $ print score compareScore (threshold_confidence distribution) (confidence score) "Score" @@ -95,7 +95,7 @@ findRealTarget config verbose number target distribution sanction_list = dontFindTarget :: Config -> Bool -> NaturalPerson -> Map Int Individual -> String -> TestTree dontFindTarget config verbose target sanction_list title = testCase ("Do not find " ++ title ++ " in Swiss Sanction List") $ do - let score = checkPersons sanction_list target + let score = checkPersons config sanction_list target when verbose $ print score compareScore (Just 0) (confidence score) "Score" @@ -124,7 +124,7 @@ testSingleTarget config verbose number target distribution ssl_target = nationality_score = 50 * checkCountryCode (nationalities individual) (nationality target) - total_score = checkPersons ssl target + total_score = checkPersons config ssl target when verbose $ print total_score compareScore (threshold_address distribution) address_score "Address" diff --git a/test/data/test.config.dhall b/test/data/test.config.dhall @@ -4,9 +4,9 @@ -- SPDX-License-Identifier: EUPL-1.2 -- Verbosity levels -let Verbosity = < Silent | Info | Errors | Debug > +let Verbosity = < Test | Silent | Info | Errors | Debug > -in { verbosity = Verbosity.Silent +in { verbosity = Verbosity.Test , ssl_location = "test-sanction-list.xml" , threshold_percentage = 0.8 -- Percentage , threshold_points = 200 -- Points needed to flag entry as match diff --git a/test/test-kycheck.hs b/test/test-kycheck.hs @@ -16,6 +16,7 @@ import Test.Tasty import KYCheck.SSL import KYCheck.Config +import KYCheck.Type import Dhall @@ -35,7 +36,11 @@ inDirectory path action = bracket main :: IO () main = do inDirectory "test/data" $ do - config <- input auto "./test.config.dhall" + dhall <- input auto "./test.config.dhall" :: IO DhallConfig + let config = inputToConfig dhall Nothing + + valid_config <- checkConfig config + targets <- xmlToSSL <$> parseSwissSanctionsList (ssl_location config) defaultMain (tests config targets)