commit 4aebf67215006ffb494dc7b6195d639d034e85a0
parent fe6daba521ed47be7b5d948b5fb0b076eda46185
Author: Vint Leenaars <vl.software@leenaa.rs>
Date: Thu, 8 May 2025 13:02:36 +0200
More configuration possibilities
Diffstat:
10 files changed, 302 insertions(+), 122 deletions(-)
diff --git a/app/Main.hs b/app/Main.hs
@@ -15,6 +15,7 @@ import Data.Time
import KYCheck.Config
import KYCheck.SSL
import KYCheck.SSL.XML.Type
+import KYCheck.Type
import Options.Applicative (execParser)
@@ -41,41 +42,11 @@ main = do
Just _ -> do curr_dir <- getCurrentDirectory
putStrLn $ "Searching for 'config.dhall' in " ++ curr_dir
- dhall <- input auto "./config.dhall"
-
- let config = Config { verbosity = case cl_verbosity cl of
- Just v -> v
- Nothing -> verbosity dhall
- , ssl_location = case cl_ssl_location cl of
- Just fp -> fp
- Nothing -> ssl_location dhall
- , threshold_percentage = case cl_threshold_percentage cl of
- Just p -> p
- Nothing -> threshold_percentage dhall
- , threshold_points = case cl_threshold_points cl of
- Just p -> p
- Nothing -> threshold_points dhall
- , perfect_points = case cl_perfect_points cl of
- Just p -> p
- Nothing -> perfect_points dhall
- , points_address = case cl_points_address cl of
- Just p -> p
- Nothing -> points_address dhall
- , points_date = case cl_points_date cl of
- Just p -> p
- Nothing -> points_date dhall
- , points_id = case cl_points_id cl of
- Just p -> p
- Nothing -> points_id dhall
- , points_name = case cl_points_name cl of
- Just p -> p
- Nothing -> points_name dhall
- , points_nationality = case cl_points_nationality cl of
- Just p -> p
- Nothing -> points_nationality dhall
- }
-
- -- validConfig <- checkConfig config
+ dhall <- input auto "./config.dhall" :: IO DhallConfig
+ let config = inputToConfig dhall $ Just cl
+
+ valid_config <- checkConfig config
+
let v = verbosity config
start <- getCurrentTime
diff --git a/config.dhall b/config.dhall
@@ -4,7 +4,7 @@
-- SPDX-License-Identifier: EUPL-1.2
-- Verbosity levels
-let Verbosity = < Silent | Info | Errors | Debug >
+let Verbosity = < Test | Silent | Info | Errors | Debug >
in { verbosity = Verbosity.Silent
, ssl_location = "files/consolidated-list_2024-07-30.xml"
diff --git a/package.yaml b/package.yaml
@@ -34,6 +34,7 @@ dependencies:
- country-codes
- dhall
- directory
+- filepath
- hxt
- optparse-applicative
- tasty
diff --git a/src/KYCheck/Check.hs b/src/KYCheck/Check.hs
@@ -31,6 +31,7 @@ import Data.Text.Metrics
import Data.Time.Calendar
import Data.Time.Calendar.OrdinalDate
+import KYCheck.Config
import KYCheck.Type
import KYCheck.SSL.Type as SSL
import KYCheck.GLS.Type as GLS
@@ -50,8 +51,8 @@ threshold_ratio = 4 % 5
threshold_float :: Float
threshold_float = 0.8
-threshold_points :: Float
-threshold_points = 200
+-- threshold_points :: Float
+-- threshold_points = 200
suspicious_dates :: (Int, Int)
suspicious_dates = (3, 75) -- Difference in years and days that will be marked suspicious (exponential)
@@ -60,24 +61,25 @@ type QualityFloat = Quality Float
-checkEntity :: Map Int Entity -> LegalEntity -> [Score]
-checkEntity entities' entity = map (\(confidence', reference') ->
- Score { match_quality = 0
- , confidence = confidence'
- , expiration = 0
- , references = [reference']
- }
- ) $ findMatchingEntities entity $ toList entities'
+checkEntity :: Config -> Map Int Entity -> LegalEntity -> [Score]
+checkEntity config entities' entity = map (\(confidence', reference') ->
+ Score { match_quality = 0
+ , confidence = confidence'
+ , expiration = 0
+ , references = [reference']
+ }
+ ) $ findMatchingEntities config entity $ toList entities'
-findMatchingEntities :: LegalEntity -> [(Int, Entity)] -> [(Float, Int)]
-findMatchingEntities _ [] = []
-findMatchingEntities entity ((ssid,ent):ents) = let
- points = compareEntity entity ent
- score = if points >= 300 then 1 else points / 300
- in
- if points >= threshold_points
- then (score,ssid):findMatchingEntities entity ents
- else findMatchingEntities entity ents
+findMatchingEntities :: Config -> LegalEntity -> [(Int, Entity)] -> [(Float, Int)]
+findMatchingEntities _ _ [] = []
+findMatchingEntities config entity ((ssid,ent):ents) =
+ let
+ points = compareEntity entity ent
+ score = if points >= 300 then 1 else points / 300
+ in
+ if points >= threshold_points config
+ then (score,ssid):findMatchingEntities config entity ents
+ else findMatchingEntities config entity ents
compareEntity :: LegalEntity -> Entity -> Float
compareEntity legal_entity entity = foldl1 (+) [ multFloats 300 address_score (removeQuality . removeSSID)
@@ -90,31 +92,32 @@ compareEntity legal_entity entity = foldl1 (+) [ multFloats 300 address_score (r
-checkPersons :: Map Int Individual -> NaturalPerson -> Score
-checkPersons individuals' person = Score { match_quality = 0
- , confidence = confidence'
- , expiration = 0
- , references = references'
- }
- where (confidence', references') = checkPersons' person (0, []) $ toList individuals'
-
-checkPersons' :: NaturalPerson -> (Float, [Int]) -> [(Int, Individual)] -> (Float, [Int])
-checkPersons' _ score [] = score
-checkPersons' person (score, ssids) ((ssid,ind):inds) = let
- points = checkPerson person ind
- new_score = if max_points >= threshold_points
- then if points >= 300 then 1 else points / 300
- else points / max_points
- in
- if new_score >= 2 / 3
- then checkPersons' person (max score new_score, ssid:ssids) inds
- else checkPersons' person (score, ssids) inds
- where max_points = foldl1 (+) [ if toList (addresses ind) == [] then 0 else 150
- , if toList (birth_dates ind) == [] then 0 else 100
- , if toList (ids ind) == [] then 0 else 200
- , if toList (names ind) == [] then 0 else 125
- , if toList (nationalities ind) == [] then 0 else 50
- ]
+checkPersons :: Config -> Map Int Individual -> NaturalPerson -> Score
+checkPersons config individuals' person = Score { match_quality = 0
+ , confidence = confidence'
+ , expiration = 0
+ , references = references'
+ }
+ where (confidence', references') = checkPersons' config person (0, []) $ toList individuals'
+
+checkPersons' :: Config -> NaturalPerson -> (Float, [Int]) -> [(Int, Individual)] -> (Float, [Int])
+checkPersons' _ _ score [] = score
+checkPersons' config person (score, ssids) ((ssid,ind):inds) =
+ let
+ points = checkPerson person ind
+ max_points = foldl1 (+) [ if toList (addresses ind) == [] then 0 else points_address config
+ , if toList (birth_dates ind) == [] then 0 else points_date config
+ , if toList (ids ind) == [] then 0 else points_id config
+ , if toList (names ind) == [] then 0 else points_name config
+ , if toList (nationalities ind) == [] then 0 else points_nationality config
+ ]
+ new_score = if max_points >= threshold_points config
+ then if points >= 300 then 1 else points / 300
+ else points / max_points
+ in
+ if new_score >= 2 / 3
+ then checkPersons' config person (max score new_score, ssid:ssids) inds
+ else checkPersons' config person (score, ssids) inds
checkPerson :: NaturalPerson -> Individual -> Float
checkPerson person individual = foldl1 (+) [ address_points
diff --git a/src/KYCheck/Config.hs b/src/KYCheck/Config.hs
@@ -7,48 +7,99 @@
{-# LANGUAGE OverloadedStrings #-}
module KYCheck.Config
- ( CommandLine(..)
- , Config(..)
+ ( inputToConfig
+ , checkConfig
, opts
- , Verbosity(..)
) where
-import Dhall hiding (auto)
+import KYCheck.Error
+import KYCheck.Type
+
+import Control.Monad
+
+import qualified Data.Text as T
import Options.Applicative
--- | Data type containing all configuration information (config.dhall)
-data Config = Config
- { verbosity :: Verbosity
- , ssl_location :: FilePath
- , threshold_percentage :: Double
- , threshold_points :: Natural
- , perfect_points :: Natural
- , points_address :: Natural
- , points_date :: Natural
- , points_id :: Natural
- , points_name :: Natural
- , points_nationality :: Natural
- } deriving (Show, Eq, Generic)
-
-data Verbosity = Silent | Info | Errors | Debug deriving (Show, Eq, Generic, Ord)
-
-instance FromDhall Config
-instance FromDhall Verbosity
-
--- | Data type for user configuration via commandline
-data CommandLine = CommandLine
- { cl_verbosity :: Maybe Verbosity
- , cl_ssl_location :: Maybe FilePath
- , cl_threshold_percentage :: Maybe Double
- , cl_threshold_points :: Maybe Natural
- , cl_perfect_points :: Maybe Natural
- , cl_points_address :: Maybe Natural
- , cl_points_date :: Maybe Natural
- , cl_points_id :: Maybe Natural
- , cl_points_name :: Maybe Natural
- , cl_points_nationality :: Maybe Natural
- } deriving (Show, Eq, Generic)
+import System.Directory
+import System.FilePath
+
+inputToConfig :: DhallConfig -> Maybe CommandLine -> Config
+inputToConfig dhall commandline =
+ let
+ d_v = dhall_verbosity dhall
+ d_ssl = dhall_ssl_location dhall
+ d_pct = realToFrac $ dhall_threshold_percentage dhall
+ d_pts = fromIntegral $ dhall_threshold_points dhall
+ d_pp = fromIntegral $ dhall_perfect_points dhall
+ d_addr = fromIntegral $ dhall_points_address dhall
+ d_date = fromIntegral $ dhall_points_date dhall
+ d_id = fromIntegral $ dhall_points_id dhall
+ d_name = fromIntegral $ dhall_points_name dhall
+ d_nat = fromIntegral $ dhall_points_nationality dhall
+ in
+ case commandline of
+ Just cl -> Config { verbosity = case cl_verbosity cl of Just v -> v; Nothing -> d_v
+ , ssl_location = case cl_ssl_location cl of Just l -> l; Nothing -> d_ssl
+ , threshold_percentage = case cl_threshold_percentage cl of Just p -> p; Nothing -> d_pct
+ , threshold_points = case cl_threshold_points cl of Just p -> p; Nothing -> d_pts
+ , perfect_points = case cl_perfect_points cl of Just p -> p; Nothing -> d_pp
+ , points_address = case cl_points_address cl of Just p -> p; Nothing -> d_addr
+ , points_date = case cl_points_date cl of Just p -> p; Nothing -> d_date
+ , points_id = case cl_points_id cl of Just p -> p; Nothing -> d_id
+ , points_name = case cl_points_name cl of Just p -> p; Nothing -> d_name
+ , points_nationality = case cl_points_nationality cl of Just p -> p; Nothing -> d_nat
+ }
+ Nothing -> Config { verbosity = d_v
+ , ssl_location = d_ssl
+ , threshold_percentage = d_pct
+ , threshold_points = d_pts
+ , perfect_points = d_pp
+ , points_address = d_addr
+ , points_date = d_date
+ , points_id = d_id
+ , points_name = d_name
+ , points_nationality = d_nat
+ }
+
+checkConfig :: Config -> IO Bool
+checkConfig config = do
+ valid_ssl <- checkSSL config
+ valid_points <- checkPoints config
+
+ let valid = foldl1 (&&) [ valid_ssl
+ , valid_points
+ ]
+ return valid
+
+checkSSL :: Config -> IO Bool
+checkSSL config = do
+ let fp = ssl_location config
+ let v = verbosity config
+
+ bool <- do exists <- doesFileExist fp
+ when (not exists) $ do curr_dir <- getCurrentDirectory
+ (handleError v . Left) $ KYCheck_NotFound (T.pack $ curr_dir </> fp) ".xml file containing sanctions"
+ -- TODO: implement RelaxNG validation
+ return exists
+
+ return bool
+
+checkPoints :: Config -> IO Bool
+checkPoints config = do
+ let v = verbosity config
+
+ bool <- do let total = foldl1 (+) [ points_address config
+ , points_date config
+ , points_id config
+ , points_name config
+ , points_nationality config
+ ]
+ threshold = threshold_points config
+ when (total < threshold) $ do (handleError v . Left) $ KYCheck_InvalidDistribution $ T.pack ("Total points obtainable (" ++ show total ++ ") lower than threshold (" ++ show threshold ++ ")")
+ return (total >= threshold)
+ return bool
+
-- | Custom commandline flag for verbosity-level Silent: --silent
verbositySilent :: Parser Verbosity
diff --git a/src/KYCheck/Error.hs b/src/KYCheck/Error.hs
@@ -0,0 +1,88 @@
+-- SPDX-FileCopyrightText: 2025 LNRS
+--
+-- SPDX-License-Identifier: AGPL-3.0-or-later
+-- SPDX-License-Identifier: EUPL-1.2
+
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+module KYCheck.Error
+ ( KYCheck_Error(..)
+ , renderError
+ , handleError
+ , showError
+ ) where
+
+import KYCheck.Type
+
+import Control.Exception (Exception, SomeException)
+import Control.Monad (when)
+import Data.Text (Text)
+import Data.Typeable (Typeable)
+import GHC.Generics (Generic)
+import System.Exit (ExitCode (..), exitWith)
+import System.IO hiding (hPutStrLn)
+
+import qualified Data.Text as T (pack)
+import qualified Data.Text.IO as TIO
+
+-- | Custom type to express exceptions in DMARC.* that cause errors
+data KYCheck_Error = KYCheck_InvalidDistribution Text
+ | KYCheck_InvalidRelaxNG Text
+ | KYCheck_InvalidXML Text String
+ | KYCheck_NotFound Text Text
+ | KYCheck_NotImplemented Text
+ | KYCheck_XMLParsingError Text SomeException
+ deriving (Show, Typeable, Generic)
+
+instance Exception KYCheck_Error
+
+renderError :: KYCheck_Error -> Text
+renderError e =
+ case e of
+ KYCheck_InvalidDistribution msg -> "Error: " <> msg
+ KYCheck_InvalidRelaxNG fp -> "Error: " <> fp <> " is not a valid relaxng-schema"
+ KYCheck_InvalidXML fp err' -> "Error: " <> fp <> " is not valid for relaxng-schema\n" <> tshow err'
+ KYCheck_NotImplemented msg -> "Error: " <> msg <> " has not been implemented yet, please change your config.dhall"
+ KYCheck_NotFound fp desc -> "Error: could not find " <> fp <> " (" <> desc <> ")"
+ KYCheck_XMLParsingError dat err' -> "Error: could not parse " <> dat <> "\n" <> tshow err'
+
+handleError :: Verbosity -> Either KYCheck_Error a -> IO a
+handleError _ (Right r) = return r
+handleError v (Left e) =
+ case e of
+ _ -> err v exit_code (renderError e)
+ where
+ exit_code =
+ case e of
+ KYCheck_InvalidDistribution{} -> 1
+ KYCheck_InvalidRelaxNG{} -> 2
+ KYCheck_InvalidXML{} -> 3
+ KYCheck_NotImplemented{} -> 4
+ KYCheck_NotFound{} -> 5
+ KYCheck_XMLParsingError{} -> 6
+
+-- | Function to show helpfull error/warning message
+showError :: KYCheck_Error -> IO ()
+showError e = hPutStrLn stderr (renderError e)
+
+-- | Function to exit program with specific exit code and msg
+err :: Verbosity -> Int -> Text -> IO a
+err v exit_code msg = do
+ when (v /= Test) $ hPutStrLn stderr msg
+ _ <- exitWith $ ExitFailure exit_code
+ return undefined
+
+-- | Convert any Showable type to T.Text
+tshow :: Show a => a -> Text
+tshow = T.pack . show
+
+-- | Print T.Text nicely to commandline
+hPutStrLnWith :: Newline -> Handle -> Text -> IO ()
+hPutStrLnWith eol h s =
+ hSetNewlineMode h (NewlineMode eol eol) >>
+ hSetEncoding h utf8 >> TIO.hPutStrLn h s
+
+hPutStrLn :: Handle -> Text -> IO ()
+hPutStrLn = hPutStrLnWith nativeNewline
diff --git a/src/KYCheck/Type.hs b/src/KYCheck/Type.hs
@@ -3,6 +3,9 @@
-- SPDX-License-Identifier: AGPL-3.0-or-later
-- SPDX-License-Identifier: EUPL-1.2
+
+{-# LANGUAGE DeriveGeneric #-}
+
module KYCheck.Type
( getSSID
, liftQuality
@@ -11,8 +14,17 @@ module KYCheck.Type
, removeSSID
, stringToQuality
, WithSSID(..)
+
+ , CommandLine(..)
+ , Config(..)
+ , DhallConfig(..)
+ , Verbosity(..)
) where
+import Dhall
+
+import qualified Data.Text as T
+
data Quality a = Good a
| Low a
| UnknownQuality a deriving (Show, Eq)
@@ -41,3 +53,52 @@ getSSID (WithSSID n _) = n
removeSSID :: WithSSID a -> a
removeSSID (WithSSID _ x) = x
+
+
+
+-- | Data type containing all configuration information (config.dhall)
+data Config = Config
+ { verbosity :: Verbosity
+ , ssl_location :: FilePath
+ , threshold_percentage :: Float
+ , threshold_points :: Float
+ , perfect_points :: Float
+ , points_address :: Float
+ , points_date :: Float
+ , points_id :: Float
+ , points_name :: Float
+ , points_nationality :: Float
+ } deriving (Show, Eq, Generic)
+
+data Verbosity = Test | Silent | Info | Errors | Debug deriving (Show, Eq, Generic, Ord)
+
+data DhallConfig = DhallConfig
+ { dhall_verbosity :: Verbosity
+ , dhall_ssl_location :: FilePath
+ , dhall_threshold_percentage :: Double
+ , dhall_threshold_points :: Natural
+ , dhall_perfect_points :: Natural
+ , dhall_points_address :: Natural
+ , dhall_points_date :: Natural
+ , dhall_points_id :: Natural
+ , dhall_points_name :: Natural
+ , dhall_points_nationality :: Natural
+ } deriving (Show, Eq, Generic)
+
+instance FromDhall DhallConfig where
+ autoWith _ = genericAutoWith (defaultInterpretOptions { fieldModifier = T.drop 6 })
+instance FromDhall Verbosity
+
+-- | Data type for user configuration via commandline
+data CommandLine = CommandLine
+ { cl_verbosity :: Maybe Verbosity
+ , cl_ssl_location :: Maybe FilePath
+ , cl_threshold_percentage :: Maybe Float
+ , cl_threshold_points :: Maybe Float
+ , cl_perfect_points :: Maybe Float
+ , cl_points_address :: Maybe Float
+ , cl_points_date :: Maybe Float
+ , cl_points_id :: Maybe Float
+ , cl_points_name :: Maybe Float
+ , cl_points_nationality :: Maybe Float
+ } deriving (Show, Eq, Generic)
diff --git a/test/Tests/Check.hs b/test/Tests/Check.hs
@@ -85,7 +85,7 @@ testTargetVersions config verbose number sanction_list versions =
findRealTarget :: Config -> Bool -> Int -> NaturalPerson -> Distribution -> Map Int Individual -> TestTree
findRealTarget config verbose number target distribution sanction_list =
testCase ("Find target " ++ show number ++ " in Swiss Sanction List") $ do
- let score = checkPersons sanction_list target
+ let score = checkPersons config sanction_list target
when verbose $ print score
compareScore (threshold_confidence distribution) (confidence score) "Score"
@@ -95,7 +95,7 @@ findRealTarget config verbose number target distribution sanction_list =
dontFindTarget :: Config -> Bool -> NaturalPerson -> Map Int Individual -> String -> TestTree
dontFindTarget config verbose target sanction_list title =
testCase ("Do not find " ++ title ++ " in Swiss Sanction List") $ do
- let score = checkPersons sanction_list target
+ let score = checkPersons config sanction_list target
when verbose $ print score
compareScore (Just 0) (confidence score) "Score"
@@ -124,7 +124,7 @@ testSingleTarget config verbose number target distribution ssl_target =
nationality_score = 50 * checkCountryCode (nationalities individual) (nationality target)
- total_score = checkPersons ssl target
+ total_score = checkPersons config ssl target
when verbose $ print total_score
compareScore (threshold_address distribution) address_score "Address"
diff --git a/test/data/test.config.dhall b/test/data/test.config.dhall
@@ -4,9 +4,9 @@
-- SPDX-License-Identifier: EUPL-1.2
-- Verbosity levels
-let Verbosity = < Silent | Info | Errors | Debug >
+let Verbosity = < Test | Silent | Info | Errors | Debug >
-in { verbosity = Verbosity.Silent
+in { verbosity = Verbosity.Test
, ssl_location = "test-sanction-list.xml"
, threshold_percentage = 0.8 -- Percentage
, threshold_points = 200 -- Points needed to flag entry as match
diff --git a/test/test-kycheck.hs b/test/test-kycheck.hs
@@ -16,6 +16,7 @@ import Test.Tasty
import KYCheck.SSL
import KYCheck.Config
+import KYCheck.Type
import Dhall
@@ -35,7 +36,11 @@ inDirectory path action = bracket
main :: IO ()
main = do
inDirectory "test/data" $ do
- config <- input auto "./test.config.dhall"
+ dhall <- input auto "./test.config.dhall" :: IO DhallConfig
+ let config = inputToConfig dhall Nothing
+
+ valid_config <- checkConfig config
+
targets <- xmlToSSL <$> parseSwissSanctionsList (ssl_location config)
defaultMain (tests config targets)