module Hledger.Read.CsvReader (
reader,
CsvRecord,
rulesFileFor,
parseRulesFile,
parseAndValidateCsvRules,
expandIncludes,
transactionFromCsvRecord,
tests_Hledger_Read_CsvReader
)
where
import Prelude ()
import Prelude.Compat hiding (getContents)
import Control.Exception hiding (try)
import Control.Monad
import Control.Monad.Except
import Control.Monad.State.Strict (StateT, get, modify', evalStateT)
import Data.Char (toLower, isDigit, isSpace)
import Data.List.Compat
import Data.Maybe
import Data.Ord
import Data.Text (Text)
import qualified Data.Text as T
import Data.Time.Calendar (Day)
#if MIN_VERSION_time(1,5,0)
import Data.Time.Format (parseTimeM, defaultTimeLocale)
#else
import Data.Time.Format (parseTime)
import System.Locale (defaultTimeLocale)
#endif
import Safe
import System.Directory (doesFileExist)
import System.FilePath
import System.IO (stderr)
import Test.HUnit hiding (State)
import Text.CSV (parseCSV, CSV)
import Text.Megaparsec.Compat hiding (parse)
import qualified Text.Parsec as Parsec
import Text.Printf (hPrintf,printf)
import Hledger.Data
import Hledger.Utils.UTF8IOCompat (getContents)
import Hledger.Utils
import Hledger.Read.Common (amountp, statusp, genericSourcePos)
reader :: Reader
reader = Reader
{rFormat = "csv"
,rExtensions = ["csv"]
,rParser = parse
,rExperimental = False
}
parse :: Maybe FilePath -> Bool -> FilePath -> Text -> ExceptT String IO Journal
parse rulesfile _ f t = do
r <- liftIO $ readJournalFromCsv rulesfile f t
case r of Left e -> throwError e
Right j -> return $ journalNumberAndTieTransactions j
readJournalFromCsv :: Maybe FilePath -> FilePath -> Text -> IO (Either String Journal)
readJournalFromCsv Nothing "-" _ = return $ Left "please use --rules-file when reading CSV from stdin"
readJournalFromCsv mrulesfile csvfile csvdata =
handle (\e -> return $ Left $ show (e :: IOException)) $ do
let throwerr = throw.userError
let rulesfile = fromMaybe (rulesFileFor csvfile) mrulesfile
rulesfileexists <- doesFileExist rulesfile
rulestext <-
if rulesfileexists
then do
hPrintf stderr "using conversion rules file %s\n" rulesfile
liftIO $ (readFile' rulesfile >>= expandIncludes (takeDirectory rulesfile))
else return $ defaultRulesText rulesfile
rules <- liftIO (runExceptT $ parseAndValidateCsvRules rulesfile rulestext) >>= either throwerr return
dbg2IO "rules" rules
let skip = maybe 0 oneorerror $ getDirective "skip" rules
where
oneorerror "" = 1
oneorerror s = readDef (throwerr $ "could not parse skip value: " ++ show s) s
let parsecfilename = if csvfile == "-" then "(stdin)" else csvfile
records <- (either throwerr id .
dbg2 "validateCsv" . validateCsv skip .
dbg2 "parseCsv")
`fmap` parseCsv parsecfilename (T.unpack csvdata)
dbg1IO "first 3 csv records" $ take 3 records
let
txns = snd $ mapAccumL
(\pos r ->
let
SourcePos name line col = pos
line' = (mpMkPos . (+1) . mpUnPos) line
pos' = SourcePos name line' col
in
(pos, transactionFromCsvRecord pos' rules r)
)
(initialPos parsecfilename) records
txns' | length txns > 1 && tdate (head txns) > tdate (last txns) = reverse txns
| otherwise = txns
when (not rulesfileexists) $ do
hPrintf stderr "created default conversion rules file %s, edit this for better results\n" rulesfile
writeFile rulesfile $ T.unpack rulestext
return $ Right nulljournal{jtxns=sortBy (comparing tdate) txns'}
parseCsv :: FilePath -> String -> IO (Either Parsec.ParseError CSV)
parseCsv path csvdata =
case path of
"-" -> liftM (parseCSV "(stdin)") getContents
_ -> return $ parseCSV path csvdata
validateCsv :: Int -> Either Parsec.ParseError CSV -> Either String [CsvRecord]
validateCsv _ (Left e) = Left $ show e
validateCsv numhdrlines (Right rs) = validate $ drop numhdrlines $ filternulls rs
where
filternulls = filter (/=[""])
validate [] = Left "no CSV records found"
validate rs@(first:_)
| isJust lessthan2 = let r = fromJust lessthan2 in Left $ printf "CSV record %s has less than two fields" (show r)
| isJust different = let r = fromJust different in Left $ printf "the first CSV record %s has %d fields but %s has %d" (show first) length1 (show r) (length r)
| otherwise = Right rs
where
length1 = length first
lessthan2 = headMay $ filter ((<2).length) rs
different = headMay $ filter ((/=length1).length) rs
rulesFileFor :: FilePath -> FilePath
rulesFileFor = (++ ".rules")
csvFileFor :: FilePath -> FilePath
csvFileFor = reverse . drop 6 . reverse
defaultRulesText :: FilePath -> Text
defaultRulesText csvfile = T.pack $ unlines
["# hledger csv conversion rules for " ++ csvFileFor (takeFileName csvfile)
,"# cf http://hledger.org/manual#csv-files"
,""
,"account1 assets:bank:checking"
,""
,"fields date, description, amount"
,""
,"#skip 1"
,""
,"#date-format %-d/%-m/%Y"
,"#date-format %-m/%-d/%Y"
,"#date-format %Y-%h-%d"
,""
,"#currency $"
,""
,"if ITUNES"
," account2 expenses:entertainment"
,""
,"if (TO|FROM) SAVINGS"
," account2 assets:bank:savings\n"
]
data CsvRules = CsvRules {
rdirectives :: [(DirectiveName,String)],
rcsvfieldindexes :: [(CsvFieldName, CsvFieldIndex)],
rassignments :: [(JournalFieldName, FieldTemplate)],
rconditionalblocks :: [ConditionalBlock]
} deriving (Show, Eq)
type CsvRulesParser a = StateT CsvRules SimpleTextParser a
type DirectiveName = String
type CsvFieldName = String
type CsvFieldIndex = Int
type JournalFieldName = String
type FieldTemplate = String
type ConditionalBlock = ([RecordMatcher], [(JournalFieldName, FieldTemplate)])
type RecordMatcher = [RegexpPattern]
type DateFormat = String
type RegexpPattern = String
rules = CsvRules {
rdirectives=[],
rcsvfieldindexes=[],
rassignments=[],
rconditionalblocks=[]
}
addDirective :: (DirectiveName, String) -> CsvRules -> CsvRules
addDirective d r = r{rdirectives=d:rdirectives r}
addAssignment :: (JournalFieldName, FieldTemplate) -> CsvRules -> CsvRules
addAssignment a r = r{rassignments=a:rassignments r}
setIndexesAndAssignmentsFromList :: [CsvFieldName] -> CsvRules -> CsvRules
setIndexesAndAssignmentsFromList fs r = addAssignmentsFromList fs . setCsvFieldIndexesFromList fs $ r
setCsvFieldIndexesFromList :: [CsvFieldName] -> CsvRules -> CsvRules
setCsvFieldIndexesFromList fs r = r{rcsvfieldindexes=zip fs [1..]}
addAssignmentsFromList :: [CsvFieldName] -> CsvRules -> CsvRules
addAssignmentsFromList fs r = foldl' maybeAddAssignment r journalfieldnames
where
maybeAddAssignment rules f = (maybe id addAssignmentFromIndex $ elemIndex f fs) rules
where
addAssignmentFromIndex i = addAssignment (f, "%"++show (i+1))
addConditionalBlock :: ConditionalBlock -> CsvRules -> CsvRules
addConditionalBlock b r = r{rconditionalblocks=b:rconditionalblocks r}
getDirective :: DirectiveName -> CsvRules -> Maybe FieldTemplate
getDirective directivename = lookup directivename . rdirectives
instance ShowErrorComponent String where
showErrorComponent = id
parseRulesFile :: FilePath -> ExceptT String IO CsvRules
parseRulesFile f =
liftIO (readFile' f >>= expandIncludes (takeDirectory f)) >>= parseAndValidateCsvRules f
expandIncludes :: FilePath -> T.Text -> IO T.Text
expandIncludes basedir content = do
let (ls,rest) = break (T.isPrefixOf "include") $ T.lines content
case rest of
[] -> return $ T.unlines ls
((T.stripPrefix "include" -> Just f):ls') -> do
let f' = basedir </> dropWhile isSpace (T.unpack f)
basedir' = takeDirectory f'
included <- readFile' f' >>= expandIncludes basedir'
return $ T.unlines [T.unlines ls, included, T.unlines ls']
ls' -> return $ T.unlines $ ls ++ ls'
parseAndValidateCsvRules :: FilePath -> T.Text -> ExceptT String IO CsvRules
parseAndValidateCsvRules rulesfile s = do
let rules = parseCsvRules rulesfile s
case rules of
Left e -> ExceptT $ return $ Left $ parseErrorPretty e
Right r -> do
r_ <- liftIO $ runExceptT $ validateRules r
ExceptT $ case r_ of
Left s -> return $ Left $ parseErrorPretty $ mpMkParseError rulesfile s
Right r -> return $ Right r
parseCsvRules :: FilePath -> T.Text -> Either (ParseError Char MPErr) CsvRules
parseCsvRules rulesfile s =
runParser (evalStateT rulesp rules) rulesfile s
validateRules :: CsvRules -> ExceptT String IO CsvRules
validateRules rules = do
unless (isAssigned "date") $ ExceptT $ return $ Left "Please specify (at top level) the date field. Eg: date %1\n"
unless ((amount && not (amountin || amountout)) ||
(not amount && (amountin && amountout)))
$ ExceptT $ return $ Left "Please specify (at top level) either the amount field, or both the amount-in and amount-out fields. Eg: amount %2\n"
ExceptT $ return $ Right rules
where
amount = isAssigned "amount"
amountin = isAssigned "amount-in"
amountout = isAssigned "amount-out"
isAssigned f = isJust $ getEffectiveAssignment rules [] f
rulesp :: CsvRulesParser CsvRules
rulesp = do
many $ choiceInState
[blankorcommentlinep <?> "blank or comment line"
,(directivep >>= modify' . addDirective) <?> "directive"
,(fieldnamelistp >>= modify' . setIndexesAndAssignmentsFromList) <?> "field name list"
,(fieldassignmentp >>= modify' . addAssignment) <?> "field assignment"
,(conditionalblockp >>= modify' . addConditionalBlock) <?> "conditional block"
]
eof
r <- get
return r{rdirectives=reverse $ rdirectives r
,rassignments=reverse $ rassignments r
,rconditionalblocks=reverse $ rconditionalblocks r
}
blankorcommentlinep :: CsvRulesParser ()
blankorcommentlinep = lift (pdbg 3 "trying blankorcommentlinep") >> choiceInState [blanklinep, commentlinep]
blanklinep :: CsvRulesParser ()
blanklinep = lift (many spacenonewline) >> newline >> return () <?> "blank line"
commentlinep :: CsvRulesParser ()
commentlinep = lift (many spacenonewline) >> commentcharp >> lift restofline >> return () <?> "comment line"
commentcharp :: CsvRulesParser Char
commentcharp = oneOf (";#*" :: [Char])
directivep :: CsvRulesParser (DirectiveName, String)
directivep = (do
lift $ pdbg 3 "trying directive"
d <- fmap T.unpack $ choiceInState $ map (lift . mptext . T.pack) directives
v <- (((char ':' >> lift (many spacenonewline)) <|> lift (some spacenonewline)) >> directivevalp)
<|> (optional (char ':') >> lift (many spacenonewline) >> lift eolof >> return "")
return (d, v)
) <?> "directive"
directives =
["date-format"
,"skip"
]
directivevalp :: CsvRulesParser String
directivevalp = anyChar `manyTill` lift eolof
fieldnamelistp :: CsvRulesParser [CsvFieldName]
fieldnamelistp = (do
lift $ pdbg 3 "trying fieldnamelist"
string "fields"
optional $ char ':'
lift (some spacenonewline)
let separator = lift (many spacenonewline) >> char ',' >> lift (many spacenonewline)
f <- fromMaybe "" <$> optional fieldnamep
fs <- some $ (separator >> fromMaybe "" <$> optional fieldnamep)
lift restofline
return $ map (map toLower) $ f:fs
) <?> "field name list"
fieldnamep :: CsvRulesParser String
fieldnamep = quotedfieldnamep <|> barefieldnamep
quotedfieldnamep :: CsvRulesParser String
quotedfieldnamep = do
char '"'
f <- some $ noneOf ("\"\n:;#~" :: [Char])
char '"'
return f
barefieldnamep :: CsvRulesParser String
barefieldnamep = some $ noneOf (" \t\n,;#~" :: [Char])
fieldassignmentp :: CsvRulesParser (JournalFieldName, FieldTemplate)
fieldassignmentp = do
lift $ pdbg 3 "trying fieldassignmentp"
f <- journalfieldnamep
assignmentseparatorp
v <- fieldvalp
return (f,v)
<?> "field assignment"
journalfieldnamep :: CsvRulesParser String
journalfieldnamep = do
lift (pdbg 2 "trying journalfieldnamep")
T.unpack <$> choiceInState (map (lift . mptext . T.pack) journalfieldnames)
journalfieldnames = [
"account1"
,"account2"
,"amount-in"
,"amount-out"
,"amount"
,"balance"
,"code"
,"comment"
,"currency"
,"date2"
,"date"
,"description"
,"status"
]
assignmentseparatorp :: CsvRulesParser ()
assignmentseparatorp = do
lift $ pdbg 3 "trying assignmentseparatorp"
choice [
try (lift (many spacenonewline) >> char ':'),
spaceChar
]
_ <- lift (many spacenonewline)
return ()
fieldvalp :: CsvRulesParser String
fieldvalp = do
lift $ pdbg 2 "trying fieldvalp"
anyChar `manyTill` lift eolof
conditionalblockp :: CsvRulesParser ConditionalBlock
conditionalblockp = do
lift $ pdbg 3 "trying conditionalblockp"
string "if" >> lift (many spacenonewline) >> optional newline
ms <- some recordmatcherp
as <- many (lift (some spacenonewline) >> fieldassignmentp)
when (null as) $
fail "start of conditional block found, but no assignment rules afterward\n(assignment rules in a conditional block should be indented)\n"
return (ms, as)
<?> "conditional block"
recordmatcherp :: CsvRulesParser [String]
recordmatcherp = do
lift $ pdbg 2 "trying recordmatcherp"
_ <- optional (matchoperatorp >> lift (many spacenonewline) >> optional newline)
ps <- patternsp
when (null ps) $
fail "start of record matcher found, but no patterns afterward\n(patterns should not be indented)\n"
return ps
<?> "record matcher"
matchoperatorp :: CsvRulesParser String
matchoperatorp = fmap T.unpack $ choiceInState $ map mptext
["~"
]
patternsp :: CsvRulesParser [String]
patternsp = do
lift $ pdbg 3 "trying patternsp"
ps <- many regexp
return ps
regexp :: CsvRulesParser String
regexp = do
lift $ pdbg 3 "trying regexp"
notFollowedBy matchoperatorp
c <- lift nonspace
cs <- anyChar `manyTill` lift eolof
return $ strip $ c:cs
type CsvRecord = [String]
transactionFromCsvRecord :: SourcePos -> CsvRules -> CsvRecord -> Transaction
transactionFromCsvRecord sourcepos rules record = t
where
mdirective = (`getDirective` rules)
mfieldtemplate = getEffectiveAssignment rules record
render = renderTemplate rules record
mskip = mdirective "skip"
mdefaultcurrency = mdirective "default-currency"
mparsedate = parseDateWithFormatOrDefaultFormats (mdirective "date-format")
mdateformat = mdirective "date-format"
date = render $ fromMaybe "" $ mfieldtemplate "date"
date' = fromMaybe (error' $ dateerror "date" date mdateformat) $ mparsedate date
mdate2 = maybe Nothing (Just . render) $ mfieldtemplate "date2"
mdate2' = maybe Nothing (maybe (error' $ dateerror "date2" (fromMaybe "" mdate2) mdateformat) Just . mparsedate) mdate2
dateerror datefield value mdateformat = unlines
["error: could not parse \""++value++"\" as a date using date format "++maybe "\"YYYY/M/D\", \"YYYY-M-D\" or \"YYYY.M.D\"" show mdateformat
,"the CSV record is: "++intercalate ", " (map show record)
,"the "++datefield++" rule is: "++(fromMaybe "required, but missing" $ mfieldtemplate datefield)
,"the date-format is: "++fromMaybe "unspecified" mdateformat
,"you may need to "
++"change your "++datefield++" rule, "
++maybe "add a" (const "change your") mdateformat++" date-format rule, "
++"or "++maybe "add a" (const "change your") mskip++" skip rule"
,"for m/d/y or d/m/y dates, use date-format %-m/%-d/%Y or date-format %-d/%-m/%Y"
]
status =
case mfieldtemplate "status" of
Nothing -> Unmarked
Just str -> either statuserror id .
runParser (statusp <* eof) "" .
T.pack $ render str
where
statuserror err = error' $ unlines
["error: could not parse \""++str++"\" as a cleared status (should be *, ! or empty)"
,"the parse error is: "++show err
]
code = maybe "" render $ mfieldtemplate "code"
description = maybe "" render $ mfieldtemplate "description"
comment = maybe "" render $ mfieldtemplate "comment"
precomment = maybe "" render $ mfieldtemplate "precomment"
currency = maybe (fromMaybe "" mdefaultcurrency) render $ mfieldtemplate "currency"
amountstr = (currency++) $ simplifySign $ getAmountStr rules record
amount = either amounterror (Mixed . (:[])) $ runParser (evalStateT (amountp <* eof) mempty) "" $ T.pack amountstr
amounterror err = error' $ unlines
["error: could not parse \""++amountstr++"\" as an amount"
,showRecord record
,"the amount rule is: "++(fromMaybe "" $ mfieldtemplate "amount")
,"the currency rule is: "++(fromMaybe "unspecified" $ mfieldtemplate "currency")
,"the default-currency is: "++fromMaybe "unspecified" mdefaultcurrency
,"the parse error is: "++show err
,"you may need to "
++"change your amount or currency rules, "
++"or "++maybe "add a" (const "change your") mskip++" skip rule"
]
amount1 = costOfMixedAmount amount
amount2 = (amount)
s `or` def = if null s then def else s
defaccount1 = fromMaybe "unknown" $ mdirective "default-account1"
defaccount2 = case isNegativeMixedAmount amount2 of
Just True -> "income:unknown"
_ -> "expenses:unknown"
account1 = T.pack $ maybe "" render (mfieldtemplate "account1") `or` defaccount1
account2 = T.pack $ maybe "" render (mfieldtemplate "account2") `or` defaccount2
balance = maybe Nothing (parsebalance.render) $ mfieldtemplate "balance"
parsebalance str
| all isSpace str = Nothing
| otherwise = Just $ either (balanceerror str) id $ runParser (evalStateT (amountp <* eof) mempty) "" $ T.pack $ (currency++) $ simplifySign str
balanceerror str err = error' $ unlines
["error: could not parse \""++str++"\" as balance amount"
,showRecord record
,"the balance rule is: "++(fromMaybe "" $ mfieldtemplate "balance")
,"the currency rule is: "++(fromMaybe "unspecified" $ mfieldtemplate "currency")
,"the default-currency is: "++fromMaybe "unspecified" mdefaultcurrency
,"the parse error is: "++show err
]
t = nulltransaction{
tsourcepos = genericSourcePos sourcepos,
tdate = date',
tdate2 = mdate2',
tstatus = status,
tcode = T.pack code,
tdescription = T.pack description,
tcomment = T.pack comment,
tpreceding_comment_lines = T.pack precomment,
tpostings =
[posting {paccount=account2, pamount=amount2, ptransaction=Just t}
,posting {paccount=account1, pamount=amount1, ptransaction=Just t, pbalanceassertion=balance}
]
}
getAmountStr :: CsvRules -> CsvRecord -> String
getAmountStr rules record =
let
mamount = getEffectiveAssignment rules record "amount"
mamountin = getEffectiveAssignment rules record "amount-in"
mamountout = getEffectiveAssignment rules record "amount-out"
render = fmap (strip . renderTemplate rules record)
in
case (render mamount, render mamountin, render mamountout) of
(Just "", Nothing, Nothing) -> error' $ "amount has no value\n"++showRecord record
(Just a, Nothing, Nothing) -> a
(Nothing, Just "", Just "") -> error' $ "neither amount-in or amount-out has a value\n"++showRecord record
(Nothing, Just i, Just "") -> i
(Nothing, Just "", Just o) -> negateStr o
(Nothing, Just _, Just _) -> error' $ "both amount-in and amount-out have a value\n"++showRecord record
_ -> error' $ "found values for amount and for amount-in/amount-out - please use either amount or amount-in/amount-out\n"++showRecord record
type CsvAmountString = String
simplifySign :: CsvAmountString -> CsvAmountString
simplifySign ('(':s) | lastMay s == Just ')' = simplifySign $ negateStr $ init s
simplifySign ('-':'-':s) = s
simplifySign s = s
negateStr :: String -> String
negateStr ('-':s) = s
negateStr s = '-':s
showRecord :: CsvRecord -> String
showRecord r = "the CSV record is: "++intercalate ", " (map show r)
getEffectiveAssignment :: CsvRules -> CsvRecord -> JournalFieldName -> Maybe FieldTemplate
getEffectiveAssignment rules record f = lastMay $ assignmentsFor f
where
assignmentsFor f = map snd $ filter ((==f).fst) $ toplevelassignments ++ conditionalassignments
where
toplevelassignments = rassignments rules
conditionalassignments = concatMap snd $ filter blockMatches $ blocksAssigning f
where
blocksAssigning f = filter (any ((==f).fst) . snd) $ rconditionalblocks rules
blockMatches :: ConditionalBlock -> Bool
blockMatches (matchers,_) = all matcherMatches matchers
where
matcherMatches :: RecordMatcher -> Bool
matcherMatches pats = patternMatches $ "(" ++ intercalate "|" pats ++ ")"
where
patternMatches :: RegexpPattern -> Bool
patternMatches pat = regexMatchesCI pat csvline
where
csvline = intercalate "," record
renderTemplate :: CsvRules -> CsvRecord -> FieldTemplate -> String
renderTemplate rules record t = regexReplaceBy "%[A-z0-9]+" replace t
where
replace ('%':pat) = maybe pat (\i -> atDef "" record (i1)) mi
where
mi | all isDigit pat = readMay pat
| otherwise = lookup pat $ rcsvfieldindexes rules
replace pat = pat
parseDateWithFormatOrDefaultFormats :: Maybe DateFormat -> String -> Maybe Day
parseDateWithFormatOrDefaultFormats mformat s = firstJust $ map parsewith formats
where
parsetime =
#if MIN_VERSION_time(1,5,0)
parseTimeM True
#else
parseTime
#endif
parsewith = flip (parsetime defaultTimeLocale) s
formats = maybe
["%Y/%-m/%-d"
,"%Y-%-m-%-d"
,"%Y.%-m.%-d"
]
(:[])
mformat
tests_Hledger_Read_CsvReader = TestList (test_parser)
test_parser = [
"convert rules parsing: empty file" ~: do
assertParseEqual (parseCsvRules "unknown" "") rules
,"convert rules parsing: trailing comments" ~: do
assertParse (parseWithState' rules rulesp "skip\n# \n#\n")
,"convert rules parsing: trailing blank lines" ~: do
assertParse (parseWithState' rules rulesp "skip\n\n \n")
,"convert rules parsing: empty field value" ~: do
assertParse (parseWithState' rules rulesp "account1 \nif foo\n account2 foo\n")
]