module Hledger.Read.CsvReader (
reader,
CsvRecord,
rulesFileFor,
parseRulesFile,
transactionFromCsvRecord,
tests_Hledger_Read_CsvReader
)
where
import Control.Applicative ((<$>))
import Control.Exception hiding (try)
import Control.Monad
import Control.Monad.Error
import Data.Char (toLower, isDigit, isSpace)
import Data.List
import Data.Maybe
import Data.Ord
import Data.Time.Calendar (Day)
import Data.Time.Format (parseTime)
import Safe
import System.Directory (doesFileExist)
import System.FilePath
import System.IO (stderr)
import System.Locale (defaultTimeLocale)
import Test.HUnit
import Text.CSV (parseCSV, CSV)
import Text.ParserCombinators.Parsec hiding (parse)
import Text.ParserCombinators.Parsec.Error
import Text.ParserCombinators.Parsec.Pos
import Text.Printf (hPrintf,printf)
import Hledger.Data
import Prelude hiding (getContents)
import Hledger.Utils.UTF8IOCompat (getContents)
import Hledger.Utils
import Hledger.Read.JournalReader (amountp)
reader :: Reader
reader = Reader format detect parse
format :: String
format = "csv"
detect :: FilePath -> String -> Bool
detect f _ = takeExtension f == '.':format
parse :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal
parse rulesfile f s =
do
r <- liftIO $ readJournalFromCsv rulesfile f s
case r of Left e -> throwError e
Right j -> return j
readJournalFromCsv :: Maybe FilePath -> FilePath -> String -> IO (Either String Journal)
readJournalFromCsv Nothing "-" _ = return $ Left "please use --rules-file when converting stdin"
readJournalFromCsv mrulesfile csvfile csvdata =
handle (\e -> return $ Left $ show (e :: IOException)) $ do
let throwerr = throw.userError
records <- (either throwerr id . validateCsv) `fmap` parseCsv csvfile csvdata
return $ dbg "" $ take 3 records
let rulesfile = fromMaybe (rulesFileFor csvfile) mrulesfile
created <- records `seq` ensureRulesFileExists rulesfile
if created
then hPrintf stderr "creating default conversion rules file %s, edit this file for better results\n" rulesfile
else hPrintf stderr "using conversion rules file %s\n" rulesfile
rules <- either (throwerr.show) id `fmap` parseRulesFile rulesfile
return $ dbg "" rules
let headerlines = maybe 0 oneorerror $ getDirective "skip" rules
where
oneorerror "" = 1
oneorerror s = readDef (throwerr $ "could not parse skip value: " ++ show s) s
records' = drop headerlines records
let txns = map (transactionFromCsvRecord rules) records'
return $ Right nulljournal{jtxns=sortBy (comparing tdate) txns}
parseCsv :: FilePath -> String -> IO (Either ParseError CSV)
parseCsv path csvdata =
case path of
"-" -> liftM (parseCSV "(stdin)") getContents
_ -> return $ parseCSV path csvdata
validateCsv :: Either ParseError CSV -> Either String [CsvRecord]
validateCsv (Left e) = Left $ show e
validateCsv (Right rs) = validate $ filternulls rs
where
filternulls = filter (/=[""])
validate [] = Left "no CSV records found"
validate rs@(first:_)
| isJust lessthan2 = let r = fromJust lessthan2 in Left $ printf "CSV record %s has less than two fields" (show r)
| isJust different = let r = fromJust different in Left $ printf "the first CSV record %s has %d fields but %s has %d" (show first) length1 (show r) (length r)
| otherwise = Right rs
where
length1 = length first
lessthan2 = headMay $ filter ((<2).length) rs
different = headMay $ filter ((/=length1).length) rs
rulesFileFor :: FilePath -> FilePath
rulesFileFor = (++ ".rules")
csvFileFor :: FilePath -> FilePath
csvFileFor = reverse . drop 6 . reverse
ensureRulesFileExists :: FilePath -> IO Bool
ensureRulesFileExists f = do
exists <- doesFileExist f
if exists
then return False
else do
writeFile f $ newRulesFileContent f
return True
newRulesFileContent :: FilePath -> String
newRulesFileContent f = unlines
["# hledger csv conversion rules for " ++ csvFileFor (takeFileName f)
,"# cf http://hledger.org/MANUAL.html"
,""
,"account1 assets:bank:checking"
,""
,"fields date, description, amount"
,""
,"#skip 1"
,""
,"#date-format %-d/%-m/%Y"
,"#date-format %-m/%-d/%Y"
,"#date-format %Y-%h-%d"
,""
,"#currency $"
,""
,"if ITUNES"
," account2 expenses:entertainment"
,""
,"if (TO|FROM) SAVINGS"
," account2 assets:bank:savings\n"
]
data CsvRules = CsvRules {
rdirectives :: [(DirectiveName,String)],
rcsvfieldindexes :: [(CsvFieldName, CsvFieldIndex)],
rassignments :: [(JournalFieldName, FieldTemplate)],
rconditionalblocks :: [ConditionalBlock]
} deriving (Show, Eq)
type DirectiveName = String
type CsvFieldName = String
type CsvFieldIndex = Int
type JournalFieldName = String
type FieldTemplate = String
type ConditionalBlock = ([RecordMatcher], [(JournalFieldName, FieldTemplate)])
type RecordMatcher = [Regexp]
type DateFormat = String
type Regexp = String
rules = CsvRules {
rdirectives=[],
rcsvfieldindexes=[],
rassignments=[],
rconditionalblocks=[]
}
addDirective :: (DirectiveName, String) -> CsvRules -> CsvRules
addDirective d r = r{rdirectives=d:rdirectives r}
addAssignment :: (JournalFieldName, FieldTemplate) -> CsvRules -> CsvRules
addAssignment a r = r{rassignments=a:rassignments r}
setIndexesAndAssignmentsFromList :: [CsvFieldName] -> CsvRules -> CsvRules
setIndexesAndAssignmentsFromList fs r = addAssignmentsFromList fs . setCsvFieldIndexesFromList fs $ r
setCsvFieldIndexesFromList :: [CsvFieldName] -> CsvRules -> CsvRules
setCsvFieldIndexesFromList fs r = r{rcsvfieldindexes=zip fs [1..]}
addAssignmentsFromList :: [CsvFieldName] -> CsvRules -> CsvRules
addAssignmentsFromList fs r = foldl' maybeAddAssignment r journalfieldnames
where
maybeAddAssignment rules f = (maybe id addAssignmentFromIndex $ elemIndex f fs) rules
where
addAssignmentFromIndex i = addAssignment (f, "%"++show (i+1))
addConditionalBlock :: ConditionalBlock -> CsvRules -> CsvRules
addConditionalBlock b r = r{rconditionalblocks=b:rconditionalblocks r}
getDirective :: DirectiveName -> CsvRules -> Maybe FieldTemplate
getDirective directivename = lookup directivename . rdirectives
parseRulesFile :: FilePath -> IO (Either ParseError CsvRules)
parseRulesFile f = do
s <- readFile' f >>= expandIncludes
let rules = parseCsvRules f s
return $ case rules of
Left e -> Left e
Right r -> case validateRules r of
Left e -> Left $ toParseError e
Right r -> Right r
where
toParseError s = newErrorMessage (Message s) (initialPos "")
expandIncludes :: String -> IO String
expandIncludes s = do
let (ls,rest) = break (isPrefixOf "include") $ lines s
case rest of
[] -> return $ unlines ls
(('i':'n':'c':'l':'u':'d':'e':f):ls') -> do
let f' = dropWhile isSpace f
included <- readFile f' >>= expandIncludes
return $ unlines [unlines ls, included, unlines ls']
ls' -> return $ unlines $ ls ++ ls'
parseCsvRules :: FilePath -> String -> Either ParseError CsvRules
parseCsvRules rulesfile s =
runParser rulesp rules rulesfile s
validateRules :: CsvRules -> Either String CsvRules
validateRules rules = do
unless (isAssigned "date") $ Left "Please specify (at top level) the date field. Eg: date %1\n"
unless ((amount && not (amountin || amountout)) ||
(not amount && (amountin && amountout)))
$ Left "Please specify (at top level) either the amount field, or both the amount-in and amount-out fields. Eg: amount %2\n"
Right rules
where
amount = isAssigned "amount"
amountin = isAssigned "amount-in"
amountout = isAssigned "amount-out"
isAssigned f = isJust $ getEffectiveAssignment rules [] f
rulesp :: GenParser Char CsvRules CsvRules
rulesp = do
many $ choice'
[blankorcommentline <?> "blank or comment line"
,(directive >>= updateState . addDirective) <?> "directive"
,(fieldnamelist >>= updateState . setIndexesAndAssignmentsFromList) <?> "field name list"
,(fieldassignment >>= updateState . addAssignment) <?> "field assignment"
,(conditionalblock >>= updateState . addConditionalBlock) <?> "conditional block"
]
eof
r <- getState
return r{rdirectives=reverse $ rdirectives r
,rassignments=reverse $ rassignments r
,rconditionalblocks=reverse $ rconditionalblocks r
}
blankorcommentline = pdbg 1 "trying blankorcommentline" >> choice' [blankline, commentline]
blankline = many spacenonewline >> newline >> return () <?> "blank line"
commentline = many spacenonewline >> commentchar >> restofline >> return () <?> "comment line"
commentchar = oneOf ";#"
directive = do
pdbg 1 "trying directive"
d <- choice' $ map string directives
v <- (((char ':' >> many spacenonewline) <|> many1 spacenonewline) >> directiveval)
<|> (optional (char ':') >> many spacenonewline >> eolof >> return "")
return (d,v)
<?> "directive"
directives =
["date-format"
,"skip"
]
directiveval = anyChar `manyTill` eolof
fieldnamelist = (do
pdbg 1 "trying fieldnamelist"
string "fields"
optional $ char ':'
many1 spacenonewline
f <- fieldname
let separator = many spacenonewline >> char ',' >> many spacenonewline
fs <- many1 $ (separator >> fromMaybe "" <$> optionMaybe fieldname)
restofline
return $ map (map toLower) $ f:fs
) <?> "field name list"
fieldname = quotedfieldname <|> barefieldname
quotedfieldname = do
char '"'
f <- many1 $ noneOf "\"\n:;#~"
char '"'
return f
barefieldname = many1 $ noneOf " \t\n,;#~"
fieldassignment = do
pdbg 1 "trying fieldassignment"
f <- journalfieldname
assignmentseparator
v <- fieldval
return (f,v)
<?> "field assignment"
journalfieldname = pdbg 2 "trying journalfieldname" >> choice' (map string journalfieldnames)
journalfieldnames =
[
"amount-in"
,"amount-out"
,"currency"
,"date2"
,"date"
,"status"
,"code"
,"description"
,"amount"
,"account1"
,"account2"
,"comment"
]
assignmentseparator = do
pdbg 3 "trying assignmentseparator"
choice [
try (many spacenonewline >> char ':'),
space
]
many spacenonewline
fieldval = do
pdbg 2 "trying fieldval"
anyChar `manyTill` eolof
conditionalblock = do
pdbg 1 "trying conditionalblock"
string "if" >> many spacenonewline >> optional newline
ms <- many1 recordmatcher
as <- many (many1 spacenonewline >> fieldassignment)
when (null as) $
fail "start of conditional block found, but no assignment rules afterward\n(assignment rules in a conditional block should be indented)\n"
return (ms, as)
<?> "conditional block"
recordmatcher = do
pdbg 2 "trying recordmatcher"
_ <- optional (matchoperator >> many spacenonewline >> optional newline)
ps <- patterns
when (null ps) $
fail "start of record matcher found, but no patterns afterward\n(patterns should not be indented)\n"
return ps
<?> "record matcher"
matchoperator = choice' $ map string
["~"
]
patterns = do
pdbg 3 "trying patterns"
ps <- many regexp
return ps
regexp = do
pdbg 3 "trying regexp"
notFollowedBy matchoperator
c <- nonspace
cs <- anyChar `manyTill` eolof
return $ strip $ c:cs
type CsvRecord = [String]
transactionFromCsvRecord :: CsvRules -> CsvRecord -> Transaction
transactionFromCsvRecord rules record = t
where
mdirective = (`getDirective` rules)
mfieldtemplate = getEffectiveAssignment rules record
render = renderTemplate rules record
mskip = mdirective "skip"
mdefaultcurrency = mdirective "default-currency"
mparsedate = parseDateWithFormatOrDefaultFormats (mdirective "date-format")
mdateformat = mdirective "date-format"
date = render $ fromMaybe "" $ mfieldtemplate "date"
date' = fromMaybe (error' $ dateerror "date" date mdateformat) $ mparsedate date
mdate2 = maybe Nothing (Just . render) $ mfieldtemplate "date2"
mdate2' = maybe Nothing (maybe (error' $ dateerror "date2" (fromMaybe "" mdate2) mdateformat) Just . mparsedate) mdate2
dateerror datefield value mdateformat = unlines
["error: could not parse \""++value++"\" as a date using date format "++maybe "\"YYYY/M/D\", \"YYYY-M-D\" or \"YYYY.M.D\"" show mdateformat
,"the CSV record is: "++intercalate ", " (map show record)
,"the "++datefield++" rule is: "++(fromMaybe "required, but missing" $ mfieldtemplate datefield)
,"the date-format is: "++fromMaybe "unspecified" mdateformat
,"you may need to "
++"change your "++datefield++" rule, "
++maybe "add a" (const "change your") mdateformat++" date-format rule, "
++"or "++maybe "add a" (const "change your") mskip++" skip rule"
,"for m/d/y or d/m/y dates, use date-format %-m/%-d/%Y or date-format %-d/%-m/%Y"
]
status = maybe False ((=="*") . render) $ mfieldtemplate "status"
code = maybe "" render $ mfieldtemplate "code"
description = maybe "" render $ mfieldtemplate "description"
comment = maybe "" render $ mfieldtemplate "comment"
precomment = maybe "" render $ mfieldtemplate "precomment"
currency = maybe (fromMaybe "" mdefaultcurrency) render $ mfieldtemplate "currency"
amountstr = (currency++) $ negateIfParenthesised $ getAmountStr rules record
amount = either amounterror mixed $ runParser (do {a <- amountp; eof; return a}) nullctx "" amountstr
amounterror err = error' $ unlines
["error: could not parse \""++amountstr++"\" as an amount"
,showRecord record
,"the amount rule is: "++(fromMaybe "" $ mfieldtemplate "amount")
,"the currency rule is: "++(fromMaybe "unspecified" $ mfieldtemplate "currency")
,"the default-currency is: "++fromMaybe "unspecified" mdefaultcurrency
,"the parse error is: "++show err
,"you may need to "
++"change your amount or currency rules, "
++"or "++maybe "add a" (const "change your") mskip++" skip rule"
]
amount1 = costOfMixedAmount amount
amount2 = (amount)
s `or` def = if null s then def else s
defaccount1 = fromMaybe "unknown" $ mdirective "default-account1"
defaccount2 = case isNegativeMixedAmount amount2 of
Just True -> "income:unknown"
_ -> "expenses:unknown"
account1 = maybe "" render (mfieldtemplate "account1") `or` defaccount1
account2 = maybe "" render (mfieldtemplate "account2") `or` defaccount2
t = nulltransaction{
tdate = date',
tdate2 = mdate2',
tstatus = status,
tcode = code,
tdescription = description,
tcomment = comment,
tpreceding_comment_lines = precomment,
tpostings =
[posting {paccount=account2, pamount=amount2, ptransaction=Just t}
,posting {paccount=account1, pamount=amount1, ptransaction=Just t}
]
}
getAmountStr :: CsvRules -> CsvRecord -> String
getAmountStr rules record =
let
mamount = getEffectiveAssignment rules record "amount"
mamountin = getEffectiveAssignment rules record "amount-in"
mamountout = getEffectiveAssignment rules record "amount-out"
render = fmap (strip . renderTemplate rules record)
in
case (render mamount, render mamountin, render mamountout) of
(Just "", Nothing, Nothing) -> error' $ "amount has no value\n"++showRecord record
(Just a, Nothing, Nothing) -> a
(Nothing, Just "", Just "") -> error' $ "neither amount-in or amount-out has a value\n"++showRecord record
(Nothing, Just i, Just "") -> i
(Nothing, Just "", Just o) -> negateStr o
(Nothing, Just _, Just _) -> error' $ "both amount-in and amount-out have a value\n"++showRecord record
_ -> error' $ "found values for amount and for amount-in/amount-out - please use either amount or amount-in/amount-out\n"++showRecord record
negateIfParenthesised :: String -> String
negateIfParenthesised ('(':s) | lastMay s == Just ')' = negateStr $ init s
negateIfParenthesised s = s
negateStr :: String -> String
negateStr ('-':s) = s
negateStr s = '-':s
showRecord :: CsvRecord -> String
showRecord r = "the CSV record is: "++intercalate ", " (map show r)
getEffectiveAssignment :: CsvRules -> CsvRecord -> JournalFieldName -> Maybe FieldTemplate
getEffectiveAssignment rules record f = lastMay $ assignmentsFor f
where
assignmentsFor f = map snd $ filter ((==f).fst) $ toplevelassignments ++ conditionalassignments
where
toplevelassignments = rassignments rules
conditionalassignments = concatMap snd $ filter blockMatches $ blocksAssigning f
where
blocksAssigning f = filter (any ((==f).fst) . snd) $ rconditionalblocks rules
blockMatches :: ConditionalBlock -> Bool
blockMatches (matchers,_) = all matcherMatches matchers
where
matcherMatches :: RecordMatcher -> Bool
matcherMatches pats = patternMatches $ "(" ++ intercalate "|" pats ++ ")"
where
patternMatches :: Regexp -> Bool
patternMatches pat = regexMatchesCIRegexCompat pat csvline
where
csvline = intercalate "," record
renderTemplate :: CsvRules -> CsvRecord -> FieldTemplate -> String
renderTemplate rules record t = regexReplaceBy "%[A-z0-9]+" replace t
where
replace ('%':pat) = maybe pat (\i -> atDef "" record (i1)) mi
where
mi | all isDigit pat = readMay pat
| otherwise = lookup pat $ rcsvfieldindexes rules
replace pat = pat
parseDateWithFormatOrDefaultFormats :: Maybe DateFormat -> String -> Maybe Day
parseDateWithFormatOrDefaultFormats mformat s = firstJust $ map parsewith formats
where
parsewith = flip (parseTime defaultTimeLocale) s
formats = maybe
["%Y/%-m/%-d"
,"%Y-%-m-%-d"
,"%Y.%-m.%-d"
]
(:[])
mformat
tests_Hledger_Read_CsvReader = TestList (test_parser)
test_parser = [
"convert rules parsing: empty file" ~: do
assertParseEqual (parseCsvRules "unknown" "") rules
,"convert rules parsing: trailing comments" ~: do
assertParse (parseWithCtx rules rulesp "skip\n# \n#\n")
,"convert rules parsing: trailing blank lines" ~: do
assertParse (parseWithCtx rules rulesp "skip\n\n \n")
]