{-# LANGUAGE TypeSynonymInstances, FlexibleInstances,
             OverloadedStrings, CPP, TemplateHaskell #-}
-- | Parses U.S. federal Thrift Savings Plan (TSP) statements.
--
-- This module works with PDF TSP statements downloaded from the TSP
-- web site. It works with the statement format used as of July 2013.
-- The format recently changed to allow for Roth contributions.  This
-- works on civilian, FERS statements; maybe it works on others, but I
-- cannot test these (if you test these and find bugs, send me patches
-- and I will merge them.)
--
-- You need to have the pdftotext program installed and available on
-- your PATH.  This program is part of the poppler project.  On Debian
-- GNU/Linux systems, it is part of the poppler-utils package.
module TsParse

-- If in test mode, just export everything.
#ifdef test

  where

#else

  ( -- * Data types

    -- ** Basic types
    Dollars
  , Shares
  , TxnType

    -- ** Transaction Detail By Source
  , BySource(..)
  , BySourceSummary(..)
  , BySourceBeginningBal
  , BySourceGainLoss
  , BySourceEndingBal
  , BySourcePosting(..)

    -- ** Transaction Detail By Fund
  , FundName
  , ByFund(..)
  , ByFundBeginningBal(..)
  , ByFundGainLoss(..)
  , ByFundEndingBal(..)
  , ByFundPosting(..)

    -- ** TSP statement
  , TspStatement(..)

    -- * Parsing a TSP statement
  , parseTsp
  , parseTspFromFile

    -- * Pretty printing
  , Pretty(..)

  ) where
#endif


import qualified Data.Decimal as D
import qualified Data.Time as T
import Data.Decimal (Decimal)
import Prelude hiding (words)
import qualified Prelude
import Control.Applicative
import Data.List.Split (splitOn)
import qualified Text.Parsec as P
import Text.Read (readMaybe)
import Text.Parsec ((<?>))
import Text.Parsec.String (Parser)
import System.Process (readProcess)
import qualified Text.PrettyPrint as Y
import Data.Monoid ((<>))

#ifdef test
import qualified Test.QuickCheck as Q
import qualified Test.QuickCheck.Property as QP
import Test.QuickCheck.All (quickCheckAll)
import Test.QuickCheck (Gen, Arbitrary(..))
import Data.List (intersperse)
import Data.List.Split (chunksOf)
#endif

-- | Any data type that is Dollars on the TSP statement.
type Dollars = Decimal

-- | Any data type that is a number of shares on the TSP statement.
type Shares = Decimal

#ifdef test

genMantissa :: Gen Integer
genMantissa = fmap fromIntegral
  $ Q.oneof [ Q.arbitrarySizedIntegral :: Gen Int
            , Q.arbitrarySizedBoundedIntegral
            ]

genDollars :: Gen Dollars
genDollars = D.Decimal <$> pure 2 <*> genMantissa

newtype DollarsRen = DollarsRen { unDollarsRen :: Rendered Dollars }
  deriving (Eq, Ord, Show)

instance Arbitrary DollarsRen where
  arbitrary = do
    dec <- genDollars
    ren <- showDecimalWithSign dec
    return . DollarsRen $ Rendered dec ren

genShares :: Gen Shares
genShares = D.Decimal <$> pure 4 <*> genMantissa

newtype SharesRen = SharesRen { unSharesRen :: Rendered Dollars }
  deriving (Eq, Ord, Show)

instance Arbitrary SharesRen where
  arbitrary = do
    dec <- genShares
    ren <- showDecimalWithSign dec
    return . SharesRen $ Rendered dec ren

#endif

isNonSpaceNonControl :: Char -> Bool
isNonSpaceNonControl c = c >= '!' && c <= '~'

-- | A single word in a text column.
word :: Parser String
word = P.many1
  (P.satisfy isNonSpaceNonControl
    <?> "non-space, non-control character")

#ifdef test

genChar :: Gen Char
genChar = Q.choose ('\0', '\127')

genNonSpacePrintable :: Gen Char
genNonSpacePrintable = Q.choose ('!', '~')

data Rendered a = Rendered
  { ast :: a
  , rendering :: String
  } deriving (Eq, Ord, Show)


genWord :: Gen (Rendered String)
genWord = do
  cs <- Q.listOf1 genNonSpacePrintable
  return $ Rendered cs cs

#endif

-- | Multiple words in a text column. Separated by a single space.
words :: Parser [String]
words = P.sepBy1 word sep
  where
    sep = P.try (P.char ' ' *> P.notFollowedBy (P.char ' '))

#ifdef test

genWords :: Gen (Rendered [String])
genWords = do
  ws <- Q.listOf1 genWord
  let withSpaces = concat . intersperse " " . map ast $ ws
  return $ Rendered (map ast ws) withSpaces

newtype WordsRen = WordsRen { unWordsRen :: Rendered [String] }
  deriving (Eq, Ord, Show)

instance Arbitrary WordsRen where
  arbitrary = WordsRen <$> genWords

prop_words :: WordsRen -> QP.Result
prop_words = testRendered words . unWordsRen

#endif

dollarSign :: Parser ()
dollarSign = () <$ optional (P.char '$')

space :: Parser ()
space = () <$ optional (P.char ' ')

digits :: Parser String
digits = fmap (filter (/= ',')) (P.many1 (P.oneOf "0123456789,"))
         <?> "digits"

-- | Parses a single decimal value. Recognizes negative signs. Strips
-- out dollar signs and commas.
--
-- Use 'readMaybe' rather than 'reads'. The Read instance of Decimal
-- returns an ambiguous parse; 'readMaybe' will use the parse that
-- consumes the entire string if there is one.
decimal :: Parser D.Decimal
decimal = do
  dollarSign
  maybeNeg <- optional (P.char '-')
  let isNeg = maybe False (const True) maybeNeg
  dollarSign
  space
  dollarSign
  whole <- digits
  _ <- P.char '.'
  frac <- digits
  let numStr = whole ++ "." ++ frac
  dec <- case readMaybe (whole ++ "." ++ frac) of
    Nothing -> fail $ "could not parse decimal: " ++ numStr
    Just r -> return r
  return $ if isNeg then negate dec else dec

#ifdef test

prop_Decimal :: DecimalRen -> QP.Result
prop_Decimal = testRendered decimal . unDecimalRen

newtype DecimalRen = DecimalRen { unDecimalRen :: Rendered D.Decimal }
  deriving (Eq, Ord, Show)

instance Arbitrary DecimalRen where
  arbitrary = do
    dec <- Q.oneof [genDollars, genShares]
    ren <- showDecimalWithSign dec
    return . DecimalRen . Rendered dec $ ren

-- | Shows a decimal, with commas. Does not show the negative sign.

showDecimalNoSign :: D.Decimal -> String
showDecimalNoSign dec =
  let shown = show . abs $ dec
      (whole, frac) = case splitOn "." shown of
        x:xs:[] -> (x, xs)
        _ -> error "unexpected split result"
      commaed = reverse
              . concat
              . intersperse ","
              . chunksOf 3
              . reverse
              $ whole
  in commaed ++ "." ++ frac

-- | Show decimal, with sign. Randomly adds a dollar sign.
showDecimalWithSign :: D.Decimal -> Gen String
showDecimalWithSign d = fmap f arbitrary
  where
    f dolSign =
      let noSign = showDecimalNoSign d
          withDols = if dolSign then '$' : noSign else noSign
      in if d < 0 then "- " ++ withDols else withDols

testRendered :: (Eq a, Show a) => Parser a -> Rendered a -> QP.Result
testRendered p (Rendered tgt rend) =
  case P.parse p "" rend of
    Left e -> QP.failed { QP.reason = "parse failed: " ++ show e
                          ++ "target: " ++ show tgt
                          ++ "rendered: " ++ rend }
    Right g ->
      if g == tgt
      then QP.succeeded
      else QP.failed { QP.reason = "parsed not equal to original. "
                       ++ "original: " ++ show tgt
                       ++ " parsed: " ++ show g
                       ++ " rendered: " ++ rend }


#endif

-- | Parses a single date.
date :: Parser T.Day
date = do
  w <- word
  let ws = splitOn "/" w
  case ws of
    m:d:y:[] -> do
      maybeDy <- T.fromGregorianValid <$> safeRead y
                 <*> safeRead m <*> safeRead d
      case maybeDy of
        Nothing -> fail $ "invalid date: " ++ w
        Just dy -> return dy
    _ -> fail $ "could not parse date: " ++ w

#ifdef test

render2digits :: Int -> String
render2digits i = case show i of
  c:[] -> '0':c:[]
  c -> c

newtype DayRen = DayRen { unDayRen :: Rendered T.Day }
  deriving (Eq, Ord, Show)

instance Arbitrary DayRen where
  arbitrary =
    let lower = fromEnum $ T.fromGregorian 1900 1 1
        upper = fromEnum $ T.fromGregorian 2100 12 31
        f dtNum =
          let dt = toEnum dtNum
              (yr, mo, da) = T.toGregorian dt
              dtStr = render2digits mo ++ "/"
                      ++ render2digits da ++ "/"
                      ++ show yr
          in DayRen $ Rendered dt dtStr
    in fmap f $ Q.choose (lower, upper)

prop_day :: DayRen -> QP.Result
prop_day = testRendered date . unDayRen

#endif

safeRead :: Read x => String -> Parser x
safeRead s = case readMaybe s of
  Just x -> return x
  _ -> fail $ "could not read string: " ++ s

columnBreak :: Parser ()
columnBreak = () <$ P.many (P.char ' ')


class Pretty a where
  pretty :: a -> Y.Doc

instance Pretty D.Decimal where
  pretty = Y.text . show

instance Pretty String where
  pretty = Y.text

instance Pretty [String] where
  pretty = Y.hsep . map Y.text

instance Pretty T.Day where
  pretty = Y.text . show

-- | A list of words that indicates the transaction type.  Each string
-- in this list will not have any spaces in it.
type TxnType = [String]

data BySourcePosting = BySourcePosting
  { bspPayrollOffice :: String
  , bspPostingDate :: T.Day
  , bspTxnType :: TxnType
  , bspTraditional :: Dollars
  , bspRoth :: Dollars
  , bspAutomatic :: Dollars
  , bspMatching :: Dollars
  , bspTotal :: Dollars
  } deriving (Eq, Ord, Show)

#ifdef test


data RenTxnBySource = RenTxnBySource
  { unRenTxnBySource :: Rendered BySourcePosting }
  deriving (Eq, Ord, Show)


instance Arbitrary RenTxnBySource where
  arbitrary = do
    rPayroll <- genWord
    rPostingDate <- fmap unDayRen arbitrary
    rTxnType <- genWords
    rTrad <- fmap unDollarsRen arbitrary
    rRoth <- fmap unDollarsRen arbitrary
    rAuto <- fmap unDollarsRen arbitrary
    rMatch <- fmap unDollarsRen arbitrary
    rTot <- fmap unDollarsRen arbitrary
    let rAst = BySourcePosting (ast rPayroll) (ast rPostingDate)
          (ast rTxnType) (ast rTrad) (ast rRoth) (ast rAuto)
          (ast rMatch) (ast rTot)
        clmns = [ return $ rendering rPayroll       , columnSpaceOne
                , return $ rendering rPostingDate   , columnSpaceOne
                , return $ rendering rTxnType       , columnSpaceTwo
                , return $ rendering rTrad          , columnSpaceOne
                , return $ rendering rRoth          , columnSpaceOne
                , return $ rendering rAuto          , columnSpaceOne
                , return $ rendering rMatch         , columnSpaceOne
                , return $ rendering rTot
                ]
    ren <- fmap concat . sequence $ clmns
    leader <- fmap (flip replicate ' ') Q.arbitrarySizedIntegral
    return . RenTxnBySource $ Rendered rAst (leader ++ ren ++ "\n")

prop_txnBySource :: RenTxnBySource -> QP.Result
prop_txnBySource = testRendered txnBySource . unRenTxnBySource

genInterleaved :: Gen a -> [a] -> Gen [a]
genInterleaved g = sequence . intersperse g . map return

-- | Generates at least two spaces.
columnSpaceTwo :: Gen String
columnSpaceTwo = fmap (f . abs) Q.arbitrarySizedIntegral
  where
    f i = "  " ++ replicate i ' '

-- | Generates at least one space.
columnSpaceOne :: Gen String
columnSpaceOne = fmap (f . abs) Q.arbitrarySizedIntegral
  where
    f i = " " ++ replicate i ' '

columns :: [String] -> Gen String
columns = fmap concat . genInterleaved columnSpaceTwo


#endif

label :: Pretty p => String -> p -> Y.Doc
label l p = Y.text l <> Y.text ": " <> pretty p

instance Pretty BySourcePosting where
  pretty x = Y.vcat
    [ label "Payroll office" (bspPayrollOffice x)
    , label "Posting date" (bspPostingDate x)
    , label "Transaction type" (bspTxnType x)
    , label "Traditional" (bspTraditional x)
    , label "Roth" (bspRoth x)
    , label "Automatic" (bspAutomatic x)
    , label "Matching" (bspMatching x)
    , label "Total" (bspTotal x)
    ]

txnBySource :: Parser BySourcePosting
txnBySource
  = BySourcePosting
  <$ P.many (P.char ' ')
  <*> word                              -- payroll office
  <* columnBreak
  <*> date                              -- date
  <* columnBreak
  <*> words                             -- transaction type
  <* columnBreak
  <*> decimal                             -- traditional
  <* columnBreak
  <*> decimal                             -- roth (words - can be negative)
  <* columnBreak
  <*> decimal                             -- automatic (can be negative)
  <* columnBreak
  <*> decimal                             -- matching (can be negative)
  <* columnBreak
  <*> decimal                             -- total (can be negative)
  <* P.char '\n'

-- | The TSP statement has several lines in the @YOUR TRANSACTION
-- DETAIL BY SOURCE@ section that contain summary data: @Beginning
-- Balance@, @Gain or Loss This Quarter@, and @Ending Balance@. Since
-- the columns in these lines are all the same they are all
-- represented by this single type. Type synonyms
-- 'BySourceBeginningBal', 'BySourceGainLoss', and
-- 'BySourceEndingBal' are used as appropriate.
data BySourceSummary = BySourceSummary
  { bssTraditional :: Dollars
  , bssRoth :: Dollars
  , bssAuto :: Dollars
  , bssMatching :: Dollars
  , bssTotal :: Dollars
  } deriving (Eq, Ord, Show)

#ifdef test

genTxnBySourceSummary
  :: [String]
  -- ^ Description. This is a list of words. It should not contain any
  -- spaces.
  -> Gen (Rendered BySourceSummary)
genTxnBySourceSummary desc = do
  rTrad <- fmap unDollarsRen arbitrary
  rRoth <- fmap unDollarsRen arbitrary
  rAuto <- fmap unDollarsRen arbitrary
  rMatch <- fmap unDollarsRen arbitrary
  rTot <- fmap unDollarsRen arbitrary
  leader <- fmap (flip replicate ' ') Q.arbitrarySizedIntegral
  let rAst = BySourceSummary (ast rTrad) (ast rRoth) (ast rAuto)
        (ast rMatch) (ast rTot)
  ren <- columns [ concat . intersperse " " $ desc,
                   rendering rTrad, rendering rRoth,
                   rendering rAuto, rendering rMatch,
                   rendering rTot ]
  return $ Rendered rAst (leader ++ ren ++ "\n")

prop_txnsBySourceSummary :: WordsRen -> QP.Property
prop_txnsBySourceSummary wr = do
  let ws = ast . unWordsRen $ wr
      wordsStr = concat . intersperse " " $ ws
  QP.forAll (genTxnBySourceSummary ws)
    (testRendered (txnsBySourceSummary wordsStr))

#endif

instance Pretty BySourceSummary where
  pretty x = Y.vcat
    [ label "Traditional" (bssTraditional x)
    , label "Roth" (bssRoth x)
    , label "Automatic" (bssAuto x)
    , label "Matching" (bssMatching x)
    , label "Total" (bssTotal x)
    ]

-- | @YOUR TRANSACTION DETAIL BY SOURCE@ Beginning Balance.
type BySourceBeginningBal = BySourceSummary

-- | @YOUR TRANSACTION DETAIL BY SOURCE@ Gain or Loss This Quarter.
type BySourceGainLoss = BySourceSummary

-- | @YOUR TRANSACTION DETAIL BY SOURCE@ Ending Balance.
type BySourceEndingBal = BySourceSummary


txnsBySourceSummary :: String -> Parser BySourceSummary
txnsBySourceSummary s
  = BySourceSummary
  <$ P.many (P.char ' ')
  <* P.string s              -- Description
  <* columnBreak
  <*> decimal                -- Traditional
  <* columnBreak
  <*> decimal                -- Roth
  <* columnBreak
  <*> decimal                -- Automatic
  <* columnBreak
  <*> decimal                -- Matching
  <* columnBreak
  <*> decimal                -- Total
  <* P.char '\n'

--
-- Transaction detail by fund
--

fundName :: Parser [String]
fundName = do
  _ <- P.many (P.char ' ')
  ws <- words
  if last ws == "Fund"
    then P.char '\n' *> return ws
    else fail "not a fund name"

#ifdef test

newtype FundNameRen = FundNameRen
  { unFundNameRen :: Rendered [String] }
  deriving (Eq, Ord, Show)

instance Arbitrary FundNameRen where
  arbitrary = do
    ws <- fmap unWordsRen arbitrary
    let ws' = ws { ast = ast ws ++ ["Fund"] }
        ws'' = ws' { rendering = rendering ws ++ " Fund\n" }
    return $ FundNameRen ws''

prop_fundName :: FundNameRen -> QP.Result
prop_fundName = testRendered fundName . unFundNameRen

#endif

-- | A single posting in the @YOUR TRANSACTION DETAIL BY FUND@ section.
data ByFundPosting = ByFundPosting
  { bfpPostingDate :: T.Day
  , bfpTxnType :: [String]
  , bfpTraditional :: Dollars
  , bfpRoth :: Dollars
  , bfpTotal :: Dollars
  , bfpSharePrice :: Dollars
  , bfpNumShares :: Shares
  } deriving (Eq, Ord, Show)

#ifdef test

genTxnByFund :: Gen (Rendered ByFundPosting)
genTxnByFund = do
  rdy <- fmap unDayRen arbitrary
  rty <- genWords
  rtrad <- fmap unDollarsRen arbitrary
  rroth <- fmap unDollarsRen arbitrary
  rtot <- fmap unDollarsRen arbitrary
  rpri <- fmap unDollarsRen arbitrary
  rsha <- fmap unDollarsRen arbitrary
  leader <- fmap (flip replicate ' ') Q.arbitrarySizedIntegral
  let rAst = ByFundPosting (ast rdy) (ast rty) (ast rtrad) (ast rroth)
                       (ast rtot) (ast rpri) (ast rsha)
  let clmns = [ return $ rendering rdy            , columnSpaceOne
              , return $ rendering rty            , columnSpaceTwo
              , return $ rendering rtrad          , columnSpaceOne
              , return $ rendering rroth          , columnSpaceOne
              , return $ rendering rtot           , columnSpaceOne
              , return $ rendering rpri           , columnSpaceOne
              , return $ rendering rsha
              ]
  ren <- fmap concat . sequence $ clmns
  return $ Rendered rAst (leader ++ ren ++ "\n")

prop_txnByFund :: QP.Property
prop_txnByFund = QP.forAll genTxnByFund $ testRendered txnByFund

#endif

instance Pretty ByFundPosting where
  pretty x = Y.vcat
    [ label "Posting date" (bfpPostingDate x)
    , label "Transaction type" (bfpTxnType x)
    , label "Traditional" (bfpTraditional x)
    , label "Roth" (bfpRoth x)
    , label "Total" (bfpTotal x)
    , label "Share price" (bfpSharePrice x)
    , label "Number of shares" (bfpNumShares x)
    ]

txnByFund :: Parser ByFundPosting
txnByFund
  = ByFundPosting
  <$ P.many (P.char ' ')
  <*> date                    -- Posting date
  <* columnBreak
  <*> words                   -- Transaction type
  <* columnBreak
  <*> decimal                   -- Traditional
  <* columnBreak
  <*> decimal                   -- Roth
  <* columnBreak
  <*> decimal                   -- Total
  <* columnBreak
  <*> decimal                   -- Share price
  <* columnBreak
  <*> decimal                   -- Number of shares
  <* P.char '\n'

-- | The beginning balance in a @YOUR TRANSACTION DETAIL BY FUND@
-- section.
data ByFundBeginningBal = ByFundBeginningBal
  { bfbbSharePrice :: Dollars
  , bfbbNumShares :: Shares
  , bfbbDollarBalance :: Dollars
  } deriving (Eq, Ord, Show)

#ifdef test

genByFundBeginningBal :: Gen (Rendered ByFundBeginningBal)
genByFundBeginningBal = do
  rpr <- fmap unDollarsRen arbitrary
  rsha <- fmap unSharesRen arbitrary
  rbal <- fmap unDollarsRen arbitrary
  leader <- fmap (flip replicate ' ') Q.arbitrarySizedIntegral
  let rAst = ByFundBeginningBal (ast rpr) (ast rsha) (ast rbal)
  ren <- columns [ "Beginning Balance",
                   rendering rpr, rendering rsha, rendering rbal ]
  return $ Rendered rAst (leader ++ ren ++ "\n")

prop_byFundBeginningBal :: QP.Property
prop_byFundBeginningBal = QP.forAll genByFundBeginningBal
  $ testRendered byFundBeginningBal

#endif

instance Pretty ByFundBeginningBal where
  pretty x = Y.vcat
    [ label "Share price" (bfbbSharePrice x)
    , label "Number of shares" (bfbbNumShares x)
    , label "Dollar balance" (bfbbDollarBalance x)
    ]

byFundBeginningBal :: Parser ByFundBeginningBal
byFundBeginningBal
  = ByFundBeginningBal
  <$ P.many (P.char ' ')
  <* P.string "Beginning Balance"
  <* columnBreak
  <*> decimal                   -- Share Price
  <* columnBreak
  <*> decimal                   -- Number of Shares
  <* columnBreak
  <*> decimal                   -- Dollar Balance
  <* P.char '\n'

-- | Gain or Loss This Quarter in the @YOUR TRANSACTION DETAIL BY
-- FUND@ section.
data ByFundGainLoss = ByFundGainLoss
  { bfglDollarBalance :: Dollars }
  deriving (Eq, Ord, Show)

#ifdef test

genByFundGainLoss :: Gen (Rendered ByFundGainLoss)
genByFundGainLoss = do
  rbal <- fmap unDollarsRen arbitrary
  leader <- fmap (flip replicate ' ') Q.arbitrarySizedIntegral
  let rAst = ByFundGainLoss (ast rbal)
  ren <- columns [ "Gain or Loss This Quarter", rendering rbal ]
  return $ Rendered rAst (leader ++ ren ++ "\n")

prop_byFundGainLoss :: QP.Property
prop_byFundGainLoss = QP.forAll genByFundGainLoss
  $ testRendered byFundGainLoss

#endif

instance Pretty ByFundGainLoss where
  pretty x = label "Dollar balance" (bfglDollarBalance x)

byFundGainLoss :: Parser ByFundGainLoss
byFundGainLoss
  = ByFundGainLoss
  <$ P.many (P.char ' ')
  <* P.string "Gain or Loss This Quarter"
  <* columnBreak
  <*> decimal
  <* P.char '\n'

-- | Ending balance in the @YOUR TRANSACTION DETAIL BY FUND@ section.
data ByFundEndingBal = ByFundEndingBal
  { bfebSharePrice :: Dollars
  , bfebNumShares :: Shares
  , bfebDollarBalance :: Dollars
  } deriving (Eq, Ord, Show)

#ifdef test

genByFundEndingBal :: Gen (Rendered ByFundEndingBal)
genByFundEndingBal = do
  rpr <- fmap unDollarsRen arbitrary
  rsha <- fmap unSharesRen arbitrary
  rbal <- fmap unDollarsRen arbitrary
  leader <- fmap (flip replicate ' ') Q.arbitrarySizedIntegral
  let rAst = ByFundEndingBal (ast rpr) (ast rsha) (ast rbal)
  ren <- columns [ "Ending Balance",
                   rendering rpr, rendering rsha, rendering rbal ]
  return $ Rendered rAst (leader ++ ren ++ "\n")

prop_byFundEndingBal :: QP.Property
prop_byFundEndingBal = QP.forAll genByFundEndingBal
  $ testRendered byFundEndingBal

#endif

instance Pretty ByFundEndingBal where
  pretty x = Y.vcat
    [ label "Share price" (bfebSharePrice x)
    , label "Number of shares" (bfebNumShares x)
    , label "Dollar balance" (bfebDollarBalance x)
    ]

byFundEndingBal :: Parser ByFundEndingBal
byFundEndingBal
  = ByFundEndingBal
  <$ P.many (P.char ' ')
  <* P.string "Ending Balance"
  <* columnBreak
  <*> decimal                   -- Share Price
  <* columnBreak
  <*> decimal                   -- Number of shares
  <* columnBreak
  <*> decimal                   -- Dollar balance
  <* P.char '\n'  

-- | Represents the entire @YOUR TRANSACTION DETAIL BY SOURCE@ section.
data BySource = BySource
  { bsBeginningBal :: BySourceBeginningBal
  , bsTxns :: [BySourcePosting]
  , bsGainLoss :: BySourceGainLoss
  , bsEndingBal :: BySourceEndingBal
  } deriving (Eq, Ord, Show)

#ifdef test

maxSize :: Int -> Gen a -> Gen a
maxSize i g = Q.sized (\s -> Q.resize (min s i) g)

vectorMaxOf :: Int -> Gen a -> Gen [a]
vectorMaxOf i g = Q.sized $ \s -> do
  let maxLen = min s i
  len <- Q.choose (0, maxLen)
  Q.vectorOf len g

vectorMaxOf1 :: Int -> Gen a -> Gen [a]
vectorMaxOf1 i g = Q.sized $ \s -> do
  let maxLen = min s i
  len <- Q.choose (1, (max 1 maxLen))
  Q.vectorOf len g

-- | Generates a garbage line, which should be discarded. Some of
-- these are lines with only a form feed (ASCII 0x0C) as pdftotext
-- generates some of these.
genGarbageLine :: Gen String
genGarbageLine = Q.oneof
  [ fmap (++ "\n")
    $ maxSize 30 (Q.listOf
        (genChar `Q.suchThat` (not . (`elem` "\n\x0C"))))
  , return "\x0C"
  ]


addJunkLines :: [String] -> Gen [String]
addJunkLines ls = do
  firstLines <- maxSize 3 (Q.listOf genGarbageLine)
  restLines <- genInterleaved genGarbageLine ls
  return $ firstLines ++ restLines


genTxnDetailsBySource :: Gen (Rendered BySource)
genTxnDetailsBySource = do
  rBeginningBal <- genTxnBySourceSummary
    (Prelude.words "Beginning Balance")
  rTxnList <- Q.listOf arbitrary
  let rTxns = map unRenTxnBySource rTxnList
  rGainLoss <- genTxnBySourceSummary
    (Prelude.words "Gain or Loss This Quarter")
  rEndingBal <- genTxnBySourceSummary
    (Prelude.words "Ending Balance")
  let hdr = "YOUR TRANSACTION DETAIL BY SOURCE\n"
      renLines = hdr : rendering rBeginningBal : map rendering rTxns
                 ++ [rendering rGainLoss, rendering rEndingBal]
  renWithJunk <- addJunkLines renLines
  let rAst = BySource (ast rBeginningBal)
              (map ast rTxns) (ast rGainLoss) (ast rEndingBal)
  return $ Rendered rAst (concat renWithJunk)

prop_transactionDetailsBySource :: QP.Property
prop_transactionDetailsBySource = QP.forAll genTxnDetailsBySource
  $ testRendered txnDetailBySourceSection

#endif

instance Pretty BySource where
  pretty x = Y.vcat . Y.punctuate (Y.text "\n") $
    [ Y.hang "Beginning balance:" 2
             (pretty . bsBeginningBal $ x)

    , Y.hang "Transactions:" 2
             (Y.vcat . Y.punctuate (Y.text "\n")
                     . map pretty . bsTxns $ x)

    , Y.hang "Gain or loss:" 2
             (pretty . bsGainLoss $ x)

    , Y.hang "Ending balance:" 2
             (pretty . bsEndingBal $ x)
    ]

skipLine :: Parser ()
skipLine
  = P.many (P.noneOf "\n\x0C")
  >> (P.char '\n' <|> P.char '\x0C')
  >> return ()
  <?> "skip line"

-- | Runs the given parser. If it fails without consuming any input,
-- skip the current line. Keeps running the given parser until it
-- succeeds.
--
-- Do not wrap this parser in the Parsec many or many1 parsers or the
-- like; it probably will not do what you expect. When it gets to end
-- of file, it will consume the remaining junk lines, and then fail
-- after consuming input. This will cause many to fail while consuming
-- input, and it will not return the items that skipLinesThrough has
-- parsed so far.  You can wrap skipLinesThrough in 'try', but
-- remember that then the trailing last junk lines will not be parsed
-- (which might be what you want anyway.)
skipLinesThrough :: Parser a -> Parser a
skipLinesThrough p = do
  r <- optional p
  case r of
    Nothing -> skipLine *> skipLinesThrough p
    Just g -> return g

-- | Runs the end parser. If it succeeds, returns all values parsed so
-- far and the value of the end parser. If it fails without consuming
-- any input, runs the main parser. If the main parser succeeds,
-- recurses. If the main parser fails without consuming any input,
-- skips a line and recurses.
parseLinesThrough
  :: Parser body
  -> Parser end
  -> Parser ([body], end)
parseLinesThrough b e = do
  maybeE <- optional e
  case maybeE of
    Nothing -> do
      maybeB <- optional b
      case maybeB of
        Nothing -> skipLine >> parseLinesThrough b e
        Just bdy -> do
          (bs, end) <- parseLinesThrough b e
          return (bdy:bs, end)
    Just end -> return ([], end)
          

txnDetailBySourceSection :: Parser BySource
txnDetailBySourceSection = do
  _ <- skipLinesThrough
       (P.try (P.string "YOUR TRANSACTION DETAIL BY SOURCE")
              <?> "transaction detail header line")
  begBal <- skipLinesThrough
            (P.try (txnsBySourceSummary "Beginning Balance")
                   <?> "Beginning balance line")
  (txns, gainLoss) <- parseLinesThrough
    (P.try txnBySource <?> "transaction by source")
    (P.try (txnsBySourceSummary "Gain or Loss This Quarter")
           <?> "transaction by source summary")
  endBal <- skipLinesThrough
    (P.try (txnsBySourceSummary "Ending Balance")
           <?> "transactions by source summary")
  return $ BySource begBal txns gainLoss endBal

-- | The name of a fund, eg @C Fund@. This is a list of words; each
-- word will not contain any spaces.
type FundName = [String]

-- | A single fund in the @YOUR TRANSACTION DETAIL BY FUND@ section
-- (e.g. the @G Fund@, @L 2040 Fund@, etc.)
data ByFund = ByFund
  { bfFundName :: FundName
  , bfBeginningBal :: ByFundBeginningBal
  , bfPostings :: [ByFundPosting]
  , bfGainLoss :: ByFundGainLoss
  , bfEndingBal :: ByFundEndingBal
  } deriving (Eq, Ord, Show)

#ifdef test

genTxnDetailOneFund :: Gen (Rendered ByFund)
genTxnDetailOneFund = do
  rFund <- fmap unFundNameRen arbitrary
  rBeg <- genByFundBeginningBal
  rTxns <- Q.listOf genTxnByFund
  rGain <- genByFundGainLoss
  rEnd <- genByFundEndingBal
  let rAst = ByFund (ast rFund) (ast rBeg)
             (map ast rTxns) (ast rGain) (ast rEnd)
      renLines = rendering rFund : rendering rBeg
                 : map rendering rTxns
                 ++ [ rendering rGain, rendering rEnd]
  renWithJunk <- addJunkLines renLines
  return $ Rendered rAst (concat renWithJunk)

prop_transactionDetailOneFund :: QP.Property
prop_transactionDetailOneFund = QP.forAll genTxnDetailOneFund
  $ testRendered txnDetailOneFund

#endif

instance Pretty ByFund where
  pretty x = Y.vcat
    [ label "Fund name" (bfFundName x)
    , Y.hang "Beginning balance:" 2 (pretty . bfBeginningBal $ x)
    , Y.hang "Transactions:" 2
             (Y.vcat . Y.punctuate "\n" . map pretty
                     . bfPostings $ x)
    , Y.hang "Gain or loss:" 2 (pretty . bfGainLoss $ x)
    , Y.hang "Ending balance:" 2 (pretty . bfEndingBal $ x)
    ]

txnDetailOneFund :: Parser ByFund
txnDetailOneFund = do
  name <- skipLinesThrough (P.try fundName <?> "fund name")
  begBal <- skipLinesThrough
    (P.try byFundBeginningBal <?> "By fund beginning balance")
  (txns, gainLoss) <- parseLinesThrough
    (P.try txnByFund <?> "transaction by fund")
    (P.try byFundGainLoss <?> "by fund gain or loss")
  endBal <- skipLinesThrough
    (P.try byFundEndingBal <?> "by fund ending balance")
  return $ ByFund name begBal txns gainLoss endBal

#ifdef test

genTxnDetailsAllFunds :: Gen (Rendered [ByFund])
genTxnDetailsAllFunds = do
  rFunds <- vectorMaxOf1 5 genTxnDetailOneFund
  let hdr = "YOUR TRANSACTION DETAIL BY FUND\n"
      renLines = hdr : map rendering rFunds
      rAst = map ast rFunds
  withJunk <- addJunkLines renLines
  return $ Rendered rAst (concat withJunk)

prop_txnDetailsAllFunds :: QP.Property
prop_txnDetailsAllFunds = QP.forAll genTxnDetailsAllFunds
  $ testRendered txnDetailsAllFunds

#endif

txnDetailsAllFunds :: Parser [ByFund]
txnDetailsAllFunds
  = skipLinesThrough
      (P.try (P.string "YOUR TRANSACTION DETAIL BY FUND")
             <?> "transaction detail by fund header")
  *> P.many (P.try (skipLinesThrough (P.try txnDetailOneFund
                                     <?> "transaction details section")))

#ifdef test

genTspStatement :: Gen (Rendered TspStatement)
genTspStatement = do
  rBySource <- genTxnDetailsBySource
  rByFund <- genTxnDetailsAllFunds
  let rAst = TspStatement (ast rBySource) (ast rByFund)
      renLines = [rendering rBySource, rendering rByFund]
  withJunk <- addJunkLines renLines
  moreJunk <- vectorMaxOf 3 genGarbageLine
  return $ Rendered rAst ((concat withJunk) ++ concat moreJunk)

prop_parseTsp :: QP.Property
prop_parseTsp = QP.forAll genTspStatement
  $ testRendered parseTsp

#endif

-- | All data that is parsed from the TSP statement is in this
-- type. The parser does not attempt to parse any of the data that is
-- on Page 1 of the PDF; most of this data all appears elsewhere on
-- the statement and can be calculated using the data that is in this
-- type (and besides, the data on Page 1 is in a multi-column format
-- that would be difficult to parse; since the data is all elsewhere,
-- it's not worth the effort.) One exception is the investment
-- allocation for future contributions, which does not appear
-- elsewhere.
--
-- In addition, the statement contains a quarterly account summary.
-- This also is not parsed because it can be derived from all the data
-- that is elsewhere on the statement.
data TspStatement = TspStatement
  { tspDetailBySource :: BySource
  , tspDetailByFund :: [ByFund]
  } deriving (Eq, Ord, Show)

instance Pretty TspStatement where
  pretty x = Y.vcat
    [ "DETAIL BY SOURCE"
    , pretty (tspDetailBySource x)
    , "\n"
    , Y.vcat . Y.punctuate "\n" . map pretty
      . tspDetailByFund $ x
    ]

-- | Parses a plain text TSP statement.  The input must be generated
-- by the pdftotext program.  This library was tested against
-- pdftotext version 0.18.4, which came with Debian Wheezy.
parseTsp :: Parser TspStatement
parseTsp
  = TspStatement
  <$> txnDetailBySourceSection
  <*> txnDetailsAllFunds

readTspFile :: String -> IO String
readTspFile s = readProcess "pdftotext"
                            ["-layout", "-enc", "ASCII7", s, "-"] ""


-- | Parses a TSP statement from a file.  This function relies upon
-- the @pdftotext@ program.  This program must exist somewhere in your
-- PATH.  This library was tested against pdftotext version 0.18.4,
-- which came with Debian Wheezy.
parseTspFromFile
  :: String
  -- ^ Filename
  -> IO TspStatement
parseTspFromFile fn = do
  s <- readTspFile fn
  case P.parse parseTsp fn s of
    Left e -> fail . show $ e
    Right g -> return g

#ifdef test
runAllTests :: IO Bool
runAllTests = $quickCheckAll
#endif