-- | 
-- Module     : Finance.Treasury
-- Copyright  : Copyright (c) 2008, Steve lihn <stevelihn@gmail.com>
-- License    : BSD3
-- Maintainer : Steve lihn <stevelihn@gmail.com>
-- Stability  : experimental
-- Tested with: GHC 6.6.1
--
-- Finance.Treasury is a module to obtain yield curve data from Department
-- of Treasury website.
--
-- Error reporting is somewhat of a mixed model in this module. More
-- improvement is desired in future releases.
--
-- Here is a small complete program illustrating the use of this module:
--
-- >    module Main where
-- >    import Finance.Treasury
-- >    import qualified Data.Map as M
-- >    import Data.List (sort)
-- >    import Text.Printf
-- >    main = do
-- >        putStrLn "**************************************"
-- >        m <- getYieldCurveThisMonth 
-- >        putStrLn "*** pretty print 10y rates for all dates"
-- >        prettyYieldCurve m (Just "10y")
-- >        --
-- >        d <- getLatestYieldCurve 
-- >        putStrLn "*** pretty print the latest daily yield curve"
-- >        prettyYieldCurve (M.fromList [d]) Nothing
-- >        --
-- >        putStrLn "*** show some yield curves of past year"
-- >        h <- getYieldCurveHist
-- >        prettyYieldCurve (minmax h) (Just "10y")
-- >        mapM_ prt yrs 
-- >        putStrLn "**************************************"
-- >        return ()
-- >        where minmax h = M.fromList [ M.findMin h, M.findMax h ]
-- >              yrs = reverse [ 1992..2007 ]
-- >              prt :: Int -> IO ()
-- >              prt yr = do putStrLn $ "*** show some yield curves of "++(show yr)
-- >                          yc <- getYieldCurveYyyy yr
-- >                          prettyYieldCurve (minmax yc) (Just "10y")
-- >                          return ()
--
-- License info: The license is a simple BSD3-style license.  
--

module Finance.Treasury ( 
    -- * Yield curve storage
    YieldCurveMap, 
    DailyYieldCurve, 
    DailyYieldCurveList,
    -- ** Yield curve maturity
    yieldCurveHash, 
    -- * Fetching yield curve data
    getLatestYieldCurve, 
    getYieldCurveThisMonth,
    getYieldCurveHist,
    getYieldCurveYyyy,
    -- * Printing yield curve data
    prettyYieldCurve
  ) where

import Network.URI (parseURI, escapeURIString, isUnescapedInURI)
-- import Network.HTTP
import Network.HTTP.Simple (httpGet)
import Data.List (intersperse, sortBy, sort, null)
import Data.Maybe (listToMaybe, fromJust)
import Data.Char (isDigit)

import qualified Data.Time.Calendar as C
import qualified Data.Map as M
import qualified Data.Tree.Class as DT
import qualified Text.XML.HXT.Parser as XP
import qualified Control.Exception as E
import Text.Printf

baseURL = "http://www.treas.gov/offices/domestic-finance/debt-management/interest-rate/"

-- yield.xml (current month if traded)
-- yield_historical.xml for this year or last year (if January)
-- yield_historical_YYYY.xml for YYYY is last year or earlier
-- up to 1990

yieldURL     = baseURL++"yield.xml"
yieldHistURL = baseURL++"yield_historical.xml"
yieldYyyyURL :: Int -> String
yieldYyyyURL yr = baseURL++"yield_historical_"++s2++".xml"
    where s1 = show yr
          s2 = if length s1 == 4 then s1
                  else error "yieldYyyyURL: "++s1++" is not YYYYY"
         

-- | translates maturity from XML names to abbreviations.
-- E.g. BC_1MONTH becomes 1m.
-- List of all maturities: 1m 3m 6m 1y 2y 3y 5y 7y 10y 20y 30y.
-- However, 30y data may be lacking for some years when the bond was not in circulation. 
yieldCurveHash :: M.Map String String
yieldCurveHash = M.fromList
    [ ( "BC_1MONTH", "1m" ),
      ( "BC_3MONTH", "3m" ),
      ( "BC_6MONTH", "6m" ), 
      ( "BC_1YEAR",  "1y" ), 
      ( "BC_2YEAR",  "2y" ), 
      ( "BC_3YEAR",  "3y" ), 
      ( "BC_5YEAR",  "5y" ), 
      ( "BC_7YEAR",  "7y" ), 
      ( "BC_10YEAR", "10y" ),
      ( "BC_20YEAR", "20y" ),
      ( "BC_30YEAR", "30y" ) ] 

-- | a Map storing the assoc array of maturity to interest rate (in percent)
type DailyYieldCurve     = M.Map String Float
-- | a List storing the tuple of maturity and interest rate (in percent)
type DailyYieldCurveList = [ (String, Float) ]
-- | a Map storing all the daily yield curves
type YieldCurveMap       = M.Map C.Day DailyYieldCurve

-- | fetch the latest daily yield curve from the monthly data.
getLatestYieldCurve :: IO (C.Day, DailyYieldCurve)
getLatestYieldCurve =
    do ycs <- getYieldCurveThisMonth
       return $ M.findMax ycs

-- | fetch the latest monthly data.
-- There is no reason to fail, so it is an error if there is no XML returned
getYieldCurveThisMonth :: IO YieldCurveMap
getYieldCurveThisMonth =
    do jstr <- fetchXML yieldURL
       parseRawWrapper parseRawXML1 jstr

-- | fetch the latest yearly data (excluding current month).
-- There is no reason to fail, so it is an error if there is no XML returned
getYieldCurveHist :: IO YieldCurveMap
getYieldCurveHist =
    do jstr <- fetchXML yieldHistURL
       parseRawWrapper parseRawXML2 jstr

-- | fetch the historical yearly data (excluding current year).
-- If YYYY is not in range (since 1992), this call may fail.
-- Otherwise, there is no reason to fail.
getYieldCurveYyyy :: Int -> IO YieldCurveMap
getYieldCurveYyyy yr =
    do s <- fetchXML $ yieldYyyyURL yr
       parseRawWrapper parseRawXML2 s

--------------------------------------------------------------------
--  helper function
parseRawWrapper parseFunc s =
       case s of
       Nothing -> return M.empty
       Just ss -> do -- FIXME: May need to check if ss =~ <?xml..., error if not
                     -- debug: putStrLn $ take 3000 $ skiphead ss
                     return $ parseFunc $ skiphead ss

parseRawXML1 xmlstr = parseRawXML getGNewDateNodes   getBcNodes   xmlstr
parseRawXML2 xmlstr = parseRawXML getGCurveDateNodes getBc30Nodes xmlstr

parseRawXML getnodes getbc xmlstr = 
    if (not $ M.null m) then m else error "parseRawXML: error empty result" 
    where m = M.fromList $ dtlist $ getnodes xmlstr 
          dtlist :: [XP.XmlTree] -> [(C.Day,DailyYieldCurve)]
          dtlist [] = []
          dtlist (s:xs) = 
              let d  = showdt . getdt $ s
                  yc = parseYieldCurve $ DT.getChildren $ justhead s $ getbc $ s in 
              if ( length d == 10 ) then (mktuple d yc):(dtlist xs)
              else dtlist xs
          mktuple d yc = ( makeDay d , M.fromList yc )
          getdt node = let dt1 = nodematch $ map getNewDateNode $ DT.getChildren node
                           bc  = justhead node $ getbc $ node
                           dt2 = nodematch $ map getNewDateNode $ DT.getChildren $ bc 
                           justdt sx = justhead node sx in
                       if not (null dt1) then justdt dt1
                       else if not (null dt2) then justdt dt2
                            else error $ "getNewDateNode error at \n"++(XP.formatXmlTree node)
          showdt dt = showText $ justhead dt $ DT.getChildren dt
--------------------------------------------------------------------

justhead node (s:sx) = s
justhead node [] = error $ "parse error at \n"++(XP.formatXmlTree node)

-- | print yield curve data in a csv format for storage or testing.
prettyYieldCurve :: YieldCurveMap -> Maybe String -> IO ()
prettyYieldCurve ycm mmat =
    do let ds = sort $ M.keys ycm
           prt1 mat d = do curv <- M.lookup d ycm
                           rate <- M.lookup mat curv
                           printf fmt (show d) mat rate
           prt2 d = do curv <- M.lookup d ycm
                       mapM_ (prt4 d) $ sorted curv
           prt0 d = case mmat of
                        Nothing  -> prt2 d
                        Just mat -> prt1 mat d
       mapM_ prt0 ds
    where prt4 d (a,b) = printf fmt (show d) a b
          sorted yc = sortByMaturity $ M.toList yc
          fmt = "%s, %3s, %5.2f\n"
-- -----------------------------------------------------------------
-- -----------------------------------------------------------------
-- Private helper functions

-- expect MM-DD-YYYY
makeDay :: String -> C.Day
makeDay s = 
    let a = splitc '-' s in
    case (length s == 10 && length a == 3 && validint a a) of 
    False -> error("date field " ++ s ++ " malformed")
    True  -> let y = read (a!!2) :: Integer
                 m = read (a!!0) :: Int
                 d = read (a!!1) :: Int in
             C.fromGregorian y m d

fetchXML :: String -> IO (Maybe String)
fetchXML url = 
    case parseURI url of
        Nothing  -> error("uri malformed:" ++ url)
        Just uri -> httpGet uri

-- helper function to sort keys (maturity) in yield curve
sortByMaturity :: DailyYieldCurveList -> DailyYieldCurveList
sortByMaturity yc = sortBy yc_sort yc

-- helper function, sort by maturity in correct order (month then year)
yc_sort (a1,a2) (b1,b2) = compare (convkey a1) (convkey b1)

-- helper function for yc_sort
convkey :: String -> Int
convkey k = 
    if not $ validint [k1] [k1] then 0
    else k2 * (if d == 'y' then 1000 else 100)
    where k1 = reverse $ tail $ reverse k
          k2 = read k1 :: Int
          d = head $ reverse k
  

parseYieldCurve :: [ XP.XmlTree ] -> [(String,Float)]
parseYieldCurve [] = []
parseYieldCurve (s:sx) =
    case (M.lookup lp mx) of
      Nothing -> parseYieldCurve sx
      Just p  -> if (ts == [] || ls == "N/A" || (not $ validnum ls ls)) then parseYieldCurve sx
                 else if yd > 0 then (p,yd):(parseYieldCurve sx)
                      else parseYieldCurve sx
    where mx = yieldCurveHash 
          lp = XP.localPartOf s
          ts = DT.getChildren s
          ls = rmspace $ showText $ justhead s $ ts 
          yd = read (fixnum ls) :: Float 

-- HXT XML Parser combinators

getGNewDateNodes :: String -> [ XP.XmlTree ]
getGNewDateNodes s = gettags $ head $ if sx == [] then error err else sx
    where gettags = XP.deep ( XP.isTag "G_NEW_DATE")
          sx = XP.xread s
          err = "getGNewDateNodes: error empty result"
          
getGCurveDateNodes :: String -> [ XP.XmlTree ]
getGCurveDateNodes s = gettags $ head $ if sx == [] then error err else sx
    where gettags = XP.deep ( XP.isTag "G_BID_CURVE_DATE")
          sx = XP.xread s
          err = "getGCurveDateNodes: error empty result"

getNewDateNode = XP.isTag "NEW_DATE" 

-- FIXME: I am just lazy, this is quick, but may have performance cost
getBcNodes   = XP.deep ( XP.isTag "G_BC_CAT" )

getBc30Nodes = XP.deep ( XP.isTag "G_BC_30YEAR" )

-- FIXME: This is a hack to XHT, I just can't find the right function to show text
showText :: XP.XmlTree -> String
showText t = showt n
    where n = DT.getNode t
          showt (XP.XText a) = a
          showt _ = ""

-- FIXME: There is probably a better way to do this!
nodematch :: [[a]] -> [a]
nodematch [] = []
nodematch xs@(n:nx) = case n of
    []  -> nodematch $ nx
    [a] -> a:(nodematch $ nx)
    _   -> error "nodematch error, there is a problem with XML syntax"


------------------------------------------------------------
-- auxillary functions --

-- skip <?xml> and <!-- tags, HXT does not take them well
skiphead :: String -> String
skiphead [] = [] 
skiphead [a,b] = [a,b] 
skiphead s@(c1:c2:sx) = 
    if c1 == '<' && not ( c2 == '?' || c2 == '!' ) then s
        else skiphead (c2:sx)

splitc :: Char -> String -> [String]
splitc delim s = 
    if null rest then [token]
        else token : splitc delim (tail rest)
    where (token,rest) = span (/=delim) s

join :: String -> [String] -> String
join sep = concat . intersperse sep

validint y [] = True 
validint y x@(s:sx) = 
    if validc s then validint y sx
                else error $ "invalid date list: "++(show y)
    where validc [] = True
          validc (s:sx) = isDigit s && validc sx

validnum y x = 
    if validc x then True
                else error $ "invalid number: "++(show y)
    where validc [] = True
          validc (s:sx) = (isDigit s || s == '.') && validc sx

-- prepend 0 in numbers such as .9, otherwise read will fail
fixnum [] = [] 
fixnum s@(c:cx) = if c=='.' then ('0':s) else s

-- remove leading spaces
rmspace :: String -> String
rmspace [] = []
rmspace s@(c:sx) = if c == ' ' then rmspace sx else s