{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-| Module: Data.Gedcom Description: Parser for the GEDCOM genealogy format Copyright: (c) Callum Lowcay, 2017 License: BSD3 Maintainer: cwslowcay@gmail.com Stability: experimental Portability: GHC -} module Data.Gedcom ( -- * Functions parseGedcomString, parseGedcomFile, GDError (..), gdLookup, GDRef, XRefTable, GDRefError (..), GDXRefID, module Data.Gedcom.Structure ) where import Control.Applicative import Data.Dynamic import Data.Either import Data.Gedcom.Common import Data.Gedcom.LineParser import Data.Gedcom.ParseMonads import Data.Gedcom.Parser import Data.Gedcom.Structure import Data.Maybe import Data.Monoid import Data.Text.Encoding.ANSEL import qualified Data.ByteString as B import qualified Data.Map as M import qualified Data.Text.All as T import Text.Megaparsec -- | A table of cross references newtype XRefTable = XRefTable (M.Map GDXRefID Dynamic) deriving Show -- | Lookup up a reference in the cross reference table gdLookup :: forall a. Typeable a => GDRef a -- ^ The reference to look up -> XRefTable -- ^ The table to look up in -> Either GDRefError a -- ^ The value or an error gdLookup (GDStructure x) _ = Right x gdLookup (GDXRef thisID) (XRefTable table) = case M.lookup thisID table of Nothing -> Left$ RefNotPresent thisID Just (dynamic) -> case fromDynamic dynamic of Nothing -> Left$ WrongRefType (dynTypeRep dynamic) (typeRep (Proxy :: Proxy a)) Just v -> Right v -- | Parse Gedcom data from a ByteString parseGedcomString :: Maybe String -- ^ The filename from which the string was read -> B.ByteString -- ^ The string to parse -> Either GDError (Gedcom, XRefTable) -- ^ The Gedcom data and cross reference table, or an error parseGedcomString mfilename intext = let filename = fromMaybe "<>" mfilename anselTree = runParser gdRoot filename . decodeANSEL$ intext utf8Tree = runParser gdRoot filename . T.decodeUtf8$ intext utf16LETree = runParser gdRoot filename . T.decodeUtf16LE$ intext utf16BETree = runParser gdRoot filename . T.decodeUtf16BE$ intext encodings = [anselTree, utf8Tree, utf16LETree, utf16BETree] charset = foldr (<|>) Nothing . fmap getCharset$ encodings trees = case charset of Nothing -> [] Just (Charset "ANSEL" _) -> [anselTree] Just (Charset "UTF-8" _) -> [utf8Tree] Just (Charset "UNICODE" _) -> [utf8Tree, utf16LETree, utf16BETree] Just (Charset "ASCII" _) -> [utf8Tree, anselTree] Just (Charset _ _) -> [anselTree, utf8Tree] in case partitionEithers trees of ([], []) -> Left . LineFormatError$ "Invalid format (is " <> (T.show filename) <> " really a gedcom file?)" (err:_, []) -> Left . LineFormatError . T.show$ err (_, dtrees) -> case partitionEithers.fmap doParseGedcom$ dtrees of ([], []) -> Left . LineFormatError$ "Unknown character encoding" (err:_, []) -> Left err (_, (gd, table):_) -> Right (gd, XRefTable table) where doParseGedcom tree = case parseGedcom tree of (Left err, _) -> Left err (Right v, table) -> Right (v, table) getCharset (Right (GDRoot (headTree:_))) = case runStructure$ parseHeader headTree of (Right (Right header), _) -> Just$ headerCharset header _ -> Nothing getCharset _ = Nothing -- | Parse Gedcom data from a file parseGedcomFile :: FilePath -- ^ The file to read -> IO (Either GDError (Gedcom, XRefTable)) -- ^ The Gedcom data and cross reference table, or an error parseGedcomFile path = parseGedcomString (Just path) <$> B.readFile path