{-# LANGUAGE PatternGuards #-} {-# OPTIONS_GHC -fno-warn-orphans #-} ----------------------------------------------------------------------------- -- | -- Module : Text.CSL.Input.MODS -- Copyright : (c) Andrea Rossato -- License : BSD-style (see LICENSE) -- -- Maintainer : Andrea Rossato -- Stability : unstable -- Portability : unportable -- -- An ugly MODS parser -- ----------------------------------------------------------------------------- module Text.CSL.Input.MODS where import Text.CSL.Output.Plain ( (<+>) ) import Text.CSL.Parser ( xpIElem, readXmlFile ) import Text.CSL.Reference import Text.XML.HXT.Arrow.Pickle.Xml import Data.Char ( isDigit ) -- | Read a file with a single MODS record. readModsFile :: FilePath -> IO Reference readModsFile f = readXmlFile xpMods f -- | Read a file with a collection of MODS records. readModsColletionFile :: FilePath -> IO [Reference] readModsColletionFile f = readXmlFile xpModsCollection f xpModsCollection :: PU [Reference] xpModsCollection = xpElem "modsCollection" $ xpList xpMods xpMods :: PU Reference xpMods = xpElem "mods" xpReference xpReference :: PU Reference xpReference = xpWrap ( \ ( ref , (ck,ty,ti) ,((au,ed,tr),(re,it,pu'),(co,ce)) , (di,pu,pp) , (ac,uri) ) -> ref { citeKey = ck `betterThen` take 10 (concat $ words ti) , refType = if refType ref /= NoType then refType ref else ty , title = ti , author = au , editor = ed `betterThen` editor ref , translator = tr `betterThen` translator ref , recipient = re `betterThen` recipient ref , interviewer = it `betterThen` interviewer ref , publisher = pu `betterThen` publisher ref `betterThen` pu' , composer = co `betterThen` composer ref , collectionEditor = ce `betterThen` collectionEditor ref , containerAuthor = containerAuthor ref , issued = issued ref `betterThen` di , publisherPlace = pp `betterThen` publisherPlace ref , url = uri , accessed = ac , page = page ref , volume = volume ref , issue = issue ref , number = number ref , section = section ref , chapterNumber = chapterNumber ref } , \r -> ( r , (citeKey r, refType r, title r) ,((author r, editor r, translator r) ,(recipient r, interviewer r, publisher r) ,(composer r, collectionEditor r)) , (issued r, publisher r, publisherPlace r) , (accessed r, url r) )) $ xp5Tuple (xpDefault emptyReference xpRelatedItem) (xpTriple xpCiteKey xpRefType xpTitle ) xpAgents xpOrigin xpUrl xpCiteKey :: PU String xpCiteKey = xpDefault [] $ xpChoice (xpAttr "ID" xpText) (xpIElem "identifier" xpText) xpLift xpOrigin :: PU ([RefDate],[Agent],String) xpOrigin = xpDefault ([],[],[]) . xpIElem "originInfo" $ xpTriple (xpDefault [] $ xpWrap (readDate,show) $ xpIElem "dateIssued" xpText0) (xpDefault [] $ xpList $ xpWrap (Entity, show) $ xpIElem "publisher" xpText0) (xpDefault [] $ xpIElem "place" xpText0) xpRefType :: PU RefType xpRefType = xpDefault NoType $ xpWrap (readType, show) xpGenre where readType t | "conference publication" <- t = PaperConference | "periodical" <- t = ArticleJournal | otherwise = Book xpRefType' :: PU RefType xpRefType' = xpDefault NoType $ xpWrap (readTypeIn, show) xpGenre where readTypeIn t | "book" <- t = Chapter | "conference publication" <- t = PaperConference | otherwise = ArticleJournal xpGenre :: PU String xpGenre = xpWrap (concat, return) $ xpList $ xpIElem "genre" $ xpChoice xpZero (xpPair (xpDefault [] $ xpAttr "authority" xpText) xpText) $ \(a,s) -> if a == "marcgt" then xpLift s else xpGenre xpRelatedItem :: PU Reference xpRelatedItem = xpIElem "relatedItem" . xpAddFixedAttr "type" "host" $ xpWrap ( \( (ty,ct) ,((ca,ed,tr),(re,it,pu'),(co,ce)) ,((di,pg,vl,is),(nu,sc,ch)) , (di',pu,pp) ) -> emptyReference { refType = ty , containerAuthor = ca , containerTitle = ct , editor = ed , translator = tr , recipient = re , interviewer = it , publisher = pu `betterThen` pu' , publisherPlace = pp , composer = co , collectionEditor = ce , issued = di `betterThen` di' , page = pg , volume = vl , issue = is , number = nu , section = sc , chapterNumber = ch } , \r -> ( (refType r, containerTitle r) ,((containerAuthor r, editor r, translator r) ,(recipient r, interviewer r, publisher r) ,(composer r, collectionEditor r)) ,((issued r, page r, volume r, issue r) ,(number r, section r, chapterNumber r)) , (issued r, publisher r, publisherPlace r) )) $ xp4Tuple (xpPair xpRefType' xpTitle) xpAgents xpPart xpOrigin -- FIXME: join title and subtitle correctly xpTitle :: PU String xpTitle = xpWrap (\(t,s) -> t <+> s, \s -> (s,[])) $ xpIElem "titleInfo" $ xpPair (xpIElem "title" xpText0) (xpDefault [] $ xpIElem "subTitle" xpText0) xpAgents :: PU (([Agent],[Agent],[Agent]) ,([Agent],[Agent],[Agent]) ,([Agent],[Agent])) xpAgents = xpTriple (xpTriple (xpAgent "author" "aut") (xpAgent "editor" "edt") (xpAgent "translator" "trl")) (xpTriple (xpAgent "recipient" "rcp") (xpAgent "interviewer" "ivr") (xpAgent "publisher" "pbl")) (xpPair (xpAgent "composer" "cmp") (xpAgent "collector" "xol")) xpAgent :: String -> String -> PU [Agent] xpAgent sa sb = xpDefault [] $ xpList $ xpIElem "name" $ xpChoice xpZero (xpIElem "role" $ xpIElem "roleTerm" xpText0) (\x -> if x == sa || x == sb then xpickle else xpZero) instance XmlPickler Agent where xpickle = xpAlt tag ps where tag (Person {}) = 0 tag (Entity {}) = 1 ps = [ xpWrap ( \(gn,fm) -> Person "" gn "" "" fm "" , \(Person _ gn _ _ fn _) -> (gn,fn)) $ xpAddFixedAttr "type" "personal" xpNameData , xpWrap ( \(_,s) -> Entity s , \(Entity s) -> ([],s)) $ xpAddFixedAttr "type" "corporate" xpNameData ] xpNameData :: PU ([String],String) xpNameData = xpWrap (readName,const []) $ xpList $ xpElem "namePart" $ xpPair (xpAttr "type" xpText) xpText0 where readName x = (readg x, readf x) readf = foldr (\(k,v) xs -> if k == "family" then v else xs) [] readg = foldr (\(k,v) xs -> if k == "given" then v:xs else xs) [] xpPart :: PU (([RefDate],String,String,String) ,(String,String,String)) xpPart = xpDefault none . xpIElem "part" . xpWrap (readIt none,const []) $ xpList xpDetail where none = (([],"","",""),("","","")) readIt r [] = r readIt acc@((d,p,v,i),(n,s,c)) (x:xs) | Date y <- x = readIt ((y,p,v,i),(n,s,c)) xs | Page y <- x = readIt ((d,y,v,i),(n,s,c)) xs | Volume y <- x = readIt ((d,p,y,i),(n,s,c)) xs | Issue y <- x = readIt ((d,p,v,y),(n,s,c)) xs | Number y <- x = readIt ((d,p,v,i),(y,s,c)) xs | ChapterNr y <- x = readIt ((d,p,v,i),(n,s,y)) xs | Section y <- x = readIt ((d,p,v,i),(n,y,c)) xs | otherwise = acc data Detail = Date [RefDate] | Page String | Volume String | Issue String | Number String | ChapterNr String | Section String deriving ( Eq, Show ) xpDetail :: PU Detail xpDetail = xpAlt tag ps where tag _ = 0 ps = [ xpWrap (Date, const []) $ xpDate , xpWrap (Page, show) $ xpPage , xpWrap (Volume, show) $ xp "volume" , xpWrap (Issue, show) $ xp "issue" , xpWrap (Number, show) $ xp "number" , xpWrap (Section, show) $ xp "section" , xpWrap (ChapterNr,show) $ xp "chapter" ] xpDate = xpWrap (readDate,show) (xpElem "date" xpText0) xp s = xpElemWithAttrValue "detail" "type" s $ xpElem "number" xpText xpPage :: PU String xpPage = xpChoice (xpElemWithAttrValue "detail" "type" "page" $ xpIElem "number" xpText) (xpElemWithAttrValue "extent" "unit" "page" $ xpPair (xpElem "start" xpText) (xpElem "end" xpText)) (\(s,e) -> xpLift (s ++ "-" ++ e)) xpUrl :: PU ([RefDate],String) xpUrl = xpDefault ([],[]) . xpIElem "location" $ xpPair (xpWrap (readDate,show) $ xpDefault [] $ xpAttr "dateLastAccessed" xpText) (xpDefault [] $ xpElem "url" xpText) readDate :: String -> [RefDate] readDate s = if takeWhile isDigit s /= [] then return $ RefDate (read $ takeWhile isDigit s) 0 0 [] else [] betterThen :: Eq a => [a] -> [a] -> [a] betterThen a b = if a == [] then b else a