{- |
The parsers in this module also skip trailing spaces.
-}
module Text.BibTeX.Parse (
   file,
   comment,
   entry,
   assignment,
   value,
   texSequence,
   texBlock,
   identifier,
   bibIdentifier,

   -- utility functions
   skippingSpace,
   skippingLeadingSpace,

   splitCommaSepList,
   splitAuthorList,
   splitSepList,
   ) where

import qualified Text.BibTeX.Entry as Entry

import qualified Text.ParserCombinators.Parsec.Token as T
import qualified Text.ParserCombinators.Parsec.Language as L
import qualified Text.ParserCombinators.Parsec as Parsec
import Text.ParserCombinators.Parsec
   (CharParser, Parser,
    (<|>), alphaNum, digit, letter, char, noneOf, oneOf,
    between, many, many1, sepEndBy, )

import Control.Monad (liftM, liftM2, liftM3, )

import Data.List.HT (chop, )


lexer :: T.TokenParser st
lexer :: forall st. TokenParser st
lexer =
   forall s (m :: * -> *) u.
Stream s m Char =>
GenLanguageDef s u m -> GenTokenParser s u m
T.makeTokenParser forall a b. (a -> b) -> a -> b
$ forall st. LanguageDef st
L.emptyDef {
      commentLine :: String
L.commentLine = String
"%",
      identStart :: ParsecT String st Identity Char
L.identStart = forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
alphaNum,
      identLetter :: ParsecT String st Identity Char
L.identLetter = forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
alphaNum
   }


identifier, comma, equals :: CharParser st String
identifier :: forall st. CharParser st String
identifier = forall s u (m :: * -> *).
GenTokenParser s u m -> ParsecT s u m String
T.identifier forall st. TokenParser st
lexer
comma :: forall st. CharParser st String
comma = forall s u (m :: * -> *).
GenTokenParser s u m -> ParsecT s u m String
T.comma forall st. TokenParser st
lexer
equals :: forall st. CharParser st String
equals = forall s u (m :: * -> *).
GenTokenParser s u m -> String -> ParsecT s u m String
T.symbol forall st. TokenParser st
lexer String
"="

braces, lexeme :: CharParser st a -> CharParser st a
braces :: forall st a. CharParser st a -> CharParser st a
braces = forall s u (m :: * -> *).
GenTokenParser s u m
-> forall a. ParsecT s u m a -> ParsecT s u m a
T.braces forall st. TokenParser st
lexer
lexeme :: forall st a. CharParser st a -> CharParser st a
lexeme = forall s u (m :: * -> *).
GenTokenParser s u m
-> forall a. ParsecT s u m a -> ParsecT s u m a
T.lexeme forall st. TokenParser st
lexer

{- |
Beware that this and all other parsers do not accept leading spaces,
cf. 'skippingSpace'.
That is when encountering leading white spaces
the parser will just return an empty list.
If you want to parse a file that contains entirely of BibTeX data
you better call @skippingLeadingSpace file@ instead.
However, the @file@ parser is more combinable
and can be used for files that contain both BibTeX and other data
or it can be used for automated filetype checking.
-}
file :: Parser [Entry.T]
file :: Parser [T]
file = Parser String
comment forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall s (m :: * -> *) t u a sep.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m sep -> ParsecT s u m [a]
sepEndBy Parser T
entry Parser String
comment


comment :: Parser String
comment :: Parser String
comment = forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many forall a b. (a -> b) -> a -> b
$ forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"@"


{- |
Parse a BibTeX entry like

> @article{author2010title,
>   author = {Firstname Surname},
>   title = {Title},
>   year = 2010,
>   month = jul,
> }

.
-}
entry :: Parser Entry.T
entry :: Parser T
entry =
   do String
entryType <- forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'@' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall st. CharParser st String
identifier
      forall st a. CharParser st a -> CharParser st a
braces forall a b. (a -> b) -> a -> b
$
         forall (m :: * -> *) a1 a2 r.
Monad m =>
(a1 -> a2 -> r) -> m a1 -> m a2 -> m r
liftM2 (String -> String -> [(String, String)] -> T
Entry.Cons String
entryType)
            (forall tok st a. GenParser tok st a -> GenParser tok st a
Parsec.try Parser String
bibIdentifier)
            (forall st. CharParser st String
comma forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall s (m :: * -> *) t u a sep.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m sep -> ParsecT s u m [a]
sepEndBy Parser (String, String)
assignment forall st. CharParser st String
comma)

{- |
Parse an assignment like

> author = {Firstname Surname}

.
-}
assignment :: Parser (String, String)
assignment :: Parser (String, String)
assignment =
   forall (m :: * -> *) a1 a2 r.
Monad m =>
(a1 -> a2 -> r) -> m a1 -> m a2 -> m r
liftM2 (,)
      Parser String
bibIdentifier
      (forall st. CharParser st String
equals forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser String
value)

{- |
Parse a value like

> jul

or

> 2010

or

> {Firstname Surname}

or

> "Firstname Surname"

.
-}
value :: Parser String
value :: Parser String
value =
   forall st a. CharParser st a -> CharParser st a
lexeme (forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
letter) forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> -- for fields like: month = jul
   forall st a. CharParser st a -> CharParser st a
lexeme (forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit)  forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> -- for fields like: year = 2010
   forall st a. CharParser st a -> CharParser st a
braces (Char -> Parser String
texSequence Char
'}') forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
   forall st a. CharParser st a -> CharParser st a
lexeme (forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between (forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'"') (forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'"') (Char -> Parser String
texSequence Char
'"'))

{- |
Parse a sequence of 'texBlock's until the occurrence of a closing character.
The closing character is not part of the result.
-}
texSequence :: Char -> Parser String
texSequence :: Char -> Parser String
texSequence Char
closeChar =
   forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat (forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (Char -> Parser String
texBlock Char
closeChar))

{- |
Parse a single character like @a@,
a LaTeX macro call like @\\alpha@
or a block enclosed in curly braces like @{\\\"{a}bc}@.
-}
texBlock :: Char -> Parser String
texBlock :: Char -> Parser String
texBlock Char
closeChar =
   forall (m :: * -> *) a1 a2 a3 r.
Monad m =>
(a1 -> a2 -> a3 -> r) -> m a1 -> m a2 -> m a3 -> m r
liftM3 (\Char
open String
body Char
close -> Char
open forall a. a -> [a] -> [a]
: String
body forall a. [a] -> [a] -> [a]
++ Char
close forall a. a -> [a] -> [a]
: [])
      (forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'{') (Char -> Parser String
texSequence Char
'}') (forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'}') forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
   forall (t :: * -> *) (m :: * -> *) a.
(Traversable t, Monad m) =>
t (m a) -> m (t a)
sequence
      [forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'\\',
       forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
oneOf String
"_{}[]$|'`^&%\".,~# " forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
letter] forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
   forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (forall a. a -> [a] -> [a]
:[]) (forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf [Char
closeChar])


{- |
Parse a name of a BibTeX entry like @author2010title@.
-}
bibIdentifier :: Parser String
bibIdentifier :: Parser String
bibIdentifier =
   forall st a. CharParser st a -> CharParser st a
lexeme forall a b. (a -> b) -> a -> b
$
   forall (m :: * -> *) a1 a2 r.
Monad m =>
(a1 -> a2 -> r) -> m a1 -> m a2 -> m r
liftM2 (:) (forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
alphaNum forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'_') (forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
alphaNum forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
oneOf String
"&;:-_.?+/"))


{- |
Extends a parser, such that all trailing spaces are skipped.
It might be more comfortable to skip all leading spaces,
but parser written that way are hard to combine.
This is so, since if you run two parsers in parallel
and both of them expect leading spaces,
then the parser combinator does not know
which one of the parallel parsers to choose.

See also: 'lexeme'.
-}
skippingSpace :: Parser a -> Parser a
skippingSpace :: forall a. Parser a -> Parser a
skippingSpace Parser a
p =
   do a
x <- Parser a
p
      forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
Parsec.skipMany forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
Parsec.space
      forall (m :: * -> *) a. Monad m => a -> m a
return a
x

skippingLeadingSpace :: Parser a -> Parser a
skippingLeadingSpace :: forall a. Parser a -> Parser a
skippingLeadingSpace Parser a
p =
   forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
Parsec.skipMany forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
Parsec.space forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser a
p


-- * Convert contents of BibTeX fields into lists

{- |
Split a string at the commas and remove leading spaces.
-}
splitCommaSepList :: String -> [String]
splitCommaSepList :: String -> [String]
splitCommaSepList = Char -> String -> [String]
splitSepList Char
','

{- |
Split a string containing a list of authors in BibTeX notation.
-}
splitAuthorList :: String -> [String]
splitAuthorList :: String -> [String]
splitAuthorList =
   forall a b. (a -> b) -> [a] -> [b]
map [String] -> String
unwords forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [[a]]
chop (String
"and" forall a. Eq a => a -> a -> Bool
==) forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
words

splitSepList :: Char -> String -> [String]
splitSepList :: Char -> String -> [String]
splitSepList Char
sep =
   forall a b. (a -> b) -> [a] -> [b]
map (forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Char
' 'forall a. Eq a => a -> a -> Bool
==)) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [[a]]
chop (Char
sepforall a. Eq a => a -> a -> Bool
==)