{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE TypeSynonymInstances #-}
-- |
-- Module      :  Documentation.Haddock.Parser.Monad
-- Copyright   :  (c) Alec Theriault 2018-2019,
-- License     :  BSD-like
--
-- Maintainer  :  haddock@projects.haskell.org
-- Stability   :  experimental
-- Portability :  portable
--
-- Defines the Parsec monad over which all parsing is done and also provides
-- more efficient versions of the usual parsec combinator functions (but
-- specialized to 'Text').

module Documentation.Haddock.Parser.Monad where

import qualified Text.Parsec.Char as Parsec
import qualified Text.Parsec as Parsec
import           Text.Parsec.Pos             ( updatePosChar )
import           Text.Parsec                 ( State(..)
                                             , getParserState, setParserState )

import qualified Data.Text as T
import           Data.Text                   ( Text )

import           Control.Monad               ( mfilter )
import           Data.String                 ( IsString(..) )
import           Data.Bits                   ( Bits(..) )
import           Data.Char                   ( ord )
import           Data.List                   ( foldl' )
import           Control.Applicative as App

import           Documentation.Haddock.Types ( Version )

import           Prelude hiding (takeWhile)
import           CompatPrelude

-- | The only bit of information we really care about truding along with us
-- through parsing is the version attached to a @\@since@ annotation - if
-- the doc even contained one.
newtype ParserState = ParserState {
  ParserState -> Maybe Version
parserStateSince :: Maybe Version
} deriving (ParserState -> ParserState -> Bool
(ParserState -> ParserState -> Bool)
-> (ParserState -> ParserState -> Bool) -> Eq ParserState
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: ParserState -> ParserState -> Bool
$c/= :: ParserState -> ParserState -> Bool
== :: ParserState -> ParserState -> Bool
$c== :: ParserState -> ParserState -> Bool
Eq, Int -> ParserState -> ShowS
[ParserState] -> ShowS
ParserState -> String
(Int -> ParserState -> ShowS)
-> (ParserState -> String)
-> ([ParserState] -> ShowS)
-> Show ParserState
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [ParserState] -> ShowS
$cshowList :: [ParserState] -> ShowS
show :: ParserState -> String
$cshow :: ParserState -> String
showsPrec :: Int -> ParserState -> ShowS
$cshowsPrec :: Int -> ParserState -> ShowS
Show)

initialParserState :: ParserState
initialParserState :: ParserState
initialParserState = Maybe Version -> ParserState
ParserState Maybe Version
forall a. Maybe a
Nothing

setSince :: Version -> Parser ()
setSince :: Version -> Parser ()
setSince Version
since = (ParserState -> ParserState) -> Parser ()
forall (m :: * -> *) u s. Monad m => (u -> u) -> ParsecT s u m ()
Parsec.modifyState (\ParserState
st -> ParserState
st{ parserStateSince :: Maybe Version
parserStateSince = Version -> Maybe Version
forall a. a -> Maybe a
Just Version
since })

type Parser = Parsec.Parsec Text ParserState

instance (a ~ Text) => IsString (Parser a) where
  fromString :: String -> Parser a
fromString = (String -> Text)
-> ParsecT Text ParserState Identity String
-> ParsecT Text ParserState Identity Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap String -> Text
T.pack (ParsecT Text ParserState Identity String
 -> ParsecT Text ParserState Identity Text)
-> (String -> ParsecT Text ParserState Identity String)
-> String
-> ParsecT Text ParserState Identity Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ParsecT Text ParserState Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
Parsec.string

parseOnly :: Parser a -> Text -> Either String (ParserState, a)
parseOnly :: Parser a -> Text -> Either String (ParserState, a)
parseOnly Parser a
p Text
t = case Parsec Text ParserState (a, ParserState)
-> ParserState
-> String
-> Text
-> Either ParseError (a, ParserState)
forall s t u a.
Stream s Identity t =>
Parsec s u a -> u -> String -> s -> Either ParseError a
Parsec.runParser Parsec Text ParserState (a, ParserState)
p' ParserState
initialParserState String
"<haddock>" Text
t of
                  Left ParseError
e -> String -> Either String (ParserState, a)
forall a b. a -> Either a b
Left (ParseError -> String
forall a. Show a => a -> String
show ParseError
e)
                  Right (a
x,ParserState
s) -> (ParserState, a) -> Either String (ParserState, a)
forall a b. b -> Either a b
Right (ParserState
s,a
x)
  where p' :: Parsec Text ParserState (a, ParserState)
p' = (,) (a -> ParserState -> (a, ParserState))
-> Parser a
-> ParsecT
     Text ParserState Identity (ParserState -> (a, ParserState))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser a
p ParsecT Text ParserState Identity (ParserState -> (a, ParserState))
-> ParsecT Text ParserState Identity ParserState
-> Parsec Text ParserState (a, ParserState)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ParsecT Text ParserState Identity ParserState
forall (m :: * -> *) s u. Monad m => ParsecT s u m u
Parsec.getState

-- | Always succeeds, but returns 'Nothing' if at the end of input. Does not
-- consume input.
--
-- Equivalent to @Parsec.optionMaybe . Parsec.lookAhead $ Parsec.anyChar@, but
-- more efficient.
peekChar :: Parser (Maybe Char)
peekChar :: Parser (Maybe Char)
peekChar = Text -> Maybe Char
headOpt (Text -> Maybe Char)
-> (State Text ParserState -> Text)
-> State Text ParserState
-> Maybe Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. State Text ParserState -> Text
forall s u. State s u -> s
stateInput (State Text ParserState -> Maybe Char)
-> ParsecT Text ParserState Identity (State Text ParserState)
-> Parser (Maybe Char)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT Text ParserState Identity (State Text ParserState)
forall (m :: * -> *) s u. Monad m => ParsecT s u m (State s u)
getParserState
  where headOpt :: Text -> Maybe Char
headOpt Text
t | Text -> Bool
T.null Text
t = Maybe Char
forall a. Maybe a
Nothing
                  | Bool
otherwise = Char -> Maybe Char
forall a. a -> Maybe a
Just (Text -> Char
T.head Text
t)
{-# INLINE peekChar #-}

-- | Fails if at the end of input. Does not consume input.
--
-- Equivalent to @Parsec.lookAhead Parsec.anyChar@, but more efficient.
peekChar' :: Parser Char
peekChar' :: Parser Char
peekChar' = Text -> Parser Char
forall s u (m :: * -> *). Text -> ParsecT s u m Char
headFail (Text -> Parser Char)
-> (State Text ParserState -> Text)
-> State Text ParserState
-> Parser Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. State Text ParserState -> Text
forall s u. State s u -> s
stateInput (State Text ParserState -> Parser Char)
-> ParsecT Text ParserState Identity (State Text ParserState)
-> Parser Char
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< ParsecT Text ParserState Identity (State Text ParserState)
forall (m :: * -> *) s u. Monad m => ParsecT s u m (State s u)
getParserState
  where headFail :: Text -> ParsecT s u m Char
headFail Text
t | Text -> Bool
T.null Text
t = String -> ParsecT s u m Char
forall s u (m :: * -> *) a. String -> ParsecT s u m a
Parsec.parserFail String
"peekChar': reached EOF"
                   | Bool
otherwise = Char -> ParsecT s u m Char
forall (f :: * -> *) a. Applicative f => a -> f a
App.pure (Text -> Char
T.head Text
t)
{-# INLINE peekChar' #-}

-- | Parses the given string. Returns the parsed string.
--
-- Equivalent to @Parsec.string (T.unpack t) $> t@, but more efficient.
string :: Text -> Parser Text
string :: Text -> ParsecT Text ParserState Identity Text
string Text
t = do
  s :: State Text ParserState
s@State{ stateInput :: forall s u. State s u -> s
stateInput = Text
inp, statePos :: forall s u. State s u -> SourcePos
statePos = SourcePos
pos } <- ParsecT Text ParserState Identity (State Text ParserState)
forall (m :: * -> *) s u. Monad m => ParsecT s u m (State s u)
getParserState
  case Text -> Text -> Maybe Text
T.stripPrefix Text
t Text
inp of
    Maybe Text
Nothing -> String -> ParsecT Text ParserState Identity Text
forall s u (m :: * -> *) a. String -> ParsecT s u m a
Parsec.parserFail String
"string: Failed to match the input string"
    Just Text
inp' ->
      let pos' :: SourcePos
pos' = (SourcePos -> Char -> SourcePos) -> SourcePos -> Text -> SourcePos
forall a. (a -> Char -> a) -> a -> Text -> a
T.foldl SourcePos -> Char -> SourcePos
updatePosChar SourcePos
pos Text
t
          s' :: State Text ParserState
s' = State Text ParserState
s{ stateInput :: Text
stateInput = Text
inp', statePos :: SourcePos
statePos = SourcePos
pos' }
      in State Text ParserState
-> ParsecT Text ParserState Identity (State Text ParserState)
forall (m :: * -> *) s u.
Monad m =>
State s u -> ParsecT s u m (State s u)
setParserState State Text ParserState
s' ParsecT Text ParserState Identity (State Text ParserState)
-> Text -> ParsecT Text ParserState Identity Text
forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Text
t

-- | Keep matching characters as long as the predicate function holds (and
-- return them).
--
-- Equivalent to @fmap T.pack . Parsec.many@, but more efficient.
takeWhile :: (Char -> Bool) -> Parser Text
takeWhile :: (Char -> Bool) -> ParsecT Text ParserState Identity Text
takeWhile Char -> Bool
f = do
  s :: State Text ParserState
s@State{ stateInput :: forall s u. State s u -> s
stateInput = Text
inp, statePos :: forall s u. State s u -> SourcePos
statePos = SourcePos
pos } <- ParsecT Text ParserState Identity (State Text ParserState)
forall (m :: * -> *) s u. Monad m => ParsecT s u m (State s u)
getParserState
  let (Text
t, Text
inp') = (Char -> Bool) -> Text -> (Text, Text)
T.span Char -> Bool
f Text
inp
      pos' :: SourcePos
pos' = (SourcePos -> Char -> SourcePos) -> SourcePos -> Text -> SourcePos
forall a. (a -> Char -> a) -> a -> Text -> a
T.foldl SourcePos -> Char -> SourcePos
updatePosChar SourcePos
pos Text
t
      s' :: State Text ParserState
s' = State Text ParserState
s{ stateInput :: Text
stateInput = Text
inp', statePos :: SourcePos
statePos = SourcePos
pos' }
  State Text ParserState
-> ParsecT Text ParserState Identity (State Text ParserState)
forall (m :: * -> *) s u.
Monad m =>
State s u -> ParsecT s u m (State s u)
setParserState State Text ParserState
s' ParsecT Text ParserState Identity (State Text ParserState)
-> Text -> ParsecT Text ParserState Identity Text
forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Text
t

-- | Like 'takeWhile', but fails if no characters matched.
--
-- Equivalent to @fmap T.pack . Parsec.many1@, but more efficient.
takeWhile1 :: (Char -> Bool) -> Parser Text
takeWhile1 :: (Char -> Bool) -> ParsecT Text ParserState Identity Text
takeWhile1 = (Text -> Bool)
-> ParsecT Text ParserState Identity Text
-> ParsecT Text ParserState Identity Text
forall (m :: * -> *) a. MonadPlus m => (a -> Bool) -> m a -> m a
mfilter (Bool -> Bool
not (Bool -> Bool) -> (Text -> Bool) -> Text -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Bool
T.null) (ParsecT Text ParserState Identity Text
 -> ParsecT Text ParserState Identity Text)
-> ((Char -> Bool) -> ParsecT Text ParserState Identity Text)
-> (Char -> Bool)
-> ParsecT Text ParserState Identity Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ParsecT Text ParserState Identity Text
takeWhile

-- | Scan the input text, accumulating characters as long as the scanning
-- function returns true.
scan :: (s -> Char -> Maybe s) -- ^ scan function
     -> s                      -- ^ initial state
     -> Parser Text 
scan :: (s -> Char -> Maybe s)
-> s -> ParsecT Text ParserState Identity Text
scan s -> Char -> Maybe s
f s
st = do
  s :: State Text ParserState
s@State{ stateInput :: forall s u. State s u -> s
stateInput = Text
inp, statePos :: forall s u. State s u -> SourcePos
statePos = SourcePos
pos } <- ParsecT Text ParserState Identity (State Text ParserState)
forall (m :: * -> *) s u. Monad m => ParsecT s u m (State s u)
getParserState
  Text
-> s
-> SourcePos
-> Int
-> (Text
    -> SourcePos -> Int -> ParsecT Text ParserState Identity Text)
-> ParsecT Text ParserState Identity Text
go Text
inp s
st SourcePos
pos Int
0 ((Text
  -> SourcePos -> Int -> ParsecT Text ParserState Identity Text)
 -> ParsecT Text ParserState Identity Text)
-> (Text
    -> SourcePos -> Int -> ParsecT Text ParserState Identity Text)
-> ParsecT Text ParserState Identity Text
forall a b. (a -> b) -> a -> b
$ \Text
inp' SourcePos
pos' Int
n ->
    let s' :: State Text ParserState
s' = State Text ParserState
s{ stateInput :: Text
Parsec.stateInput = Text
inp', statePos :: SourcePos
Parsec.statePos = SourcePos
pos' }
    in State Text ParserState
-> ParsecT Text ParserState Identity (State Text ParserState)
forall (m :: * -> *) s u.
Monad m =>
State s u -> ParsecT s u m (State s u)
setParserState State Text ParserState
s' ParsecT Text ParserState Identity (State Text ParserState)
-> Text -> ParsecT Text ParserState Identity Text
forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Int -> Text -> Text
T.take Int
n Text
inp
  where
    go :: Text
-> s
-> SourcePos
-> Int
-> (Text
    -> SourcePos -> Int -> ParsecT Text ParserState Identity Text)
-> ParsecT Text ParserState Identity Text
go Text
inp s
s !SourcePos
pos !Int
n Text -> SourcePos -> Int -> ParsecT Text ParserState Identity Text
cont
      = case Text -> Maybe (Char, Text)
T.uncons Text
inp of
          Maybe (Char, Text)
Nothing -> Text -> SourcePos -> Int -> ParsecT Text ParserState Identity Text
cont Text
inp SourcePos
pos Int
n        -- ran out of input
          Just (Char
c, Text
inp') ->
             case s -> Char -> Maybe s
f s
s Char
c of
               Maybe s
Nothing -> Text -> SourcePos -> Int -> ParsecT Text ParserState Identity Text
cont Text
inp SourcePos
pos Int
n   -- scan function failed
               Just s
s' -> Text
-> s
-> SourcePos
-> Int
-> (Text
    -> SourcePos -> Int -> ParsecT Text ParserState Identity Text)
-> ParsecT Text ParserState Identity Text
go Text
inp' s
s' (SourcePos -> Char -> SourcePos
updatePosChar SourcePos
pos Char
c) (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Text -> SourcePos -> Int -> ParsecT Text ParserState Identity Text
cont


-- | Parse a decimal number.
decimal :: Integral a => Parser a
decimal :: Parser a
decimal = (a -> Char -> a) -> a -> String -> a
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' a -> Char -> a
forall a. Num a => a -> Char -> a
step a
0 (String -> a)
-> ParsecT Text ParserState Identity String -> Parser a
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` Parser Char -> ParsecT Text ParserState Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
Parsec.many1 Parser Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
Parsec.digit
  where step :: a -> Char -> a
step a
a Char
c = a
a a -> a -> a
forall a. Num a => a -> a -> a
* a
10 a -> a -> a
forall a. Num a => a -> a -> a
+ Int -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
48)

-- | Parse a hexadecimal number.
hexadecimal :: (Integral a, Bits a) => Parser a
hexadecimal :: Parser a
hexadecimal = (a -> Char -> a) -> a -> String -> a
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' a -> Char -> a
forall a. (Bits a, Num a) => a -> Char -> a
step a
0 (String -> a)
-> ParsecT Text ParserState Identity String -> Parser a
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` Parser Char -> ParsecT Text ParserState Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
Parsec.many1 Parser Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
Parsec.hexDigit 
  where
  step :: a -> Char -> a
step a
a Char
c | Int
w Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
48 Bool -> Bool -> Bool
&& Int
w Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
57  = (a
a a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftL` Int
4) a -> a -> a
forall a. Bits a => a -> a -> a
.|. Int -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
w Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
48)
           | Int
w Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
97             = (a
a a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftL` Int
4) a -> a -> a
forall a. Bits a => a -> a -> a
.|. Int -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
w Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
87)
           | Bool
otherwise           = (a
a a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftL` Int
4) a -> a -> a
forall a. Bits a => a -> a -> a
.|. Int -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
w Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
55)
    where w :: Int
w = Char -> Int
ord Char
c