{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ViewPatterns #-}
-- |
-- Module      :  Documentation.Haddock.Parser.Identifier
-- Copyright   :  (c) Alec Theriault 2019,
-- License     :  BSD-like
--
-- Maintainer  :  haddock@projects.haskell.org
-- Stability   :  experimental
-- Portability :  portable
--
-- Functionality for parsing identifiers and operators

module Documentation.Haddock.Parser.Identifier (
  Identifier(..),
  parseValid,
) where

import Documentation.Haddock.Types           ( Namespace(..) )
import Documentation.Haddock.Parser.Monad
import qualified Text.Parsec as Parsec
import           Text.Parsec.Pos             ( updatePosChar )
import           Text.Parsec                 ( State(..)
                                             , getParserState, setParserState )

import Data.Text (Text)
import qualified Data.Text as T

import           Data.Char (isAlpha, isAlphaNum)
import Control.Monad (guard)
import Data.Maybe
import CompatPrelude

-- | Identifier string surrounded with namespace, opening, and closing quotes/backticks.
data Identifier = Identifier !Namespace !Char String !Char
  deriving (Int -> Identifier -> ShowS
[Identifier] -> ShowS
Identifier -> String
(Int -> Identifier -> ShowS)
-> (Identifier -> String)
-> ([Identifier] -> ShowS)
-> Show Identifier
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Identifier] -> ShowS
$cshowList :: [Identifier] -> ShowS
show :: Identifier -> String
$cshow :: Identifier -> String
showsPrec :: Int -> Identifier -> ShowS
$cshowsPrec :: Int -> Identifier -> ShowS
Show, Identifier -> Identifier -> Bool
(Identifier -> Identifier -> Bool)
-> (Identifier -> Identifier -> Bool) -> Eq Identifier
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Identifier -> Identifier -> Bool
$c/= :: Identifier -> Identifier -> Bool
== :: Identifier -> Identifier -> Bool
$c== :: Identifier -> Identifier -> Bool
Eq)

parseValid :: Parser Identifier
parseValid :: Parser Identifier
parseValid = do
  s :: State Text ParserState
s@State{ stateInput :: forall s u. State s u -> s
stateInput = Text
inp, statePos :: forall s u. State s u -> SourcePos
statePos = SourcePos
pos } <- ParsecT Text ParserState Identity (State Text ParserState)
forall (m :: * -> *) s u. Monad m => ParsecT s u m (State s u)
getParserState

  case Text -> Maybe (Namespace, Char, Text, Char, Text)
takeIdentifier Text
inp of
    Maybe (Namespace, Char, Text, Char, Text)
Nothing -> String -> Parser Identifier
forall s u (m :: * -> *) a. String -> ParsecT s u m a
Parsec.parserFail String
"parseValid: Failed to match a valid identifier"
    Just (Namespace
ns, Char
op, Text
ident, Char
cl, Text
inp') ->
      let posOp :: SourcePos
posOp = SourcePos -> Char -> SourcePos
updatePosChar SourcePos
pos Char
op
          posIdent :: SourcePos
posIdent = (SourcePos -> Char -> SourcePos) -> SourcePos -> Text -> SourcePos
forall a. (a -> Char -> a) -> a -> Text -> a
T.foldl SourcePos -> Char -> SourcePos
updatePosChar SourcePos
posOp Text
ident
          posCl :: SourcePos
posCl = SourcePos -> Char -> SourcePos
updatePosChar SourcePos
posIdent Char
cl
          s' :: State Text ParserState
s' = State Text ParserState
s{ stateInput :: Text
stateInput = Text
inp', statePos :: SourcePos
statePos = SourcePos
posCl }
      in State Text ParserState
-> ParsecT Text ParserState Identity (State Text ParserState)
forall (m :: * -> *) s u.
Monad m =>
State s u -> ParsecT s u m (State s u)
setParserState State Text ParserState
s' ParsecT Text ParserState Identity (State Text ParserState)
-> Identifier -> Parser Identifier
forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Namespace -> Char -> String -> Char -> Identifier
Identifier Namespace
ns Char
op (Text -> String
T.unpack Text
ident) Char
cl


-- | Try to parse a delimited identifier off the front of the given input.
--
-- This tries to match as many valid Haskell identifiers/operators as possible,
-- to the point of sometimes accepting invalid things (ex: keywords). Some
-- considerations:
--
--   - operators and identifiers can have module qualifications
--   - operators can be wrapped in parens (for prefix)
--   - identifiers can be wrapped in backticks (for infix)
--   - delimiters are backticks or regular ticks
--   - since regular ticks are also valid in identifiers, we opt for the
--     longest successful parse
--
-- This function should make /O(1)/ allocations
takeIdentifier :: Text -> Maybe (Namespace, Char, Text, Char, Text)
takeIdentifier :: Text -> Maybe (Namespace, Char, Text, Char, Text)
takeIdentifier Text
input = [(Namespace, Char, Text, Char, Text)]
-> Maybe (Namespace, Char, Text, Char, Text)
forall a. [a] -> Maybe a
listToMaybe ([(Namespace, Char, Text, Char, Text)]
 -> Maybe (Namespace, Char, Text, Char, Text))
-> [(Namespace, Char, Text, Char, Text)]
-> Maybe (Namespace, Char, Text, Char, Text)
forall a b. (a -> b) -> a -> b
$ do

    -- Optional namespace
    let (Namespace
ns, Text
input') = case Text -> Maybe (Char, Text)
T.uncons Text
input of
                         Just (Char
'v', Text
i) -> (Namespace
Value, Text
i)
                         Just (Char
't', Text
i) -> (Namespace
Type, Text
i)
                         Maybe (Char, Text)
_             -> (Namespace
None, Text
input)

    -- Opening tick
    (Char
op, Text
input'') <- Maybe (Char, Text) -> [(Char, Text)]
forall a. Maybe a -> [a]
maybeToList (Text -> Maybe (Char, Text)
T.uncons Text
input')
    Bool -> [()]
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Char
op Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\'' Bool -> Bool -> Bool
|| Char
op Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'`')

    -- Identifier/operator
    (Text
ident, Text
input''') <- Text -> [(Text, Text)]
wrapped Text
input''

    -- Closing tick
    (Char
cl, Text
input'''') <- Maybe (Char, Text) -> [(Char, Text)]
forall a. Maybe a -> [a]
maybeToList (Text -> Maybe (Char, Text)
T.uncons Text
input''')
    Bool -> [()]
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Char
cl Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\'' Bool -> Bool -> Bool
|| Char
cl Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'`')

    (Namespace, Char, Text, Char, Text)
-> [(Namespace, Char, Text, Char, Text)]
forall (m :: * -> *) a. Monad m => a -> m a
return (Namespace
ns, Char
op, Text
ident, Char
cl, Text
input'''')

  where

    -- | Parse out a wrapped, possibly qualified, operator or identifier
    wrapped :: Text -> [(Text, Text)]
wrapped Text
t = do
      (Char
c, Text
t'  ) <- Maybe (Char, Text) -> [(Char, Text)]
forall a. Maybe a -> [a]
maybeToList (Text -> Maybe (Char, Text)
T.uncons Text
t)
      -- Tuples
      case Char
c of
        Char
'(' | Just (Char
c', Text
_) <- Text -> Maybe (Char, Text)
T.uncons Text
t'
            , Char
c' Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
',' Bool -> Bool -> Bool
|| Char
c' Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
')'
            -> do let (Text
commas, Text
t'') = (Char -> Bool) -> Text -> (Text, Text)
T.span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
',') Text
t'
                  (Char
')', Text
t''') <- Maybe (Char, Text) -> [(Char, Text)]
forall a. Maybe a -> [a]
maybeToList (Text -> Maybe (Char, Text)
T.uncons Text
t'')
                  (Text, Text) -> [(Text, Text)]
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> Text -> Text
T.take (Text -> Int
T.length Text
commas Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) Text
t, Text
t''')

        -- Parenthesized
        Char
'(' -> do (Int
n,   Text
t'' ) <- Bool -> Int -> [(Int, Text)] -> Text -> [(Int, Text)]
general Bool
False Int
0 [] Text
t'
                  (Char
')', Text
t''') <- Maybe (Char, Text) -> [(Char, Text)]
forall a. Maybe a -> [a]
maybeToList (Text -> Maybe (Char, Text)
T.uncons Text
t'')
                  (Text, Text) -> [(Text, Text)]
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> Text -> Text
T.take (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) Text
t, Text
t''')

        -- Backticked
        Char
'`' -> do (Int
n,   Text
t'' ) <- Bool -> Int -> [(Int, Text)] -> Text -> [(Int, Text)]
general Bool
False Int
0 [] Text
t'
                  (Char
'`', Text
t''') <- Maybe (Char, Text) -> [(Char, Text)]
forall a. Maybe a -> [a]
maybeToList (Text -> Maybe (Char, Text)
T.uncons Text
t'')
                  (Text, Text) -> [(Text, Text)]
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> Text -> Text
T.take (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) Text
t, Text
t''')

        -- Unadorned
        Char
_   -> do (Int
n,   Text
t'' ) <- Bool -> Int -> [(Int, Text)] -> Text -> [(Int, Text)]
general Bool
False Int
0 [] Text
t
                  (Text, Text) -> [(Text, Text)]
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> Text -> Text
T.take Int
n Text
t, Text
t'')

    -- | Parse out a possibly qualified operator or identifier
    general :: Bool           -- ^ refuse inputs starting with operators
            -> Int            -- ^ total characters \"consumed\" so far
            -> [(Int, Text)]  -- ^ accumulated results
            -> Text           -- ^ current input
            -> [(Int, Text)]  -- ^ total characters parsed & what remains
    general :: Bool -> Int -> [(Int, Text)] -> Text -> [(Int, Text)]
general !Bool
identOnly !Int
i [(Int, Text)]
acc Text
t
      -- Starts with an identifier (either just an identifier, or a module qual)
      | Just (Int
n, Text
rest) <- Text -> Maybe (Int, Text)
identLike Text
t
      = if Text -> Bool
T.null Text
rest
          then [(Int, Text)]
acc
          else case Text -> Char
T.head Text
rest of
                 Char
'`' -> (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i, Text
rest) (Int, Text) -> [(Int, Text)] -> [(Int, Text)]
forall a. a -> [a] -> [a]
: [(Int, Text)]
acc
                 Char
')' -> (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i, Text
rest) (Int, Text) -> [(Int, Text)] -> [(Int, Text)]
forall a. a -> [a] -> [a]
: [(Int, Text)]
acc
                 Char
'.' -> Bool -> Int -> [(Int, Text)] -> Text -> [(Int, Text)]
general Bool
False (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) [(Int, Text)]
acc (Text -> Text
T.tail Text
rest)
                 Char
'\'' -> let (Int
m, Text
rest') = Text -> (Int, Text)
quotes Text
rest
                         in Bool -> Int -> [(Int, Text)] -> Text -> [(Int, Text)]
general Bool
True (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
m Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i) ((Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
m Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i, Text
rest') (Int, Text) -> [(Int, Text)] -> [(Int, Text)]
forall a. a -> [a] -> [a]
: [(Int, Text)]
acc) (Text -> Text
T.tail Text
rest')
                 Char
_ -> [(Int, Text)]
acc

      -- An operator
      | Just (Int
n, Text
rest) <- Text -> Maybe (Int, Text)
optr Text
t
      , Bool -> Bool
not Bool
identOnly
      = (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i, Text
rest) (Int, Text) -> [(Int, Text)] -> [(Int, Text)]
forall a. a -> [a] -> [a]
: [(Int, Text)]
acc

      -- Anything else
      | Bool
otherwise
      = [(Int, Text)]
acc

    -- | Parse an identifier off the front of the input
    identLike :: Text -> Maybe (Int, Text)
identLike Text
t
      | Text -> Bool
T.null Text
t = Maybe (Int, Text)
forall a. Maybe a
Nothing
      | Char -> Bool
isAlpha (Text -> Char
T.head Text
t) Bool -> Bool -> Bool
|| Char
'_' Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Text -> Char
T.head Text
t
      = let !(Text
idt, Text
rest) = (Char -> Bool) -> Text -> (Text, Text)
T.span (\Char
c -> Char -> Bool
isAlphaNum Char
c Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_') Text
t
            !(Text
octos, Text
rest') = (Char -> Bool) -> Text -> (Text, Text)
T.span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'#') Text
rest
      in (Int, Text) -> Maybe (Int, Text)
forall a. a -> Maybe a
Just (Text -> Int
T.length Text
idt Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Text -> Int
T.length Text
octos, Text
rest')
      | Bool
otherwise = Maybe (Int, Text)
forall a. Maybe a
Nothing

    -- | Parse all but the last quote off the front of the input
    -- PRECONDITION: T.head t == '\''
    quotes :: Text -> (Int, Text)
    quotes :: Text -> (Int, Text)
quotes Text
t = let !n :: Int
n = Text -> Int
T.length ((Char -> Bool) -> Text -> Text
T.takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\'') Text
t) Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
               in (Int
n, Int -> Text -> Text
T.drop Int
n Text
t)

    -- | Parse an operator off the front of the input
    optr :: Text -> Maybe (Int, Text)
optr Text
t = let !(Text
op, Text
rest) = (Char -> Bool) -> Text -> (Text, Text)
T.span Char -> Bool
isSymbolChar Text
t
             in if Text -> Bool
T.null Text
op then Maybe (Int, Text)
forall a. Maybe a
Nothing else (Int, Text) -> Maybe (Int, Text)
forall a. a -> Maybe a
Just (Text -> Int
T.length Text
op, Text
rest)