{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}
module Text.Megaparsec.Stream
( Stream (..) )
where
import Data.Char (chr)
import Data.Foldable (foldl')
import Data.Kind (Type)
import Data.List.NonEmpty (NonEmpty (..))
import Data.Maybe (fromMaybe)
import Data.Proxy
import Data.Word (Word8)
import Text.Megaparsec.Pos
import Text.Megaparsec.State
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Lazy.Char8 as BL8
import qualified Data.List.NonEmpty as NE
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
class (Ord (Token s), Ord (Tokens s)) => Stream s where
type Token s :: Type
type Tokens s :: Type
tokenToChunk :: Proxy s -> Token s -> Tokens s
tokenToChunk pxy = tokensToChunk pxy . pure
tokensToChunk :: Proxy s -> [Token s] -> Tokens s
chunkToTokens :: Proxy s -> Tokens s -> [Token s]
chunkLength :: Proxy s -> Tokens s -> Int
chunkEmpty :: Proxy s -> Tokens s -> Bool
chunkEmpty pxy ts = chunkLength pxy ts <= 0
take1_ :: s -> Maybe (Token s, s)
takeN_ :: Int -> s -> Maybe (Tokens s, s)
takeWhile_ :: (Token s -> Bool) -> s -> (Tokens s, s)
showTokens :: Proxy s -> NonEmpty (Token s) -> String
tokensLength :: Proxy s -> NonEmpty (Token s) -> Int
tokensLength Proxy = NE.length
reachOffset
:: Int
-> PosState s
-> (String, PosState s)
reachOffsetNoLine
:: Int
-> PosState s
-> PosState s
reachOffsetNoLine o pst =
snd (reachOffset o pst)
instance Stream String where
type Token String = Char
type Tokens String = String
tokenToChunk Proxy = pure
tokensToChunk Proxy = id
chunkToTokens Proxy = id
chunkLength Proxy = length
chunkEmpty Proxy = null
take1_ [] = Nothing
take1_ (t:ts) = Just (t, ts)
takeN_ n s
| n <= 0 = Just ("", s)
| null s = Nothing
| otherwise = Just (splitAt n s)
takeWhile_ = span
showTokens Proxy = stringPretty
reachOffset o pst =
reachOffset' splitAt foldl' id id ('\n','\t') o pst
reachOffsetNoLine o pst =
reachOffsetNoLine' splitAt foldl' ('\n', '\t') o pst
instance Stream B.ByteString where
type Token B.ByteString = Word8
type Tokens B.ByteString = B.ByteString
tokenToChunk Proxy = B.singleton
tokensToChunk Proxy = B.pack
chunkToTokens Proxy = B.unpack
chunkLength Proxy = B.length
chunkEmpty Proxy = B.null
take1_ = B.uncons
takeN_ n s
| n <= 0 = Just (B.empty, s)
| B.null s = Nothing
| otherwise = Just (B.splitAt n s)
takeWhile_ = B.span
showTokens Proxy = stringPretty . fmap (chr . fromIntegral)
reachOffset o pst =
reachOffset' B.splitAt B.foldl' B8.unpack (chr . fromIntegral) (10, 9) o pst
reachOffsetNoLine o pst =
reachOffsetNoLine' B.splitAt B.foldl' (10, 9) o pst
instance Stream BL.ByteString where
type Token BL.ByteString = Word8
type Tokens BL.ByteString = BL.ByteString
tokenToChunk Proxy = BL.singleton
tokensToChunk Proxy = BL.pack
chunkToTokens Proxy = BL.unpack
chunkLength Proxy = fromIntegral . BL.length
chunkEmpty Proxy = BL.null
take1_ = BL.uncons
takeN_ n s
| n <= 0 = Just (BL.empty, s)
| BL.null s = Nothing
| otherwise = Just (BL.splitAt (fromIntegral n) s)
takeWhile_ = BL.span
showTokens Proxy = stringPretty . fmap (chr . fromIntegral)
reachOffset o pst =
reachOffset' splitAtBL BL.foldl' BL8.unpack (chr . fromIntegral) (10, 9) o pst
reachOffsetNoLine o pst =
reachOffsetNoLine' splitAtBL BL.foldl' (10, 9) o pst
instance Stream T.Text where
type Token T.Text = Char
type Tokens T.Text = T.Text
tokenToChunk Proxy = T.singleton
tokensToChunk Proxy = T.pack
chunkToTokens Proxy = T.unpack
chunkLength Proxy = T.length
chunkEmpty Proxy = T.null
take1_ = T.uncons
takeN_ n s
| n <= 0 = Just (T.empty, s)
| T.null s = Nothing
| otherwise = Just (T.splitAt n s)
takeWhile_ = T.span
showTokens Proxy = stringPretty
reachOffset o pst =
reachOffset' T.splitAt T.foldl' T.unpack id ('\n', '\t') o pst
reachOffsetNoLine o pst =
reachOffsetNoLine' T.splitAt T.foldl' ('\n', '\t') o pst
instance Stream TL.Text where
type Token TL.Text = Char
type Tokens TL.Text = TL.Text
tokenToChunk Proxy = TL.singleton
tokensToChunk Proxy = TL.pack
chunkToTokens Proxy = TL.unpack
chunkLength Proxy = fromIntegral . TL.length
chunkEmpty Proxy = TL.null
take1_ = TL.uncons
takeN_ n s
| n <= 0 = Just (TL.empty, s)
| TL.null s = Nothing
| otherwise = Just (TL.splitAt (fromIntegral n) s)
takeWhile_ = TL.span
showTokens Proxy = stringPretty
reachOffset o pst =
reachOffset' splitAtTL TL.foldl' TL.unpack id ('\n', '\t') o pst
reachOffsetNoLine o pst =
reachOffsetNoLine' splitAtTL TL.foldl' ('\n', '\t') o pst
data St = St SourcePos ShowS
reachOffset'
:: forall s. Stream s
=> (Int -> s -> (Tokens s, s))
-> (forall b. (b -> Token s -> b) -> b -> Tokens s -> b)
-> (Tokens s -> String)
-> (Token s -> Char)
-> (Token s, Token s)
-> Int
-> PosState s
-> (String, PosState s)
reachOffset' splitAt'
foldl''
fromToks
fromTok
(newlineTok, tabTok)
o
PosState {..} =
( case expandTab pstateTabWidth
. addPrefix
. f
. fromToks
. fst
$ takeWhile_ (/= newlineTok) post of
"" -> "<empty line>"
xs -> xs
, PosState
{ pstateInput = post
, pstateOffset = max pstateOffset o
, pstateSourcePos = spos
, pstateTabWidth = pstateTabWidth
, pstateLinePrefix =
if sameLine
then pstateLinePrefix ++ f ""
else f ""
}
)
where
addPrefix xs =
if sameLine
then pstateLinePrefix ++ xs
else xs
sameLine = sourceLine spos == sourceLine pstateSourcePos
(pre, post) = splitAt' (o - pstateOffset) pstateInput
St spos f = foldl'' go (St pstateSourcePos id) pre
go (St apos g) ch =
let SourcePos n l c = apos
c' = unPos c
w = unPos pstateTabWidth
in if | ch == newlineTok ->
St (SourcePos n (l <> pos1) pos1)
id
| ch == tabTok ->
St (SourcePos n l (mkPos $ c' + w - ((c' - 1) `rem` w)))
(g . (fromTok ch :))
| otherwise ->
St (SourcePos n l (c <> pos1))
(g . (fromTok ch :))
{-# INLINE reachOffset' #-}
reachOffsetNoLine'
:: forall s. Stream s
=> (Int -> s -> (Tokens s, s))
-> (forall b. (b -> Token s -> b) -> b -> Tokens s -> b)
-> (Token s, Token s)
-> Int
-> PosState s
-> PosState s
reachOffsetNoLine' splitAt'
foldl''
(newlineTok, tabTok)
o
PosState {..} =
( PosState
{ pstateInput = post
, pstateOffset = max pstateOffset o
, pstateSourcePos = spos
, pstateTabWidth = pstateTabWidth
, pstateLinePrefix = pstateLinePrefix
}
)
where
spos = foldl'' go pstateSourcePos pre
(pre, post) = splitAt' (o - pstateOffset) pstateInput
go (SourcePos n l c) ch =
let c' = unPos c
w = unPos pstateTabWidth
in if | ch == newlineTok ->
SourcePos n (l <> pos1) pos1
| ch == tabTok ->
SourcePos n l (mkPos $ c' + w - ((c' - 1) `rem` w))
| otherwise ->
SourcePos n l (c <> pos1)
{-# INLINE reachOffsetNoLine' #-}
splitAtBL :: Int -> BL.ByteString -> (BL.ByteString, BL.ByteString)
splitAtBL n = BL.splitAt (fromIntegral n)
{-# INLINE splitAtBL #-}
splitAtTL :: Int -> TL.Text -> (TL.Text, TL.Text)
splitAtTL n = TL.splitAt (fromIntegral n)
{-# INLINE splitAtTL #-}
stringPretty :: NonEmpty Char -> String
stringPretty (x:|[]) = charPretty x
stringPretty ('\r':|"\n") = "crlf newline"
stringPretty xs = "\"" <> concatMap f (NE.toList xs) <> "\""
where
f ch =
case charPretty' ch of
Nothing -> [ch]
Just pretty -> "<" <> pretty <> ">"
charPretty :: Char -> String
charPretty ' ' = "space"
charPretty ch = fromMaybe ("'" <> [ch] <> "'") (charPretty' ch)
charPretty' :: Char -> Maybe String
charPretty' = \case
'\NUL' -> Just "null"
'\SOH' -> Just "start of heading"
'\STX' -> Just "start of text"
'\ETX' -> Just "end of text"
'\EOT' -> Just "end of transmission"
'\ENQ' -> Just "enquiry"
'\ACK' -> Just "acknowledge"
'\BEL' -> Just "bell"
'\BS' -> Just "backspace"
'\t' -> Just "tab"
'\n' -> Just "newline"
'\v' -> Just "vertical tab"
'\f' -> Just "form feed"
'\r' -> Just "carriage return"
'\SO' -> Just "shift out"
'\SI' -> Just "shift in"
'\DLE' -> Just "data link escape"
'\DC1' -> Just "device control one"
'\DC2' -> Just "device control two"
'\DC3' -> Just "device control three"
'\DC4' -> Just "device control four"
'\NAK' -> Just "negative acknowledge"
'\SYN' -> Just "synchronous idle"
'\ETB' -> Just "end of transmission block"
'\CAN' -> Just "cancel"
'\EM' -> Just "end of medium"
'\SUB' -> Just "substitute"
'\ESC' -> Just "escape"
'\FS' -> Just "file separator"
'\GS' -> Just "group separator"
'\RS' -> Just "record separator"
'\US' -> Just "unit separator"
'\DEL' -> Just "delete"
'\160' -> Just "non-breaking space"
_ -> Nothing
expandTab
:: Pos
-> String
-> String
expandTab w' = go 0
where
go 0 [] = []
go 0 ('\t':xs) = go w xs
go 0 (x:xs) = x : go 0 xs
go n xs = ' ' : go (n - 1) xs
w = unPos w'