{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE PostfixOperators #-}
{-# LANGUAGE Safe #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# OPTIONS_GHC -fno-warn-missing-signatures #-}
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
module Data.YAML.Token
( tokenize
, Token(..)
, Code(..)
) where
import qualified Data.ByteString.Lazy.Char8 as BLC
import Data.Char (chr, ord)
import qualified Data.DList as D
import Prelude hiding ((*), (+), (-), (/), (^))
import qualified Prelude
import Util hiding (empty)
infixl 6 .+
(.+) :: Int -> Int -> Int
(.+) = (Prelude.+)
infixl 6 .-
(.-) :: Int -> Int -> Int
(.-) = (Prelude.-)
infixl 7 .*
(.*) :: Int -> Int -> Int
(.*) = (Prelude.*)
infixl 9 |>
(|>) :: record -> (record -> value) -> value
record |> field = field record
data Encoding = UTF8
| UTF16LE
| UTF16BE
| UTF32LE
| UTF32BE
instance Show Encoding where
show UTF8 = "UTF-8"
show UTF16LE = "UTF-16LE"
show UTF16BE = "UTF-16BE"
show UTF32LE = "UTF-32LE"
show UTF32BE = "UTF-32BE"
decode :: BLC.ByteString -> (Encoding, [(Int, Char)])
decode text = (encoding, undoEncoding encoding text)
where encoding = detectEncoding $ BLC.unpack $ BLC.take 4 text
detectEncoding :: [Char] -> Encoding
detectEncoding text =
case text of
'\x00' : '\x00' : '\xFE' : '\xFF' : _ -> UTF32BE
'\x00' : '\x00' : '\x00' : _ : _ -> UTF32BE
'\xFF' : '\xFE' : '\x00' : '\x00' : _ -> UTF32LE
_ : '\x00' : '\x00' : '\x00' : _ -> UTF32LE
'\xFE' : '\xFF' : _ -> UTF16BE
'\x00' : _ : _ -> UTF16BE
'\xFF' : '\xFE' : _ -> UTF16LE
_ : '\x00' : _ -> UTF16LE
'\xEF' : '\xBB' : '\xBF' : _ -> UTF8
_ -> UTF8
undoEncoding :: Encoding -> BLC.ByteString -> [(Int, Char)]
undoEncoding encoding bytes =
case encoding of
UTF8 -> undoUTF8 bytes 0
UTF16LE -> combinePairs $ undoUTF16LE bytes 0
UTF16BE -> combinePairs $ undoUTF16BE bytes 0
UTF32LE -> combinePairs $ undoUTF32LE bytes 0
UTF32BE -> combinePairs $ undoUTF32BE bytes 0
hasFewerThan :: Int -> BLC.ByteString -> Bool
hasFewerThan n bytes
| n == 1 = BLC.null bytes
| n > 1 = BLC.null bytes || hasFewerThan (n .- 1) (BLC.tail bytes)
| otherwise = False
undoUTF32LE :: BLC.ByteString -> Int -> [(Int, Char)]
undoUTF32LE bytes offset
| BLC.null bytes = []
| hasFewerThan 4 bytes = error "UTF-32LE input contains invalid number of bytes"
| otherwise = let first = BLC.head bytes
bytes' = BLC.tail bytes
second = BLC.head bytes'
bytes'' = BLC.tail bytes'
third = BLC.head bytes''
bytes''' = BLC.tail bytes''
fourth = BLC.head bytes'''
rest = BLC.tail bytes'''
in (offset .+ 4,
chr $ (ord first)
.+ 256 .* ((ord second)
.+ 256 .* ((ord third)
.+ 256 .* ((ord fourth))))):(undoUTF32LE rest $ offset .+ 4)
undoUTF32BE :: BLC.ByteString -> Int -> [(Int, Char)]
undoUTF32BE bytes offset
| BLC.null bytes = []
| hasFewerThan 4 bytes = error "UTF-32BE input contains invalid number of bytes"
| otherwise = let first = BLC.head bytes
bytes' = BLC.tail bytes
second = BLC.head bytes'
bytes'' = BLC.tail bytes'
third = BLC.head bytes''
bytes''' = BLC.tail bytes''
fourth = BLC.head bytes'''
rest = BLC.tail bytes'''
in (offset .+ 4,
chr $ (ord fourth)
.+ 256 .* ((ord third)
.+ 256 .* ((ord second)
.+ 256 .* ((ord first))))):(undoUTF32BE rest $ offset .+ 4)
combinePairs :: [(Int, Char)] -> [(Int, Char)]
combinePairs [] = []
combinePairs (head@(_, head_char):tail)
| '\xD800' <= head_char && head_char <= '\xDBFF' = combineLead head tail
| '\xDC00' <= head_char && head_char <= '\xDFFF' = error "UTF-16 contains trail surrogate without lead surrogate"
| otherwise = head:(combinePairs tail)
combineLead :: (Int, Char) -> [(Int, Char)] -> [(Int, Char)]
combineLead _lead [] = error "UTF-16 contains lead surrogate as final character"
combineLead (_, lead_char) ((trail_offset, trail_char):rest)
| '\xDC00' <= trail_char && trail_char <= '\xDFFF' = (trail_offset, combineSurrogates lead_char trail_char):combinePairs rest
| otherwise = error "UTF-16 contains lead surrogate without trail surrogate"
surrogateOffset :: Int
surrogateOffset = 0x10000 .- (0xD800 .* 1024) .- 0xDC00
combineSurrogates :: Char -> Char -> Char
combineSurrogates lead trail = chr $ (ord lead) .* 1024 .+ (ord trail) .+ surrogateOffset
undoUTF16LE :: BLC.ByteString -> Int -> [(Int, Char)]
undoUTF16LE bytes offset
| BLC.null bytes = []
| hasFewerThan 2 bytes = error "UTF-16LE input contains odd number of bytes"
| otherwise = let low = BLC.head bytes
bytes' = BLC.tail bytes
high = BLC.head bytes'
rest = BLC.tail bytes'
in (offset .+ 2, chr $ (ord low) .+ (ord high) .* 256):(undoUTF16LE rest $ offset .+ 2)
undoUTF16BE :: BLC.ByteString -> Int -> [(Int, Char)]
undoUTF16BE bytes offset
| BLC.null bytes = []
| hasFewerThan 2 bytes = error "UTF-16BE input contains odd number of bytes"
| otherwise = let high = BLC.head bytes
bytes' = BLC.tail bytes
low = BLC.head bytes'
rest = BLC.tail bytes'
in (offset .+ 2, chr $ (ord low) .+ (ord high) .* 256):(undoUTF16BE rest $ offset .+ 2)
undoUTF8 :: BLC.ByteString -> Int -> [(Int, Char)]
undoUTF8 bytes offset
| BLC.null bytes = []
| otherwise = let first = BLC.head bytes
rest = BLC.tail bytes
in case () of
_ | first < '\x80' -> (offset .+ 1, first):(undoUTF8 rest $ offset .+ 1)
| first < '\xC0' -> error $ "UTF-8 input contains invalid first byte"
| first < '\xE0' -> decodeTwoUTF8 first offset rest
| first < '\xF0' -> decodeThreeUTF8 first offset rest
| first < '\xF8' -> decodeFourUTF8 first offset rest
| otherwise -> error $ "UTF-8 input contains invalid first byte"
decodeTwoUTF8 :: Char -> Int -> BLC.ByteString -> [(Int, Char)]
decodeTwoUTF8 first offset bytes
| BLC.null bytes = error "UTF-8 double byte char is missing second byte at eof"
| otherwise = let second = BLC.head bytes
rest = BLC.tail bytes
in case () of
_ | second < '\x80' || '\xBF' < second -> error $ "UTF-8 double byte char has invalid second byte"
| otherwise -> (offset .+ 2, combineTwoUTF8 first second):(undoUTF8 rest $ offset .+ 2)
combineTwoUTF8 :: Char -> Char -> Char
combineTwoUTF8 first second = chr(((ord first) .- 0xC0) .* 64
.+ ((ord second) .- 0x80))
decodeThreeUTF8 :: Char -> Int -> BLC.ByteString -> [(Int, Char)]
decodeThreeUTF8 first offset bytes
| hasFewerThan 2 bytes = error "UTF-8 triple byte char is missing bytes at eof"
| otherwise = let second = BLC.head bytes
bytes' = BLC.tail bytes
third = BLC.head bytes'
rest = BLC.tail bytes'
in case () of
_ | second < '\x80' || '\xBF' < second -> error "UTF-8 triple byte char has invalid second byte"
| third < '\x80' || '\xBF' < third -> error "UTF-8 triple byte char has invalid third byte"
| otherwise -> (offset .+ 3, combineThreeUTF8 first second third):(undoUTF8 rest $ offset .+ 3)
combineThreeUTF8 :: Char -> Char -> Char -> Char
combineThreeUTF8 first second third = chr(((ord first) .- 0xE0) .* 4096
.+ ((ord second) .- 0x80) .* 64
.+ ((ord third) .- 0x80))
decodeFourUTF8 :: Char -> Int -> BLC.ByteString -> [(Int, Char)]
decodeFourUTF8 first offset bytes
| hasFewerThan 3 bytes = error "UTF-8 quad byte char is missing bytes at eof"
| otherwise = let second = BLC.head bytes
bytes' = BLC.tail bytes
third = BLC.head bytes'
bytes'' = BLC.tail bytes'
fourth = BLC.head bytes''
rest = BLC.tail bytes''
in case () of
_ | second < '\x80' || '\xBF' < second -> error "UTF-8 quad byte char has invalid second byte"
| third < '\x80' || '\xBF' < third -> error "UTF-8 quad byte char has invalid third byte"
| third < '\x80' || '\xBF' < third -> error "UTF-8 quad byte char has invalid fourth byte"
| otherwise -> (offset .+ 4, combineFourUTF8 first second third fourth):(undoUTF8 rest $ offset .+ 4)
combineFourUTF8 :: Char -> Char -> Char -> Char -> Char
combineFourUTF8 first second third fourth = chr(((ord first) .- 0xF0) .* 262144
.+ ((ord second) .- 0x80) .* 4096
.+ ((ord third) .- 0x80) .* 64
.+ ((ord fourth) .- 0x80))
data Code = Bom
| Text
| Meta
| Break
| LineFeed
| LineFold
| Indicator
| White
| Indent
| DirectivesEnd
| DocumentEnd
| BeginEscape
| EndEscape
| BeginComment
| EndComment
| BeginDirective
| EndDirective
| BeginTag
| EndTag
| BeginHandle
| EndHandle
| BeginAnchor
| EndAnchor
| BeginProperties
| EndProperties
| BeginAlias
| EndAlias
| BeginScalar
| EndScalar
| BeginSequence
| EndSequence
| BeginMapping
| EndMapping
| BeginPair
| EndPair
| BeginNode
| EndNode
| BeginDocument
| EndDocument
| BeginStream
| EndStream
| Error
| Unparsed
| Detected
deriving (Show,Eq)
data Token = Token {
tByteOffset :: Int,
tCharOffset :: Int,
tLine :: Int,
tLineChar :: Int,
tCode :: Code,
tText :: String
} deriving Show
data Parser result = Parser (State -> Reply result)
data Result result = Failed String
| Result result
| More (Parser result)
instance (Show result) => Show (Result result) where
show result = case result of
Failed message -> "Failed " ++ message
Result result -> "Result " ++ (show result)
More _ -> "More"
data Reply result = Reply {
rResult :: !(Result result),
rTokens :: !(D.DList Token),
rCommit :: !(Maybe String),
rState :: !State
}
instance (Show result) => Show (Reply result) where
show reply = "Result: " ++ (show $ reply|>rResult)
++ ", Tokens: " ++ (show $ D.toList $ reply|>rTokens)
++ ", Commit: " ++ (show $ reply|>rCommit)
++ ", State: { " ++ (show $ reply|>rState) ++ "}"
type Pattern = Parser ()
data State = State {
sEncoding :: !Encoding,
sDecision :: !String,
sLimit :: !Int,
sForbidden :: !(Maybe Pattern),
sIsPeek :: !Bool,
sIsSol :: !Bool,
sChars :: ![Char],
sCharsByteOffset :: !Int,
sCharsCharOffset :: !Int,
sCharsLine :: !Int,
sCharsLineChar :: !Int,
sByteOffset :: !Int,
sCharOffset :: !Int,
sLine :: !Int,
sLineChar :: !Int,
sCode :: !Code,
sLast :: !Char,
sInput :: ![(Int, Char)]
}
instance Show State where
show state = "Encoding: " ++ (show $ state|>sEncoding)
++ ", Decision: " ++ (show $ state|>sDecision)
++ ", Limit: " ++ (show $ state|>sLimit)
++ ", IsPeek: " ++ (show $ state|>sIsPeek)
++ ", IsSol: " ++ (show $ state|>sIsSol)
++ ", Chars: >>>" ++ (reverse $ state|>sChars) ++ "<<<"
++ ", CharsByteOffset: " ++ (show $ state|>sCharsByteOffset)
++ ", CharsCharOffset: " ++ (show $ state|>sCharsCharOffset)
++ ", CharsLine: " ++ (show $ state|>sCharsLine)
++ ", CharsLineChar: " ++ (show $ state|>sCharsLineChar)
++ ", ByteOffset: " ++ (show $ state|>sByteOffset)
++ ", CharOffset: " ++ (show $ state|>sCharOffset)
++ ", Line: " ++ (show $ state|>sLine)
++ ", LineChar: " ++ (show $ state|>sLineChar)
++ ", Code: " ++ (show $ state|>sCode)
++ ", Last: " ++ (show $ state|>sLast)
initialState :: BLC.ByteString -> State
initialState input
= State { sEncoding = encoding
, sDecision = ""
, sLimit = -1
, sForbidden = Nothing
, sIsPeek = False
, sIsSol = True
, sChars = []
, sCharsByteOffset = -1
, sCharsCharOffset = -1
, sCharsLine = -1
, sCharsLineChar = -1
, sByteOffset = 0
, sCharOffset = 0
, sLine = 1
, sLineChar = 0
, sCode = Unparsed
, sLast = ' '
, sInput = decoded
}
where
(encoding, decoded) = decode input
setLimit :: Int -> State -> State
setLimit limit state = state { sLimit = limit }
setForbidden :: Maybe Pattern -> State -> State
setForbidden forbidden state = state { sForbidden = forbidden }
setCode :: Code -> State -> State
setCode code state = state { sCode = code }
class Match parameter result | parameter -> result where
match :: parameter -> Parser result
instance Match (Parser result) result where
match = id
instance Match Char () where
match code = nextIf (== code)
instance Match (Char, Char) () where
match (low, high) = nextIf $ \ code -> low <= code && code <= high
instance Match String () where
match = foldr (&) empty
returnReply :: State -> result -> Reply result
returnReply state result = Reply { rResult = Result result,
rTokens = D.empty,
rCommit = Nothing,
rState = state }
tokenReply :: State -> Token -> Reply ()
tokenReply state token = Reply { rResult = Result (),
rTokens = D.singleton token,
rCommit = Nothing,
rState = state { sCharsByteOffset = -1,
sCharsCharOffset = -1,
sCharsLine = -1,
sCharsLineChar = -1,
sChars = [] } }
failReply :: State -> String -> Reply result
failReply state message = Reply { rResult = Failed message,
rTokens = D.empty,
rCommit = Nothing,
rState = state }
unexpectedReply :: State -> Reply result
unexpectedReply state = case state|>sInput of
((_, char):_) -> failReply state $ "Unexpected '" ++ [char] ++ "'"
[] -> failReply state "Unexpected end of input"
instance Functor Parser where
fmap = liftM
instance Applicative Parser where
pure = return
(<*>) = ap
instance Monad Parser where
return result = Parser $ \ state -> returnReply state result
left >>= right = bindParser left right
where bindParser (Parser left) right = Parser $ \ state ->
let reply = left state
in case reply|>rResult of
Failed message -> reply { rResult = Failed message }
Result value -> reply { rResult = More $ right value }
More parser -> reply { rResult = More $ bindParser parser right }
fail message = Parser $ \ state -> failReply state message
infix 3 ^
infix 3 %
infix 3 <%
infix 3 !
infix 3 ?!
infixl 3 -
infixr 2 &
infixr 1 /
infix 0 ?
infix 0 *
infix 0 +
infix 0 <?
infix 0 >?
infix 0 >!
(%) :: (Match match result) => match -> Int -> Pattern
parser % n
| n <= 0 = empty
| n > 0 = parser & parser % n .- 1
(<%) :: (Match match result) => match -> Int -> Pattern
parser <% n
| n < 1 = fail "Fewer than 0 repetitions"
| n == 1 = reject parser Nothing
| n > 1 = "<%" ^ ( parser ! "<%" & parser <% n .- 1 / empty )
(^) :: (Match match result) => String -> match -> Parser result
decision ^ parser = choice decision $ match parser
(!) :: (Match match result) => match -> String -> Pattern
parser ! decision = parser & commit decision
(?!) :: (Match match result) => match -> String -> Pattern
parser ?! decision = peek parser & commit decision
(<?) :: (Match match result) => match -> Parser result
(<?) lookbehind = prev lookbehind
(>?) :: (Match match result) => match -> Parser result
(>?) lookahead = peek lookahead
(>!) :: (Match match result) => match -> Pattern
(>!) lookahead = reject lookahead Nothing
(-) :: (Match match1 result1, Match match2 result2) => match1 -> match2 -> Parser result1
parser - rejected = reject rejected Nothing & parser
(&) :: (Match match1 result1, Match match2 result2) => match1 -> match2 -> Parser result2
before & after = (match before) >> (match after)
(/) :: (Match match1 result, Match match2 result) => match1 -> match2 -> Parser result
first / second = Parser $ \ state ->
let Parser parser = decide (match first) (match second)
in parser state
(?) :: (Match match result) => match -> Pattern
(?) optional = (optional & empty) / empty
(*) :: (Match match result) => match -> Pattern
(*) parser = "*" ^ zomParser
where zomParser = (parser ! "*" & zomParser) / empty
(+) :: (Match match result) => match -> Pattern
(+) parser = parser & (parser *)
decide :: Parser result -> Parser result -> Parser result
decide left right = Parser $ \ state ->
let Parser parser = decideParser state D.empty left right
in parser state
where decideParser point tokens (Parser left) right = Parser $ \state ->
let reply = left state
tokens' reply = D.append tokens $ reply|>rTokens
in case (reply|>rResult, reply|>rCommit) of
(Failed _, _) -> Reply { rState = point,
rTokens = D.empty,
rResult = More right,
rCommit = Nothing }
(Result _, _) -> reply { rTokens = tokens' reply }
(More left', Just _) -> reply { rTokens = tokens' reply,
rResult = More left' }
(More left', Nothing) -> let Parser parser = decideParser point (tokens' reply) left' right
in parser $ reply|>rState
choice :: String -> Parser result -> Parser result
choice decision parser = Parser $ \ state ->
let Parser parser' = choiceParser (state|>sDecision) decision parser
in parser' state { sDecision = decision }
where choiceParser parentDecision makingDecision (Parser parser) = Parser $ \ state ->
let reply = parser state
commit' = case reply|>rCommit of
Nothing -> Nothing
Just decision | decision == makingDecision -> Nothing
| otherwise -> reply|>rCommit
reply' = case reply|>rResult of
More parser' -> reply { rCommit = commit',
rResult = More $ choiceParser parentDecision makingDecision parser' }
_ -> reply { rCommit = commit',
rState = (reply|>rState) { sDecision = parentDecision } }
in reply'
recovery :: (Match match1 result, Match match2 result) => match1 -> match2 -> Parser result
recovery pattern recover =
Parser $ \ state ->
let (Parser parser) = match pattern
reply = parser state
in if state|>sIsPeek
then reply
else case reply|>rResult of
Result _ -> reply
More more -> reply { rResult = More $ more `recovery` recover }
Failed message -> reply { rResult = More $ fake Error message & unparsed & recover }
where unparsed = let (Parser parser) = match finishToken
in Parser $ \ state -> parser $ state { sCode = Unparsed }
prev :: (Match match result) => match -> Parser result
prev parser = Parser $ \ state ->
prevParser state (match parser) state { sIsPeek = True, sInput = (-1, state|>sLast) : state|>sInput }
where prevParser point (Parser parser) state =
let reply = parser state
in case reply|>rResult of
Failed message -> failReply point message
Result value -> returnReply point value
More parser' -> prevParser point parser' $ reply|>rState
peek :: (Match match result) => match -> Parser result
peek parser = Parser $ \ state ->
peekParser state (match parser) state { sIsPeek = True }
where peekParser point (Parser parser) state =
let reply = parser state
in case reply|>rResult of
Failed message -> failReply point message
Result value -> returnReply point value
More parser' -> peekParser point parser' $ reply|>rState
reject :: (Match match result) => match -> Maybe String -> Pattern
reject parser name = Parser $ \ state ->
rejectParser state name (match parser) state { sIsPeek = True }
where rejectParser point name (Parser parser) state =
let reply = parser state
in case reply|>rResult of
Failed _message -> returnReply point ()
Result _value -> case name of
Nothing -> unexpectedReply point
Just text -> failReply point $ "Unexpected " ++ text
More parser' -> rejectParser point name parser' $ reply|>rState
upto :: Pattern -> Pattern
upto parser = ( ( parser >!) & nextIf (const True) *)
nonEmpty :: (Match match result) => match -> Parser result
nonEmpty parser = Parser $ \ state ->
let Parser parser' = nonEmptyParser (state|>sCharOffset) (match parser)
in parser' state
where nonEmptyParser offset (Parser parser) = Parser $ \ state ->
let reply = parser state
state' = reply|>rState
in case reply|>rResult of
Failed _message -> reply
Result _value -> if state'|>sCharOffset > offset
then reply
else failReply state' "Matched empty pattern"
More parser' -> reply { rResult = More $ nonEmptyParser offset parser' }
empty :: Pattern
empty = return ()
eof :: Pattern
eof = Parser $ \ state ->
if state|>sInput == []
then returnReply state ()
else unexpectedReply state
sol :: Pattern
sol = Parser $ \ state ->
if state|>sIsSol
then returnReply state ()
else failReply state "Expected start of line"
commit :: String -> Pattern
commit decision = Parser $ \ state ->
Reply { rState = state,
rTokens = D.empty,
rResult = Result (),
rCommit = Just decision }
nextLine :: Pattern
nextLine = Parser $ \ state ->
returnReply state { sIsSol = True,
sLine = state|>sLine .+ 1,
sLineChar = 0 }
()
with :: (value -> State -> State) -> (State -> value) -> value -> Parser result -> Parser result
with setField getField value parser = Parser $ \ state ->
let value' = getField state
Parser parser' = value' `seq` withParser value' parser
in parser' $ setField value state
where withParser parentValue (Parser parser) = Parser $ \ state ->
let reply = parser state
in case reply|>rResult of
Failed _ -> reply { rState = setField parentValue $ reply|>rState }
Result _ -> reply { rState = setField parentValue $ reply|>rState }
More parser' -> reply { rResult = More $ withParser parentValue parser' }
forbidding :: (Match match1 result1, Match match2 result2) => match1 -> match2 -> Parser result1
forbidding parser forbidden = with setForbidden sForbidden (Just $ forbidden & empty) (match parser)
limitedTo :: (Match match result) => match -> Int -> Parser result
limitedTo parser limit = with setLimit sLimit limit (match parser)
nextIf :: (Char -> Bool) -> Pattern
nextIf test = Parser $ \ state ->
case state|>sForbidden of
Nothing -> limitedNextIf state
Just parser -> let Parser parser' = reject parser $ Just "forbidden pattern"
reply = parser' state { sForbidden = Nothing }
in case reply|>rResult of
Failed _ -> reply
Result _ -> limitedNextIf state
where limitedNextIf state =
case state|>sLimit of
-1 -> consumeNextIf state
0 -> failReply state "Lookahead limit reached"
_limit -> consumeNextIf state { sLimit = state|>sLimit .- 1 }
consumeNextIf state =
case state|>sInput of
((offset, char):rest) | test char -> let chars = if state|>sIsPeek
then []
else char:(state|>sChars)
byte_offset = charsOf sByteOffset sCharsByteOffset
char_offset = charsOf sCharOffset sCharsCharOffset
line = charsOf sLine sCharsLine
line_char = charsOf sLineChar sCharsLineChar
is_sol = if char == '\xFEFF'
then state|>sIsSol
else False
state' = state { sInput = rest,
sLast = char,
sChars = chars,
sCharsByteOffset = byte_offset,
sCharsCharOffset = char_offset,
sCharsLine = line,
sCharsLineChar = line_char,
sIsSol = is_sol,
sByteOffset = offset,
sCharOffset = state|>sCharOffset .+ 1,
sLineChar = state|>sLineChar .+ 1 }
in returnReply state' ()
| otherwise -> unexpectedReply state
[] -> unexpectedReply state
where charsOf field charsField = if state|>sIsPeek
then -1
else if state|>sChars == []
then state|>field
else state|>charsField
finishToken :: Pattern
finishToken = Parser $ \ state ->
let state' = state { sChars = [],
sCharsByteOffset = -1,
sCharsCharOffset = -1,
sCharsLine = -1,
sCharsLineChar = -1 }
in if state|>sIsPeek
then returnReply state' ()
else case state|>sChars of
[] -> returnReply state' ()
chars@(_:_) -> tokenReply state' Token { tByteOffset = state|>sCharsByteOffset,
tCharOffset = state|>sCharsCharOffset,
tLine = state|>sCharsLine,
tLineChar = state|>sCharsLineChar,
tCode = state|>sCode,
tText = reverse chars }
wrap :: (Match match result) => match -> Parser result
wrap parser = do result <- match parser
finishToken
eof
return result
token :: (Match match result) => Code -> match -> Pattern
token code parser = finishToken & with setCode sCode code (parser & finishToken)
fake :: Code -> String -> Pattern
fake code text = Parser $ \ state ->
if state|>sIsPeek
then returnReply state ()
else tokenReply state Token { tByteOffset = value state sByteOffset sCharsByteOffset,
tCharOffset = value state sCharOffset sCharsCharOffset,
tLine = value state sLine sCharsLine,
tLineChar = value state sLineChar sCharsLineChar,
tCode = code,
tText = text }
where value state field1 field2 =
if field2 state == -1
then field1 state
else field2 state
meta :: (Match match result) => match -> Pattern
meta parser = token Meta parser
indicator :: (Match match result) => match -> Pattern
indicator parser = token Indicator $ parser
text :: (Match match result) => match -> Pattern
text parser = token Text parser
emptyToken :: Code -> Pattern
emptyToken code = finishToken & parser code
where parser code = Parser $ \ state ->
if state|>sIsPeek
then returnReply state ()
else tokenReply state Token { tByteOffset = state|>sByteOffset,
tCharOffset = state|>sCharOffset,
tLine = state|>sLine,
tLineChar = state|>sLineChar,
tCode = code,
tText = "" }
wrapTokens :: Code -> Code -> Pattern -> Pattern
wrapTokens beginCode endCode pattern = emptyToken beginCode
& prefixErrorWith pattern (emptyToken endCode)
& emptyToken endCode
prefixErrorWith :: (Match match result) => match -> Pattern -> Parser result
prefixErrorWith pattern prefix =
Parser $ \ state ->
let (Parser parser) = match pattern
reply = parser state
in case reply|>rResult of
Result _ -> reply
More more -> reply { rResult = More $ prefixErrorWith more prefix }
Failed message -> reply { rResult = More $ prefix & (fail message :: Parser result) }
data Context = BlockOut
| BlockIn
| FlowOut
| FlowIn
| BlockKey
| FlowKey
instance Show Context where
show context = case context of
BlockOut -> "block-out"
BlockIn -> "block-in"
FlowOut -> "flow-out"
FlowIn -> "flow-in"
BlockKey -> "block-key"
FlowKey -> "flow-key"
instance Read Context where
readsPrec _ text = [ ((r word), tail) | (word, tail) <- lex text ]
where r word = case word of
"block_out" -> BlockOut
"block_in" -> BlockIn
"flow_out" -> FlowOut
"flow_in" -> FlowIn
"block_key" -> BlockKey
"flow_key" -> FlowKey
_ -> error $ "unknown context: " ++ word
data Chomp = Strip
| Clip
| Keep
instance Show Chomp where
show chomp = case chomp of
Strip -> "strip"
Clip -> "clip"
Keep -> "keep"
instance Read Chomp where
readsPrec _ text = [ ((r word), tail) | (word, tail) <- lex text ]
where r word = case word of
"strip" -> Strip
"clip" -> Clip
"keep" -> Keep
_ -> error $ "unknown chomp: " ++ word
type Tokenizer = BLC.ByteString -> Bool -> [Token]
patternTokenizer :: Pattern -> Tokenizer
patternTokenizer pattern input withFollowing =
D.toList $ patternParser (wrap pattern) (initialState input)
where patternParser (Parser parser) state =
let reply = parser state
tokens = commitBugs reply
state' = reply|>rState
in case reply|>rResult of
Failed message -> errorTokens tokens state' message withFollowing
Result _ -> tokens
More parser' -> D.append tokens $ patternParser parser' state'
errorTokens :: D.DList Token -> State -> String -> Bool -> D.DList Token
errorTokens tokens state message withFollowing =
let tokens' = D.append tokens $ D.singleton Token { tByteOffset = state|>sByteOffset,
tCharOffset = state|>sCharOffset,
tLine = state|>sLine,
tLineChar = state|>sLineChar,
tCode = Error,
tText = message }
in if withFollowing && state|>sInput /= []
then D.append tokens' $ D.singleton Token { tByteOffset = state|>sByteOffset,
tCharOffset = state|>sCharOffset,
tLine = state|>sLine,
tLineChar = state|>sLineChar,
tCode = Unparsed,
tText = map snd $ state|>sInput }
else tokens'
commitBugs :: Reply result -> D.DList Token
commitBugs reply =
let tokens = reply|>rTokens
state = reply|>rState
in case reply|>rCommit of
Nothing -> tokens
Just commit -> D.append tokens $ D.singleton Token { tByteOffset = state|>sByteOffset,
tCharOffset = state|>sCharOffset,
tLine = state|>sLine,
tLineChar = state|>sLineChar,
tCode = Error,
tText = "Commit to '" ++ commit ++ "' was made outside it" }
tokenize :: BLC.ByteString -> Bool -> [Token]
tokenize = patternTokenizer l_yaml_stream
bom :: Match match1 result1 => match1 -> Parser ()
bom code = code
& (Parser $ \ state -> let text = case state|>sEncoding of
UTF8 -> "TF-8"
UTF16LE -> "TF-16LE"
UTF16BE -> "TF-16BE"
UTF32LE -> "TF-32LE"
UTF32BE -> "TF-32BE"
Parser parser = fake Bom text
in parser state)
na :: Int
na = error "Accessing non-applicable indentation"
asInteger :: Parser Int
asInteger = Parser $ \ state -> returnReply state $ ord (state|>sLast) .- 48
result :: result -> Parser result
result = return
c_printable = '\x9' / '\xA' / '\xD' / ('\x20', '\x7E')
/ '\x85' / ('\xA0', '\xD7FF') / ('\xE000', '\xFFFD')
/ ('\x10000', '\x10FFFF')
nb_json = '\x9' / ('\x20', '\x10FFFF')
c_byte_order_mark = bom '\xFEFF'
c_sequence_entry = indicator '-'
c_mapping_key = indicator '?'
c_mapping_value = indicator ':'
c_collect_entry = indicator ','
c_sequence_start = indicator '['
c_sequence_end = indicator ']'
c_mapping_start = indicator '{'
c_mapping_end = indicator '}'
c_comment = indicator '#'
c_anchor = indicator '&'
c_alias = indicator '*'
c_tag = indicator '!'
c_literal = indicator '|'
c_folded = indicator '>'
c_single_quote = indicator '\''
c_double_quote = indicator '"'
c_directive = indicator '%'
c_reserved = indicator ( '@' / '`' )
c_indicator = c_sequence_entry / c_mapping_key / c_mapping_value / c_collect_entry
/ c_sequence_start / c_sequence_end / c_mapping_start / c_mapping_end
/ c_comment / c_anchor / c_alias / c_tag
/ c_literal / c_folded / c_single_quote / c_double_quote
/ c_directive / c_reserved
c_flow_indicator = c_collect_entry / c_sequence_start / c_sequence_end / c_mapping_start / c_mapping_end
b_line_feed = '\xA'
b_carriage_return = '\xD'
b_char = b_line_feed / b_carriage_return
nb_char = c_printable - b_char - c_byte_order_mark
b_break = ( b_carriage_return & b_line_feed
/ b_carriage_return
/ b_line_feed )
& nextLine
b_as_line_feed = token LineFeed b_break
b_non_content = token Break b_break
s_space = '\x20'
s_tab = '\x9'
s_white = s_space / s_tab
ns_char = nb_char - s_white
ns_dec_digit = ('\x30', '\x39')
ns_hex_digit = ns_dec_digit
/ ('\x41', '\x46') / ('\x61', '\x66')
ns_ascii_letter = ('\x41', '\x5A') / ('\x61', '\x7A')
ns_word_char = ns_dec_digit / ns_ascii_letter / '-'
ns_uri_char = "escape"
^ ( '%' ! "escape" & ns_hex_digit & ns_hex_digit / ns_word_char / '#'
/ ';' / '/' / '?' / ':' / '@' / '&' / '=' / '+' / '$' / ','
/ '_' / '.' / '!' / '~' / '*' / '\'' / '(' / ')' / '[' / ']' )
ns_tag_char = ns_uri_char - c_tag - c_flow_indicator
c_escape = indicator '\\'
ns_esc_null = meta '0'
ns_esc_bell = meta 'a'
ns_esc_backspace = meta 'b'
ns_esc_horizontal_tab = meta ( 't' / '\x9' )
ns_esc_line_feed = meta 'n'
ns_esc_vertical_tab = meta 'v'
ns_esc_form_feed = meta 'f'
ns_esc_carriage_return = meta 'r'
ns_esc_escape = meta 'e'
ns_esc_space = meta '\x20'
ns_esc_double_quote = meta '"'
ns_esc_slash = meta '/'
ns_esc_backslash = meta '\\'
ns_esc_next_line = meta 'N'
ns_esc_non_breaking_space = meta '_'
ns_esc_line_separator = meta 'L'
ns_esc_paragraph_separator = meta 'P'
ns_esc_8_bit = indicator 'x' ! "escaped" & meta ( ns_hex_digit % 2 )
ns_esc_16_bit = indicator 'u' ! "escaped" & meta ( ns_hex_digit % 4 )
ns_esc_32_bit = indicator 'U' ! "escaped" & meta ( ns_hex_digit % 8 )
c_ns_esc_char = wrapTokens BeginEscape EndEscape
$ c_escape ! "escape"
& "escaped"
^ ( ns_esc_null / ns_esc_bell / ns_esc_backspace
/ ns_esc_horizontal_tab / ns_esc_line_feed
/ ns_esc_vertical_tab / ns_esc_form_feed
/ ns_esc_carriage_return / ns_esc_escape / ns_esc_space
/ ns_esc_double_quote / ns_esc_slash / ns_esc_backslash
/ ns_esc_next_line / ns_esc_non_breaking_space
/ ns_esc_line_separator / ns_esc_paragraph_separator
/ ns_esc_8_bit / ns_esc_16_bit / ns_esc_32_bit )
s_indent n = token Indent ( s_space % n )
s_indent_lt n = token Indent ( s_space <% n )
s_indent_le n = token Indent ( s_space <% (n .+ 1) )
s_separate_in_line = token White ( s_white +) / sol
s_line_prefix n c = case c of
BlockOut -> s_block_line_prefix n
BlockIn -> s_block_line_prefix n
FlowOut -> s_flow_line_prefix n
FlowIn -> s_flow_line_prefix n
s_block_line_prefix n = s_indent n
s_flow_line_prefix n = s_indent n & ( s_separate_in_line ?)
l_empty n c = ( s_line_prefix n c / s_indent_lt n )
& b_as_line_feed
b_l_trimmed n c = b_non_content & ( l_empty n c +)
b_as_space = token LineFold b_break
b_l_folded n c = b_l_trimmed n c / b_as_space
s_flow_folded n = ( s_separate_in_line ?) & b_l_folded n FlowIn
& s_flow_line_prefix n
c_nb_comment_text = wrapTokens BeginComment EndComment
$ c_comment & meta ( nb_char *)
b_comment = b_non_content / eof
s_b_comment = ( s_separate_in_line & ( c_nb_comment_text ?) ?)
& b_comment
l_comment = s_separate_in_line & ( c_nb_comment_text ?) & b_comment
s_l_comments = ( s_b_comment / sol )
& ( nonEmpty l_comment *)
s_separate n c = case c of
BlockOut -> s_separate_lines n
BlockIn -> s_separate_lines n
FlowOut -> s_separate_lines n
FlowIn -> s_separate_lines n
BlockKey -> s_separate_in_line
FlowKey -> s_separate_in_line
s_separate_lines n = s_l_comments & s_flow_line_prefix n
/ s_separate_in_line
l_directive = ( wrapTokens BeginDirective EndDirective
$ c_directive ! "doc"
& "directive"
^ ( ns_yaml_directive
/ ns_tag_directive
/ ns_reserved_directive ) )
& s_l_comments
ns_reserved_directive = ns_directive_name
& ( s_separate_in_line & ns_directive_parameter *)
ns_directive_name = meta ( ns_char +)
ns_directive_parameter = meta ( ns_char +)
ns_yaml_directive = meta [ 'Y', 'A', 'M', 'L' ] ! "directive"
& s_separate_in_line & ns_yaml_version
ns_yaml_version = meta ( ( ns_dec_digit +) & '.' & ( ns_dec_digit +) )
ns_tag_directive = meta [ 'T', 'A', 'G' ] ! "directive"
& s_separate_in_line & c_tag_handle
& s_separate_in_line & ns_tag_prefix
c_tag_handle = c_named_tag_handle
/ c_secondary_tag_handle
/ c_primary_tag_handle
c_primary_tag_handle = wrapTokens BeginHandle EndHandle
$ c_tag
c_secondary_tag_handle = wrapTokens BeginHandle EndHandle
$ c_tag & c_tag
c_named_tag_handle = wrapTokens BeginHandle EndHandle
$ c_tag & meta ( ns_word_char +) & c_tag
ns_tag_prefix = wrapTokens BeginTag EndTag
$ ( c_ns_local_tag_prefix / ns_global_tag_prefix )
c_ns_local_tag_prefix = c_tag & meta ( ns_uri_char *)
ns_global_tag_prefix = meta ( ns_tag_char & ( ns_uri_char *) )
c_ns_properties n c = wrapTokens BeginProperties EndProperties
$ ( c_ns_tag_property
& ( s_separate n c & c_ns_anchor_property ?) )
/ ( c_ns_anchor_property
& ( s_separate n c & c_ns_tag_property ?) )
c_ns_tag_property = wrapTokens BeginTag EndTag
$ c_verbatim_tag
/ c_ns_shorthand_tag
/ c_non_specific_tag
c_verbatim_tag = c_tag & indicator '<' & meta ( ns_uri_char +) & indicator '>'
c_ns_shorthand_tag = c_tag_handle & meta ( ns_tag_char +)
c_non_specific_tag = c_tag
c_ns_anchor_property = wrapTokens BeginAnchor EndAnchor
$ c_anchor & ns_anchor_name
ns_anchor_char = ns_char - c_flow_indicator
ns_anchor_name = meta ( ns_anchor_char +)
c_ns_alias_node = wrapTokens BeginAlias EndAlias
$ c_alias ! "node" & ns_anchor_name
e_scalar = wrapTokens BeginScalar EndScalar empty
e_node = wrapTokens BeginNode EndNode e_scalar
nb_double_char = "escape" ^ ( c_ns_esc_char / ( nb_json - c_escape - c_double_quote ) )
ns_double_char = nb_double_char - s_white
c_double_quoted n c = wrapTokens BeginScalar EndScalar
$ c_double_quote ! "node" & text ( nb_double_text n c ) & c_double_quote
nb_double_text n c = case c of
FlowOut -> nb_double_multi_line n
FlowIn -> nb_double_multi_line n
BlockKey -> nb_double_one_line
FlowKey -> nb_double_one_line
nb_double_one_line = ( nb_double_char *)
s_double_escaped n = ( s_white *)
& wrapTokens BeginEscape EndEscape ( c_escape ! "escape" & b_non_content )
& ( l_empty n FlowIn *)
& s_flow_line_prefix n
s_double_break n = "escape" ^ ( s_double_escaped n / s_flow_folded n )
nb_ns_double_in_line = ( ( s_white *) & ns_double_char *)
s_double_next_line n = s_double_break n
& ( ns_double_char & nb_ns_double_in_line
& ( s_double_next_line n / ( s_white *) ) ?)
nb_double_multi_line n = nb_ns_double_in_line
& ( s_double_next_line n / ( s_white *) )
c_quoted_quote = wrapTokens BeginEscape EndEscape
$ c_single_quote ! "escape" & meta '\''
nb_single_char = "escape" ^ ( c_quoted_quote / ( nb_json - c_single_quote ) )
ns_single_char = nb_single_char - s_white
c_single_quoted n c = wrapTokens BeginScalar EndScalar
$ c_single_quote ! "node" & text ( nb_single_text n c ) & c_single_quote
nb_single_text n c = case c of
FlowOut -> nb_single_multi_line n
FlowIn -> nb_single_multi_line n
BlockKey -> nb_single_one_line
FlowKey -> nb_single_one_line
nb_single_one_line = ( nb_single_char *)
nb_ns_single_in_line = ( ( s_white *) & ns_single_char *)
s_single_next_line n = s_flow_folded n
& ( ns_single_char & nb_ns_single_in_line
& ( s_single_next_line n / ( s_white *) ) ?)
nb_single_multi_line n = nb_ns_single_in_line
& ( s_single_next_line n / ( s_white *) )
ns_plain_first _c = ns_char - c_indicator
/ ( ':' / '?' / '-' ) & ( ns_char >?)
ns_plain_safe c = case c of
FlowOut -> ns_plain_safe_out
FlowIn -> ns_plain_safe_in
BlockKey -> ns_plain_safe_out
FlowKey -> ns_plain_safe_in
ns_plain_safe_out = ns_char - c_mapping_value - c_comment
ns_plain_safe_in = ns_plain_safe_out - c_flow_indicator
ns_plain_char c = ns_plain_safe c
/ ( ns_char <?) & '#'
/ ':' & ( ns_char >?)
ns_plain n c = wrapTokens BeginScalar EndScalar
$ text (case c of
FlowOut -> ns_plain_multi_line n c
FlowIn -> ns_plain_multi_line n c
BlockKey -> ns_plain_one_line c
FlowKey -> ns_plain_one_line c)
nb_ns_plain_in_line c = ( ( s_white *) & ns_plain_char c *)
ns_plain_one_line c = ns_plain_first c ! "node" & nb_ns_plain_in_line c
s_ns_plain_next_line n c = s_flow_folded n
& ns_plain_char c & nb_ns_plain_in_line c
ns_plain_multi_line n c = ns_plain_one_line c
& ( s_ns_plain_next_line n c *)
in_flow c = case c of
FlowOut -> FlowIn
FlowIn -> FlowIn
BlockKey -> FlowKey
FlowKey -> FlowKey
c_flow_sequence n c = wrapTokens BeginSequence EndSequence
$ c_sequence_start ! "node" & ( s_separate n c ?)
& ( ns_s_flow_seq_entries n (in_flow c) ?) & c_sequence_end
ns_s_flow_seq_entries n c = ns_flow_seq_entry n c & ( s_separate n c ?)
& ( c_collect_entry & ( s_separate n c ?)
& ( ns_s_flow_seq_entries n c ?) ?)
ns_flow_seq_entry n c = "pair" ^ ( ns_flow_pair n c / "node" ^ ns_flow_node n c )
c_flow_mapping n c = wrapTokens BeginMapping EndMapping
$ c_mapping_start ! "node" & ( s_separate n c ?)
& ( ns_s_flow_map_entries n (in_flow c) ?) & c_mapping_end
ns_s_flow_map_entries n c = ns_flow_map_entry n c & ( s_separate n c ?)
& ( c_collect_entry & ( s_separate n c ?)
& ( ns_s_flow_map_entries n c ?) ?)
ns_flow_map_entry n c = wrapTokens BeginPair EndPair
$ "key" ^ ( ( c_mapping_key ! "key" & s_separate n c
& ns_flow_map_explicit_entry n c )
/ ns_flow_map_implicit_entry n c )
ns_flow_map_explicit_entry n c = ns_flow_map_implicit_entry n c
/ ( e_node
& e_node )
ns_flow_map_implicit_entry n c = "pair"
^ ( ns_flow_map_yaml_key_entry n c
/ c_ns_flow_map_empty_key_entry n c
/ c_ns_flow_map_json_key_entry n c )
ns_flow_map_yaml_key_entry n c = ( "node" ^ ns_flow_yaml_node n c ) ! "pair"
& ( ( ( s_separate n c ?)
& c_ns_flow_map_separate_value n c )
/ e_node )
c_ns_flow_map_empty_key_entry n c = e_node
& c_ns_flow_map_separate_value n c
c_ns_flow_map_separate_value n c = c_mapping_value & ( ns_char >!) ! "pair"
& ( ( s_separate n c & ns_flow_node n c )
/ e_node )
c_ns_flow_map_json_key_entry n c = ( "node" ^ c_flow_json_node n c ) ! "pair"
& ( ( ( s_separate n c ?)
& c_ns_flow_map_adjacent_value n c )
/ e_node )
c_ns_flow_map_adjacent_value n c = c_mapping_value ! "pair"
& ( ( ( s_separate n c ?)
& ns_flow_node n c )
/ e_node )
ns_flow_pair n c = wrapTokens BeginMapping EndMapping
$ wrapTokens BeginPair EndPair
$ ( ( c_mapping_key ! "pair" & s_separate n c
& ns_flow_map_explicit_entry n c )
/ ns_flow_pair_entry n c )
ns_flow_pair_entry n c = ( ns_flow_pair_yaml_key_entry n c
/ c_ns_flow_map_empty_key_entry n c
/ c_ns_flow_pair_json_key_entry n c )
ns_flow_pair_yaml_key_entry n c = ns_s_implicit_yaml_key FlowKey
& c_ns_flow_map_separate_value n c
c_ns_flow_pair_json_key_entry n c = c_s_implicit_json_key FlowKey
& c_ns_flow_map_adjacent_value n c
ns_s_implicit_yaml_key c = ( "node" ^ ( ns_flow_yaml_node na c ) & ( s_separate_in_line ?) )
`limitedTo` 1024
c_s_implicit_json_key c = ( "node" ^ ( c_flow_json_node na c ) & ( s_separate_in_line ?) )
`limitedTo` 1024
ns_flow_yaml_content n c = ns_plain n c
c_flow_json_content n c = c_flow_sequence n c / c_flow_mapping n c
/ c_single_quoted n c / c_double_quoted n c
ns_flow_content n c = ns_flow_yaml_content n c / c_flow_json_content n c
ns_flow_yaml_node n c = wrapTokens BeginNode EndNode
$ c_ns_alias_node
/ ns_flow_yaml_content n c
/ ( c_ns_properties n c
& ( ( s_separate n c & ns_flow_yaml_content n c )
/ e_scalar ) )
c_flow_json_node n c = wrapTokens BeginNode EndNode
$ ( c_ns_properties n c & s_separate n c ?)
& c_flow_json_content n c
ns_flow_node n c = wrapTokens BeginNode EndNode
$ c_ns_alias_node
/ ns_flow_content n c
/ ( c_ns_properties n c
& ( ( s_separate n c & ns_flow_content n c )
/ e_scalar ) )
c_b_block_header n = "header"
^ ( do m <- c_indentation_indicator n
t <- c_chomping_indicator
( s_white / b_char ) ?! "header"
s_b_comment
result (m, t)
/ do t <- c_chomping_indicator
m <- c_indentation_indicator n
s_b_comment
result (m, t) )
c_indentation_indicator n = indicator ( ns_dec_digit - '0' ) & asInteger
/ detect_scalar_indentation n
detect_scalar_indentation n = peek $ ( nb_char *)
& ( b_non_content & ( l_empty n BlockIn *) ?)
& count_spaces (-n)
count_spaces n = (s_space & count_spaces (n .+ 1))
/ result (max 1 n)
c_chomping_indicator = indicator '-' & result Strip
/ indicator '+' & result Keep
/ result Clip
end_block_scalar t = case t of
Strip -> emptyToken EndScalar
Clip -> emptyToken EndScalar
Keep -> empty
b_chomped_last t = case t of
Strip -> emptyToken EndScalar & b_non_content
Clip -> b_as_line_feed & emptyToken EndScalar
Keep -> b_as_line_feed
l_chomped_empty n t = case t of
Strip -> l_strip_empty n
Clip -> l_strip_empty n
Keep -> l_keep_empty n
l_strip_empty n = ( s_indent_le n & b_non_content *)
& ( l_trail_comments n ?)
l_keep_empty n = ( l_empty n BlockIn *)
& emptyToken EndScalar
& ( l_trail_comments n ?)
l_trail_comments n = s_indent_lt n & c_nb_comment_text & b_comment
& ( nonEmpty l_comment *)
c_l__literal n = do emptyToken BeginScalar
c_literal ! "node"
(m, t) <- c_b_block_header n `prefixErrorWith` emptyToken EndScalar
text ( l_literal_content (n .+ m) t )
l_nb_literal_text n = ( l_empty n BlockIn *)
& s_indent n & ( nb_char +)
b_nb_literal_next n = b_as_line_feed
& l_nb_literal_text n
l_literal_content n t = ( ( l_nb_literal_text n & ( b_nb_literal_next n *) & b_chomped_last t )
/ end_block_scalar t )
& l_chomped_empty n t
c_l__folded n = do emptyToken BeginScalar
c_folded ! "node"
(m, t) <- c_b_block_header n `prefixErrorWith` emptyToken EndScalar
text ( l_folded_content (n .+ m) t )
s_nb_folded_text n = s_indent n & ns_char ! "fold" & ( nb_char *)
l_nb_folded_lines n = s_nb_folded_text n
& ( b_l_folded n BlockIn & s_nb_folded_text n *)
s_nb_spaced_text n = s_indent n & s_white ! "fold" & ( nb_char *)
b_l_spaced n = b_as_line_feed
& ( l_empty n BlockIn *)
l_nb_spaced_lines n = s_nb_spaced_text n
& ( b_l_spaced n & s_nb_spaced_text n *)
l_nb_same_lines n = ( l_empty n BlockIn *)
& "fold" ^ ( l_nb_folded_lines n / l_nb_spaced_lines n )
l_nb_diff_lines n = l_nb_same_lines n
& ( b_as_line_feed & l_nb_same_lines n *)
l_folded_content n t = ( ( l_nb_diff_lines n & b_chomped_last t )
/ end_block_scalar t )
& l_chomped_empty n t
detect_collection_indentation n = peek $ ( nonEmpty l_comment* ) & count_spaces (-n)
detect_inline_indentation = peek $ count_spaces 0
l__block_sequence n = do m <- detect_collection_indentation n
wrapTokens BeginSequence EndSequence $ ( s_indent (n .+ m) & c_l_block_seq_entry (n .+ m) +)
c_l_block_seq_entry n = c_sequence_entry & ( ns_char >!) ! "node"
& s_l__block_indented n BlockIn
s_l__block_indented n c = do m <- detect_inline_indentation
"node" ^ ( ( s_indent m
& ( ns_l_in_line_sequence (n .+ 1 .+ m)
/ ns_l_in_line_mapping (n .+ 1 .+ m) ) )
/ s_l__block_node n c
/ ( e_node & ( s_l_comments ?) & unparsed (n .+ 1) ) ) `recovery` unparsed (n .+ 1)
ns_l_in_line_sequence n = wrapTokens BeginNode EndNode
$ wrapTokens BeginSequence EndSequence
$ c_l_block_seq_entry n
& ( s_indent n & c_l_block_seq_entry n *)
l__block_mapping n = do m <- detect_collection_indentation n
wrapTokens BeginMapping EndMapping $ ( s_indent (n .+ m) & ns_l_block_map_entry (n .+ m) +)
ns_l_block_map_entry n = wrapTokens BeginPair EndPair
$ c_l_block_map_explicit_entry n
/ ns_l_block_map_implicit_entry n
c_l_block_map_explicit_entry n = c_l_block_map_explicit_key n
& ( l_block_map_explicit_value n
/ e_node )
c_l_block_map_explicit_key n = c_mapping_key ! "node" & s_l__block_indented n BlockOut
l_block_map_explicit_value n = s_indent n & c_mapping_value & s_l__block_indented n BlockOut
ns_l_block_map_implicit_entry n = ( ns_s_block_map_implicit_key
/ e_node )
& c_l_block_map_implicit_value n
ns_s_block_map_implicit_key = c_s_implicit_json_key BlockKey
/ ns_s_implicit_yaml_key BlockKey
c_l_block_map_implicit_value n = c_mapping_value ! "node"
& ( ( s_l__block_node n BlockOut
/ ( e_node & ( s_l_comments ?) & unparsed (n .+ 1) ) ) `recovery` unparsed (n .+ 1) )
ns_l_in_line_mapping n = wrapTokens BeginNode EndNode
$ wrapTokens BeginMapping EndMapping
$ ns_l_block_map_entry n
& ( s_indent n & ns_l_block_map_entry n *)
unparsed n = ( sol / unparsed_text & unparsed_break )
& ( nonEmpty ( unparsed_indent n & unparsed_text & unparsed_break ) *)
unparsed_indent n = token Unparsed ( s_space % n )
unparsed_text = token Unparsed ( upto ( eof / c_forbidden / b_break ) )
unparsed_break = eof / peek c_forbidden / token Unparsed b_break / empty
s_l__block_node n c = s_l__block_in_block n c / s_l__flow_in_block n
s_l__flow_in_block n = s_separate (n .+ 1) FlowOut
& ns_flow_node (n .+ 1) FlowOut & s_l_comments
s_l__block_in_block n c = wrapTokens BeginNode EndNode
$ ( s_l__block_scalar n c / s_l__block_collection n c )
s_l__block_scalar n c = s_separate (n .+ 1) c
& ( c_ns_properties (n .+ 1) c & s_separate (n .+ 1) c ?)
& ( c_l__literal n / c_l__folded n )
s_l__block_collection n c = ( s_separate (n .+ 1) c & c_ns_properties (n .+ 1) c & ( s_l_comments >?) ?)
& s_l_comments
& ( l__block_sequence (seq_spaces n c)
/ l__block_mapping n )
seq_spaces n c = case c of
BlockOut -> n .- 1
BlockIn -> n
l_document_prefix = ( c_byte_order_mark ?) & ( nonEmpty l_comment *)
c_directives_end = token DirectivesEnd [ '-', '-', '-' ]
c_document_end = token DocumentEnd [ '.', '.', '.' ]
l_document_suffix = c_document_end & s_l_comments
c_forbidden = sol
& ( c_directives_end / c_document_end )
& ( b_char / s_white / eof )
l_bare_document = "node" ^ s_l__block_node (-1) BlockIn
`forbidding` c_forbidden
l_explicit_document = c_directives_end ! "doc"
& ( ( l_bare_document
/ e_node & ( s_l_comments ?) & unparsed 0 ) `recovery` unparsed 0 )
l_directives_document = ( l_directive +)
& l_explicit_document
l_any_document = wrapTokens BeginDocument EndDocument
$ "doc" ^ ( l_directives_document
/ l_explicit_document
/ l_bare_document ) `recovery` unparsed 0
l_yaml_stream = ( nonEmpty l_document_prefix *)
& ( eof / ( c_document_end & ( b_char / s_white / eof ) >?) / l_any_document )
& ( nonEmpty ( "more" ^ ( ( l_document_suffix ! "more" +) & ( nonEmpty l_document_prefix *) & ( eof / l_any_document )
/ ( nonEmpty l_document_prefix *) & "doc" ^ ( wrapTokens BeginDocument EndDocument l_explicit_document ?) ) ) *)