module Iri.Parsing.Attoparsec.ByteString
(
uri,
httpUri,
)
where
import Iri.Prelude hiding (foldl, hash)
import Iri.Data
import Data.Attoparsec.ByteString hiding (try)
import qualified Data.Attoparsec.ByteString.Char8 as F
import qualified Data.ByteString as K
import qualified Data.Text as T
import qualified Data.Text.Punycode as A
import qualified Data.Text.Encoding as B
import qualified Data.Text.Encoding.Error as L
import qualified Data.HashMap.Strict as O
import qualified Data.Vector as S
import qualified VectorBuilder.Builder as P
import qualified VectorBuilder.Vector as Q
import qualified VectorBuilder.MonadPlus as E
import qualified Iri.PercentEncoding as I
import qualified Iri.CodePointPredicates.Rfc3986 as C
import qualified Iri.MonadPlus as R
import qualified Ptr.Poking as G
import qualified Ptr.ByteString as H
import qualified Text.Builder as J
import qualified Net.IPv4 as M
import qualified Net.IPv6 as N
percent :: Parser Word8
percent =
word8 37
plus :: Parser Word8
plus =
word8 43
colon :: Parser Word8
colon =
word8 58
at :: Parser Word8
at =
word8 64
forwardSlash :: Parser Word8
forwardSlash =
word8 47
question :: Parser Word8
question =
word8 63
hash :: Parser Word8
hash =
word8 35
equality :: Parser Word8
equality =
word8 61
ampersand :: Parser Word8
ampersand =
word8 38
semicolon :: Parser Word8
semicolon =
word8 59
labeled :: String -> Parser a -> Parser a
labeled label parser =
parser <?> label
uri :: Parser Iri
uri =
labeled "URI" $ do
parsedScheme <- scheme
colon
parsedHierarchy <- hierarchy
parsedQuery <- query
parsedFragment <- fragment
return (Iri parsedScheme parsedHierarchy parsedQuery parsedFragment)
httpUri :: Parser HttpIri
httpUri =
labeled "HTTP URI" $ do
satisfy (\ x -> x == 104 || x == 72)
satisfy (\ x -> x == 116 || x == 84)
satisfy (\ x -> x == 116 || x == 84)
satisfy (\ x -> x == 112 || x == 80)
secure <- satisfy (\ b -> b == 115 || b == 83) $> True <|> pure False
string "://"
parsedHost <- host
parsedPort <- PresentPort <$> (colon *> port) <|> pure MissingPort
parsedPath <- (forwardSlash *> path) <|> pure (Path mempty)
parsedQuery <- query
parsedFragment <- fragment
return (HttpIri (Security secure) parsedHost parsedPort parsedPath parsedQuery parsedFragment)
hierarchy :: Parser Hierarchy
hierarchy =
do
slashPresent <- forwardSlash $> True <|> pure False
if slashPresent
then do
slashPresent <- forwardSlash $> True <|> pure False
if slashPresent
then authorisedHierarchyBody AuthorisedHierarchy
else AbsoluteHierarchy <$> path
else RelativeHierarchy <$> path
authorisedHierarchyBody :: (Authority -> Path -> body) -> Parser body
authorisedHierarchyBody body =
do
parsedUserInfo <- (presentUserInfo PresentUserInfo <* at) <|> pure MissingUserInfo
parsedHost <- host
parsedPort <- PresentPort <$> (colon *> port) <|> pure MissingPort
parsedPath <- (forwardSlash *> path) <|> pure (Path mempty)
return (body (Authority parsedUserInfo parsedHost parsedPort) parsedPath)
scheme :: Parser Scheme
scheme =
labeled "Scheme" $
fmap Scheme (takeWhile1 (C.scheme . fromIntegral))
presentUserInfo :: (User -> Password -> a) -> Parser a
presentUserInfo result =
labeled "User info" $
do
user <- User <$> urlEncodedString (C.unencodedUserInfoComponent . fromIntegral)
passwordFollows <- True <$ colon <|> pure False
if passwordFollows
then do
password <- PresentPassword <$> urlEncodedString (C.unencodedUserInfoComponent . fromIntegral)
return (result user password)
else return (result user MissingPassword)
host :: Parser Host
host =
labeled "Host" $
IpV6Host <$> ipV6 <|>
IpV4Host <$> M.parserUtf8 <|>
NamedHost <$> domainName
ipV6 :: Parser IPv6
ipV6 =
do
a <- F.hexadecimal
colon
b <- F.hexadecimal
colon
c <- F.hexadecimal
colon
d <- F.hexadecimal
colon
mplus
(do
e <- F.hexadecimal
colon
f <- F.hexadecimal
colon
g <- F.hexadecimal
colon
h <- F.hexadecimal
return (N.fromWord16s a b c d e f g h))
(do
colon
return (N.fromWord16s a b c d 0 0 0 0))
domainName :: Parser RegName
domainName =
fmap RegName (E.sepBy1 domainLabel (word8 46))
domainLabel :: Parser DomainLabel
domainLabel =
labeled "Domain label" $ do
punycodeFollows <- True <$ string "xn--" <|> pure False
ascii <- takeWhile1 (C.domainLabel . fromIntegral)
if punycodeFollows
then case A.decode ascii of
Right text -> return (DomainLabel text)
Left exception -> fail (showString "Punycode decoding exception: " (show exception))
else return (DomainLabel (B.decodeUtf8 ascii))
port :: Parser Word16
port =
F.decimal
path :: Parser Path
path =
do
segments <- E.sepBy pathSegment forwardSlash
if segmentsAreEmpty segments
then return (Path mempty)
else return (Path segments)
where
segmentsAreEmpty segments =
S.length segments == 1 &&
(case S.unsafeHead segments of PathSegment headSegmentText -> T.null headSegmentText)
pathSegment :: Parser PathSegment
pathSegment =
fmap PathSegment (urlEncodedString (C.unencodedPathSegment . fromIntegral))
utf8Chunks :: Parser ByteString -> Parser Text
utf8Chunks chunk =
labeled "UTF8 chunks" $
R.foldlM progress (mempty, mempty, B.streamDecodeUtf8) chunk >>= finish
where
progress (!builder, _, decode) bytes =
case unsafeDupablePerformIO (try (evaluate (decode bytes))) of
Right (B.Some decodedChunk undecodedBytes newDecode) ->
return (builder <> J.text decodedChunk, undecodedBytes, newDecode)
Left (L.DecodeError error _) ->
fail (showString "UTF8 decoding: " error)
finish (builder, undecodedBytes, _) =
if K.null undecodedBytes
then return (J.run builder)
else fail (showString "UTF8 decoding: Bytes remaining: " (show undecodedBytes))
urlEncodedString :: (Word8 -> Bool) -> Parser Text
urlEncodedString unencodedBytesPredicate =
labeled "URL-encoded string" $
utf8Chunks $
takeWhile1 unencodedBytesPredicate <|> encoded
where
encoded =
K.singleton <$> percentEncodedByte
percentEncodedByte :: Parser Word8
percentEncodedByte =
labeled "Percent-encoded byte" $ do
percent
byte1 <- anyWord8
byte2 <- anyWord8
I.matchPercentEncodedBytes (fail "Broken percent encoding") return byte1 byte2
query :: Parser Query
query =
labeled "Query" $
(question *> (Query <$> queryOrFragmentBody)) <|> pure (Query mempty)
fragment :: Parser Fragment
fragment =
labeled "Fragment" $
(hash *> (Fragment <$> queryOrFragmentBody)) <|> pure (Fragment mempty)
queryOrFragmentBody :: Parser Text
queryOrFragmentBody =
utf8Chunks $
takeWhile1 (C.unencodedQuery . fromIntegral) <|>
" " <$ plus <|>
K.singleton <$> percentEncodedByte