module Text.URI (
URI(..)
, dereferencePath
, dereferencePathString
, escapeString
, isReference
, isRelative
, nullURI
, okInFragment
, okInPath
, okInQuery
, okInQueryItem
, okInUserinfo
, mergePaths
, mergePathStrings
, mergeURIs
, mergeURIStrings
, pairsToQuery
, parseURI
, pathToSegments
, segmentsToPath
, queryToPairs
, unescapeString
, uriPathSegments
, uriQueryItems
) where
import Data.Char
import Data.List
import Data.Maybe
import Data.Word
import Codec.Binary.UTF8.String
import Safe
import Text.Parsec
import Text.Printf
data URI = URI {
uriScheme :: Maybe String
, uriUserInfo :: Maybe String
, uriRegName :: Maybe String
, uriPort :: Maybe Integer
, uriPath :: String
, uriQuery :: Maybe String
, uriFragment :: Maybe String
} deriving (Eq)
nullURI :: URI
nullURI = URI {
uriScheme = Nothing
, uriRegName = Nothing
, uriUserInfo = Nothing
, uriPort = Nothing
, uriPath = ""
, uriQuery = Nothing
, uriFragment = Nothing
}
instance Show URI where
show u = concat [
maybe "" (++ ":") $ uriScheme u
, if not (isReference u) then "//" else ""
, maybe "" (++ "@") $ uriUserInfo u
, maybe "" (id) $ uriRegName u
, maybe "" (\s -> ":" ++ show s) $ uriPort u
, uriPath u
, maybe "" ("?" ++) $ uriQuery u
, maybe "" ("#" ++) $ uriFragment u
]
okInUserinfo :: Char -> Bool
okInUserinfo = satisfiesAny [isUnreserved, isSubDelim, (==':')]
okInQuery :: Char -> Bool
okInQuery = satisfiesAny [isPChar, (`elem` "/?")]
okInQueryItem :: Char -> Bool
okInQueryItem = isUnreserved
okInFragment :: Char -> Bool
okInFragment = okInQuery
okInPath :: Char -> Bool
okInPath = satisfiesAny [isPChar, (`elem` "/@")]
okInPathSegment :: Char -> Bool
okInPathSegment = satisfiesAny [isPChar, (== '@')]
parseURI :: String -> Maybe URI
parseURI s = either (const Nothing) (Just) $ parse uriP "user input" s
escapeChar :: (Char -> Bool) -> Char -> String
escapeChar f c = if f c && c /= '%' then [c] else concat $ map (printf "%%%0.2X") (encode [c])
escapeString :: (Char -> Bool) -> String -> String
escapeString f s = concat $ map (escapeChar f) s
isReference :: URI -> Bool
isReference u = all (isNothing) [uriRegName u, uriScheme u]
isRelative :: URI -> Bool
isRelative u = isReference u && (headDef ' ' (uriPath u) /= '/')
pairsToQuery :: [(String, String)] -> String
pairsToQuery = initSafe . foldl (\rest (k,v) -> concat [
rest
, escapeString (okInQueryItem) k
, "="
, escapeString (okInQueryItem) v
, "&"
]) ""
queryToPairs :: String -> [(String, String)]
queryToPairs q = either (const []) (id) $ parse urlEncodedPairsP "query" q
unescapeString :: String -> String
unescapeString s = either (const s) (id) $ parse (many $ percentEncodedP <|> anyChar) "escaped text" s
uriQueryItems :: URI -> [(String, String)]
uriQueryItems = maybe [] (queryToPairs) . uriQuery
pathToSegments :: String -> [String]
pathToSegments = explode '/'
uriPathSegments :: URI -> [String]
uriPathSegments = pathToSegments . uriPath
segmentsToPath :: [String] -> String
segmentsToPath ss = intercalate "/" $ map (escapeString (okInPathSegment)) ss
mergeURIs :: URI
-> URI
-> URI
mergeURIs t r = if isJust (uriScheme r) then
t { uriScheme = uriScheme r
, uriRegName = uriRegName r
, uriPort = uriPort r
, uriUserInfo = uriUserInfo r
, uriPath = dereferencePathString (uriPath r)
, uriQuery = uriQuery r
, uriFragment = uriFragment r
}
else
if isJust (uriRegName r) then
t { uriRegName = uriRegName r
, uriPort = uriPort r
, uriUserInfo = uriUserInfo r
, uriPath = dereferencePathString (uriPath r)
, uriQuery = uriQuery r
, uriFragment = uriFragment r
}
else
t { uriQuery = maybe (uriQuery r) (Just) (uriQuery t)
, uriPath = mergePathStrings (uriPath t) (uriPath r)
, uriFragment = uriFragment r
}
mergeURIStrings :: String -> String -> String
mergeURIStrings s1 s2 = show $ mergeURIs (fromMaybe nullURI $ parseURI s1) (fromMaybe nullURI $ parseURI s2)
mergePathStrings :: String -> String -> String
mergePathStrings p1 p2 = segmentsToPath $ mergePaths (pathToSegments p1) (pathToSegments p2)
mergePaths :: [String] -> [String] -> [String]
mergePaths p1 ("":p2) = ("":p2)
mergePaths p1 p2 = dereferencePath (p1 ++ p2)
dereferencePath :: [String] -> [String]
dereferencePath = dereferencePath' []
dereferencePathString :: String -> String
dereferencePathString = segmentsToPath . dereferencePath . pathToSegments
dereferencePath' :: [String] -> [String] -> [String]
dereferencePath' processed [] = processed
dereferencePath' processed ("..":ps) = dereferencePath' (initSafe processed) (ps)
dereferencePath' processed (".":ps) = dereferencePath' processed ps
dereferencePath' processed (p:ps) = dereferencePath' (processed ++ [p]) ps
sepByWSep p sep = sepByWSep1 p sep <|> return []
isGenDelim = (`elem` ":/?#[]@")
isSubDelim = (`elem` "!$&'()*+,;=")
isReserved c = isGenDelim c || isSubDelim c
isUnreserved c = isAlphaNum c || c `elem` "-._~"
isPChar = satisfiesAny [isUnreserved, isSubDelim, (`elem` "%:@")]
satisfiesAny :: [a -> Bool] -> a -> Bool
satisfiesAny fs a = or (map ($ a) fs)
sepByWSep1 p sep = do
first <- p
rest <- many $ do
sepV <- sep
pV <- p
return $ sepV ++ pV
return $ concat (first : rest)
percentEncodedP = do
string "%"
d1 <- hexDigit
d2 <- hexDigit
return $ chr (read $ "0x" ++ [d1,d2])
reservedP :: Stream s m Char => ParsecT s u m Char
reservedP = satisfy isReserved
unreservedP = satisfy isUnreserved
genDelimP :: Stream s m Char => ParsecT s u m Char
genDelimP = satisfy isGenDelim
subDelimP = satisfy isSubDelim
pCharP = satisfy isPChar
uriP = do
schemeV <- optionMaybe $ try schemeP
(authorityV, pathV) <- hierPartP
let (userinfoV, hostV, portV) = fromMaybe (Nothing, Nothing, Nothing) authorityV
queryV <- optionMaybe $ try $ do
string "?"
queryP
fragmentV <- optionMaybe $ try $ do
string "#"
fragmentP
return $ URI {
uriScheme = schemeV
, uriRegName = hostV
, uriPort = portV
, uriPath = pathV
, uriUserInfo = userinfoV
, uriQuery = queryV
, uriFragment = fragmentV
}
schemeP = do
l <- letter
ls <- many1 (alphaNum <|> oneOf "+-.")
string ":"
return (l:ls)
hierPartP = do
authorityV <- optionMaybe $ try $ do
string "//"
authorityP
pathV <- pathP
return (authorityV, pathV)
pathP = pathRootlessP <|> pathAbsoluteP <|> pathNoSchemeP <|> pathABEmptyP <|> pathEmptyP
pathABEmptyP = do
segs <- many $ do
string "/"
segmentV <- segmentP
return $ "/" ++ segmentV
return (concat segs)
pathAbsoluteP = do
string "/"
rest <- option "" $ do
s1 <- segmentNZP
segs <- many $ do
string "/"
v <- segmentP
return $ "/" ++ v
return $ concat (s1 : segs)
return $ "/" ++ rest
pathNoSchemeP = do
first <- segmentNZNCP
rest <- sepByWSep segmentP (string "/")
return $ first ++ rest
pathRootlessP = do
first <- segmentNZP
rest <- sepByWSep segmentP (string "/")
return $ first ++ rest
pathEmptyP = string ""
segmentP = many $ pCharP
segmentNZP = many1 $ pCharP
segmentNZNCP = many1 (subDelimP <|> unreservedP <|> oneOf "@%")
authorityP = do
userinfoV <- optionMaybe (try $ do
result <- userinfoP
string "@"
return result)
hostV <- hostP
portV <- optionMaybe (try $ do
string ":"
portP)
return (userinfoV, Just hostV, portV)
hostP = ipLiteralP <|> ipv4AddressP <|> regNameP
ipLiteralP = do
string "["
result <- ipv6AddressP <|> ipvFutureP
string "]"
return result
ipvFutureP = do
v <- string "v"
versionV <- many1 hexDigit
dot <- string "."
datV <- many1 (satisfy $ satisfiesAny [isUnreserved, isSubDelim, (==':')])
return $ concat [v, versionV, dot, datV]
ipv6AddressP = do
many1 $ oneOf "ABCDEF1234567890:"
ipv4AddressP = do
d1 <- decOctetP
string "."
d2 <- decOctetP
string "."
d3 <- decOctetP
string "."
d4 <- decOctetP
return $ concat [d1, ".", d2, ".", d3, ".", d4]
decOctetP = do
a1 <- countMinMax 1 3 digit
if read a1 > 255 then
fail "Decimal octet value too large"
else
return a1
regNameP = many (unreservedP <|> subDelimP <|> oneOf "%")
countMinMax m n p | m > 0 = do
a1 <- p
ar <- countMinMax (m1) (n1) p
return (a1:ar)
countMinMax _ n _ | n <= 0 = return []
countMinMax _ n p = option [] $ do
a1 <- p
ar <- countMinMax 0 (n1) p
return (a1:ar)
portP = do
digitV <- many digit
return $ read digitV
userinfoP = many $ satisfy $ satisfiesAny [isUnreserved, isSubDelim, (==':')]
queryP = many $ satisfy (isPChar) <|> oneOf "/?"
fragmentP = queryP
urlEncodedPairsP = many urlEncodedPairP
urlEncodedPairP = do
keyV <- manyTill (percentEncodedP <|> plusP <|> unreservedP) (char '=')
valueV <- manyTill (percentEncodedP <|> plusP <|> unreservedP) (skip (char '&') <|> eof)
return (keyV, valueV)
plusP = do
char '+'
return ' '
skip a = do
a
return ()
explode :: Char -> String -> [String]
explode c = unfoldr (\s -> if null s then Nothing else Just (takeWhile (/=c) s, (tailSafe . dropWhile (/=c)) s))