-- | Parser defintions -- -- Defines parsers used by the Tor Control protocol -- -- __Warning__: This function is used internally by 'Network.Anonymous.Tor' -- and using these functions directly is unsupported. The -- interface of these functions might change at any time without -- prior notice. -- module Network.Anonymous.Tor.Protocol.Parser ( quotedString , unquotedString , reply , key , keyValue , value , token , tokens ) where import Control.Applicative ((*>), (<$>), (<*), (<*>), (<|>)) import qualified Data.Attoparsec.ByteString as Atto import qualified Data.Attoparsec.ByteString.Char8 as Atto8 import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BS8 import Data.Word (Word8) import qualified Network.Anonymous.Tor.Protocol.Parser.Ast as A -- | Ascii offset representation of a double quote. doubleQuote :: Word8 doubleQuote = 34 -- | Ascii offset representation of a single quote. singleQuote :: Word8 singleQuote = 39 -- | Ascii offset representation of a backslash. backslash :: Word8 backslash = 92 -- | Ascii offset representation of a minus '-' symbol minus :: Word8 minus = 45 -- | Ascii offset representation of a plus '+' symbol plus :: Word8 plus = 43 -- | Ascii offset representation of a space ' ' character space :: Word8 space = 32 -- | Ascii offset representation of an equality sign. equals :: Word8 equals = 61 -- | Parses a single- or double-quoted string, and returns all bytes within the -- value; the unescaping is beyond the scope of this function (since different -- unescaping mechanisms might be desired). quotedString :: Atto.Parser BS.ByteString quotedString = let quoted :: Word8 -- ^ The character used for quoting -> Atto.Parser BS.ByteString -- ^ The value inside the quotes, without the surrounding quotes quoted c = (Atto.word8 c *> escaped c <* Atto.word8 c) -- | Parses an escaped string, with an arbitrary surrounding quote type. escaped :: Word8 -> Atto.Parser BS.ByteString escaped c = BS8.concat <$> Atto8.many' -- Make sure that we eat pairs of backslashes; this will make sure -- that a string such as "\\\\" is interpreted correctly, and the -- ending quoted will not be interpreted as escaped. ( Atto8.string (BS8.pack "\\\\") -- This eats all escaped quotes and leaves them in tact; the unescaping -- is beyond the scope of this function. <|> Atto8.string (BS.pack [backslash, c]) -- And for the rest: eat everything that is not a quote. <|> (BS.singleton <$> Atto.satisfy (/= c))) in quoted doubleQuote <|> quoted singleQuote -- | An unquoted string is "everything until a whitespace or newline is reached". unquotedString :: Atto.Parser BS.ByteString unquotedString = Atto8.takeWhile1 (not . Atto8.isSpace) reply :: Atto.Parser [A.Line] reply = do -- A reply is a series of lines that look like 250-Foo or 250+Bar and then -- followed by a line that uses a space like 250 Wombat. -- -- Let's parse all these lines into a reply. replies <- Atto.many' (replyLine minus <|> replyLine plus) lastReply <- replyLine space return (replies ++ [lastReply]) where replyLine :: Word8 -> Atto.Parser A.Line replyLine c = A.Line <$> Atto8.decimal <*> (Atto.word8 c *> tokens) <* Atto8.endOfLine -- | Parses either a quoted value or an unquoted value value :: Atto.Parser BS.ByteString value = quotedString <|> unquotedString -- | Parses key and value keyValue :: Atto.Parser A.Token keyValue = do A.Token k _ <- key _ <- Atto.word8 equals v <- value return (A.Token k (Just v)) -- | Parses a key, which is anything until either a space has been reached, or -- an '=' is reached. key :: Atto.Parser A.Token key = let isKeyEnd '=' = True isKeyEnd c = Atto8.isSpace c in flip A.Token Nothing <$> Atto8.takeWhile1 (not . isKeyEnd) -- | A Token is either a Key or a Key/Value combination. token :: Atto.Parser A.Token token = Atto.skipWhile Atto8.isHorizontalSpace *> (keyValue <|> key) -- | Parser that reads keys or key/values tokens :: Atto.Parser [A.Token] tokens = Atto.many' token