module Main where import BasePrelude import Test.Hspec import Conversion import Conversion.Text import Data.Text (Text) import qualified HTMLTokenizer.Parser import qualified ListT.Attoparsec import qualified ListT.HTMLParser import qualified ListT.Text import qualified ListT.HTMLParser as P main = hspec $ do it "Backtracking" $ do let text = "" parser = a <|> b where a = do P.openingTag P.openingTag P.openingTag return 1 b = do P.openingTag P.openingTag P.closingTag P.closingTag return 2 result <- parse parser text shouldBe result (Right 2) it "Complex" $ do let text = "" parser = a <|> b where a = do P.openingTag b <|> c P.openingTag return 1 where b = do ("b", _, _) <- P.openingTag P.closingTag c = do ("b", _, _) <- P.openingTag ("c", _, _) <- P.openingTag P.closingTag b = do P.openingTag P.openingTag P.openingTag P.closingTag P.closingTag P.closingTag return 2 result <- parse parser text shouldBe result (Right 2) -- | Scrape the body of a GET response using an HTML parser. parse :: ListT.HTMLParser.Parser IO a -> Text -> IO (Either Error a) parse parser = fmap (either (Left . parseSomeException) id) . try . fmap (either (Left . Error_Parsing) Right) . ListT.HTMLParser.run parser . ListT.Attoparsec.textParser HTMLTokenizer.Parser.token . ListT.Text.stream 2 where parseSomeException e = fromMaybe (error $ showString "Unexpected exception: " $ shows e $ "") $ Error_Lexing <$> fromException e data Error = -- | A tokenization failure Error_Lexing ListT.Attoparsec.ParsingFailure | -- | A token-stream parsing failure Error_Parsing ListT.HTMLParser.Error deriving (Show, Eq)