-- -- Copyright 2018, akashche at redhat.com -- -- Licensed under the Apache License, Version 2.0 (the "License"); -- you may not use this file except in compliance with the License. -- You may obtain a copy of the License at -- -- http://www.apache.org/licenses/LICENSE-2.0 -- -- Unless required by applicable law or agreed to in writing, software -- distributed under the License is distributed on an "AS IS" BASIS, -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- See the License for the specific language governing permissions and -- limitations under the License. -- -- | -- Additional combinators and utilities for @Parsec@ library -- {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DuplicateRecordFields #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE Strict #-} module VtUtils.Parsec ( Parser -- combinators , parsecLineContains , parsecLinePrefix , parsecLineNoPrefix , parsecSkipLines , parsecSkipManyTill , parsecTry , parsecWhitespace -- non-combinator utils , parsecErrorToText , parsecParseFile , parsecParseText ) where import Prelude (Either(..), Int, IO, (-), (>), (.), ($), (<$>), error, return) import Data.List (foldl', intersperse) import Data.Monoid ((<>)) import Data.Text (Text, isInfixOf, isPrefixOf, pack, stripStart, unpack) import Data.Text.Lazy (fromChunks, toStrict) import Data.Text.Lazy.Builder (fromString, fromText, toLazyText) import Text.Parsec (ParseError, (<|>), char, lookAhead, manyTill, noneOf, oneOf, parse, skipMany, try) import Text.Parsec.Char (anyChar, string) import Text.Parsec.Error (Message(..), errorMessages, errorPos, messageString) import Text.Parsec.Pos (sourceColumn, sourceLine, sourceName) import Text.Parsec.Text.Lazy (Parser) import VtUtils.IO import VtUtils.Text -- combinators -- | Finds a line containing a specified substring -- -- Uses @LF@ as a line separator -- -- Resulting line doesn't contain a line separator -- -- Arguments: -- -- * @needle :: Text@: Substring to find -- -- Return value: Line that contains a specified substring -- parsecLineContains :: Text -> Parser Text parsecLineContains needle = do line <- pack <$> manyTill (noneOf ['\n']) (char '\n') if isInfixOf needle line then do return line else parsecLineContains needle -- | Finds a line with a specified prefix -- -- Uses @LF@ as a line separator -- -- Whitespace is stripped from the start of each line before checking for prefix -- -- Resulting line doesn't contain a line separator -- -- Arguments: -- -- * @prefix :: Text@: Prefix to find -- -- Return value: Line with the specified prefix -- parsecLinePrefix :: Text -> Parser Text parsecLinePrefix prefix = do line <- pack <$> manyTill (noneOf ['\n']) (char '\n') if isPrefixOf prefix (stripStart line) then do return line else parsecLinePrefix prefix -- | Finds a line that does not have a specified prefix -- -- Uses @LF@ as a line separator -- -- Whitespace is stripped from the start of each line before checking for prefix -- -- Resulting line doesn't contain a line separator -- -- Arguments: -- -- * @prefix :: Text@: Prefix that should be skipped -- -- Return value: First line that does not have a specified prefix -- parsecLineNoPrefix :: Text -> Parser Text parsecLineNoPrefix prefix = do line <- pack <$> manyTill (noneOf ['\n']) (char '\n') if isPrefixOf prefix (stripStart line) then parsecLineNoPrefix prefix else do return line -- | Skips a specified number of lines -- -- Uses @LF@ as a line separator -- -- Does not consume additional whitespace after the last line skipped (or between the lines) -- -- Arguments: -- -- * @count :: Int@: Number of lines to skip -- parsecSkipLines :: Int -> Parser () parsecSkipLines count = if count > 0 then do _ <- manyTill (noneOf ['\n']) (char '\n') parsecSkipLines (count - 1) else do return () -- | Skips all input until the specified substring is found -- -- Warning: all look-ahead data is kept in memory -- -- Arguments: -- -- * @needle :: Text@: Substring to find -- -- Return value: First line that does not have a specified prefix -- parsecSkipManyTill :: Text -> Parser () parsecSkipManyTill needle = do scan return () where scan = done <|> recur done = do _ <- try (lookAhead (string (unpack needle))) return () recur = do _ <- anyChar scan return () -- | The parser @parsecTry p@ behaves like parser p, except that it pretends -- that it hasn't consumed any input when an error occurs -- -- This is a re-export of [Text.Parsec.try](https://hackage.haskell.org/package/parsec-3.1.13.0/docs/Text-Parsec.html#v:try) -- under a different name to not conflict with [Control.Exception.try](https://hackage.haskell.org/package/base-4.12.0.0/docs/Control-Exception.html#v:try) -- -- Arguments: -- -- * @parser :: Parser a@: Parser to wrap into @try@ -- -- Return value: Resulting value from the specified parser -- parsecTry :: Parser a -> Parser a parsecTry = try -- | Skips one or more whitespace characters -- -- Note: Lexemes from [Text.Parsec.Token.TokenParser](https://hackage.haskell.org/package/parsec-3.1.13.0/docs/Text-Parsec-Token.html#v:TokenParser) -- can be used instead -- parsecWhitespace :: Parser () parsecWhitespace = skipMany (oneOf [' ', '\t', '\n', '\r']) -- | Formats @ParseError@ into @Text@ string -- -- Arguments: -- -- * @err :: ParseError@: @ParseError@ thrown by @Parsec@ -- -- Return value: @Text@ representation of a specified error -- parsecErrorToText :: ParseError -> Text parsecErrorToText err = toStrict $ toLazyText $ fromText "ParseError:" <> fromText " file: [" <> fromString (sourceName pos) <> fromText "]," <> fromText " line: [" <> fromText (textShow (sourceLine pos)) <> fromText "]," <> fromText " column: [" <> fromText (textShow (sourceColumn pos)) <> fromText "]," <> fromText " messages: [" <> msg <> "]" where prefix ms = case ms of (SysUnExpect _) -> "unexpected: " (UnExpect _) -> "unexpected: " (Expect _) -> "expected: " (Message _) -> "message: " errMsgToBuilder ms = fromText (prefix ms) <> fromString (messageString ms) pos = errorPos err msgList = errorMessages err builderList = errMsgToBuilder <$> msgList commaList = intersperse (fromText ", ") builderList msg = foldl' (<>) (fromText "") commaList -- | Lazily reads contents from a specified file and parses it using the specified parser -- -- File contents are decoded as @UTF-8@ -- -- Throws an error on file IO error or parsing error -- -- Arguments: -- -- * @parser :: Parser a@: Parser to use for the contents of the file -- * @path :: ParseError@: Path to a file to parse -- -- Return value: Resulting value from the specified parser -- parsecParseFile :: Parser a -> Text -> IO a parsecParseFile parser path = ioWithFileText path $ \tx -> case parse parser (unpack path) tx of Left err -> (error . unpack) (parsecErrorToText err) Right res -> return res -- | Parser a specified strict @Text@ string using a specified parser -- -- Note: parser is typed on a lazy @Text@ input (so it can also be used with @parsecParseFile@) -- -- Throws an error on parsing error -- -- Arguments: -- -- * @parser :: Parser a@: Parser to use for the contents of the file -- * @text :: Text@: @Text@ string to parse -- -- Return value: Resulting value from the specified parser -- parsecParseText :: Parser a -> Text -> a parsecParseText parser text = case parse parser "" (fromChunks [text]) of Left err -> (error . unpack) (parsecErrorToText err) Right res -> res