{- | Copyright : (c) 2024 Pierre Le Marre Maintainer: dev@wismill.eu Stability : experimental Miscellaneous bits common to various parsers -} module UCD.Parser.Common ( readCodePoint, readCodePointM, UnicodeRange (..), parseRange, pattern Comma, pattern HashTag, pattern NewLine, pattern Period, pattern SemiColon, pattern Slash, ) where import Data.ByteString qualified as B import Data.ByteString.Char8 qualified as B8 import Data.Char (chr) import Data.Word (Word8) -------------------------------------------------------------------------------- -- Code point parser -------------------------------------------------------------------------------- {- | Parse a code point formatted as hexadecimal /Warning:/ raise an error on invalid input. >>> readCodePoint "0061" 'a' @since 0.1.0 -} readCodePoint ∷ B.ByteString → Char readCodePoint = chr . read . B8.unpack . ("0x" <>) {- | Parse a code point formatted as hexadecimal, or return 'Nothing' on an empty string. /Warning:/ raise an error on invalid input. >>> readCodePointM "0061" Just 'a' >>> readCodePointM "" Nothing See also: 'readCodePoint'. @since 0.1.0 -} readCodePointM ∷ B.ByteString → Maybe Char readCodePointM raw | B.null raw = Nothing | otherwise = Just (readCodePoint raw) -------------------------------------------------------------------------------- -- Code point range parser -------------------------------------------------------------------------------- {- | A Unicode code point range @since 0.1.0 -} data UnicodeRange a = SingleChar { _first ∷ !Char } | CharRange { _first ∷ !Char , _last ∷ !Char , _rangeName ∷ !a } deriving (Eq, Show) {- | Parse @AAAA..BBBB@ range @since 0.1.0 -} parseRange ∷ B.ByteString → UnicodeRange () parseRange raw = case B.span (/= Period) raw of (readCodePoint → ch1, rest) | B.null rest → SingleChar ch1 | otherwise → CharRange ch1 (readCodePoint (B.drop 2 rest)) () -------------------------------------------------------------------------------- -- Char8 patterns -------------------------------------------------------------------------------- -- | @'\\n'@ pattern NewLine ∷ Word8 pattern NewLine = 0x0a -- | @#@ pattern HashTag ∷ Word8 pattern HashTag = 0x23 -- | @,@ pattern Comma ∷ Word8 pattern Comma = 0x2c -- | @.@ pattern Period ∷ Word8 pattern Period = 0x2e -- | @\/@ pattern Slash ∷ Word8 pattern Slash = 0x2f -- | @;@ pattern SemiColon ∷ Word8 pattern SemiColon = 0x3b