{-# LANGUAGE OverloadedStrings #-}
module Hasmin.Parser.Primitives
( ident
, escape
, unicode
, nmstart
, nmchar
, int
, int'
, digits
) where
import Control.Applicative ((<|>), some, many)
import Data.Attoparsec.Text (Parser, (<?>))
import Data.Monoid ((<>))
import Data.Text.Lazy.Builder (Builder)
import Data.Text (Text)
import qualified Data.Attoparsec.Text as A
import qualified Data.Char as C
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as B
escape :: Parser Builder
escape = unicode
<|> (mappend <$> (B.singleton <$> A.char '\\') <*> (B.singleton <$> A.satisfy cond))
<?> "not an escape token: {unicode}|\\\\[^\\n\\r\\f0-9a-f]"
where cond c = c /= '\n'
&& c /= '\r'
&& c /= '\f'
&& (not . C.isHexDigit) c
unicode :: Parser Builder
unicode = do
backslash <- A.char '\\'
hexChars <- A.takeWhile1 C.isHexDigit
_ <- A.option mempty (A.string "\r\n" <|> (T.singleton <$> A.satisfy ws))
if T.length hexChars <= 6
then pure $ B.singleton backslash <> B.fromText hexChars
else fail "unicode escaped character with length greater than 6"
where ws x = x == ' ' || x == '\n' || x == '\r' || x == '\t' || x == '\f'
ident :: Parser Text
ident = do
dash <- (B.fromText <$> A.string "--") <|> (B.singleton <$> A.char '-') <|> pure mempty
ns <- nmstart
nc <- mconcat <$> many nmchar
pure $ TL.toStrict (B.toLazyText (dash <> ns <> nc))
nmstart :: Parser Builder
nmstart = B.singleton <$> A.satisfy (\c -> C.isAlpha c || (not . C.isAscii) c || c == '_')
<|> escape
<?> "not an nmstart token: [_a-z]|{nonascii}|{escape}"
nmchar :: Parser Builder
nmchar = B.singleton <$> A.satisfy cond <|> escape
where cond x = C.isAlphaNum x || x == '_' || x == '-'
|| (not . C.isAscii) x
int' :: Parser String
int' = do
sign <- A.char '-' <|> pure '+'
d <- digits
case sign of
'+' -> pure d
'-' -> pure (sign:d)
_ -> error "int': parsed a number starting with other than [+|-]"
int :: Parser Int
int = read <$> int'
digits :: Parser String
digits = some A.digit