module Language.Symantic.Grammar.Terminal where
import Control.Monad
import Data.Semigroup (Semigroup(..))
import Data.String (IsString(..))
import Prelude hiding (any)
import qualified Data.Bool as Bool
import qualified Data.Char as Char
import qualified Data.List as List
import qualified Data.Text as Text
import Language.Symantic.Grammar.Fixity
import Language.Symantic.Grammar.EBNF
newtype Terminal g a
= Terminal { unTerminal :: g a }
deriving (Functor, Gram_Terminal)
deriving instance Gram_Rule g => Gram_Rule (Terminal g)
class Gram_Terminal g where
any :: g Char
but :: Terminal g Char -> Terminal g Char -> Terminal g Char
eoi :: g ()
char :: Char -> g Char
string :: String -> g String
unicat :: Unicat -> g Char
range :: (Char, Char) -> g Char
deriving instance Gram_Terminal RuleEBNF
instance Gram_Terminal EBNF where
any = ebnf_const "_"
Terminal (EBNF f) `but` Terminal (EBNF g) =
Terminal $ EBNF $ \bo po -> parenInfix po op $
f bo (op, SideL) <> " - " <> g bo (op, SideR)
where op = infixL 6
eoi = ebnf_const "eoi"
char = ebnf_const . escape
where
escape c | Char.isPrint c && c /= '"' = Text.concat $ ["\"", Text.singleton c, "\""]
escape c = Text.concat ["U+", Text.pack $ show $ Char.ord c]
string s =
case List.break (\c -> Bool.not (Char.isPrint c) || c == '"') s of
(ps, "") -> raw ps
("", [c]) -> "" <$ char c
(ps, [c]) -> "" <$ raw ps <* char c
("", c:rs) -> "" <$ char c <* string rs
(ps, c:rs) -> "" <$ raw ps <* char c <* string rs
where
raw cs = ebnf_const $ Text.concat $ ["\"", Text.pack cs, "\""]
unicat = ebnf_const . Text.pack . show
range (l, h) = ebnf_const $ Text.concat
[ runEBNF $ char l
, "…"
, runEBNF $ char h
]
instance IsString (EBNF String) where
fromString = string
data Unicat
= Unicat_Letter
| Unicat_Mark
| Unicat_Number
| Unicat_Punctuation
| Unicat_Symbol
| Unicat Char.GeneralCategory
deriving (Eq, Show)
unicode_categories :: Unicat -> [Char.GeneralCategory]
unicode_categories c =
case c of
Unicat_Letter ->
[ Char.UppercaseLetter
, Char.LowercaseLetter
, Char.TitlecaseLetter
, Char.ModifierLetter
, Char.OtherLetter
]
Unicat_Mark ->
[ Char.NonSpacingMark
, Char.SpacingCombiningMark
, Char.EnclosingMark
]
Unicat_Number ->
[ Char.DecimalNumber
, Char.LetterNumber
, Char.OtherNumber
]
Unicat_Punctuation ->
[ Char.ConnectorPunctuation
, Char.DashPunctuation
, Char.OpenPunctuation
, Char.ClosePunctuation
, Char.OtherPunctuation
]
Unicat_Symbol ->
[ Char.MathSymbol
, Char.CurrencySymbol
, Char.ModifierSymbol
, Char.OtherSymbol
]
Unicat cat -> [cat]