{-| Module : Language.Change.Quote Copyright : (c) Owen Bechtel, 2023 License : MIT Maintainer : ombspring@gmail.com Stability : experimental -} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE TupleSections #-} module Language.Change.Quote (ch, chs) where import Data.Data (Data) import Data.Void (Void) import Data.List.NonEmpty (NonEmpty(..), toList, cons) import Data.Char (isAlphaNum, isAsciiUpper, isSpace) import Data.Bifunctor (first) import Control.Monad (void) import qualified Data.Map as Map import qualified Data.Set as Set import Data.Set (Set) import qualified Text.Megaparsec as M import Text.Megaparsec ((<|>)) import qualified Text.Megaparsec.Char as MC import qualified Text.Megaparsec.Char.Lexer as Lexer import qualified Control.Monad.Combinators.NonEmpty as NE import Control.Monad.Reader (ReaderT(..), local) import qualified Language.Haskell.TH as TH import Language.Haskell.TH.Quote(QuasiQuoter(..), dataToExpQ) import Language.Haskell.TH.Lib (appE, conE, varE) import Data.Generics.Aliases (extQ) import Language.Change (Change(..), Env(..), PSet(..), Pattern(..), applyChanges) data CharS = Lit Char | AntiQ String -- antiquoted identifier | CList [Char] -- does not exist in syntax; used for compilation of antiquotes deriving (Data) newtype SetS = SetS (NonEmpty CharS) deriving (Data) data PSetS = PSetS SetS Bool deriving (Data) data PatternS = OneS PSetS | OptionalS PSetS | ManyS PSetS deriving (Data) data EnvS = EnvS [PatternS] [PatternS] deriving (Data) type Pairing a b = NonEmpty (a, b) type SoundList = NonEmpty CharS type EnvList = NonEmpty EnvS newtype Rep = Rep { convertRep :: String } deriving (Data) data StatementS = Simple SoundList Rep EnvList | SoundSplit SoundList (Pairing Rep EnvList) | EnvSplit (Pairing SoundList Rep) EnvList deriving (Data) newtype ChangeS = ChangeS (NonEmpty StatementS) deriving (Data) type Parser = ReaderT Bool (M.Parsec Void String) -- True = allow newlines allowNewlines :: Parser a -> Parser a allowNewlines = local (const True) isSound, isIdentifier :: Char -> Bool isSound c = not (isSpace c || isAsciiUpper c || c `elem` ",>/;{}[]%_!?*") isIdentifier c = isAlphaNum c || c == '_' || c == '\'' spaces :: Parser () spaces = ReaderT $ \allowNs -> Lexer.space (if allowNs then MC.space1 else MC.hspace1) lineComment M.empty where lineComment = Lexer.skipLineComment "//" symbol :: Char -> Parser () symbol c = M.single c >> spaces charS :: Parser CharS charS = M.choice [ lit, oneChar, multipleChar ] where lit = Lit <$> M.satisfy isSound <* spaces oneChar = AntiQ . pure <$> M.satisfy isAsciiUpper <* spaces multipleChar = AntiQ <$ symbol '[' <*> M.takeWhile1P (Just "Haskell identifier") isIdentifier <* spaces <* symbol ']' setS :: Parser SetS setS = M.choice [ SetS . pure <$> charS , SetS <$ symbol '{' <*> NE.some charS <* symbol '}' ] pSetS :: Parser PSetS pSetS = PSetS <$> setS <*> M.choice [ False <$ symbol '!', return True ] patternS :: Parser PatternS patternS = do pset <- pSetS M.choice [ OptionalS pset <$ symbol '?' , ManyS pset <$ symbol '*' , return (OneS pset) ] envS :: Parser EnvS envS = EnvS <$> (reverse <$> M.many patternS) <* symbol '_' <*> M.many patternS pairing :: Parser a -> Parser () -> Parser b -> Parser (Pairing a b) pairing pa psep pb = do allowNewlines (symbol '{') ps <- allowNewlines (NE.sepBy1 pair (symbol ';')) symbol '}' return ps where pair = (,) <$> pa <* psep <*> pb soundList :: Parser SoundList soundList = NE.sepBy1 charS (symbol ',') envList :: Parser EnvList envList = NE.sepBy1 envS (symbol ',') rep :: Parser Rep rep = M.choice [ Rep <$> M.takeWhile1P (Just "phoneme") isSound <* spaces , Rep "" <$ symbol '%' ] statementS :: Parser StatementS statementS = envSplit <|> other where envSplit = EnvSplit <$> pairing soundList (symbol '>') rep <* symbol '/' <*> envList other = do sounds <- soundList symbol '>' M.choice [ SoundSplit sounds <$> pairing rep (symbol '/') envList , Simple sounds <$> rep <* symbol '/' <*> envList ] someNewlines :: Parser () someNewlines = void (M.some (symbol '\n' <|> symbol '\r')) changeS :: Parser ChangeS changeS = ChangeS <$> NE.sepEndBy1 statementS someNewlines bulletedChanges :: Parser (NonEmpty ChangeS) bulletedChanges = do symbol '*' stmt <- statementS newline <- M.option False (True <$ someNewlines) if newline then M.choice [ (ChangeS (stmt :| []) `cons`) <$> bulletedChanges , (\(stmts, cs) -> ChangeS (stmt `cons` stmts) :| cs) <$> nonBulleted , return (ChangeS (stmt :| []) :| []) ] else return (ChangeS (stmt :| []) :| []) where nonBulleted :: Parser (NonEmpty StatementS, [ChangeS]) nonBulleted = do stmt <- statementS newline <- M.option False (True <$ someNewlines) if newline then M.choice [ (\cs -> (stmt :| [], toList cs)) <$> bulletedChanges , first (stmt `cons`) <$> nonBulleted , return (stmt :| [], []) ] else return (stmt :| [], []) total :: Parser a -> Parser a total p = allowNewlines spaces >> (p <* M.eof) {-| Compile a sound change specification into a value of type 'Change' 'Char'. Example: @ setV = "aeiou" change1 = [ch| m, n, ŋ > { m \/ _{mpb}; n \/ _{ntd}; ŋ \/ _{ŋkg} } { p > b; t > d; k > g } \/ V_V |] @ The sound change in this example consists of two statements separated by a newline. The first statement says \"m, n, and ŋ become \/m\/ when followed by m, p, or b, \/n\/ when followed by n, t, or d, and \/ŋ\/ when followed by ŋ, k, or g.\" The second statement says that voiceless stops (p, t, and k) become voiced when between two vowels. The uppercase @V@ here is shorthand for \"any element of @setV@.\" All uppercase letters are interpreted in this way, which means they are not allowed as phonemes. Whitespace characters, as well as the following symbols ( @,>\/;{}[]%_!?*@ ) are also not allowed as phonemes. All other unicode characters are allowed. (For example, you can use @#@ as a \"phoneme\" representing the start or end of a word.) Here's another example: @ change2 = [ch| V > % \/ s_{ptk} \/\/ vowel loss after \/s\/ before stops { o > ø; u > y } \/ _V!*{ji} \/\/ umlaut before \/j\/ and \/i\/ (with optional consonants in between) y > i \/ _ \/\/ unconditional shift |] @ This example illustrates several features: * You can create line comments with @\/\/@. * The left side of @>@ need not be a single phoneme; it can also be a set of phonemes (in this case @V@). * The symbol @%@ represents the empty string. * Certain suffixes can be added to phonemes and phoneme-sets to create more complex patterns: * @X!@ matches anything /not/ in the set @X@. * @X*@ matches zero or more elements of @X@. * @X?@ matches zero or one elements of @X@. * There are also the combinations @X!*@ and @X!?@. * To create an unconditional sound change, use the empty environment @_@. * The statements in a sound change are applied in a single traversal. This means that the same phoneme cannot be affected by more than one statement. For example, the change above would convert \"yni\" into \"ini,\" and \"uni\" into \"yni.\" (If you want to sequence multiple changes, use 'chs'.) * If multiple statements are applicable to the same phoneme, the first one is applied. For example, the change above would convert \"soki\" into \"ski\", not \"søki.\" Other noteworthy things: * As mentioned above, capital letters refer to sets of characters: @V@ refers to @setV@, @N@ refers to @setN@, and so on. To refer to sets with longer names, you can use square brackets; for example, if you define @setNasal :: [Char]@, you can refer to it with @[Nasal]@. * It is possible to have multiple conditions in a single statement. For example, @e > i / _a, a_@ means \"\/e\/ becomes \/i\/ when before \/a\/ /or/ after \/a\/.\" * The parser is whitespace-sensitive. Horizontal space characters can go almost anywhere, but newlines are only allowed between statements (where they are required) and inside of @{}@ blocks. -} ch :: QuasiQuoter ch = QuasiQuoter { quoteExp = makeQuoter (total changeS) (varE 'convertChangeS) , quotePat = undefined , quoteType = undefined , quoteDec = undefined } {-| Compile a sequence of sound changes into a value of type @['Change' 'Char']@. Each change is preceded by a bullet @*@. For example: @ changes = [chs| * p > f \/ _{tk} t > θ \/ _{pk} k > x \/ _{pt} \/\/ these three statements are applied in a single traversal * x > ç \/ i_ \/\/ this statement is applied after the ones before it * i > j \/ _V |] @ For example, @'applyChanges' changes "ikt"@ would return @"içt"@. -} chs :: QuasiQuoter chs = QuasiQuoter { quoteExp = makeQuoter (toList <$> total bulletedChanges) (appE (varE 'map) (varE 'convertChangeS)) , quotePat = undefined , quoteType = undefined , quoteDec = undefined } initialState :: s -> M.SourcePos -> M.State s Void initialState input position = M.State { M.stateInput = input , M.stateOffset = 0 , M.statePosState = M.PosState { M.pstateInput = input , M.pstateOffset = 0 , M.pstateSourcePos = position , M.pstateTabWidth = M.defaultTabWidth , M.pstateLinePrefix = "" } , M.stateParseErrors = [] } makeQuoter :: (Data a) => Parser a -> TH.ExpQ -> String -> TH.ExpQ makeQuoter parse convert input = do loc <- TH.location let file = TH.loc_filename loc (line, col) = TH.loc_start loc state = initialState input (M.SourcePos file (M.mkPos line) (M.mkPos col)) case snd (M.runParser' (runReaderT parse False) state) of Left errors -> fail (M.errorBundlePretty errors) Right change -> let change' = dataToExpQ (const Nothing `extQ` antiquote) change in appE convert change' antiquote :: CharS -> Maybe TH.ExpQ antiquote = \case Lit _ -> Nothing AntiQ s -> Just (appE (conE 'CList) (varE (TH.mkName ("set" ++ s)))) CList _ -> Nothing badAntiQ :: a badAntiQ = error "Language.Change.Quote: Unexpected AntiQ at runtime.\nThis error should never happen." convertCharList :: CharS -> [Char] convertCharList = \case Lit c -> [c] AntiQ _ -> badAntiQ CList cs -> cs convertCharSet :: CharS -> Set Char convertCharSet = \case Lit c -> Set.singleton c AntiQ _ -> badAntiQ CList cs -> Set.fromList cs convertSetS :: SetS -> Set Char convertSetS (SetS cs) = foldMap convertCharSet cs convertPSetS :: PSetS -> PSet Char convertPSetS (PSetS set b) = PSet (convertSetS set) b convertPatternS :: PatternS -> Pattern Char convertPatternS = \case OneS set -> One (convertPSetS set) OptionalS set -> Optional (convertPSetS set) ManyS set -> Many (convertPSetS set) convertEnvS :: EnvS -> Env Char convertEnvS (EnvS e1 e2) = Env (map convertPatternS e1) (map convertPatternS e2) convertEnvPair :: (Rep, EnvList) -> [(String, Env Char)] convertEnvPair (rp, envs) = let rp' = convertRep rp in map (\env -> (rp', convertEnvS env)) (toList envs) convertStatementS :: StatementS -> [(Char, [(String, Env Char)])] convertStatementS = \case Simple sounds rp envs -> let pairs = convertEnvPair (rp, envs) in map (, pairs) (concatMap convertCharList sounds) SoundSplit sounds envPairs -> let pairs = concatMap convertEnvPair envPairs in map (, pairs) (concatMap convertCharList sounds) EnvSplit soundPairs envs -> let envs' = map convertEnvS (toList envs) in concatMap (\(sounds, rp) -> let rp' = convertRep rp pairs = map (rp', ) envs' in map (, pairs) (concatMap convertCharList sounds)) soundPairs convertChangeS :: ChangeS -> Change Char convertChangeS (ChangeS stmts) = Change (Map.fromListWith (flip (++)) (concatMap convertStatementS stmts))