{-# LANGUAGE ScopedTypeVariables #-} -- | -- Module : Language.IPA.Parser -- Copyright : (c) 2021 Rory Tyler Hayford -- -- License : BSD-3-Clause -- Maintainer : rory.hayford@protonmail.com -- Stability : experimental -- Portability : GHC -- -- Parser for IPA literals module Language.IPA.Parser ( -- * IPA parsing -- ** Parsing single segments parseSegment , segmentP -- ** Parsing syllables , parseSyllable , syllableP , parseSyllables , syllablesP -- * X-SAMPA parsing -- ** Parsing single segments , parseSegmentXSampa , segmentXSampaP -- ** Parsing syllables , parseSyllableXSampa , parseSyllablesXSampa , syllableXSampaP ) where import Control.Applicative ( Alternative((<|>), some, many) ) import Data.Attoparsec.Text ( IResult(Fail, Done, Partial), Parser ) import qualified Data.Attoparsec.Text as P import Data.Foldable ( asum, foldl' ) import Data.Functor ( ($>), (<&>) ) import Data.Text ( Text ) import Data.Text.Normalize ( NormalizationMode(NFD), normalize ) import Language.IPA.Types -- | Parse a single literal segment in IPA notation, returning 'InvalidIPA' upon -- failure and a 'Segment' upon success. Note that supplying two or more segment -- literals will cause the parse to fail -- -- >>> parseSegment "ɨːː" -- Right (WithSegmentalFeature (Length OverLong) (Vowel Close Central Unrounded)) -- -- >>> parseSegment "ɨːːb" -- Left (InvalidIPA "Failed to parse character 'b'") parseSegment :: Text -> Either IPAException Segment parseSegment t = handleResult InvalidIPA msg (P.parse (segmentP <* P.endOfInput) (normalize NFD t)) where msg = "Failed to parse character(s):" -- | Parse a literal syllable in IPA notation, returning 'InvalidIPA' upon -- failure and a 'Syllable' upon success, where the syllable is parameterized by -- some type @t@ satisfying a 'MultiSegment' constraint. Note that, as with -- 'parseSegment', supplying two or more syllable literals will cause the parse -- to fail -- -- >>> parseSyllable @[] "ma˧˥" -- Right (WithSuprasegmentalFeature (LexicalToneContour HighRising) -- (Syllable [Consonant (Pulmonic Voiced Bilabial Nasal),Vowel Open Front Unrounded])) parseSyllable :: MultiSegment t => Text -> Either IPAException (Syllable t) parseSyllable t = handleResult InvalidIPA msg (P.parse (syllableP <* P.endOfInput) (normalize NFD t)) where msg = "Failed to parse segment(s):" -- | Parse several syllables into a container parameterized by the same type as that -- parameterizing the individual 'Syllable's. Whitespace between syllables is interpreted -- as a syllable boundary -- -- >>> parseSyllables @[] "haːj˧ ɓaː˧" -- Right [ WithSuprasegmentalFeature (LevelLexicalTone MidTone) -- (Syllable [ Consonant (Pulmonic Voiceless Glottal (Fricative NonSibilant)) -- , WithSegmentalFeature (Length Long) (Vowel Open Front Unrounded) -- , Consonant (Pulmonic Voiced Palatal Approximant) -- ]) -- , WithSuprasegmentalFeature (LevelLexicalTone MidTone) -- (Syllable [ Consonant (Implosive Voiced Bilabial) -- , WithSegmentalFeature (Length Long) (Vowel Open Front Unrounded) -- ]) -- ] parseSyllables :: (MultiSegment t, Monoid (t (Syllable t))) => Text -> Either IPAException (t (Syllable t)) parseSyllables t = handleResult InvalidIPA msg (P.parse (syllablesP <* P.endOfInput) (normalize NFD t)) where msg = "Failed to parse syllable:" -- | Parser for syllables. This function is exposed to allow users to create -- more specific parsers for aggregations of multiple syllables syllableP :: MultiSegment t => Parser (Syllable t) syllableP = withStressP <|> withFeaturesP <|> Syllable <$> justSegments where justSegments = someT segmentP withFeaturesP = foldr WithSuprasegmentalFeature <$> (Syllable <$> justSegments) <*> some suprasegmentalFeatureP withStressP = foldr WithSuprasegmentalFeature <$> withStress <*> many suprasegmentalFeatureP where withStress = WithSuprasegmentalFeature <$> stressP <*> (Syllable <$> justSegments) -- | Parser for segments. This function is exposed to allow users to create -- more specific parsers for multi-segment sequences segmentP :: Parser Segment segmentP = withFeatureP <|> pureSegmentP <|> optionalP where pureSegmentP = vowelP <|> Consonant <$> consonantP withFeatureP = foldr WithSegmentalFeature <$> pureSegmentP <*> some segmentalFeatureP optionalP = Optional <$> ("(" *> (withFeatureP <|> pureSegmentP) <* ")") -- | Parser for poly-syllabic sequences, with individual syllables separated -- by (optional) whitespace syllablesP :: (MultiSegment t, Monoid (t (Syllable t))) => Parser (t (Syllable t)) syllablesP = someT withWS where withWS = syllableP <* many P.space consonantP :: Parser Consonant consonantP = clickP <|> implosiveP <|> ejectiveP <|> doublyArticulatedP <|> pulmonicP vowelP :: Parser Segment vowelP = triphthongP <|> diphthongP <|> Vowel <$> pureP where diphthongP = Diphthong <$> pureP <*> pureP triphthongP = Triphthong <$> pureP <*> pureP <*> pureP pv = Pure lowered = diacriticP '\x031e' pureP = asum [ "e" <* lowered $> pv Mid Front Unrounded , "ø" <* lowered $> pv Mid Front Rounded , "ɤ" <* lowered $> pv Mid Back Unrounded , "o" <* lowered $> pv Mid Back Rounded , "i" $> pv Close Front Unrounded , "y" $> pv Close Front Rounded , "ɨ" $> pv Close Central Unrounded , "ʉ" $> pv Close Central Rounded , "ɯ" $> pv Close Back Unrounded , "u" $> pv Close Back Rounded , "ɪ" $> pv NearClose Front Unrounded , "ʏ" $> pv NearClose Front Rounded , "ʊ" $> pv NearClose Back Rounded , "e" $> pv CloseMid Front Unrounded , "ø" $> pv CloseMid Front Rounded , "ɘ" $> pv CloseMid Central Unrounded , "ɵ" $> pv CloseMid Central Rounded , "ɤ" $> pv CloseMid Back Unrounded , "o" $> pv CloseMid Back Rounded , "ə" $> pv Mid Central Unrounded , "ɛ" $> pv OpenMid Front Unrounded , "œ" $> pv OpenMid Front Rounded , "ɜ" $> pv OpenMid Central Unrounded , "ɞ" $> pv OpenMid Central Rounded , "ʌ" $> pv OpenMid Back Unrounded , "ɔ" $> pv OpenMid Back Rounded , "æ" $> pv NearOpen Front Unrounded , "ɐ" $> pv NearOpen Central Unrounded , "a" $> pv Open Front Unrounded , "ɶ" $> pv Open Front Rounded , "ä" $> pv Open Central Unrounded , "ɑ" $> pv Open Back Unrounded , "ɒ" $> pv Open Back Rounded ] pulmonicP :: Parser Consonant pulmonicP = asum [ biliabialP , labioDentalP , dentalP , postAlveolarP , retroflexP , palatalP , velarP , uvularP , pharyngealP , glottalP -- Alveolars should go at the end; alveolar plosives appear in -- several affricates in other places of articulation , alveolarP ] where biliabialP = asum cs where bc v = Pulmonic v Bilabial cs = [ "m" $> bc Voiced Nasal , doubleArticulated "p" "ɸ" $> bc Voiced (Affricate NonSibilant) , doubleArticulated "b" "β" $> bc Voiceless (Affricate NonSibilant) , "β" $> bc Voiced (Fricative NonSibilant) , "ɸ" $> bc Voiceless (Fricative NonSibilant) , "ʙ" $> bc Voiced Trill , "b" $> bc Voiced Plosive , "p" $> bc Voiceless Plosive ] labioDentalP = asum cs where ldc v = Pulmonic v LabioDental cs = [ "ɱ" $> ldc Voiced Nasal , "f" $> ldc Voiceless (Fricative NonSibilant) , "v" $> ldc Voiced (Fricative NonSibilant) , "ʋ" $> ldc Voiced Approximant , "ⱱ" $> ldc Voiced Flap ] dentalP = "θ" $> dc Voiceless (Fricative NonSibilant) <|> "ð" $> dc Voiced (Fricative NonSibilant) where dc v = Pulmonic v Dental alveolarP = asum cs where ac v = Pulmonic v Alveolar cs = [ "n" $> ac Voiced Nasal , doubleArticulated "t" "s" $> ac Voiced (Affricate Sibilant) , doubleArticulated "d" "z" $> ac Voiceless (Affricate Sibilant) , "s" $> ac Voiceless (Fricative Sibilant) , "z" $> ac Voiced (Fricative Sibilant) , "ɹ" $> ac Voiced Approximant , "ɾ" $> ac Voiced Flap , "r" $> ac Voiced Trill , "ɬ" $> ac Voiceless LateralFricative , "ɮ" $> ac Voiced LateralFricative , "l" $> ac Voiced LateralApproximant , "ɺ" $> ac Voiced LateralFlap , "tɬ" $> ac Voiceless LateralAffricate , "dɮ" $> ac Voiced LateralAffricate , "t" $> ac Voiceless Plosive , "d" $> ac Voiced Plosive ] postAlveolarP = doubleArticulated "t" "ʃ" $> pc Voiceless (Affricate Sibilant) <|> doubleArticulated "d" "ʒ" $> pc Voiced (Affricate Sibilant) <|> "ʃ" $> pc Voiceless (Fricative Sibilant) <|> "ʒ" $> pc Voiced (Fricative Sibilant) where pc v = Pulmonic v PostAlveolar retroflexP = asum cs where rc v = Pulmonic v Retroflex cs = [ "ɳ" $> rc Voiced Nasal , doubleArticulated "ʈ" "ʂ" $> rc Voiceless (Affricate Sibilant) , doubleArticulated "ɖ" "ʐ" $> rc Voiced (Affricate Sibilant) , "ʂ" $> rc Voiceless (Fricative Sibilant) , "ʐ" $> rc Voiced (Fricative Sibilant) , "ɻ" $> rc Voiced Approximant , "ɽ" $> rc Voiced Flap , "ɽr" $> rc Voiced Trill , "ɭ" $> rc Voiced LateralApproximant , "ʈ" $> rc Voiceless Plosive , "ɖ" $> rc Voiced Plosive ] palatalP = asum cs where pc v = Pulmonic v Palatal cs = [ "ɲ" $> pc Voiced Nasal , doubleArticulated "t" "ɕ" $> pc Voiceless (Affricate Sibilant) , doubleArticulated "d" "ʑ" $> pc Voiced (Affricate Sibilant) , doubleArticulated "c" "ç" $> pc Voiceless (Affricate NonSibilant) , doubleArticulated "ɟ" "ʝ" $> pc Voiced (Affricate NonSibilant) , "ɕ" $> pc Voiceless (Fricative Sibilant) , "ʑ" $> pc Voiced (Fricative Sibilant) , "ç" $> pc Voiceless (Fricative NonSibilant) , "ʝ" $> pc Voiced (Fricative NonSibilant) , "j" $> pc Voiced Approximant , "ʎ" $> pc Voiced LateralApproximant , "c" $> pc Voiceless Plosive , "ɟ" $> pc Voiced Plosive ] velarP = asum cs where vc v = Pulmonic v Velar cs = [ "ŋ" $> vc Voiced Nasal , doubleArticulated "k" "x" $> vc Voiceless (Affricate NonSibilant) , doubleArticulated "g" "ɣ" $> vc Voiced (Affricate NonSibilant) , "x" $> vc Voiceless (Fricative NonSibilant) , "ɣ" $> vc Voiced (Fricative NonSibilant) , "ɰ" $> vc Voiced Approximant , "ʟ" $> vc Voiced LateralApproximant , "k" $> vc Voiceless Plosive , "g" $> vc Voiced Plosive ] uvularP = asum cs where uc v = Pulmonic v Uvular cs = [ "ɴ" $> uc Voiced Nasal , doubleArticulated "q" "χ" $> uc Voiceless (Fricative NonSibilant) , doubleArticulated "ɢ" "ʁ" $> uc Voiced (Fricative NonSibilant) , "χ" $> uc Voiceless (Fricative NonSibilant) , "ʁ" $> uc Voiced (Fricative NonSibilant) , "ʀ" $> uc Voiced Trill , "q" $> uc Voiceless Plosive , "ɢ" $> uc Voiced Plosive ] pharyngealP = asum cs where pc v = Pulmonic v Pharyngeal cs = [ doubleArticulated "ʡ" "ʢ" $> pc Voiced (Affricate NonSibilant) , "ħ" $> pc Voiceless (Fricative NonSibilant) , "ʕ" $> pc Voiced (Fricative NonSibilant) , "ʡ" $> pc Voiced Flap , "ʜ" $> pc Voiced Trill , "ʢ" $> pc Voiceless Trill , "ʡ" $> pc Voiceless Plosive ] glottalP = doubleArticulated "ʔ" "h" $> gc Voiceless (Affricate NonSibilant) <|> "ʔ" $> gc Voiceless Plosive <|> "h" $> gc Voiceless (Fricative NonSibilant) <|> "ɦ" $> gc Voiced (Fricative NonSibilant) where gc v = Pulmonic v Glottal ejectiveP :: Parser Consonant ejectiveP = asum cs <* ejective where ejective = diacriticP '\x02bc' ej = Ejective cs = [ doubleArticulated "t" "ɬ" $> ej Alveolar LateralAffricate , doubleArticulated "q" "χ" $> ej Uvular (Affricate NonSibilant) , doubleArticulated "t" "s" $> ej Alveolar (Affricate Sibilant) , doubleArticulated "t" "ʃ" $> ej PostAlveolar (Affricate Sibilant) , doubleArticulated "ʈ" "ʂ" $> ej Retroflex (Affricate Sibilant) , doubleArticulated "t" "ɕ" $> ej Palatal (Affricate Sibilant) , doubleArticulated "k" "x" $> ej Velar (Affricate NonSibilant) , "p" $> ej Bilabial Plosive , "ɸ" $> ej Bilabial (Fricative NonSibilant) , "f" $> ej LabioDental (Fricative NonSibilant) , "θ" $> ej Dental (Fricative NonSibilant) , "ɬ" $> ej Alveolar LateralFricative , "t" $> ej Alveolar Plosive , "s" $> ej Alveolar (Fricative Sibilant) , "ʃ" $> ej PostAlveolar (Fricative Sibilant) , "ʈ" $> ej Retroflex Plosive , "ʂ" $> ej Retroflex (Fricative Sibilant) , "c" $> ej Palatal Plosive , "ɕ" $> ej Palatal (Fricative Sibilant) , "k" $> ej Velar Plosive , "x" $> ej Velar (Fricative NonSibilant) , "q" $> ej Uvular Plosive , "χ" $> ej Uvular (Fricative NonSibilant) , "ʡ" $> ej Pharyngeal Plosive ] implosiveP :: Parser Consonant implosiveP = Implosive Voiceless <$> asum cs <* voiceless <|> Implosive Voiced <$> asum cs where voiceless = diacriticP '\x030a' cs = [ "ɓ" $> Bilabial , "ɗ" $> Alveolar , "ᶑ" $> Retroflex , "ʄ" $> Palatal , "ɠ" $> Velar , "ʛ" $> Uvular ] clickP :: Parser Consonant clickP = Click <$> asum [ "ʘ" $> Bilabial , "ǀ" $> Dental , "ǃ" $> Alveolar , "ǁ" $> PostAlveolar , "ǂ" $> Palatal ] doublyArticulatedP :: Parser Consonant doublyArticulatedP = asum cs where da = DoublyArticulated cs = [ doubleArticulated "n" "m" $> da Voiced Bilabial Alveolar Nasal , doubleArticulated "t" "p" $> da Voiceless Bilabial Alveolar Plosive , doubleArticulated "d" "b" $> da Voiced Bilabial Alveolar Plosive , doubleArticulated "ŋ" "m" $> da Voiced Bilabial Velar Nasal , doubleArticulated "k" "p" $> da Voiceless Bilabial Velar Plosive , doubleArticulated "g" "b" $> da Voiced Bilabial Velar Plosive , doubleArticulated "q" "ʡ" $> da Voiceless Uvular Pharyngeal Plosive , "ɥ" <* P.char '\x030a' $> da Voiceless Bilabial Palatal (Fricative NonSibilant) , "ɥ" $> da Voiced Bilabial Palatal Approximant , "ʍ" $> da Voiceless Bilabial Velar (Fricative NonSibilant) , "w" $> da Voiced Bilabial Velar Approximant , "ɫ" $> da Voiced Alveolar Velar LateralApproximant , "ɧ" $> da Voiceless PostAlveolar Velar (Fricative Sibilant) ] segmentalFeatureP :: Parser SegmentalFeature segmentalFeatureP = secondaryArticulationP <|> asum fs where fs = [ "\x030a" $> Voicing Voiceless , "\x030c" $> Voicing Voiced , "\x2d0\x2d0" $> Length OverLong , "\x2d1" $> Length HalfLong , "\x2d0" $> Length Long , "\x0306" $> Length ExtraShort , "\x036a" $> Aspirated , "\x0339" $> MoreRounded , "\x031c" $> LessRounded , "\x031f" $> Advanced , "\x0320" $> Retracted , "\x0308" $> Centralized , "\x033d" $> MidCentralized , "\x1d5d" $> Compressed , "\x0329" $> Syllabic , "\x032f" $> NonSyllabic , "\x02de" $> Rhotacized , "\x0324" $> BreathyVoice , "\x0330" $> CreakyVoice , "\x033c" $> LinguoLabialized , "\x02b7" $> Labialized , "\x02b2" $> Palatalized , "\x02e0" $> Velarized , "\x02e4" $> Pharyngealized , "\x031d" $> Raised , "\x031e" $> Lowered , "\x0318" $> AdvancedTongueRoot , "\x0319" $> RetractedTongueRoot , "\x032a" $> Dentalized , "\x033a" $> Apical , "\x033b" $> Laminal , "\x0303" $> Nasalized , "\x207f" $> NasalRelease , "\x02e1" $> LateralRelease , "\x031a" $> NoAudibleRelease ] secondaryArticulationP :: Parser SegmentalFeature secondaryArticulationP = SecondaryArticulation <$> secondaryP where secondaryP = asum segments pc = PulmonicConsonant pv = PureVowel segments = [ "\x1d50" $> pc Voiced Bilabial Nasal , "\x1dac" $> pc Voiced LabioDental Nasal , "\x207f" $> pc Voiced Alveolar Nasal , "\x1daf" $> pc Voiced Retroflex Nasal , "\x1dae" $> pc Voiced Palatal Nasal , "\x1d51" $> pc Voiced Velar Nasal , "\x1db0" $> pc Voiced Uvular Nasal , "\x1d56" $> pc Voiced Bilabial Plosive , "\x1d47" $> pc Voiceless Bilabial Plosive , "\x1d57" $> pc Voiceless Alveolar Plosive , "\x1d48" $> pc Voiced Alveolar Plosive , "\x1d9c" $> pc Voiceless Palatal Plosive , "\x1da1" $> pc Voiced Palatal Plosive , "\x1d4f" $> pc Voiceless Velar Plosive , "\x1da2" $> pc Voiced Velar Plosive , "\x02c0" $> pc Voiceless Glottal Plosive , "\x1db2" $> pc Voiced Bilabial (Fricative NonSibilant) , "\x1d5d" $> pc Voiceless Bilabial (Fricative NonSibilant) , "\x1da0" $> pc Voiced LabioDental (Fricative NonSibilant) , "\x1d5b" $> pc Voiceless LabioDental (Fricative NonSibilant) , "\x1dbf" $> pc Voiceless Dental (Fricative NonSibilant) , "\x1d9e" $> pc Voiced Dental (Fricative NonSibilant) , "\x02e2" $> pc Voiceless Alveolar (Fricative Sibilant) , "\x1dbb" $> pc Voiced Alveolar (Fricative Sibilant) , "\x1db4" $> pc Voiceless PostAlveolar (Fricative Sibilant) , "\x1dbe" $> pc Voiced PostAlveolar (Fricative Sibilant) , "\x1d9d" $> pc Voiceless Palatal (Fricative Sibilant) , "\x1dbd" $> pc Voiced Palatal (Fricative Sibilant) , "\x1d9c\x0327" $> pc Voiceless Palatal (Fricative NonSibilant) , "\x1da8" $> pc Voiced Palatal (Fricative NonSibilant) , "\x02e3" $> pc Voiceless Velar (Fricative NonSibilant) , "\x02e0" $> pc Voiced Velar (Fricative NonSibilant) , "\x1d61" $> pc Voiceless Uvular (Fricative NonSibilant) , "\x02b6" $> pc Voiced Uvular (Fricative NonSibilant) , "\x02b0" $> pc Voiceless Glottal (Fricative NonSibilant) , "\x02b1" $> pc Voiced Glottal (Fricative NonSibilant) , "\x1db9" $> pc Voiced LabioDental Approximant , "\x02b4" $> pc Voiced Alveolar Approximant , "\x02b5" $> pc Voiced Retroflex Approximant , "\x02b2" $> pc Voiced Palatal Approximant , "\xab69" $> pc Voiceless Velar Approximant , "\x1dad" $> pc Voiced Velar Approximant , "\x02b3" $> pc Voiced Alveolar Trill , "\x1df1" $> DoublyArticulatedConsonant Voiced Bilabial Velar Approximant , "\x2071" $> pv Close Front Unrounded , "\x02b8" $> pv Close Front Rounded , "\x1da4" $> pv Close Central Unrounded , "\x1db6" $> pv Close Central Rounded , "\x1d5a" $> pv Close Back Unrounded , "\x1d58" $> pv Close Back Rounded , "\x1da6" $> pv NearClose Front Unrounded , "\x1da7" $> pv NearClose Central Unrounded , "\x1db7" $> pv NearClose Back Rounded , "\x1d4a" $> pv Mid Central Unrounded , "\x1d4a" $> pv Mid Central Rounded , "\x1d4b" $> pv OpenMid Front Unrounded , "\xa7f9" $> pv OpenMid Front Rounded , "\x1d9f" $> pv OpenMid Central Unrounded , "\x1dba" $> pv OpenMid Back Unrounded , "\x1d53" $> pv OpenMid Back Rounded , "\x1d46" $> pv NearOpen Front Unrounded , "\x1d44" $> pv NearOpen Central Unrounded , "\x1d45" $> pv NearOpen Back Unrounded , "\x1d9b" $> pv NearOpen Back Rounded , "\x1d43" $> pv Open Front Unrounded , "\x1d44" $> pv Open Back Rounded ] diacriticP :: Char -> Parser Char diacriticP = P.char doubleArticulated :: Text -> Text -> Parser Text doubleArticulated x y = P.string $ x <> invertedBreve <> y where invertedBreve = "\x0361" suprasegmentalFeatureP :: Parser SuprasegmentalFeature suprasegmentalFeatureP = asum fs where fs = [ toneContourP , toneContourDiacriticP , levelToneP , levelToneDiacriticP , toneNumberP digits , "." $> Break , "\x203f" $> Linking ] levelToneP = LevelLexicalTone <$> asum [ "\x02e5" $> ExtraHighTone , "\x02e6" $> HighTone , "\x02e7" $> MidTone , "\x02e8" $> LowTone , "\x02e9" $> ExtraLowTone ] levelToneDiacriticP = LevelLexicalToneDiacritic <$> asum [ "\x030b" $> ExtraHighTone , "\x0341" $> HighTone , "\x0304" $> MidTone , "\x0340" $> LowTone , "\x030f" $> ExtraLowTone , "\xa71c" $> DownStep , "\xa71b" $> UpStep ] toneContourP = LexicalToneContour <$> asum [ "\x02e9\x02e5" $> Rising , "\x02e5\x02e9" $> Falling , "\x02e7\x02e5" $> HighRising , "\x02e9\x02e7" $> LowRising , "\x02e5\x02e7" $> HighFalling , "\x02e7\x02e9" $> LowFalling , "\x02e7\x02e6\x02e8" $> RisingFalling , "\x02e7\x02e8\x02e6" $> FallingRising , "\x2197" $> GlobalRise , "\x2199" $> GlobalFall ] toneContourDiacriticP = LexicalToneContourDiacritic <$> asum [ "\x0302" $> Rising , "\x030c" $> Falling , "\x1dc9" $> HighRising , "\x1dc5" $> LowRising , "\x1dc7" $> HighFalling , "\x1dc6" $> LowFalling , "\x1dc8" $> RisingFalling , "\x1dc9" $> FallingRising ] digits = [ "\x2070" $> 0 , "\x00b9" $> 1 , "\x00b2" $> 2 , "\x00b3" $> 3 , "\x2074" $> 4 , "\x2075" $> 5 , "\x2076" $> 6 , "\x2077" $> 7 , "\x2078" $> 8 , "\x2079" $> 9 ] stressP :: Parser SuprasegmentalFeature stressP = "\x02c8" $> Stress Primary <|> "\x02cc" $> Stress Secondary toneNumberP :: (Foldable t) => t (Parser Int) -> Parser SuprasegmentalFeature toneNumberP digits = ToneNumber <$> (some (asum digits) <&> foldl' (\a b -> b + a * 10) 0) consT :: (Applicative t, Monoid (t a)) => a -> t a -> t a consT x xs = pure x <> xs manyT :: (Applicative t, Monoid (t a), Alternative f) => f a -> f (t a) manyT v = manyT' where manyT' = someT' <|> pure mempty someT' = consT <$> v <*> manyT' someT :: (Applicative t, Monoid (t a), Alternative f) => f a -> f (t a) someT v = consT <$> v <*> manyT v handleResult :: (Text -> IPAException) -> Text -> IResult Text b -> Either IPAException b handleResult ty t i = case i of Done _ r -> Right r Fail ch _ _ -> Left . ty $ mconcat [ t, " ", "'", ch, "'" ] Partial p -> handleResult ty t (p mempty) ------------------------------------------------------------------------------- -- X-SAMPA Parser -- ------------------------------------------------------------------------------- -- | As 'parseSegment', but in X-SAMPA notation parseSegmentXSampa :: Text -> Either IPAException Segment parseSegmentXSampa t = handleResult InvalidXSampa msg (P.parse (segmentXSampaP <* P.endOfInput) t) where msg = "Failed to parse character(s):" -- | As 'parseSyllable', but in X-SAMPA notation parseSyllableXSampa :: MultiSegment t => Text -> Either IPAException (Syllable t) parseSyllableXSampa t = handleResult InvalidXSampa msg (P.parse (syllableXSampaP <* P.endOfInput) t) where msg = "Failed to parse segment(s):" -- | As 'parseSyllables', but in X-SAMPA notation parseSyllablesXSampa :: (MultiSegment t, Monoid (t (Syllable t))) => Text -> Either IPAException (t (Syllable t)) parseSyllablesXSampa t = handleResult InvalidXSampa msg (P.parse (syllablesXSampaP <* P.endOfInput) t) where msg = "Failed to parse syllable:" -- | As 'segmentP', but in X-SAMPA notation segmentXSampaP :: Parser Segment segmentXSampaP = withFeatureP <|> pureSegmentP <|> optionalP where pureSegmentP = vowelXSampaP <|> Consonant <$> consonantXSampaP withFeatureP = foldr WithSegmentalFeature <$> pureSegmentP <*> some segmentalFeatureXSampaP optionalP = Optional <$> ("(" *> (withFeatureP <|> pureSegmentP) <* ")") -- | As 'syllableP', but in X-SAMPA notation syllableXSampaP :: MultiSegment t => Parser (Syllable t) syllableXSampaP = withStressP <|> withFeaturesP <|> Syllable <$> justSegments where justSegments = someT segmentXSampaP withFeaturesP = foldr WithSuprasegmentalFeature <$> (Syllable <$> justSegments) <*> some suprasegmentalFeatureXSampaP withStressP = foldr WithSuprasegmentalFeature <$> withStress <*> many suprasegmentalFeatureXSampaP where withStress = WithSuprasegmentalFeature <$> stressXSampaP <*> (Syllable <$> justSegments) syllablesXSampaP :: (MultiSegment t, Monoid (t (Syllable t))) => Parser (t (Syllable t)) syllablesXSampaP = someT withWS where withWS = syllableXSampaP <* many P.space consonantXSampaP :: Parser Consonant consonantXSampaP = clickXSampaP <|> implosiveXSampaP <|> ejectiveXSampaP <|> doublyArticulatedXSampaP <|> pulmonicXSampaP vowelXSampaP :: Parser Segment vowelXSampaP = triphthongP <|> diphthongP <|> Vowel <$> pureP where diphthongP = Diphthong <$> pureP <*> pureP triphthongP = Triphthong <$> pureP <*> pureP <*> pureP pv = Pure lowered = "_o" pureP = asum [ "e" <* lowered $> pv Mid Front Unrounded , "2" <* lowered $> pv Mid Front Rounded , "7" <* lowered $> pv Mid Back Unrounded , "o" <* lowered $> pv Mid Back Rounded , "i" $> pv Close Front Unrounded , "y" $> pv Close Front Rounded , "1" $> pv Close Central Unrounded , "}" $> pv Close Central Rounded , "M" $> pv Close Back Unrounded , "u" $> pv Close Back Rounded , "I" $> pv NearClose Front Unrounded , "Y" $> pv NearClose Front Rounded , "U" $> pv NearClose Back Rounded , "e" $> pv CloseMid Front Unrounded , "2" $> pv CloseMid Front Rounded , "@" *> slash $> pv CloseMid Central Unrounded , "8" $> pv CloseMid Central Rounded , "7" $> pv CloseMid Back Unrounded , "o" $> pv CloseMid Back Rounded , "@" $> pv Mid Central Unrounded , "E" $> pv OpenMid Front Unrounded , "9" $> pv OpenMid Front Rounded , "3" *> slash $> pv OpenMid Central Unrounded , "3" *> slash $> pv OpenMid Central Rounded , "V" $> pv OpenMid Back Unrounded , "O" $> pv OpenMid Back Rounded , "{" $> pv NearOpen Front Unrounded , "6" $> pv NearOpen Central Unrounded , "a" $> pv Open Front Unrounded , "&" $> pv Open Front Rounded , "A" $> pv Open Back Unrounded , "Q" $> pv Open Back Rounded ] pulmonicXSampaP :: Parser Consonant pulmonicXSampaP = asum [ biliabialP , labioDentalP , dentalP , postAlveolarP , retroflexP , palatalP , velarP , uvularP , pharyngealP , glottalP , alveolarP ] where biliabialP = asum cs where bc v = Pulmonic v Bilabial cs = [ "m" $> bc Voiced Nasal , doubleArticulatedXSampa "p" "p" *> slash $> bc Voiceless (Affricate NonSibilant) , doubleArticulatedXSampa "b" "B" $> bc Voiced (Affricate NonSibilant) , "B" $> bc Voiced (Fricative NonSibilant) , "p" *> slash $> bc Voiceless (Fricative NonSibilant) , "B" *> slash $> bc Voiced Trill , "b" $> bc Voiced Plosive , "p" $> bc Voiceless Plosive ] labioDentalP = asum cs where ldc v = Pulmonic v LabioDental cs = [ "F" $> ldc Voiced Nasal , "f" $> ldc Voiceless (Fricative NonSibilant) , "v" $> ldc Voiced (Fricative NonSibilant) , "P" $> ldc Voiced Approximant ] dentalP = "T" $> dc Voiceless (Fricative NonSibilant) <|> "D" $> dc Voiced (Fricative NonSibilant) where dc v = Pulmonic v Dental alveolarP = asum cs where ac v = Pulmonic v Alveolar cs = [ "n" $> ac Voiced Nasal , doubleArticulatedXSampa "t" "s" $> ac Voiced (Affricate Sibilant) , doubleArticulatedXSampa "d" "z" $> ac Voiceless (Affricate Sibilant) , "s" $> ac Voiceless (Fricative Sibilant) , "z" $> ac Voiced (Fricative Sibilant) , "r" *> slash $> ac Voiced Approximant , "4" $> ac Voiced Flap , "r" $> ac Voiced Trill , "K" $> ac Voiceless LateralFricative , "K" *> slash $> ac Voiced LateralFricative , "l" $> ac Voiced LateralApproximant , "t" $> ac Voiceless Plosive , "d" $> ac Voiced Plosive ] postAlveolarP = doubleArticulatedXSampa "t" "S" $> pc Voiceless (Affricate Sibilant) <|> doubleArticulatedXSampa "d" "Z" $> pc Voiced (Affricate Sibilant) <|> "S" $> pc Voiceless (Fricative Sibilant) <|> "Z" $> pc Voiced (Fricative Sibilant) where pc v = Pulmonic v PostAlveolar retroflexP = asum cs where rc v = Pulmonic v Retroflex retroflexDiacritic = diacriticP '`' cs = [ "n" *> retroflexDiacritic $> rc Voiced Nasal , doubleArticulatedXSampa "t`" "s" *> retroflexDiacritic $> rc Voiceless (Affricate Sibilant) , doubleArticulatedXSampa "d`" "s" *> retroflexDiacritic $> rc Voiced (Affricate Sibilant) , "s" *> retroflexDiacritic $> rc Voiceless (Fricative Sibilant) , "z" *> retroflexDiacritic $> rc Voiced (Fricative Sibilant) , "r" *> slash *> retroflexDiacritic $> rc Voiced Approximant , "r" *> retroflexDiacritic $> rc Voiced Flap , "l" *> retroflexDiacritic $> rc Voiced LateralApproximant , "t" *> retroflexDiacritic $> rc Voiceless Plosive , "d" *> retroflexDiacritic $> rc Voiced Plosive ] palatalP = asum cs where pc v = Pulmonic v Palatal cs = [ "J" $> pc Voiced Nasal , doubleArticulatedXSampa "t" "s" *> slash $> pc Voiceless (Affricate Sibilant) , doubleArticulatedXSampa "d" "z" *> slash $> pc Voiced (Affricate Sibilant) , "s" *> slash $> pc Voiceless (Fricative Sibilant) , "z" *> slash $> pc Voiced (Fricative Sibilant) , "C" $> pc Voiceless (Fricative NonSibilant) , "j" *> slash $> pc Voiced (Fricative NonSibilant) , "j" $> pc Voiced Approximant , "L" $> pc Voiced LateralApproximant , "c" $> pc Voiceless Plosive , "J" *> slash $> pc Voiced Plosive ] velarP = asum cs where vc v = Pulmonic v Velar cs = [ "N" $> vc Voiced Nasal , doubleArticulatedXSampa "k" "x" $> vc Voiceless (Affricate NonSibilant) , doubleArticulatedXSampa "g" "G" $> vc Voiced (Affricate NonSibilant) , "x" $> vc Voiceless (Fricative NonSibilant) , "G" $> vc Voiced (Fricative NonSibilant) , "m" *> slash $> vc Voiced Approximant , "L" *> slash $> vc Voiced LateralApproximant , "k" $> vc Voiceless Plosive , "g" $> vc Voiced Plosive ] uvularP = asum cs where uc v = Pulmonic v Uvular cs = [ "N" *> slash $> uc Voiced Nasal , doubleArticulatedXSampa "q" "X" $> uc Voiceless (Fricative NonSibilant) , doubleArticulatedXSampa "G\\" "R" $> uc Voiced (Fricative NonSibilant) , "X" $> uc Voiceless (Fricative NonSibilant) , "R" $> uc Voiced (Fricative NonSibilant) , "R" *> slash $> uc Voiced Trill , "q" $> uc Voiceless Plosive , "G" *> slash $> uc Voiced Plosive ] pharyngealP = "X" *> slash $> pc Voiceless (Fricative NonSibilant) <|> "?" *> slash $> pc Voiced (Fricative NonSibilant) where pc v = Pulmonic v Pharyngeal glottalP = doubleArticulatedXSampa "?" "h" *> slash $> gc Voiceless (Affricate NonSibilant) <|> "?" $> gc Voiceless Plosive <|> "h" $> gc Voiceless (Fricative NonSibilant) <|> "h" *> slash $> gc Voiced (Fricative NonSibilant) where gc v = Pulmonic v Glottal ejectiveXSampaP :: Parser Consonant ejectiveXSampaP = asum cs <* ejective where ejective = "_>" ej = Ejective cs = [ doubleArticulatedXSampa "q" "X" $> ej Uvular (Affricate NonSibilant) , doubleArticulatedXSampa "t" "s" $> ej Alveolar (Affricate Sibilant) , doubleArticulatedXSampa "t" "S" $> ej PostAlveolar (Affricate Sibilant) , doubleArticulatedXSampa "t`" "s`" $> ej Retroflex (Affricate Sibilant) , doubleArticulatedXSampa "t" "s" *> slash $> ej Palatal (Affricate Sibilant) , doubleArticulatedXSampa "k" "x" $> ej Velar (Affricate NonSibilant) , "p" $> ej Bilabial Plosive , "p" *> slash $> ej Bilabial (Fricative NonSibilant) , "f" $> ej LabioDental (Fricative NonSibilant) , "T" $> ej Dental (Fricative NonSibilant) , "t" $> ej Alveolar Plosive , "s" $> ej Alveolar (Fricative Sibilant) , "S" $> ej PostAlveolar (Fricative Sibilant) , "t`" $> ej Retroflex Plosive , "s`" $> ej Retroflex (Fricative Sibilant) , "c" $> ej Palatal Plosive , "s" *> slash $> ej Palatal (Fricative Sibilant) , "k" $> ej Velar Plosive , "x" $> ej Velar (Fricative NonSibilant) , "q" $> ej Uvular Plosive , "X" $> ej Uvular (Fricative NonSibilant) ] implosiveXSampaP :: Parser Consonant implosiveXSampaP = Implosive Voiceless <$> asum cs <* implosive <* voiceless <|> Implosive Voiced <$> asum cs <* implosive where voiceless = "_0" implosive = "_<" cs = [ "b" $> Bilabial , "d" $> Alveolar , "d`" $> Retroflex , "f" $> Palatal , "g" $> Velar , "G" $> Uvular ] clickXSampaP :: Parser Consonant clickXSampaP = Click <$> asum [ "O" $> Bilabial , "|" $> Dental , "!" $> Alveolar , "|" *> slash *> "|" $> PostAlveolar , "=" $> Palatal ] <* slash doublyArticulatedXSampaP :: Parser Consonant doublyArticulatedXSampaP = asum cs where da = DoublyArticulated cs = [ doubleArticulatedXSampa "n" "m" $> da Voiced Bilabial Alveolar Nasal , doubleArticulatedXSampa "t" "p" $> da Voiceless Bilabial Alveolar Plosive , doubleArticulatedXSampa "d" "b" $> da Voiced Bilabial Alveolar Plosive , doubleArticulatedXSampa "N" "m" $> da Voiced Bilabial Velar Nasal , doubleArticulatedXSampa "k" "p" $> da Voiceless Bilabial Velar Plosive , doubleArticulatedXSampa "g" "b" $> da Voiced Bilabial Velar Plosive , "H_0" $> da Voiceless Bilabial Palatal (Fricative NonSibilant) , "H" $> da Voiced Bilabial Palatal Approximant , "W" $> da Voiceless Bilabial Velar (Fricative NonSibilant) , "w" $> da Voiced Bilabial Velar Approximant , "x" *> slash $> da Voiceless PostAlveolar Velar (Fricative Sibilant) ] segmentalFeatureXSampaP :: Parser SegmentalFeature segmentalFeatureXSampaP = secondaryArticulationP <|> asum fs where fs = [ "_0" $> Voicing Voiceless , "_v" $> Voicing Voiced , "::" $> Length OverLong , ":" *> slash $> Length HalfLong , ":" $> Length Long , "_X" $> Length ExtraShort , "_h" $> Aspirated , "_O" $> MoreRounded , "_c" $> LessRounded , "_+" $> Advanced , "_-" $> Retracted , "_\"" $> Centralized , "_x" $> MidCentralized , "=" $> Syllabic , "_^" $> NonSyllabic , "`" $> Rhotacized , "_t" $> BreathyVoice , "_k" $> CreakyVoice , "_w" $> Labialized , "'" $> Palatalized , "_G" $> Velarized , "_?" *> slash $> Pharyngealized , "_r" $> Raised , "_o" $> Lowered , "_A" $> AdvancedTongueRoot , "_q" $> RetractedTongueRoot , "_d" $> Dentalized , "_a" $> Apical , "_m" $> Laminal , "~" $> Nasalized , "_l" $> LateralRelease , "_}" $> NoAudibleRelease ] suprasegmentalFeatureXSampaP :: Parser SuprasegmentalFeature suprasegmentalFeatureXSampaP = asum fs where fs = [ toneContourP, levelToneP, "." $> Break, "-" *> slash $> Linking ] levelToneP = LevelLexicalTone <$> asum [ "_T" $> ExtraHighTone , "_H" $> HighTone , "_M" $> MidTone , "_L" $> LowTone , "_B" $> ExtraLowTone , "!" $> DownStep , "^" $> UpStep ] toneContourP = LexicalToneContour <$> asum [ "_R" $> Rising , "_F" $> Falling , "_H_T" $> HighRising , "_B_L" $> LowRising , "_H_F" $> HighFalling , "_L_B" $> LowFalling , "_R_F" $> RisingFalling , "_F_R" $> FallingRising , "" $> GlobalRise , "" $> GlobalFall ] slash :: Parser Char slash = P.char '\\' doubleArticulatedXSampa :: Text -> Text -> Parser Text doubleArticulatedXSampa x y = P.string $ x <> "_" <> y stressXSampaP :: Parser SuprasegmentalFeature stressXSampaP = ("\"" <|> "\'") $> Stress Primary <|> "%" $> Stress Secondary