{-# LANGUAGE ScopedTypeVariables #-}

-- |
-- Module      : Language.IPA.Parser
-- Copyright   : (c) 2021 Rory Tyler Hayford
--
-- License     : BSD-3-Clause
-- Maintainer  : rory.hayford@protonmail.com
-- Stability   : experimental
-- Portability : GHC
--
-- Parser for IPA literals
module Language.IPA.Parser
    ( -- * IPA parsing
      -- ** Parsing single segments
      parseSegment
    , segmentP
      -- ** Parsing syllables
    , parseSyllable
    , syllableP
    , parseSyllables
    , syllablesP
      -- * X-SAMPA parsing
      -- ** Parsing single segments
    , parseSegmentXSampa
    , segmentXSampaP
      -- ** Parsing syllables
    , parseSyllableXSampa
    , parseSyllablesXSampa
    , syllableXSampaP
    ) where

import           Control.Applicative  ( Alternative((<|>), some, many) )

import           Data.Attoparsec.Text ( IResult(Fail, Done, Partial), Parser )
import qualified Data.Attoparsec.Text as P
import           Data.Foldable        ( asum, foldl' )
import           Data.Functor         ( ($>), (<&>) )
import           Data.Text            ( Text )
import           Data.Text.Normalize  ( NormalizationMode(NFD), normalize )

import           Language.IPA.Types

-- | Parse a single literal segment in IPA notation, returning 'InvalidIPA' upon
-- failure and a 'Segment' upon success. Note that supplying two or more segment
-- literals will cause the parse to fail
--
-- >>> parseSegment "ɨːː"
-- Right (WithSegmentalFeature (Length OverLong) (Vowel Close Central Unrounded))
--
-- >>> parseSegment "ɨːːb"
-- Left (InvalidIPA "Failed to parse character 'b'")
parseSegment :: Text -> Either IPAException Segment
parseSegment t =
    handleResult InvalidIPA
                 msg
                 (P.parse (segmentP <* P.endOfInput) (normalize NFD t))
  where
    msg = "Failed to parse character(s):"

-- | Parse a literal syllable in IPA notation, returning 'InvalidIPA' upon
-- failure and a 'Syllable' upon success, where the syllable is parameterized by
-- some type @t@ satisfying a 'MultiSegment' constraint. Note that, as with
-- 'parseSegment', supplying two or more syllable literals will cause the parse
-- to fail
--
-- >>> parseSyllable @[] "ma˧˥"
-- Right (WithSuprasegmentalFeature (LexicalToneContour HighRising)
--       (Syllable [Consonant (Pulmonic Voiced Bilabial Nasal),Vowel Open Front Unrounded]))
parseSyllable :: MultiSegment t => Text -> Either IPAException (Syllable t)
parseSyllable t =
    handleResult InvalidIPA
                 msg
                 (P.parse (syllableP <* P.endOfInput) (normalize NFD t))
  where
    msg = "Failed to parse segment(s):"

-- | Parse several syllables into a container parameterized by the same type as that
-- parameterizing the individual 'Syllable's. Whitespace between syllables is interpreted
-- as a syllable boundary
--
-- >>> parseSyllables @[] "haːj˧ ɓaː˧"
-- Right [ WithSuprasegmentalFeature (LevelLexicalTone MidTone)
--           (Syllable [ Consonant (Pulmonic Voiceless Glottal (Fricative NonSibilant))
--                     , WithSegmentalFeature (Length Long) (Vowel Open Front Unrounded)
--                     , Consonant (Pulmonic Voiced Palatal Approximant)
--                     ])
--       , WithSuprasegmentalFeature (LevelLexicalTone MidTone)
--           (Syllable [ Consonant (Implosive Voiced Bilabial)
--                     , WithSegmentalFeature (Length Long) (Vowel Open Front Unrounded)
--                     ])
--       ]
parseSyllables :: (MultiSegment t, Monoid (t (Syllable t)))
               => Text
               -> Either IPAException (t (Syllable t))
parseSyllables t =
    handleResult InvalidIPA
                 msg
                 (P.parse (syllablesP <* P.endOfInput) (normalize NFD t))
  where
    msg = "Failed to parse syllable:"

-- | Parser for syllables. This function is exposed to allow users to create
-- more specific parsers for aggregations of multiple syllables
syllableP :: MultiSegment t => Parser (Syllable t)
syllableP = withStressP <|> withFeaturesP <|> Syllable <$> justSegments
  where
    justSegments = someT segmentP

    withFeaturesP = foldr WithSuprasegmentalFeature
        <$> (Syllable <$> justSegments)
        <*> some suprasegmentalFeatureP

    withStressP = foldr WithSuprasegmentalFeature <$> withStress
        <*> many suprasegmentalFeatureP
      where
        withStress = WithSuprasegmentalFeature <$> stressP
            <*> (Syllable <$> justSegments)

-- | Parser for segments. This function is exposed to allow users to create
-- more specific parsers for multi-segment sequences
segmentP :: Parser Segment
segmentP = withFeatureP <|> pureSegmentP <|> optionalP
  where
    pureSegmentP = vowelP <|> Consonant <$> consonantP

    withFeatureP =
        foldr WithSegmentalFeature <$> pureSegmentP <*> some segmentalFeatureP

    optionalP    = Optional
        <$> ("(" *> (withFeatureP <|> pureSegmentP) <* ")")

-- | Parser for poly-syllabic sequences, with individual syllables separated
-- by (optional) whitespace
syllablesP :: (MultiSegment t, Monoid (t (Syllable t)))
           => Parser (t (Syllable t))
syllablesP = someT withWS
  where
    withWS = syllableP <* many P.space

consonantP :: Parser Consonant
consonantP =
    clickP <|> implosiveP <|> ejectiveP <|> doublyArticulatedP <|> pulmonicP

vowelP :: Parser Segment
vowelP = triphthongP <|> diphthongP <|> Vowel <$> pureP
  where
    diphthongP  = Diphthong <$> pureP <*> pureP

    triphthongP = Triphthong <$> pureP <*> pureP <*> pureP

    pv          = Pure

    lowered     = diacriticP '\x031e'

    pureP       =
        asum [ "e" <* lowered $> pv Mid Front Unrounded
             , "ø" <* lowered $> pv Mid Front Rounded
             , "ɤ" <* lowered $> pv Mid Back Unrounded
             , "o" <* lowered $> pv Mid Back Rounded
             , "i" $> pv Close Front Unrounded
             , "y" $> pv Close Front Rounded
             , "ɨ" $> pv Close Central Unrounded
             , "ʉ" $> pv Close Central Rounded
             , "ɯ" $> pv Close Back Unrounded
             , "u" $> pv Close Back Rounded
             , "ɪ" $> pv NearClose Front Unrounded
             , "ʏ" $> pv NearClose Front Rounded
             , "ʊ" $> pv NearClose Back Rounded
             , "e" $> pv CloseMid Front Unrounded
             , "ø" $> pv CloseMid Front Rounded
             , "ɘ" $> pv CloseMid Central Unrounded
             , "ɵ" $> pv CloseMid Central Rounded
             , "ɤ" $> pv CloseMid Back Unrounded
             , "o" $> pv CloseMid Back Rounded
             , "ə" $> pv Mid Central Unrounded
             , "ɛ" $> pv OpenMid Front Unrounded
             , "œ" $> pv OpenMid Front Rounded
             , "ɜ" $> pv OpenMid Central Unrounded
             , "ɞ" $> pv OpenMid Central Rounded
             , "ʌ" $> pv OpenMid Back Unrounded
             , "ɔ" $> pv OpenMid Back Rounded
             , "æ" $> pv NearOpen Front Unrounded
             , "ɐ" $> pv NearOpen Central Unrounded
             , "a" $> pv Open Front Unrounded
             , "ɶ" $> pv Open Front Rounded
             , "ä" $> pv Open Central Unrounded
             , "ɑ" $> pv Open Back Unrounded
             , "ɒ" $> pv Open Back Rounded
             ]

pulmonicP :: Parser Consonant
pulmonicP =
    asum [ biliabialP
         , labioDentalP
         , dentalP
         , postAlveolarP
         , retroflexP
         , palatalP
         , velarP
         , uvularP
         , pharyngealP
         , glottalP
           -- Alveolars should go at the end; alveolar plosives appear in
           -- several affricates in other places of articulation
         , alveolarP
         ]
  where
    biliabialP = asum cs
      where
        bc v = Pulmonic v Bilabial

        cs   =
            [ "m" $> bc Voiced Nasal
            , doubleArticulated "p" "ɸ" $> bc Voiced (Affricate NonSibilant)
            , doubleArticulated "b" "β"
              $> bc Voiceless (Affricate NonSibilant)
            , "β" $> bc Voiced (Fricative NonSibilant)
            , "ɸ" $> bc Voiceless (Fricative NonSibilant)
            , "ʙ" $> bc Voiced Trill
            , "b" $> bc Voiced Plosive
            , "p" $> bc Voiceless Plosive
            ]

    labioDentalP = asum cs
      where
        ldc v = Pulmonic v LabioDental

        cs    = [ "ɱ" $> ldc Voiced Nasal
                , "f" $> ldc Voiceless (Fricative NonSibilant)
                , "v" $> ldc Voiced (Fricative NonSibilant)
                , "ʋ" $> ldc Voiced Approximant
                , "ⱱ" $> ldc Voiced Flap
                ]

    dentalP = "θ" $> dc Voiceless (Fricative NonSibilant)
        <|> "ð" $> dc Voiced (Fricative NonSibilant)
      where
        dc v = Pulmonic v Dental

    alveolarP = asum cs
      where
        ac v = Pulmonic v Alveolar

        cs   =
            [ "n" $> ac Voiced Nasal
            , doubleArticulated "t" "s" $> ac Voiced (Affricate Sibilant)
            , doubleArticulated "d" "z" $> ac Voiceless (Affricate Sibilant)
            , "s" $> ac Voiceless (Fricative Sibilant)
            , "z" $> ac Voiced (Fricative Sibilant)
            , "ɹ" $> ac Voiced Approximant
            , "ɾ" $> ac Voiced Flap
            , "r" $> ac Voiced Trill
            , "ɬ" $> ac Voiceless LateralFricative
            , "ɮ" $> ac Voiced LateralFricative
            , "l" $> ac Voiced LateralApproximant
            , "ɺ" $> ac Voiced LateralFlap
            , "tɬ" $> ac Voiceless LateralAffricate
            , "dɮ" $> ac Voiced LateralAffricate
            , "t" $> ac Voiceless Plosive
            , "d" $> ac Voiced Plosive
            ]

    postAlveolarP = doubleArticulated "t" "ʃ"
        $> pc Voiceless (Affricate Sibilant)
        <|> doubleArticulated "d" "ʒ" $> pc Voiced (Affricate Sibilant)
        <|> "ʃ" $> pc Voiceless (Fricative Sibilant)
        <|> "ʒ" $> pc Voiced (Fricative Sibilant)
      where
        pc v = Pulmonic v PostAlveolar

    retroflexP = asum cs
      where
        rc v = Pulmonic v Retroflex

        cs   =
            [ "ɳ" $> rc Voiced Nasal
            , doubleArticulated "ʈ" "ʂ" $> rc Voiceless (Affricate Sibilant)
            , doubleArticulated "ɖ" "ʐ" $> rc Voiced (Affricate Sibilant)
            , "ʂ" $> rc Voiceless (Fricative Sibilant)
            , "ʐ" $> rc Voiced (Fricative Sibilant)
            , "ɻ" $> rc Voiced Approximant
            , "ɽ" $> rc Voiced Flap
            , "ɽr" $> rc Voiced Trill
            , "ɭ" $> rc Voiced LateralApproximant
            , "ʈ" $> rc Voiceless Plosive
            , "ɖ" $> rc Voiced Plosive
            ]

    palatalP = asum cs
      where
        pc v = Pulmonic v Palatal

        cs   =
            [ "ɲ" $> pc Voiced Nasal
            , doubleArticulated "t" "ɕ" $> pc Voiceless (Affricate Sibilant)
            , doubleArticulated "d" "ʑ" $> pc Voiced (Affricate Sibilant)
            , doubleArticulated "c" "ç"
              $> pc Voiceless (Affricate NonSibilant)
            , doubleArticulated "ɟ" "ʝ" $> pc Voiced (Affricate NonSibilant)
            , "ɕ" $> pc Voiceless (Fricative Sibilant)
            , "ʑ" $> pc Voiced (Fricative Sibilant)
            , "ç" $> pc Voiceless (Fricative NonSibilant)
            , "ʝ" $> pc Voiced (Fricative NonSibilant)
            , "j" $> pc Voiced Approximant
            , "ʎ" $> pc Voiced LateralApproximant
            , "c" $> pc Voiceless Plosive
            , "ɟ" $> pc Voiced Plosive
            ]

    velarP = asum cs
      where
        vc v = Pulmonic v Velar

        cs   =
            [ "ŋ" $> vc Voiced Nasal
            , doubleArticulated "k" "x"
              $> vc Voiceless (Affricate NonSibilant)
            , doubleArticulated "g" "ɣ" $> vc Voiced (Affricate NonSibilant)
            , "x" $> vc Voiceless (Fricative NonSibilant)
            , "ɣ" $> vc Voiced (Fricative NonSibilant)
            , "ɰ" $> vc Voiced Approximant
            , "ʟ" $> vc Voiced LateralApproximant
            , "k" $> vc Voiceless Plosive
            , "g" $> vc Voiced Plosive
            ]

    uvularP = asum cs
      where
        uc v = Pulmonic v Uvular

        cs   =
            [ "ɴ" $> uc Voiced Nasal
            , doubleArticulated "q" "χ"
              $> uc Voiceless (Fricative NonSibilant)
            , doubleArticulated "ɢ" "ʁ" $> uc Voiced (Fricative NonSibilant)
            , "χ" $> uc Voiceless (Fricative NonSibilant)
            , "ʁ" $> uc Voiced (Fricative NonSibilant)
            , "ʀ" $> uc Voiced Trill
            , "q" $> uc Voiceless Plosive
            , "ɢ" $> uc Voiced Plosive
            ]

    pharyngealP = asum cs
      where
        pc v = Pulmonic v Pharyngeal

        cs   =
            [ doubleArticulated "ʡ" "ʢ" $> pc Voiced (Affricate NonSibilant)
            , "ħ" $> pc Voiceless (Fricative NonSibilant)
            , "ʕ" $> pc Voiced (Fricative NonSibilant)
            , "ʡ" $> pc Voiced Flap
            , "ʜ" $> pc Voiced Trill
            , "ʢ" $> pc Voiceless Trill
            , "ʡ" $> pc Voiceless Plosive
            ]

    glottalP = doubleArticulated "ʔ" "h"
        $> gc Voiceless (Affricate NonSibilant)
        <|> "ʔ" $> gc Voiceless Plosive
        <|> "h" $> gc Voiceless (Fricative NonSibilant)
        <|> "ɦ" $> gc Voiced (Fricative NonSibilant)
      where
        gc v = Pulmonic v Glottal

ejectiveP :: Parser Consonant
ejectiveP = asum cs <* ejective
  where
    ejective = diacriticP '\x02bc'

    ej       = Ejective

    cs       =
        [ doubleArticulated "t" "ɬ" $> ej Alveolar LateralAffricate
        , doubleArticulated "q" "χ" $> ej Uvular (Affricate NonSibilant)
        , doubleArticulated "t" "s" $> ej Alveolar (Affricate Sibilant)
        , doubleArticulated "t" "ʃ" $> ej PostAlveolar (Affricate Sibilant)
        , doubleArticulated "ʈ" "ʂ" $> ej Retroflex (Affricate Sibilant)
        , doubleArticulated "t" "ɕ" $> ej Palatal (Affricate Sibilant)
        , doubleArticulated "k" "x" $> ej Velar (Affricate NonSibilant)
        , "p" $> ej Bilabial Plosive
        , "ɸ" $> ej Bilabial (Fricative NonSibilant)
        , "f" $> ej LabioDental (Fricative NonSibilant)
        , "θ" $> ej Dental (Fricative NonSibilant)
        , "ɬ" $> ej Alveolar LateralFricative
        , "t" $> ej Alveolar Plosive
        , "s" $> ej Alveolar (Fricative Sibilant)
        , "ʃ" $> ej PostAlveolar (Fricative Sibilant)
        , "ʈ" $> ej Retroflex Plosive
        , "ʂ" $> ej Retroflex (Fricative Sibilant)
        , "c" $> ej Palatal Plosive
        , "ɕ" $> ej Palatal (Fricative Sibilant)
        , "k" $> ej Velar Plosive
        , "x" $> ej Velar (Fricative NonSibilant)
        , "q" $> ej Uvular Plosive
        , "χ" $> ej Uvular (Fricative NonSibilant)
        , "ʡ" $> ej Pharyngeal Plosive
        ]

implosiveP :: Parser Consonant
implosiveP = Implosive Voiceless <$> asum cs <* voiceless
    <|> Implosive Voiced <$> asum cs
  where
    voiceless = diacriticP '\x030a'

    cs        = [ "ɓ" $> Bilabial
                , "ɗ" $> Alveolar
                , "ᶑ" $> Retroflex
                , "ʄ" $> Palatal
                , "ɠ" $> Velar
                , "ʛ" $> Uvular
                ]

clickP :: Parser Consonant
clickP = Click
    <$> asum [ "ʘ" $> Bilabial
             , "ǀ" $> Dental
             , "ǃ" $> Alveolar
             , "ǁ" $> PostAlveolar
             , "ǂ" $> Palatal
             ]

doublyArticulatedP :: Parser Consonant
doublyArticulatedP = asum cs
  where
    da = DoublyArticulated

    cs =
        [ doubleArticulated "n" "m" $> da Voiced Bilabial Alveolar Nasal
        , doubleArticulated "t" "p" $> da Voiceless Bilabial Alveolar Plosive
        , doubleArticulated "d" "b" $> da Voiced Bilabial Alveolar Plosive
        , doubleArticulated "ŋ" "m" $> da Voiced Bilabial Velar Nasal
        , doubleArticulated "k" "p" $> da Voiceless Bilabial Velar Plosive
        , doubleArticulated "g" "b" $> da Voiced Bilabial Velar Plosive
        , doubleArticulated "q" "ʡ" $> da Voiceless Uvular Pharyngeal Plosive
        , "ɥ" <* P.char '\x030a'
          $> da Voiceless Bilabial Palatal (Fricative NonSibilant)
        , "ɥ" $> da Voiced Bilabial Palatal Approximant
        , "ʍ" $> da Voiceless Bilabial Velar (Fricative NonSibilant)
        , "w" $> da Voiced Bilabial Velar Approximant
        , "ɫ" $> da Voiced Alveolar Velar LateralApproximant
        , "ɧ" $> da Voiceless PostAlveolar Velar (Fricative Sibilant)
        ]

segmentalFeatureP :: Parser SegmentalFeature
segmentalFeatureP = secondaryArticulationP <|> asum fs
  where
    fs = [ "\x030a" $> Voicing Voiceless
         , "\x030c" $> Voicing Voiced
         , "\x2d0\x2d0" $> Length OverLong
         , "\x2d1" $> Length HalfLong
         , "\x2d0" $> Length Long
         , "\x0306" $> Length ExtraShort
         , "\x036a" $> Aspirated
         , "\x0339" $> MoreRounded
         , "\x031c" $> LessRounded
         , "\x031f" $> Advanced
         , "\x0320" $> Retracted
         , "\x0308" $> Centralized
         , "\x033d" $> MidCentralized
         , "\x1d5d" $> Compressed
         , "\x0329" $> Syllabic
         , "\x032f" $> NonSyllabic
         , "\x02de" $> Rhotacized
         , "\x0324" $> BreathyVoice
         , "\x0330" $> CreakyVoice
         , "\x033c" $> LinguoLabialized
         , "\x02b7" $> Labialized
         , "\x02b2" $> Palatalized
         , "\x02e0" $> Velarized
         , "\x02e4" $> Pharyngealized
         , "\x031d" $> Raised
         , "\x031e" $> Lowered
         , "\x0318" $> AdvancedTongueRoot
         , "\x0319" $> RetractedTongueRoot
         , "\x032a" $> Dentalized
         , "\x033a" $> Apical
         , "\x033b" $> Laminal
         , "\x0303" $> Nasalized
         , "\x207f" $> NasalRelease
         , "\x02e1" $> LateralRelease
         , "\x031a" $> NoAudibleRelease
         ]

secondaryArticulationP :: Parser SegmentalFeature
secondaryArticulationP = SecondaryArticulation <$> secondaryP
  where
    secondaryP = asum segments

    pc         = PulmonicConsonant

    pv         = PureVowel

    segments   =
        [ "\x1d50" $> pc Voiced Bilabial Nasal
        , "\x1dac" $> pc Voiced LabioDental Nasal
        , "\x207f" $> pc Voiced Alveolar Nasal
        , "\x1daf" $> pc Voiced Retroflex Nasal
        , "\x1dae" $> pc Voiced Palatal Nasal
        , "\x1d51" $> pc Voiced Velar Nasal
        , "\x1db0" $> pc Voiced Uvular Nasal
        , "\x1d56" $> pc Voiced Bilabial Plosive
        , "\x1d47" $> pc Voiceless Bilabial Plosive
        , "\x1d57" $> pc Voiceless Alveolar Plosive
        , "\x1d48" $> pc Voiced Alveolar Plosive
        , "\x1d9c" $> pc Voiceless Palatal Plosive
        , "\x1da1" $> pc Voiced Palatal Plosive
        , "\x1d4f" $> pc Voiceless Velar Plosive
        , "\x1da2" $> pc Voiced Velar Plosive
        , "\x02c0" $> pc Voiceless Glottal Plosive
        , "\x1db2" $> pc Voiced Bilabial (Fricative NonSibilant)
        , "\x1d5d" $> pc Voiceless Bilabial (Fricative NonSibilant)
        , "\x1da0" $> pc Voiced LabioDental (Fricative NonSibilant)
        , "\x1d5b" $> pc Voiceless LabioDental (Fricative NonSibilant)
        , "\x1dbf" $> pc Voiceless Dental (Fricative NonSibilant)
        , "\x1d9e" $> pc Voiced Dental (Fricative NonSibilant)
        , "\x02e2" $> pc Voiceless Alveolar (Fricative Sibilant)
        , "\x1dbb" $> pc Voiced Alveolar (Fricative Sibilant)
        , "\x1db4" $> pc Voiceless PostAlveolar (Fricative Sibilant)
        , "\x1dbe" $> pc Voiced PostAlveolar (Fricative Sibilant)
        , "\x1d9d" $> pc Voiceless Palatal (Fricative Sibilant)
        , "\x1dbd" $> pc Voiced Palatal (Fricative Sibilant)
        , "\x1d9c\x0327" $> pc Voiceless Palatal (Fricative NonSibilant)
        , "\x1da8" $> pc Voiced Palatal (Fricative NonSibilant)
        , "\x02e3" $> pc Voiceless Velar (Fricative NonSibilant)
        , "\x02e0" $> pc Voiced Velar (Fricative NonSibilant)
        , "\x1d61" $> pc Voiceless Uvular (Fricative NonSibilant)
        , "\x02b6" $> pc Voiced Uvular (Fricative NonSibilant)
        , "\x02b0" $> pc Voiceless Glottal (Fricative NonSibilant)
        , "\x02b1" $> pc Voiced Glottal (Fricative NonSibilant)
        , "\x1db9" $> pc Voiced LabioDental Approximant
        , "\x02b4" $> pc Voiced Alveolar Approximant
        , "\x02b5" $> pc Voiced Retroflex Approximant
        , "\x02b2" $> pc Voiced Palatal Approximant
        , "\xab69" $> pc Voiceless Velar Approximant
        , "\x1dad" $> pc Voiced Velar Approximant
        , "\x02b3" $> pc Voiced Alveolar Trill
        , "\x1df1"
          $> DoublyArticulatedConsonant Voiced Bilabial Velar Approximant
        , "\x2071" $> pv Close Front Unrounded
        , "\x02b8" $> pv Close Front Rounded
        , "\x1da4" $> pv Close Central Unrounded
        , "\x1db6" $> pv Close Central Rounded
        , "\x1d5a" $> pv Close Back Unrounded
        , "\x1d58" $> pv Close Back Rounded
        , "\x1da6" $> pv NearClose Front Unrounded
        , "\x1da7" $> pv NearClose Central Unrounded
        , "\x1db7" $> pv NearClose Back Rounded
        , "\x1d4a" $> pv Mid Central Unrounded
        , "\x1d4a" $> pv Mid Central Rounded
        , "\x1d4b" $> pv OpenMid Front Unrounded
        , "\xa7f9" $> pv OpenMid Front Rounded
        , "\x1d9f" $> pv OpenMid Central Unrounded
        , "\x1dba" $> pv OpenMid Back Unrounded
        , "\x1d53" $> pv OpenMid Back Rounded
        , "\x1d46" $> pv NearOpen Front Unrounded
        , "\x1d44" $> pv NearOpen Central Unrounded
        , "\x1d45" $> pv NearOpen Back Unrounded
        , "\x1d9b" $> pv NearOpen Back Rounded
        , "\x1d43" $> pv Open Front Unrounded
        , "\x1d44" $> pv Open Back Rounded
        ]

diacriticP :: Char -> Parser Char
diacriticP = P.char

doubleArticulated :: Text -> Text -> Parser Text
doubleArticulated x y = P.string $ x <> invertedBreve <> y
  where
    invertedBreve = "\x0361"

suprasegmentalFeatureP :: Parser SuprasegmentalFeature
suprasegmentalFeatureP = asum fs
  where
    fs = [ toneContourP
         , toneContourDiacriticP
         , levelToneP
         , levelToneDiacriticP
         , toneNumberP digits
         , "." $> Break
         , "\x203f" $> Linking
         ]

    levelToneP = LevelLexicalTone
        <$> asum [ "\x02e5" $> ExtraHighTone
                 , "\x02e6" $> HighTone
                 , "\x02e7" $> MidTone
                 , "\x02e8" $> LowTone
                 , "\x02e9" $> ExtraLowTone
                 ]

    levelToneDiacriticP = LevelLexicalToneDiacritic
        <$> asum [ "\x030b" $> ExtraHighTone
                 , "\x0341" $> HighTone
                 , "\x0304" $> MidTone
                 , "\x0340" $> LowTone
                 , "\x030f" $> ExtraLowTone
                 , "\xa71c" $> DownStep
                 , "\xa71b" $> UpStep
                 ]

    toneContourP = LexicalToneContour
        <$> asum [ "\x02e9\x02e5" $> Rising
                 , "\x02e5\x02e9" $> Falling
                 , "\x02e7\x02e5" $> HighRising
                 , "\x02e9\x02e7" $> LowRising
                 , "\x02e5\x02e7" $> HighFalling
                 , "\x02e7\x02e9" $> LowFalling
                 , "\x02e7\x02e6\x02e8" $> RisingFalling
                 , "\x02e7\x02e8\x02e6" $> FallingRising
                 , "\x2197" $> GlobalRise
                 , "\x2199" $> GlobalFall
                 ]

    toneContourDiacriticP = LexicalToneContourDiacritic
        <$> asum [ "\x0302" $> Rising
                 , "\x030c" $> Falling
                 , "\x1dc9" $> HighRising
                 , "\x1dc5" $> LowRising
                 , "\x1dc7" $> HighFalling
                 , "\x1dc6" $> LowFalling
                 , "\x1dc8" $> RisingFalling
                 , "\x1dc9" $> FallingRising
                 ]

    digits = [ "\x2070" $> 0
             , "\x00b9" $> 1
             , "\x00b2" $> 2
             , "\x00b3" $> 3
             , "\x2074" $> 4
             , "\x2075" $> 5
             , "\x2076" $> 6
             , "\x2077" $> 7
             , "\x2078" $> 8
             , "\x2079" $> 9
             ]

stressP :: Parser SuprasegmentalFeature
stressP = "\x02c8" $> Stress Primary <|> "\x02cc" $> Stress Secondary

toneNumberP :: (Foldable t) => t (Parser Int) -> Parser SuprasegmentalFeature
toneNumberP digits = ToneNumber
    <$> (some (asum digits) <&> foldl' (\a b -> b + a * 10) 0)

consT :: (Applicative t, Monoid (t a)) => a -> t a -> t a
consT x xs = pure x <> xs

manyT :: (Applicative t, Monoid (t a), Alternative f) => f a -> f (t a)
manyT v = manyT'
  where
    manyT' = someT' <|> pure mempty

    someT' = consT <$> v <*> manyT'

someT :: (Applicative t, Monoid (t a), Alternative f) => f a -> f (t a)
someT v = consT <$> v <*> manyT v

handleResult :: (Text -> IPAException)
             -> Text
             -> IResult Text b
             -> Either IPAException b
handleResult ty t i = case i of
    Done _ r    -> Right r
    Fail ch _ _ -> Left . ty $ mconcat [ t, " ", "'", ch, "'" ]
    Partial p   -> handleResult ty t (p mempty)

-------------------------------------------------------------------------------
--                               X-SAMPA Parser                              --
-------------------------------------------------------------------------------
-- | As 'parseSegment', but in X-SAMPA notation
parseSegmentXSampa :: Text -> Either IPAException Segment
parseSegmentXSampa t =
    handleResult InvalidXSampa
                 msg
                 (P.parse (segmentXSampaP <* P.endOfInput) t)
  where
    msg = "Failed to parse character(s):"

-- | As 'parseSyllable', but in X-SAMPA notation
parseSyllableXSampa
    :: MultiSegment t => Text -> Either IPAException (Syllable t)
parseSyllableXSampa t =
    handleResult InvalidXSampa
                 msg
                 (P.parse (syllableXSampaP <* P.endOfInput) t)
  where
    msg = "Failed to parse segment(s):"

-- | As 'parseSyllables', but in X-SAMPA notation
parseSyllablesXSampa :: (MultiSegment t, Monoid (t (Syllable t)))
                     => Text
                     -> Either IPAException (t (Syllable t))
parseSyllablesXSampa t =
    handleResult InvalidXSampa
                 msg
                 (P.parse (syllablesXSampaP <* P.endOfInput) t)
  where
    msg = "Failed to parse syllable:"

-- | As 'segmentP', but in X-SAMPA notation
segmentXSampaP :: Parser Segment
segmentXSampaP = withFeatureP <|> pureSegmentP <|> optionalP
  where
    pureSegmentP = vowelXSampaP <|> Consonant <$> consonantXSampaP

    withFeatureP = foldr WithSegmentalFeature <$> pureSegmentP
        <*> some segmentalFeatureXSampaP

    optionalP    = Optional
        <$> ("(" *> (withFeatureP <|> pureSegmentP) <* ")")

-- | As 'syllableP', but in X-SAMPA notation
syllableXSampaP :: MultiSegment t => Parser (Syllable t)
syllableXSampaP = withStressP <|> withFeaturesP <|> Syllable <$> justSegments
  where
    justSegments = someT segmentXSampaP

    withFeaturesP = foldr WithSuprasegmentalFeature
        <$> (Syllable <$> justSegments)
        <*> some suprasegmentalFeatureXSampaP

    withStressP = foldr WithSuprasegmentalFeature <$> withStress
        <*> many suprasegmentalFeatureXSampaP
      where
        withStress = WithSuprasegmentalFeature <$> stressXSampaP
            <*> (Syllable <$> justSegments)

syllablesXSampaP :: (MultiSegment t, Monoid (t (Syllable t)))
                 => Parser (t (Syllable t))
syllablesXSampaP = someT withWS
  where
    withWS = syllableXSampaP <* many P.space

consonantXSampaP :: Parser Consonant
consonantXSampaP = clickXSampaP
    <|> implosiveXSampaP
    <|> ejectiveXSampaP
    <|> doublyArticulatedXSampaP
    <|> pulmonicXSampaP

vowelXSampaP :: Parser Segment
vowelXSampaP = triphthongP <|> diphthongP <|> Vowel <$> pureP
  where
    diphthongP  = Diphthong <$> pureP <*> pureP

    triphthongP = Triphthong <$> pureP <*> pureP <*> pureP

    pv          = Pure

    lowered     = "_o"

    pureP       =
        asum [ "e" <* lowered $> pv Mid Front Unrounded
             , "2" <* lowered $> pv Mid Front Rounded
             , "7" <* lowered $> pv Mid Back Unrounded
             , "o" <* lowered $> pv Mid Back Rounded
             , "i" $> pv Close Front Unrounded
             , "y" $> pv Close Front Rounded
             , "1" $> pv Close Central Unrounded
             , "}" $> pv Close Central Rounded
             , "M" $> pv Close Back Unrounded
             , "u" $> pv Close Back Rounded
             , "I" $> pv NearClose Front Unrounded
             , "Y" $> pv NearClose Front Rounded
             , "U" $> pv NearClose Back Rounded
             , "e" $> pv CloseMid Front Unrounded
             , "2" $> pv CloseMid Front Rounded
             , "@" *> slash $> pv CloseMid Central Unrounded
             , "8" $> pv CloseMid Central Rounded
             , "7" $> pv CloseMid Back Unrounded
             , "o" $> pv CloseMid Back Rounded
             , "@" $> pv Mid Central Unrounded
             , "E" $> pv OpenMid Front Unrounded
             , "9" $> pv OpenMid Front Rounded
             , "3" *> slash $> pv OpenMid Central Unrounded
             , "3" *> slash $> pv OpenMid Central Rounded
             , "V" $> pv OpenMid Back Unrounded
             , "O" $> pv OpenMid Back Rounded
             , "{" $> pv NearOpen Front Unrounded
             , "6" $> pv NearOpen Central Unrounded
             , "a" $> pv Open Front Unrounded
             , "&" $> pv Open Front Rounded
             , "A" $> pv Open Back Unrounded
             , "Q" $> pv Open Back Rounded
             ]

pulmonicXSampaP :: Parser Consonant
pulmonicXSampaP =
    asum [ biliabialP
         , labioDentalP
         , dentalP
         , postAlveolarP
         , retroflexP
         , palatalP
         , velarP
         , uvularP
         , pharyngealP
         , glottalP
         , alveolarP
         ]
  where
    biliabialP = asum cs
      where
        bc v = Pulmonic v Bilabial

        cs   = [ "m" $> bc Voiced Nasal
               , doubleArticulatedXSampa "p" "p" *> slash
                 $> bc Voiceless (Affricate NonSibilant)
               , doubleArticulatedXSampa "b" "B"
                 $> bc Voiced (Affricate NonSibilant)
               , "B" $> bc Voiced (Fricative NonSibilant)
               , "p" *> slash $> bc Voiceless (Fricative NonSibilant)
               , "B" *> slash $> bc Voiced Trill
               , "b" $> bc Voiced Plosive
               , "p" $> bc Voiceless Plosive
               ]

    labioDentalP = asum cs
      where
        ldc v = Pulmonic v LabioDental

        cs    = [ "F" $> ldc Voiced Nasal
                , "f" $> ldc Voiceless (Fricative NonSibilant)
                , "v" $> ldc Voiced (Fricative NonSibilant)
                , "P" $> ldc Voiced Approximant
                ]

    dentalP = "T" $> dc Voiceless (Fricative NonSibilant)
        <|> "D" $> dc Voiced (Fricative NonSibilant)
      where
        dc v = Pulmonic v Dental

    alveolarP = asum cs
      where
        ac v = Pulmonic v Alveolar

        cs   = [ "n" $> ac Voiced Nasal
               , doubleArticulatedXSampa "t" "s"
                 $> ac Voiced (Affricate Sibilant)
               , doubleArticulatedXSampa "d" "z"
                 $> ac Voiceless (Affricate Sibilant)
               , "s" $> ac Voiceless (Fricative Sibilant)
               , "z" $> ac Voiced (Fricative Sibilant)
               , "r" *> slash $> ac Voiced Approximant
               , "4" $> ac Voiced Flap
               , "r" $> ac Voiced Trill
               , "K" $> ac Voiceless LateralFricative
               , "K" *> slash $> ac Voiced LateralFricative
               , "l" $> ac Voiced LateralApproximant
               , "t" $> ac Voiceless Plosive
               , "d" $> ac Voiced Plosive
               ]

    postAlveolarP = doubleArticulatedXSampa "t" "S"
        $> pc Voiceless (Affricate Sibilant)
        <|> doubleArticulatedXSampa "d" "Z" $> pc Voiced (Affricate Sibilant)
        <|> "S" $> pc Voiceless (Fricative Sibilant)
        <|> "Z" $> pc Voiced (Fricative Sibilant)
      where
        pc v = Pulmonic v PostAlveolar

    retroflexP = asum cs
      where
        rc v = Pulmonic v Retroflex

        retroflexDiacritic = diacriticP '`'

        cs                 =
            [ "n" *> retroflexDiacritic $> rc Voiced Nasal
            , doubleArticulatedXSampa "t`" "s" *> retroflexDiacritic
              $> rc Voiceless (Affricate Sibilant)
            , doubleArticulatedXSampa "d`" "s" *> retroflexDiacritic
              $> rc Voiced (Affricate Sibilant)
            , "s" *> retroflexDiacritic $> rc Voiceless (Fricative Sibilant)
            , "z" *> retroflexDiacritic $> rc Voiced (Fricative Sibilant)
            , "r" *> slash *> retroflexDiacritic $> rc Voiced Approximant
            , "r" *> retroflexDiacritic $> rc Voiced Flap
            , "l" *> retroflexDiacritic $> rc Voiced LateralApproximant
            , "t" *> retroflexDiacritic $> rc Voiceless Plosive
            , "d" *> retroflexDiacritic $> rc Voiced Plosive
            ]

    palatalP = asum cs
      where
        pc v = Pulmonic v Palatal

        cs   = [ "J" $> pc Voiced Nasal
               , doubleArticulatedXSampa "t" "s" *> slash
                 $> pc Voiceless (Affricate Sibilant)
               , doubleArticulatedXSampa "d" "z" *> slash
                 $> pc Voiced (Affricate Sibilant)
               , "s" *> slash $> pc Voiceless (Fricative Sibilant)
               , "z" *> slash $> pc Voiced (Fricative Sibilant)
               , "C" $> pc Voiceless (Fricative NonSibilant)
               , "j" *> slash $> pc Voiced (Fricative NonSibilant)
               , "j" $> pc Voiced Approximant
               , "L" $> pc Voiced LateralApproximant
               , "c" $> pc Voiceless Plosive
               , "J" *> slash $> pc Voiced Plosive
               ]

    velarP = asum cs
      where
        vc v = Pulmonic v Velar

        cs   = [ "N" $> vc Voiced Nasal
               , doubleArticulatedXSampa "k" "x"
                 $> vc Voiceless (Affricate NonSibilant)
               , doubleArticulatedXSampa "g" "G"
                 $> vc Voiced (Affricate NonSibilant)
               , "x" $> vc Voiceless (Fricative NonSibilant)
               , "G" $> vc Voiced (Fricative NonSibilant)
               , "m" *> slash $> vc Voiced Approximant
               , "L" *> slash $> vc Voiced LateralApproximant
               , "k" $> vc Voiceless Plosive
               , "g" $> vc Voiced Plosive
               ]

    uvularP = asum cs
      where
        uc v = Pulmonic v Uvular

        cs   = [ "N" *> slash $> uc Voiced Nasal
               , doubleArticulatedXSampa "q" "X"
                 $> uc Voiceless (Fricative NonSibilant)
               , doubleArticulatedXSampa "G\\" "R"
                 $> uc Voiced (Fricative NonSibilant)
               , "X" $> uc Voiceless (Fricative NonSibilant)
               , "R" $> uc Voiced (Fricative NonSibilant)
               , "R" *> slash $> uc Voiced Trill
               , "q" $> uc Voiceless Plosive
               , "G" *> slash $> uc Voiced Plosive
               ]

    pharyngealP = "X" *> slash $> pc Voiceless (Fricative NonSibilant)
        <|> "?" *> slash $> pc Voiced (Fricative NonSibilant)
      where
        pc v = Pulmonic v Pharyngeal

    glottalP = doubleArticulatedXSampa "?" "h" *> slash
        $> gc Voiceless (Affricate NonSibilant)
        <|> "?" $> gc Voiceless Plosive
        <|> "h" $> gc Voiceless (Fricative NonSibilant)
        <|> "h" *> slash $> gc Voiced (Fricative NonSibilant)
      where
        gc v = Pulmonic v Glottal

ejectiveXSampaP :: Parser Consonant
ejectiveXSampaP = asum cs <* ejective
  where
    ejective = "_>"

    ej       = Ejective

    cs       =
        [ doubleArticulatedXSampa "q" "X" $> ej Uvular (Affricate NonSibilant)
        , doubleArticulatedXSampa "t" "s" $> ej Alveolar (Affricate Sibilant)
        , doubleArticulatedXSampa "t" "S"
          $> ej PostAlveolar (Affricate Sibilant)
        , doubleArticulatedXSampa "t`" "s`"
          $> ej Retroflex (Affricate Sibilant)
        , doubleArticulatedXSampa "t" "s" *> slash
          $> ej Palatal (Affricate Sibilant)
        , doubleArticulatedXSampa "k" "x" $> ej Velar (Affricate NonSibilant)
        , "p" $> ej Bilabial Plosive
        , "p" *> slash $> ej Bilabial (Fricative NonSibilant)
        , "f" $> ej LabioDental (Fricative NonSibilant)
        , "T" $> ej Dental (Fricative NonSibilant)
        , "t" $> ej Alveolar Plosive
        , "s" $> ej Alveolar (Fricative Sibilant)
        , "S" $> ej PostAlveolar (Fricative Sibilant)
        , "t`" $> ej Retroflex Plosive
        , "s`" $> ej Retroflex (Fricative Sibilant)
        , "c" $> ej Palatal Plosive
        , "s" *> slash $> ej Palatal (Fricative Sibilant)
        , "k" $> ej Velar Plosive
        , "x" $> ej Velar (Fricative NonSibilant)
        , "q" $> ej Uvular Plosive
        , "X" $> ej Uvular (Fricative NonSibilant)
        ]

implosiveXSampaP :: Parser Consonant
implosiveXSampaP = Implosive Voiceless <$> asum cs <* implosive <* voiceless
    <|> Implosive Voiced <$> asum cs <* implosive
  where
    voiceless = "_0"

    implosive = "_<"

    cs        = [ "b" $> Bilabial
                , "d" $> Alveolar
                , "d`" $> Retroflex
                , "f" $> Palatal
                , "g" $> Velar
                , "G" $> Uvular
                ]

clickXSampaP :: Parser Consonant
clickXSampaP = Click
    <$> asum [ "O" $> Bilabial
             , "|" $> Dental
             , "!" $> Alveolar
             , "|" *> slash *> "|" $> PostAlveolar
             , "=" $> Palatal
             ]
    <* slash

doublyArticulatedXSampaP :: Parser Consonant
doublyArticulatedXSampaP = asum cs
  where
    da = DoublyArticulated

    cs =
        [ doubleArticulatedXSampa "n" "m" $> da Voiced Bilabial Alveolar Nasal
        , doubleArticulatedXSampa "t" "p"
          $> da Voiceless Bilabial Alveolar Plosive
        , doubleArticulatedXSampa "d" "b"
          $> da Voiced Bilabial Alveolar Plosive
        , doubleArticulatedXSampa "N" "m" $> da Voiced Bilabial Velar Nasal
        , doubleArticulatedXSampa "k" "p"
          $> da Voiceless Bilabial Velar Plosive
        , doubleArticulatedXSampa "g" "b" $> da Voiced Bilabial Velar Plosive
        , "H_0" $> da Voiceless Bilabial Palatal (Fricative NonSibilant)
        , "H" $> da Voiced Bilabial Palatal Approximant
        , "W" $> da Voiceless Bilabial Velar (Fricative NonSibilant)
        , "w" $> da Voiced Bilabial Velar Approximant
        , "x" *> slash $> da Voiceless PostAlveolar Velar (Fricative Sibilant)
        ]

segmentalFeatureXSampaP :: Parser SegmentalFeature
segmentalFeatureXSampaP = secondaryArticulationP <|> asum fs
  where
    fs = [ "_0" $> Voicing Voiceless
         , "_v" $> Voicing Voiced
         , "::" $> Length OverLong
         , ":" *> slash $> Length HalfLong
         , ":" $> Length Long
         , "_X" $> Length ExtraShort
         , "_h" $> Aspirated
         , "_O" $> MoreRounded
         , "_c" $> LessRounded
         , "_+" $> Advanced
         , "_-" $> Retracted
         , "_\"" $> Centralized
         , "_x" $> MidCentralized
         , "=" $> Syllabic
         , "_^" $> NonSyllabic
         , "`" $> Rhotacized
         , "_t" $> BreathyVoice
         , "_k" $> CreakyVoice
         , "_w" $> Labialized
         , "'" $> Palatalized
         , "_G" $> Velarized
         , "_?" *> slash $> Pharyngealized
         , "_r" $> Raised
         , "_o" $> Lowered
         , "_A" $> AdvancedTongueRoot
         , "_q" $> RetractedTongueRoot
         , "_d" $> Dentalized
         , "_a" $> Apical
         , "_m" $> Laminal
         , "~" $> Nasalized
         , "_l" $> LateralRelease
         , "_}" $> NoAudibleRelease
         ]

suprasegmentalFeatureXSampaP :: Parser SuprasegmentalFeature
suprasegmentalFeatureXSampaP = asum fs
  where
    fs           =
        [ toneContourP, levelToneP, "." $> Break, "-" *> slash $> Linking ]

    levelToneP   = LevelLexicalTone
        <$> asum [ "_T" $> ExtraHighTone
                 , "_H" $> HighTone
                 , "_M" $> MidTone
                 , "_L" $> LowTone
                 , "_B" $> ExtraLowTone
                 , "!" $> DownStep
                 , "^" $> UpStep
                 ]

    toneContourP = LexicalToneContour
        <$> asum [ "_R" $> Rising
                 , "_F" $> Falling
                 , "_H_T" $> HighRising
                 , "_B_L" $> LowRising
                 , "_H_F" $> HighFalling
                 , "_L_B" $> LowFalling
                 , "_R_F" $> RisingFalling
                 , "_F_R" $> FallingRising
                 , "<R>" $> GlobalRise
                 , "<F>" $> GlobalFall
                 ]

slash :: Parser Char
slash = P.char '\\'

doubleArticulatedXSampa :: Text -> Text -> Parser Text
doubleArticulatedXSampa x y = P.string $ x <> "_" <> y

stressXSampaP :: Parser SuprasegmentalFeature
stressXSampaP = ("\"" <|> "\'") $> Stress Primary <|> "%" $> Stress Secondary