module Sound.SC3.Lang.Data.CMUdict where
import Data.Char
import Data.Maybe
import Data.List
import Data.List.Split
import qualified Data.Map as M
data Stress = No_stress | Primary_stress | Secondary_stress
deriving (Eq,Ord,Enum,Bounded,Read,Show)
data Phoneme
= AO | AA | IY | UW | EH | IH | UH | AH | AX | AE
| EY | AY | OW | AW | OY
| ER | AXR
| Y | W | Q
| P | B | T | D | K | G
| CH | JH
| F | V | TH | DH | S | Z | SH | ZH
| HH
| M | EM | N | EN | NG | ENG
| L | EL | R | DX | NX
deriving (Eq,Ord,Enum,Bounded,Read,Show)
type Phoneme_str = (Phoneme,Maybe Stress)
type SYLLABLE = [Phoneme_str]
type ARPABET = [Phoneme_str]
type ARPABET_syl = [SYLLABLE]
type CMU_Dict_ty a = M.Map String a
type CMU_Dict = CMU_Dict_ty ARPABET
type CMU_Dict_syl = CMU_Dict_ty ARPABET_syl
parse_phoneme_str :: String -> Phoneme_str
parse_phoneme_str w =
case reverse w of
'0':w' -> (read (reverse w'),Just No_stress)
'1':w' -> (read (reverse w'),Just Primary_stress)
'2':w' -> (read (reverse w'),Just Secondary_stress)
_ -> (read w,Nothing)
parse_arpabet :: String -> (String,ARPABET)
parse_arpabet e =
case words e of
w:p -> (w,map parse_phoneme_str p)
_ -> error "parse_arpabet"
parse_arpabet_syl :: String -> (String,ARPABET_syl)
parse_arpabet_syl e =
case words e of
w:p -> let p' = wordsBy (== "-") p
in (w,map (map parse_phoneme_str) p')
_ -> error "parse_arpabet_syl"
data Phoneme_Class = Monophthong | Diphthong | R_Coloured
| Semivowel
| Stop | Affricate | Fricative | Aspirate
| Nasal
| Liquid
deriving (Eq,Ord,Enum,Bounded,Read,Show)
arpabet_classification_table :: [(Phoneme_Class,[Phoneme])]
arpabet_classification_table =
[(Monophthong,[AO,AA,IY,UW,EH,IH,UH,AH,AX,AE])
,(Diphthong,[EY,AY,OW,AW,OY])
,(R_Coloured,[ER,AXR])
,(Semivowel,[Y,W,Q])
,(Stop,[P,B,T,D,K,G])
,(Affricate,[CH,JH])
,(Fricative,[F,V,TH,DH,S,Z,SH,ZH])
,(Aspirate,[HH])
,(Nasal,[M,EM,N,EN,NG,ENG])
,(Liquid,[L,EL,R,DX,NX])]
arpabet_classification :: Phoneme -> Phoneme_Class
arpabet_classification p =
let f (_,l) = p `elem` l
in fromMaybe (error "arpabet_classification") $
fmap fst $
find f arpabet_classification_table
cmudict_load_ty :: (String -> (String,a)) -> FilePath -> IO (CMU_Dict_ty a)
cmudict_load_ty pf fn = do
s <- readFile fn
let is_comment w = case w of {';':_ -> True;_ -> False}
l = filter (not . is_comment) (lines s)
return (M.fromList (map pf l))
cmudict_load :: FilePath -> IO CMU_Dict
cmudict_load = cmudict_load_ty parse_arpabet
cmudict_syl_load :: FilePath -> IO CMU_Dict_syl
cmudict_syl_load = cmudict_load_ty parse_arpabet_syl
d_lookup :: CMU_Dict_ty a -> String -> Maybe a
d_lookup d w = M.lookup (map toUpper w) d
d_lookup' :: CMU_Dict_ty a -> String -> Either String a
d_lookup' d w = maybe (Left w) Right (d_lookup d w)
arpabet_ipa_table :: [(Phoneme,Either String [(Stress,String)])]
arpabet_ipa_table =
[(AO,Left "ɔ")
,(AA,Left "ɑ")
,(IY,Left "i")
,(UW,Left "u")
,(EH,Left "ɛ")
,(IH,Left "ɪ")
,(UH,Left "ʊ")
,(AH,Right [(Primary_stress,"ʌ"),(No_stress,"ə")])
,(AX,Left "ə")
,(AE,Left "æ")
,(EY,Left "eɪ")
,(AY,Left "aɪ")
,(OW,Left "oʊ")
,(AW,Left "aʊ")
,(OY,Left "ɔɪ")
,(ER,Left "ɝ")
,(AXR,Left "ɚ")
,(Y,Left "j")
,(W,Left "w")
,(Q,Left "ʔ")
,(P,Left "p")
,(B,Left "b")
,(T,Left "t")
,(D,Left "d")
,(K,Left "k")
,(G,Left "ɡ")
,(CH,Left "tʃ")
,(JH,Left "dʒ")
,(F,Left "f")
,(V,Left "v")
,(TH,Left "θ")
,(DH,Left "ð")
,(S,Left "s")
,(Z,Left "z")
,(SH,Left "ʃ")
,(ZH,Left "ʒ")
,(HH,Left "h")
,(M,Left "m")
,(EM,Left "m̩")
,(N,Left "n")
,(EN,Left "n̩")
,(NG,Left "ŋ")
,(ENG,Left "ŋ̍")
,(L,Left "ɫ")
,(EL,Left "ɫ̩")
,(R,Left "ɹ")
,(DX,Left "ɾ")
,(NX,Left "ɾ̃")
]
phoneme_ipa :: Maybe Stress -> Phoneme -> String
phoneme_ipa s =
either id (fromMaybe (error (show ("phoneme_ipa: no stressed phoneme",s))) .
lookup (fromMaybe (error "phoneme_ipa: no stress") s)) .
fromMaybe (error "phoneme_ipa: no phoneme") .
flip lookup arpabet_ipa_table
arpabet_ipa :: ARPABET -> String
arpabet_ipa = concatMap (\(p,s) -> phoneme_ipa s p)