----------------------------------------------------------------------
-- |
-- Module      : GF.Speech.PGFToCFG
--
-- Approximates PGF grammars with context-free grammars.
----------------------------------------------------------------------
module GF.Speech.PGFToCFG (bnfPrinter, pgfToCFG) where

import PGF(showCId)
import PGF.Internal as PGF
--import GF.Infra.Ident
import GF.Grammar.CFG hiding (Symbol)

import Data.Array.IArray as Array
--import Data.List
import Data.Map (Map)
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
--import Data.Maybe
import Data.Set (Set)
import qualified Data.Set as Set

bnfPrinter :: PGF -> CId -> String
bnfPrinter :: PGF -> CId -> String
bnfPrinter = (CFG -> CFG) -> PGF -> CId -> String
toBNF CFG -> CFG
forall a. a -> a
id

toBNF :: (CFG -> CFG) -> PGF -> CId -> String
toBNF :: (CFG -> CFG) -> PGF -> CId -> String
toBNF CFG -> CFG
f PGF
pgf CId
cnc = CFG -> String
prCFG (CFG -> String) -> CFG -> String
forall a b. (a -> b) -> a -> b
$ CFG -> CFG
f (CFG -> CFG) -> CFG -> CFG
forall a b. (a -> b) -> a -> b
$ PGF -> CId -> CFG
pgfToCFG PGF
pgf CId
cnc

type Profile = [Int]

pgfToCFG :: PGF
          -> CId   -- ^ Concrete syntax name
          -> CFG
pgfToCFG :: PGF -> CId -> CFG
pgfToCFG PGF
pgf CId
lang = String -> Set String -> [Rule String String] -> CFG
forall c t.
(Ord c, Ord t) =>
c -> Set c -> [Rule c t] -> Grammar c t
mkCFG (CId -> String
showCId (PGF -> CId
lookStartCat PGF
pgf)) Set String
extCats ([Rule String String]
startRules [Rule String String]
-> [Rule String String] -> [Rule String String]
forall a. [a] -> [a] -> [a]
++ ((FId, Production) -> [Rule String String])
-> [(FId, Production)] -> [Rule String String]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (FId, Production) -> [Rule String String]
ruleToCFRule [(FId, Production)]
rules)
  where
    cnc :: Concr
cnc = PGF -> CId -> Concr
lookConcr PGF
pgf CId
lang

    rules :: [(FId,Production)]
    rules :: [(FId, Production)]
rules = [(FId
fcat,Production
prod) | (FId
fcat,Set Production
set) <- IntMap (Set Production) -> [(FId, Set Production)]
forall a. IntMap a -> [(FId, a)]
IntMap.toList (Concr -> IntMap (Set Production)
PGF.productions Concr
cnc)
                         , Production
prod <- Set Production -> [Production]
forall a. Set a -> [a]
Set.toList Set Production
set]

    fcatCats :: Map FId Cat
    fcatCats :: Map FId String
fcatCats = [(FId, String)] -> Map FId String
forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList [(FId
fc, CId -> String
showCId CId
c String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"_" String -> String -> String
forall a. [a] -> [a] -> [a]
++ Integer -> String
forall a. Show a => a -> String
show Integer
i)
                                 | (CId
c,CncCat FId
s FId
e Array FId String
lbls) <- Map CId CncCat -> [(CId, CncCat)]
forall k a. Map k a -> [(k, a)]
Map.toList (Concr -> Map CId CncCat
cnccats Concr
cnc),
                                   (FId
fc,Integer
i) <- [FId] -> [Integer] -> [(FId, Integer)]
forall a b. [a] -> [b] -> [(a, b)]
zip ((FId, FId) -> [FId]
forall a. Ix a => (a, a) -> [a]
range (FId
s,FId
e)) [Integer
1..]]

    fcatCat :: FId -> Cat
    fcatCat :: FId -> String
fcatCat FId
c = String -> FId -> Map FId String -> String
forall k a. Ord k => a -> k -> Map k a -> a
Map.findWithDefault (String
"Unknown_" String -> String -> String
forall a. [a] -> [a] -> [a]
++ FId -> String
forall a. Show a => a -> String
show FId
c) FId
c Map FId String
fcatCats

    fcatToCat :: FId -> LIndex -> Cat
    fcatToCat :: FId -> FId -> String
fcatToCat FId
c FId
l = FId -> String
fcatCat FId
c String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
row
      where row :: String
row = if FId -> FId
catLinArity FId
c FId -> FId -> Bool
forall a. Eq a => a -> a -> Bool
== FId
1 then String
"" else String
"_" String -> String -> String
forall a. [a] -> [a] -> [a]
++ FId -> String
forall a. Show a => a -> String
show FId
l

    -- gets the number of fields in the lincat for the given category
    catLinArity :: FId -> Int
    catLinArity :: FId -> FId
catLinArity FId
c = [FId] -> FId
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum (FId
1FId -> [FId] -> [FId]
forall a. a -> [a] -> [a]
:[(FId, FId) -> FId
forall a. Ix a => (a, a) -> FId
rangeSize (UArray FId FId -> (FId, FId)
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> (i, i)
bounds UArray FId FId
rhs) | (CncFun CId
_ UArray FId FId
rhs, [PArg]
_) <- FId -> [(CncFun, [PArg])]
topdownRules FId
c])

    topdownRules :: FId -> [(CncFun, [PArg])]
topdownRules FId
cat = FId -> [(CncFun, [PArg])] -> [(CncFun, [PArg])]
f FId
cat []
      where
        f :: FId -> [(CncFun, [PArg])] -> [(CncFun, [PArg])]
f FId
cat [(CncFun, [PArg])]
rules = [(CncFun, [PArg])]
-> (Set Production -> [(CncFun, [PArg])])
-> Maybe (Set Production)
-> [(CncFun, [PArg])]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [(CncFun, [PArg])]
rules ((Production -> [(CncFun, [PArg])] -> [(CncFun, [PArg])])
-> [(CncFun, [PArg])] -> Set Production -> [(CncFun, [PArg])]
forall a b. (a -> b -> b) -> b -> Set a -> b
Set.foldr Production -> [(CncFun, [PArg])] -> [(CncFun, [PArg])]
g [(CncFun, [PArg])]
rules) (FId -> IntMap (Set Production) -> Maybe (Set Production)
forall a. FId -> IntMap a -> Maybe a
IntMap.lookup FId
cat (Concr -> IntMap (Set Production)
productions Concr
cnc))

        g :: Production -> [(CncFun, [PArg])] -> [(CncFun, [PArg])]
g (PApply FId
funid [PArg]
args) [(CncFun, [PArg])]
rules = (Concr -> Array FId CncFun
cncfuns Concr
cnc Array FId CncFun -> FId -> CncFun
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
! FId
funid,[PArg]
args) (CncFun, [PArg]) -> [(CncFun, [PArg])] -> [(CncFun, [PArg])]
forall a. a -> [a] -> [a]
: [(CncFun, [PArg])]
rules
        g (PCoerce FId
cat)       [(CncFun, [PArg])]
rules = FId -> [(CncFun, [PArg])] -> [(CncFun, [PArg])]
f FId
cat [(CncFun, [PArg])]
rules


    extCats :: Set Cat
    extCats :: Set String
extCats = [String] -> Set String
forall a. Ord a => [a] -> Set a
Set.fromList ([String] -> Set String) -> [String] -> Set String
forall a b. (a -> b) -> a -> b
$ (Rule String String -> String) -> [Rule String String] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map Rule String String -> String
forall c t. Rule c t -> c
ruleLhs [Rule String String]
startRules

    startRules :: [CFRule]
    startRules :: [Rule String String]
startRules = [String -> [Symbol String String] -> CFTerm -> Rule String String
forall c t. c -> [Symbol c t] -> CFTerm -> Rule c t
Rule (CId -> String
showCId CId
c) [String -> Symbol String String
forall c t. c -> Symbol c t
NonTerminal (FId -> FId -> String
fcatToCat FId
fc FId
r)] (FId -> CFTerm
CFRes FId
0)
                      | (CId
c,CncCat FId
s FId
e Array FId String
lbls) <- Map CId CncCat -> [(CId, CncCat)]
forall k a. Map k a -> [(k, a)]
Map.toList (Concr -> Map CId CncCat
cnccats Concr
cnc),
                        FId
fc <- (FId, FId) -> [FId]
forall a. Ix a => (a, a) -> [a]
range (FId
s,FId
e), Bool -> Bool
not (FId -> Bool
isPredefFId FId
fc),
                        FId
r <- [FId
0..FId -> FId
catLinArity FId
fcFId -> FId -> FId
forall a. Num a => a -> a -> a
-FId
1]]

    ruleToCFRule :: (FId,Production) -> [CFRule]
    ruleToCFRule :: (FId, Production) -> [Rule String String]
ruleToCFRule (FId
c,PApply FId
funid [PArg]
args) =
        [String -> [Symbol String String] -> CFTerm -> Rule String String
forall c t. c -> [Symbol c t] -> CFTerm -> Rule c t
Rule (FId -> FId -> String
fcatToCat FId
c FId
l) (Array FId Symbol -> [Symbol String String]
mkRhs Array FId Symbol
row) ([[FId]] -> CFTerm
profilesToTerm [Array FId Symbol -> FId -> [FId]
fixProfile Array FId Symbol
row FId
n | FId
n <- [FId
0..[PArg] -> FId
forall (t :: * -> *) a. Foldable t => t a -> FId
length [PArg]
argsFId -> FId -> FId
forall a. Num a => a -> a -> a
-FId
1]])
           | (FId
l,FId
seqid) <- UArray FId FId -> [(FId, FId)]
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> [(i, e)]
Array.assocs UArray FId FId
rhs
           , let row :: Array FId Symbol
row = Concr -> Array FId (Array FId Symbol)
sequences Concr
cnc Array FId (Array FId Symbol) -> FId -> Array FId Symbol
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
! FId
seqid
           , Bool -> Bool
not (Array FId Symbol -> Bool
containsLiterals Array FId Symbol
row)]
      where
        CncFun CId
f UArray FId FId
rhs = Concr -> Array FId CncFun
cncfuns Concr
cnc Array FId CncFun -> FId -> CncFun
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
! FId
funid

        mkRhs :: Array DotPos Symbol -> [CFSymbol]
        mkRhs :: Array FId Symbol -> [Symbol String String]
mkRhs = (Symbol -> [Symbol String String])
-> [Symbol] -> [Symbol String String]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Symbol -> [Symbol String String]
symbolToCFSymbol ([Symbol] -> [Symbol String String])
-> (Array FId Symbol -> [Symbol])
-> Array FId Symbol
-> [Symbol String String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Array FId Symbol -> [Symbol]
forall (a :: * -> * -> *) e i. (IArray a e, Ix i) => a i e -> [e]
Array.elems

        containsLiterals :: Array DotPos Symbol -> Bool
        containsLiterals :: Array FId Symbol -> Bool
containsLiterals Array FId Symbol
row = Bool -> Bool
not ([FId] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null ([FId
n | SymLit FId
n FId
_ <- Array FId Symbol -> [Symbol]
forall (a :: * -> * -> *) e i. (IArray a e, Ix i) => a i e -> [e]
Array.elems Array FId Symbol
row] [FId] -> [FId] -> [FId]
forall a. [a] -> [a] -> [a]
++
                                          [FId
n | SymVar FId
n FId
_ <- Array FId Symbol -> [Symbol]
forall (a :: * -> * -> *) e i. (IArray a e, Ix i) => a i e -> [e]
Array.elems Array FId Symbol
row]))

        symbolToCFSymbol :: Symbol -> [CFSymbol]
        symbolToCFSymbol :: Symbol -> [Symbol String String]
symbolToCFSymbol (SymCat FId
n FId
l)    = [let PArg [(FId, FId)]
_ FId
fid = [PArg]
args[PArg] -> FId -> PArg
forall a. [a] -> FId -> a
!!FId
n in String -> Symbol String String
forall c t. c -> Symbol c t
NonTerminal (FId -> FId -> String
fcatToCat FId
fid FId
l)]
        symbolToCFSymbol (SymKS String
t)       = [String -> Symbol String String
forall c t. t -> Symbol c t
Terminal String
t]
        symbolToCFSymbol (SymKP [Symbol]
syms [([Symbol], [String])]
as) = (Symbol -> [Symbol String String])
-> [Symbol] -> [Symbol String String]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Symbol -> [Symbol String String]
symbolToCFSymbol [Symbol]
syms
                                           ---- ++ [t | Alt ss _ <- as, t <- ss]
                                           ---- should be alternatives in [[CFSymbol]]
                                           ---- AR 3/6/2010
        symbolToCFSymbol Symbol
SymBIND         = [String -> Symbol String String
forall c t. t -> Symbol c t
Terminal String
"&+"]
        symbolToCFSymbol Symbol
SymSOFT_BIND    = []
        symbolToCFSymbol Symbol
SymSOFT_SPACE   = []
        symbolToCFSymbol Symbol
SymCAPIT        = [String -> Symbol String String
forall c t. t -> Symbol c t
Terminal String
"&|"]
        symbolToCFSymbol Symbol
SymALL_CAPIT    = [String -> Symbol String String
forall c t. t -> Symbol c t
Terminal String
"&|"]
        symbolToCFSymbol Symbol
SymNE           = []

        fixProfile :: Array DotPos Symbol -> Int -> Profile
        fixProfile :: Array FId Symbol -> FId -> [FId]
fixProfile Array FId Symbol
row FId
i = [FId
k | (FId
k,FId
j) <- [(FId, FId)]
nts, FId
j FId -> FId -> Bool
forall a. Eq a => a -> a -> Bool
== FId
i]
            where
              nts :: [(FId, FId)]
nts = [FId] -> [FId] -> [(FId, FId)]
forall a b. [a] -> [b] -> [(a, b)]
zip [FId
0..] [FId
j | Symbol
nt <- Array FId Symbol -> [Symbol]
forall (a :: * -> * -> *) e i. (IArray a e, Ix i) => a i e -> [e]
Array.elems Array FId Symbol
row, FId
j <- Symbol -> [FId]
getPos Symbol
nt]

              getPos :: Symbol -> [FId]
getPos (SymCat FId
j FId
_) = [FId
j]
              getPos (SymLit FId
j FId
_) = [FId
j]
              getPos Symbol
_            = []

        profilesToTerm :: [Profile] -> CFTerm
        profilesToTerm :: [[FId]] -> CFTerm
profilesToTerm [[FId]]
ps = CId -> [CFTerm] -> CFTerm
CFObj CId
f ((CId -> [FId] -> CFTerm) -> [CId] -> [[FId]] -> [CFTerm]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith CId -> [FId] -> CFTerm
profileToTerm [CId]
argTypes [[FId]]
ps)
            where ([CId]
argTypes,CId
_) = Type -> ([CId], CId)
catSkeleton (Type -> ([CId], CId)) -> Type -> ([CId], CId)
forall a b. (a -> b) -> a -> b
$ Abstr -> CId -> Type
lookType (PGF -> Abstr
abstract PGF
pgf) CId
f

        profileToTerm :: CId -> Profile -> CFTerm
        profileToTerm :: CId -> [FId] -> CFTerm
profileToTerm CId
t [] = CId -> CFTerm
CFMeta CId
t
        profileToTerm CId
_ [FId]
xs = FId -> CFTerm
CFRes ([FId] -> FId
forall a. [a] -> a
last [FId]
xs) -- FIXME: unify
    ruleToCFRule (FId
c,PCoerce FId
c') =
        [String -> [Symbol String String] -> CFTerm -> Rule String String
forall c t. c -> [Symbol c t] -> CFTerm -> Rule c t
Rule (FId -> FId -> String
fcatToCat FId
c FId
l) [String -> Symbol String String
forall c t. c -> Symbol c t
NonTerminal (FId -> FId -> String
fcatToCat FId
c' FId
l)] (FId -> CFTerm
CFRes FId
0)
           | FId
l <- [FId
0..FId -> FId
catLinArity FId
cFId -> FId -> FId
forall a. Num a => a -> a -> a
-FId
1]]