{-
    BNF Converter: ocamlyacc Generator
    Copyright (C) 2005  Author:  Kristofer Johannisson

-}

-- based on BNFC Haskell backend

{-# LANGUAGE LambdaCase #-}

module BNFC.Backend.OCaml.CFtoOCamlYacc
       (
       cf2ocamlyacc, terminal, epName
       )
        where

import Data.Char
import Data.Foldable (toList)

import BNFC.CF
import BNFC.Utils ((+++))
import BNFC.Backend.Common
import BNFC.Backend.OCaml.OCamlUtil

-- Type declarations

type Pattern     = String
type Action      = String
type MetaVar     = String

-- The main function, that given a CF
-- generates a ocamlyacc module.
cf2ocamlyacc :: String -> String -> String -> CF -> String
cf2ocamlyacc :: TokenCat -> TokenCat -> TokenCat -> CF -> TokenCat
cf2ocamlyacc TokenCat
name TokenCat
absName TokenCat
lexName CF
cf
 = [TokenCat] -> TokenCat
unlines
    [TokenCat -> TokenCat -> TokenCat -> CF -> TokenCat
header TokenCat
name TokenCat
absName TokenCat
lexName CF
cf,
    TokenCat -> CF -> TokenCat
declarations TokenCat
absName CF
cf,
    TokenCat
"%%",
    CF -> TokenCat
rules CF
cf
    ]


header :: String -> String -> String -> CF -> String
header :: TokenCat -> TokenCat -> TokenCat -> CF -> TokenCat
header TokenCat
_ TokenCat
absName TokenCat
_ CF
cf = [TokenCat] -> TokenCat
unlines
         [TokenCat
"/* This ocamlyacc file was machine-generated by the BNF converter */",
          TokenCat
"%{",
          TokenCat
"open " TokenCat -> TokenCat -> TokenCat
forall a. [a] -> [a] -> [a]
++ TokenCat
absName,
          TokenCat
"open Lexing",
          TokenCat
"",
          TokenCat
"%}"
         ]

declarations :: String -> CF -> String
declarations :: TokenCat -> CF -> TokenCat
declarations TokenCat
absName CF
cf =
  [TokenCat] -> TokenCat
unlines
    [ [TokenCat] -> [TokenCat] -> TokenCat
tokens (CF -> [TokenCat]
unicodeAndSymbols CF
cf) (CF -> [TokenCat]
asciiKeywords CF
cf)
    , CF -> TokenCat
specialTokens CF
cf
    , TokenCat -> CF -> TokenCat
entryPoints TokenCat
absName CF
cf
    ]

-- | Declare keyword and symbol tokens.

tokens :: [String] -> [String] -> String
tokens :: [TokenCat] -> [TokenCat] -> TokenCat
tokens [TokenCat]
symbols [TokenCat]
reswords =
  [TokenCat] -> TokenCat
unlines ([TokenCat] -> TokenCat) -> [TokenCat] -> TokenCat
forall a b. (a -> b) -> a -> b
$ [[TokenCat]] -> [TokenCat]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
    [ [ [TokenCat] -> TokenCat
unwords ([TokenCat] -> TokenCat) -> [TokenCat] -> TokenCat
forall a b. (a -> b) -> a -> b
$ TokenCat
"%token" TokenCat -> [TokenCat] -> [TokenCat]
forall a. a -> [a] -> [a]
: (TokenCat -> TokenCat) -> [TokenCat] -> [TokenCat]
forall a b. (a -> b) -> [a] -> [b]
map (TokenCat
"KW_" TokenCat -> TokenCat -> TokenCat
forall a. [a] -> [a] -> [a]
++) [TokenCat]
reswords | Bool
hasReserved ]
    , [ TokenCat
"" | Bool
hasReserved ]
    , (((TokenCat, Integer) -> TokenCat)
-> [(TokenCat, Integer)] -> [TokenCat]
forall a b. (a -> b) -> [a] -> [b]
`map` [TokenCat] -> [Integer] -> [(TokenCat, Integer)]
forall a b. [a] -> [b] -> [(a, b)]
zip [TokenCat]
symbols [Integer
1..]) (((TokenCat, Integer) -> TokenCat) -> [TokenCat])
-> ((TokenCat, Integer) -> TokenCat) -> [TokenCat]
forall a b. (a -> b) -> a -> b
$ \ (TokenCat
s, Integer
n) ->
        TokenCat
"%token SYMB" TokenCat -> TokenCat -> TokenCat
forall a. [a] -> [a] -> [a]
++ Integer -> TokenCat
forall a. Show a => a -> TokenCat
show Integer
n TokenCat -> TokenCat -> TokenCat
+++ TokenCat
"/*" TokenCat -> TokenCat -> TokenCat
+++ TokenCat
s TokenCat -> TokenCat -> TokenCat
+++ TokenCat
"*/"
    ]
  where
  hasReserved :: Bool
hasReserved = Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ [TokenCat] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [TokenCat]
reswords

-- | map a CF terminal into a ocamlyacc token
terminal :: CF -> String -> String
terminal :: CF -> TokenCat -> TokenCat
terminal CF
cf = \ TokenCat
s ->
    -- Use a lambda here to make sure that kws is computed before the
    -- second argument is applied.
    -- The GHC manual says that let-floating is not consistently applied
    -- so just writing @terminal cf s = ...@ could result in computing
    -- kws for every @s@ anew.
    if TokenCat
s TokenCat -> [TokenCat] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [TokenCat]
kws then TokenCat
"KW_" TokenCat -> TokenCat -> TokenCat
forall a. [a] -> [a] -> [a]
++ TokenCat
s
    else case TokenCat -> [(TokenCat, Integer)] -> Maybe Integer
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup TokenCat
s ([TokenCat] -> [Integer] -> [(TokenCat, Integer)]
forall a b. [a] -> [b] -> [(a, b)]
zip (CF -> [TokenCat]
unicodeAndSymbols CF
cf) [Integer
1..]) of
      Just Integer
i -> TokenCat
"SYMB" TokenCat -> TokenCat -> TokenCat
forall a. [a] -> [a] -> [a]
++ Integer -> TokenCat
forall a. Show a => a -> TokenCat
show Integer
i
      Maybe Integer
Nothing -> TokenCat -> TokenCat
forall a. HasCallStack => TokenCat -> a
error (TokenCat -> TokenCat) -> TokenCat -> TokenCat
forall a b. (a -> b) -> a -> b
$ TokenCat
"CFtoOCamlYacc: terminal " TokenCat -> TokenCat -> TokenCat
forall a. [a] -> [a] -> [a]
++ TokenCat -> TokenCat
forall a. Show a => a -> TokenCat
show TokenCat
s TokenCat -> TokenCat -> TokenCat
forall a. [a] -> [a] -> [a]
++ TokenCat
" not defined in CF."
  where
  kws :: [TokenCat]
kws = CF -> [TokenCat]
asciiKeywords CF
cf

-- | map a CF nonterminal into a ocamlyacc symbol
nonterminal :: Cat -> String
nonterminal :: Cat -> TokenCat
nonterminal Cat
c = (Char -> Char) -> TokenCat -> TokenCat
forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
spaceToUnderscore (Cat -> TokenCat
fixType Cat
c)
    where spaceToUnderscore :: Char -> Char
spaceToUnderscore Char
' ' = Char
'_'
          spaceToUnderscore Char
x = Char
x

specialTokens :: CF -> String
specialTokens :: CF -> TokenCat
specialTokens CF
cf = [TokenCat] -> TokenCat
unlines ([TokenCat] -> TokenCat) -> [TokenCat] -> TokenCat
forall a b. (a -> b) -> a -> b
$ [[TokenCat]] -> [TokenCat]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[TokenCat]] -> [TokenCat]) -> [[TokenCat]] -> [TokenCat]
forall a b. (a -> b) -> a -> b
$
  [ [ TokenCat
"%token TOK_EOF" ]
  , [ TokenCat -> TokenCat -> TokenCat
prToken (TokenCat -> TokenCat
ty TokenCat
n)      TokenCat
n | TokenCat
n                 <- [TokenCat]
specialCatsP  ]
  , [ TokenCat -> TokenCat -> TokenCat
prToken (Bool -> TokenCat
posTy Bool
pos) TokenCat
n | TokenReg RFun
n0 Bool
pos Reg
_ <- CF -> [Pragma]
forall function. CFG function -> [Pragma]
cfgPragmas CF
cf, let n :: TokenCat
n = RFun -> TokenCat
forall a. WithPosition a -> a
wpThing RFun
n0 ]
  ]
  where
  prToken :: TokenCat -> TokenCat -> TokenCat
prToken TokenCat
t TokenCat
n = TokenCat
"%token" TokenCat -> TokenCat -> TokenCat
+++ TokenCat
t TokenCat -> TokenCat -> TokenCat
+++ TokenCat
"TOK_" TokenCat -> TokenCat -> TokenCat
forall a. [a] -> [a] -> [a]
++ TokenCat
n
  ty :: TokenCat -> TokenCat
ty = \case
    TokenCat
"Ident"   -> TokenCat
"<string>"
    TokenCat
"String"  -> TokenCat
"<string>"
    TokenCat
"Integer" -> TokenCat
"<int>"
    TokenCat
"Double"  -> TokenCat
"<float>"
    TokenCat
"Char"    -> TokenCat
"<char>"
  posTy :: Bool -> TokenCat
posTy = \case
    Bool
True  -> TokenCat
"<(int * int) * string>"
    Bool
False -> TokenCat
"<string>"


entryPoints :: String -> CF -> String
entryPoints :: TokenCat -> CF -> TokenCat
entryPoints TokenCat
absName CF
cf = [TokenCat] -> TokenCat
unlines ([TokenCat] -> TokenCat) -> [TokenCat] -> TokenCat
forall a b. (a -> b) -> a -> b
$
    (TokenCat
"%start" TokenCat -> TokenCat -> TokenCat
+++ [TokenCat] -> TokenCat
unwords ((Cat -> TokenCat) -> [Cat] -> [TokenCat]
forall a b. (a -> b) -> [a] -> [b]
map Cat -> TokenCat
epName [Cat]
eps))
    TokenCat -> [TokenCat] -> [TokenCat]
forall a. a -> [a] -> [a]
:
    ((Cat -> TokenCat) -> [Cat] -> [TokenCat]
forall a b. (a -> b) -> [a] -> [b]
map Cat -> TokenCat
typing [Cat]
eps)
    where eps :: [Cat]
eps = NonEmpty Cat -> [Cat]
forall (t :: * -> *) a. Foldable t => t a -> [a]
toList (NonEmpty Cat -> [Cat]) -> NonEmpty Cat -> [Cat]
forall a b. (a -> b) -> a -> b
$ CF -> NonEmpty Cat
forall f. CFG f -> NonEmpty Cat
allEntryPoints CF
cf
          typing :: Cat -> String
          typing :: Cat -> TokenCat
typing Cat
c = TokenCat
"%type" TokenCat -> TokenCat -> TokenCat
+++ TokenCat
"<" TokenCat -> TokenCat -> TokenCat
forall a. [a] -> [a] -> [a]
++ Cat -> TokenCat
qualify (Cat -> Cat
normCat Cat
c) TokenCat -> TokenCat -> TokenCat
forall a. [a] -> [a] -> [a]
++ TokenCat
">" TokenCat -> TokenCat -> TokenCat
+++ Cat -> TokenCat
epName Cat
c
          qualify :: Cat -> TokenCat
qualify Cat
c = if Cat
c Cat -> [Cat] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [ TokenCat -> Cat
TokenCat TokenCat
"Integer", TokenCat -> Cat
TokenCat TokenCat
"Double", TokenCat -> Cat
TokenCat TokenCat
"Char",
                                    TokenCat -> Cat
TokenCat TokenCat
"String", Cat -> Cat
ListCat (TokenCat -> Cat
TokenCat TokenCat
"Integer"),
                                    Cat -> Cat
ListCat (TokenCat -> Cat
TokenCat TokenCat
"Double"),
                                    Cat -> Cat
ListCat (TokenCat -> Cat
TokenCat TokenCat
"Char"),
                                    Cat -> Cat
ListCat (TokenCat -> Cat
TokenCat TokenCat
"String") ]
                      then Cat -> TokenCat
fixType Cat
c
                      else TokenCat
absName TokenCat -> TokenCat -> TokenCat
forall a. [a] -> [a] -> [a]
++ TokenCat
"." TokenCat -> TokenCat -> TokenCat
forall a. [a] -> [a] -> [a]
++ Cat -> TokenCat
fixType Cat
c

epName :: Cat -> String
epName :: Cat -> TokenCat
epName Cat
c = TokenCat
"p" TokenCat -> TokenCat -> TokenCat
forall a. [a] -> [a] -> [a]
++ TokenCat -> TokenCat
capitalize (Cat -> TokenCat
nonterminal Cat
c)
            where capitalize :: TokenCat -> TokenCat
capitalize TokenCat
s = case TokenCat
s of
                    [] -> []
                    Char
c:TokenCat
cs -> Char -> Char
toUpper Char
c Char -> TokenCat -> TokenCat
forall a. a -> [a] -> [a]
: TokenCat
cs

entryPointRules :: CF -> String
entryPointRules :: CF -> TokenCat
entryPointRules CF
cf = [TokenCat] -> TokenCat
unlines ([TokenCat] -> TokenCat) -> [TokenCat] -> TokenCat
forall a b. (a -> b) -> a -> b
$ (Cat -> TokenCat) -> [Cat] -> [TokenCat]
forall a b. (a -> b) -> [a] -> [b]
map Cat -> TokenCat
mkRule ([Cat] -> [TokenCat]) -> [Cat] -> [TokenCat]
forall a b. (a -> b) -> a -> b
$ NonEmpty Cat -> [Cat]
forall (t :: * -> *) a. Foldable t => t a -> [a]
toList (NonEmpty Cat -> [Cat]) -> NonEmpty Cat -> [Cat]
forall a b. (a -> b) -> a -> b
$ CF -> NonEmpty Cat
forall f. CFG f -> NonEmpty Cat
allEntryPoints CF
cf
    where
        mkRule :: Cat -> String
        mkRule :: Cat -> TokenCat
mkRule Cat
s = [TokenCat] -> TokenCat
unlines [
            Cat -> TokenCat
epName Cat
s TokenCat -> TokenCat -> TokenCat
forall a. [a] -> [a] -> [a]
++ TokenCat
" : " TokenCat -> TokenCat -> TokenCat
forall a. [a] -> [a] -> [a]
++ Cat -> TokenCat
nonterminal Cat
s TokenCat -> TokenCat -> TokenCat
forall a. [a] -> [a] -> [a]
++ TokenCat
" TOK_EOF { $1 }",
            TokenCat
"  | error { raise (BNFC_Util.Parse_error (Parsing.symbol_start_pos (), Parsing.symbol_end_pos ())) };"
            ]

rules :: CF -> String
rules :: CF -> TokenCat
rules CF
cf = [TokenCat] -> TokenCat
unlines [
    CF -> TokenCat
entryPointRules CF
cf,
    ([TokenCat] -> TokenCat
unlines ([TokenCat] -> TokenCat) -> [TokenCat] -> TokenCat
forall a b. (a -> b) -> a -> b
$ ((Cat, [Rule]) -> TokenCat) -> [(Cat, [Rule])] -> [TokenCat]
forall a b. (a -> b) -> [a] -> [b]
map ((Cat, [(TokenCat, TokenCat)]) -> TokenCat
prOne ((Cat, [(TokenCat, TokenCat)]) -> TokenCat)
-> ((Cat, [Rule]) -> (Cat, [(TokenCat, TokenCat)]))
-> (Cat, [Rule])
-> TokenCat
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Cat, [Rule]) -> (Cat, [(TokenCat, TokenCat)])
mkOne) (CF -> [(Cat, [Rule])]
ruleGroups CF
cf)),
    CF -> TokenCat
specialRules CF
cf
    ]
    where
        mkOne :: (Cat, [Rule]) -> (Cat, [(TokenCat, TokenCat)])
mkOne (Cat
cat,[Rule]
rules) = (Cat
cat, (TokenCat -> TokenCat) -> [Rule] -> Cat -> [(TokenCat, TokenCat)]
constructRule (CF -> TokenCat -> TokenCat
terminal CF
cf) [Rule]
rules Cat
cat)
        prOne :: (Cat, [(TokenCat, TokenCat)]) -> TokenCat
prOne (Cat
_,[]) = [] -- nt has only internal use
        prOne (Cat
nt,((TokenCat
p,TokenCat
a):[(TokenCat, TokenCat)]
ls)) =
          [TokenCat] -> TokenCat
unwords [TokenCat
nt', TokenCat
":" , TokenCat
p, TokenCat
"{", TokenCat
a, TokenCat
"}", TokenCat
"\n" TokenCat -> TokenCat -> TokenCat
forall a. [a] -> [a] -> [a]
++ [(TokenCat, TokenCat)] -> TokenCat
pr [(TokenCat, TokenCat)]
ls] TokenCat -> TokenCat -> TokenCat
forall a. [a] -> [a] -> [a]
++ TokenCat
";\n"
         where
           nt' :: TokenCat
nt' = Cat -> TokenCat
nonterminal Cat
nt
           pr :: [(TokenCat, TokenCat)] -> TokenCat
pr [] = []
           pr ((TokenCat
p,TokenCat
a):[(TokenCat, TokenCat)]
ls) =
             [TokenCat] -> TokenCat
unlines [ [TokenCat] -> TokenCat
unwords [ TokenCat
"  |", TokenCat
p, TokenCat
"{", TokenCat
a , TokenCat
"}" ] ] TokenCat -> TokenCat -> TokenCat
forall a. [a] -> [a] -> [a]
++ [(TokenCat, TokenCat)] -> TokenCat
pr [(TokenCat, TokenCat)]
ls



-- For every non-terminal, we construct a set of rules. A rule is a sequence of
-- terminals and non-terminals, and an action to be performed
constructRule :: (String -> String) -> [Rule] -> NonTerminal -> [(Pattern,Action)]
constructRule :: (TokenCat -> TokenCat) -> [Rule] -> Cat -> [(TokenCat, TokenCat)]
constructRule TokenCat -> TokenCat
terminal [Rule]
rules Cat
nt =
  [ (TokenCat
p, Cat -> RFun -> [TokenCat] -> TokenCat
forall a. IsFun a => Cat -> a -> [TokenCat] -> TokenCat
generateAction Cat
nt (Rule -> RFun
forall function. Rul function -> function
funRule Rule
r) [TokenCat]
m)
  | Rule
r <- [Rule]
rules
  , let (TokenCat
p, [TokenCat]
m) = (TokenCat -> TokenCat) -> Rule -> (TokenCat, [TokenCat])
generatePatterns TokenCat -> TokenCat
terminal Rule
r
  ]



-- Generates a string containing the semantic action.
-- An action can for example be: Sum $1 $2, that is, construct an AST
-- with the constructor Sum applied to the two metavariables $1 and $2.
generateAction :: IsFun a => NonTerminal -> a -> [MetaVar] -> Action
generateAction :: forall a. IsFun a => Cat -> a -> [TokenCat] -> TokenCat
generateAction Cat
_ a
f [TokenCat]
ms = (if a -> Bool
forall a. IsFun a => a -> Bool
isCoercion a
f then TokenCat
"" else TokenCat
f') TokenCat -> TokenCat -> TokenCat
+++ [TokenCat] -> TokenCat
mkTuple [TokenCat]
ms
    where
    f' :: TokenCat
f' = case a -> TokenCat
forall a. IsFun a => a -> TokenCat
funName a
f of -- ocaml cons is somehow not a standard infix oper, right?
           TokenCat
"(:[])" -> TokenCat
"(fun x -> [x])"
           TokenCat
"(:)"   -> TokenCat
"(fun (x,xs) -> x::xs)"
           TokenCat
x       -> TokenCat -> TokenCat
sanitizeOcaml TokenCat
x


generatePatterns :: (String -> String) -> Rule -> (Pattern,[MetaVar])
generatePatterns :: (TokenCat -> TokenCat) -> Rule -> (TokenCat, [TokenCat])
generatePatterns TokenCat -> TokenCat
terminal Rule
r = case Rule -> SentForm
forall function. Rul function -> SentForm
rhsRule Rule
r of
  []  -> (TokenCat
"/* empty */",[])
  SentForm
its -> ([TokenCat] -> TokenCat
unwords ((Either Cat TokenCat -> TokenCat) -> SentForm -> [TokenCat]
forall a b. (a -> b) -> [a] -> [b]
map Either Cat TokenCat -> TokenCat
mkIt SentForm
its), SentForm -> [TokenCat]
forall {a} {b}. [Either a b] -> [TokenCat]
metas SentForm
its)
 where
   mkIt :: Either Cat TokenCat -> TokenCat
mkIt Either Cat TokenCat
i = case Either Cat TokenCat
i of
     Left Cat
c -> Cat -> TokenCat
nonterminal Cat
c
     Right TokenCat
s -> TokenCat -> TokenCat
terminal TokenCat
s
   metas :: [Either a b] -> [TokenCat]
metas [Either a b]
its = [ (Char
'$'Char -> TokenCat -> TokenCat
forall a. a -> [a] -> [a]
: Int -> TokenCat
forall a. Show a => a -> TokenCat
show Int
i) | (Int
i, Left a
_c) <- [Int] -> [Either a b] -> [(Int, Either a b)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
1 ::Int ..] [Either a b]
its ]

specialRules :: CF -> String
specialRules :: CF -> TokenCat
specialRules CF
cf = [TokenCat] -> TokenCat
unlines ([TokenCat] -> TokenCat) -> [TokenCat] -> TokenCat
forall a b. (a -> b) -> a -> b
$ ((TokenCat -> TokenCat) -> [TokenCat] -> [TokenCat]
forall a b. (a -> b) -> [a] -> [b]
`map` CF -> [TokenCat]
forall f. CFG f -> [TokenCat]
literals CF
cf) ((TokenCat -> TokenCat) -> [TokenCat])
-> (TokenCat -> TokenCat) -> [TokenCat]
forall a b. (a -> b) -> a -> b
$ \case
  TokenCat
"Ident"   -> TokenCat
"ident : TOK_Ident  { Ident $1 };"
  TokenCat
"String"  -> TokenCat
"string : TOK_String { $1 };"
  TokenCat
"Integer" -> TokenCat
"int :  TOK_Integer  { $1 };"
  TokenCat
"Double"  -> TokenCat
"float : TOK_Double  { $1 };"
  TokenCat
"Char"    -> TokenCat
"char : TOK_Char { $1 };"
  TokenCat
own       -> [TokenCat] -> TokenCat
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([TokenCat] -> TokenCat) -> [TokenCat] -> TokenCat
forall a b. (a -> b) -> a -> b
$
    [ Cat -> TokenCat
fixType (TokenCat -> Cat
TokenCat TokenCat
own), TokenCat
" : TOK_", TokenCat
own, TokenCat
" { ", TokenCat
own, TokenCat
" (",  TokenCat
posn, TokenCat
"$1)};" ]
    where -- ignore position categories for now
    posn :: TokenCat
posn = TokenCat
"" -- if isPositionCat cf own then "mkPosToken " else ""