{-# LANGUAGE OverloadedStrings #-}
module Skylighting.Parser ( parseSyntaxDefinition
                          , parseSyntaxDefinitionFromText
                          , addSyntaxDefinition
                          , missingIncludes
                          ) where

import qualified Data.String as String
import qualified Data.ByteString.Lazy.Char8 as BL
import Data.Char (isAlphaNum, toUpper)
import qualified Data.Map as M
import Data.Maybe (fromMaybe)
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TLE
import qualified Data.Text.Read as TR
import qualified Data.Text.Encoding as TE
import Safe
import Skylighting.Regex
import Skylighting.Types
import System.FilePath
import Text.XML
import qualified Control.Exception as E
import Control.Monad.Except

-- | Adds a syntax definition to a syntax map,
-- replacing any existing definition with the same name.
addSyntaxDefinition :: Syntax -> SyntaxMap -> SyntaxMap
addSyntaxDefinition :: Syntax -> SyntaxMap -> SyntaxMap
addSyntaxDefinition Syntax
s = Text -> Syntax -> SyntaxMap -> SyntaxMap
forall k a. Ord k => k -> a -> Map k a -> Map k a
M.insert (Syntax -> Text
sName Syntax
s) Syntax
s

-- | Scan a list of 'Syntax's and make sure that
-- `IncludeRules` never asks for a syntax not in this
-- list.  Produces a list of pairs where the first
-- element is the including syntax name and the second
-- element is the (missing) included syntax name.
-- This is intended for sanity checks to avoid run-time
-- errors.
missingIncludes :: [Syntax] -> [(Text, Text)]
missingIncludes :: [Syntax] -> [(Text, Text)]
missingIncludes [Syntax]
syns = [(Text, Text)] -> [(Text, Text)]
forall a. Ord a => [a] -> [a]
ordNub
  [(Syntax -> Text
sName Syntax
s, Text
lang)
     | Syntax
s <- [Syntax]
syns
     , Context
c <- Map Text Context -> [Context]
forall k a. Map k a -> [a]
M.elems (Syntax -> Map Text Context
sContexts Syntax
s)
     , IncludeRules (Text
lang, Text
_) <- (Rule -> Matcher) -> [Rule] -> [Matcher]
forall a b. (a -> b) -> [a] -> [b]
map Rule -> Matcher
rMatcher (Context -> [Rule]
cRules Context
c)
     , Bool -> Bool
not (Text
lang Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.member` Set Text
syntaxNames)]
   where syntaxNames :: Set Text
syntaxNames = [Text] -> Set Text
forall a. Ord a => [a] -> Set a
Set.fromList ([Text] -> Set Text) -> [Text] -> Set Text
forall a b. (a -> b) -> a -> b
$ (Syntax -> Text) -> [Syntax] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Syntax -> Text
sName [Syntax]
syns

ordNub :: (Ord a) => [a] -> [a]
ordNub :: [a] -> [a]
ordNub [a]
l = Set a -> [a] -> [a]
forall a. Ord a => Set a -> [a] -> [a]
go Set a
forall a. Set a
Set.empty [a]
l
  where
    go :: Set a -> [a] -> [a]
go Set a
_ [] = []
    go Set a
s (a
x:[a]
xs) = if a
x a -> Set a -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.member` Set a
s then Set a -> [a] -> [a]
go Set a
s [a]
xs
                                      else a
x a -> [a] -> [a]
forall a. a -> [a] -> [a]
: Set a -> [a] -> [a]
go (a -> Set a -> Set a
forall a. Ord a => a -> Set a -> Set a
Set.insert a
x Set a
s) [a]
xs

standardDelims :: Set.Set Char
standardDelims :: Set Char
standardDelims = [Char] -> Set Char
forall a. Ord a => [a] -> Set a
Set.fromList [Char]
" \n\t.():!+,-<=>%&*/;?[]^{|}~\\"

defaultKeywordAttr :: KeywordAttr
defaultKeywordAttr :: KeywordAttr
defaultKeywordAttr = KeywordAttr :: Bool -> Set Char -> KeywordAttr
KeywordAttr { keywordCaseSensitive :: Bool
keywordCaseSensitive = Bool
True
                                 , keywordDelims :: Set Char
keywordDelims = Set Char
standardDelims }

vBool :: Bool -> Text -> Bool
vBool :: Bool -> Text -> Bool
vBool Bool
defaultVal Text
value = case Text
value of
                           Text
"true"  -> Bool
True
                           Text
"yes"   -> Bool
True
                           Text
"1"     -> Bool
True
                           Text
"false" -> Bool
False
                           Text
"no"    -> Bool
False
                           Text
"0"     -> Bool
False
                           Text
_       -> Bool
defaultVal

-- | Parses a file containing a Kate XML syntax definition
-- into a 'Syntax' description.
parseSyntaxDefinition :: FilePath -> IO (Either String Syntax)
parseSyntaxDefinition :: [Char] -> IO (Either [Char] Syntax)
parseSyntaxDefinition [Char]
fp = do
  ByteString
bs <- [Char] -> IO ByteString
BL.readFile [Char]
fp
  Either [Char] Syntax -> IO (Either [Char] Syntax)
forall (m :: * -> *) a. Monad m => a -> m a
return (Either [Char] Syntax -> IO (Either [Char] Syntax))
-> Either [Char] Syntax -> IO (Either [Char] Syntax)
forall a b. (a -> b) -> a -> b
$ [Char] -> Text -> Either [Char] Syntax
parseSyntaxDefinitionFromText [Char]
fp (ByteString -> Text
toTextLazy ByteString
bs)
 where
  toTextLazy :: ByteString -> Text
toTextLazy = ByteString -> Text
TLE.decodeUtf8 (ByteString -> Text)
-> (ByteString -> ByteString) -> ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
filterCRs (ByteString -> ByteString)
-> (ByteString -> ByteString) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
dropBOM
  dropBOM :: ByteString -> ByteString
dropBOM ByteString
bs =
         if ByteString
"\xEF\xBB\xBF" ByteString -> ByteString -> Bool
`BL.isPrefixOf` ByteString
bs
            then Int64 -> ByteString -> ByteString
BL.drop Int64
3 ByteString
bs
            else ByteString
bs
  filterCRs :: ByteString -> ByteString
filterCRs = (Char -> Bool) -> ByteString -> ByteString
BL.filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
'\r')

parseSyntaxDefinitionFromText ::
  FilePath -> TL.Text -> Either String Syntax
parseSyntaxDefinitionFromText :: [Char] -> Text -> Either [Char] Syntax
parseSyntaxDefinitionFromText [Char]
fp Text
xml =
    case ParseSettings -> Text -> Either SomeException Document
parseText ParseSettings
forall a. Default a => a
def Text
xml of
      Left SomeException
e    -> [Char] -> Either [Char] Syntax
forall a b. a -> Either a b
Left ([Char] -> Either [Char] Syntax) -> [Char] -> Either [Char] Syntax
forall a b. (a -> b) -> a -> b
$ SomeException -> [Char]
forall e. Exception e => e -> [Char]
E.displayException SomeException
e
      Right Document
doc -> Except [Char] Syntax -> Either [Char] Syntax
forall e a. Except e a -> Either e a
runExcept (Except [Char] Syntax -> Either [Char] Syntax)
-> Except [Char] Syntax -> Either [Char] Syntax
forall a b. (a -> b) -> a -> b
$ [Char] -> Document -> Except [Char] Syntax
documentToSyntax [Char]
fp Document
doc

-- | Parses an XML 'Document' as a 'Syntax' description.
documentToSyntax :: FilePath -- ^ used for short name
                 -> Document
                 -> Except String Syntax
documentToSyntax :: [Char] -> Document -> Except [Char] Syntax
documentToSyntax [Char]
fp Document{ documentRoot :: Document -> Element
documentRoot = Element
rootEl } = do
  Bool -> ExceptT [Char] Identity () -> ExceptT [Char] Identity ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (Element -> Name
elementName Element
rootEl Name -> Name -> Bool
forall a. Eq a => a -> a -> Bool
== Name
"language") (ExceptT [Char] Identity () -> ExceptT [Char] Identity ())
-> ExceptT [Char] Identity () -> ExceptT [Char] Identity ()
forall a b. (a -> b) -> a -> b
$
    [Char] -> ExceptT [Char] Identity ()
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError [Char]
"Root element is not language"
  let filename :: [Char]
filename = [Char] -> [Char]
takeFileName [Char]
fp
  let casesensitive :: Bool
casesensitive = Bool -> Text -> Bool
vBool Bool
True (Text -> Bool) -> Text -> Bool
forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"casesensitive" Element
rootEl

  Element
hlEl <- case [Char] -> Element -> [Element]
getElementsNamed [Char]
"highlighting" Element
rootEl of
            []      -> [Char] -> ExceptT [Char] Identity Element
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError [Char]
"No highlighting element"
            (Element
hl:[Element]
_)  -> Element -> ExceptT [Char] Identity Element
forall (m :: * -> *) a. Monad m => a -> m a
return Element
hl

  Map Text [Text]
lists <- [(Text, [Text])] -> Map Text [Text]
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList ([(Text, [Text])] -> Map Text [Text])
-> ExceptT [Char] Identity [(Text, [Text])]
-> ExceptT [Char] Identity (Map Text [Text])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Element -> ExceptT [Char] Identity (Text, [Text]))
-> [Element] -> ExceptT [Char] Identity [(Text, [Text])]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM Element -> ExceptT [Char] Identity (Text, [Text])
getList ([Char] -> Element -> [Element]
getElementsNamed [Char]
"list" Element
hlEl)

  let itemDatas :: ItemData
itemDatas = Element -> ItemData
getItemData Element
hlEl

  let defKeywordAttr :: KeywordAttr
defKeywordAttr = Element -> KeywordAttr
getKeywordAttrs Element
rootEl

  let contextEls :: [Element]
contextEls = [Char] -> Element -> [Element]
getElementsNamed [Char]
"contexts" Element
hlEl [Element] -> (Element -> [Element]) -> [Element]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
                   [Char] -> Element -> [Element]
getElementsNamed [Char]
"context"

  let syntaxname :: Text
syntaxname = [Char] -> Element -> Text
getAttrValue [Char]
"name" Element
rootEl

  [Context]
contexts <- (Element -> ExceptT [Char] Identity Context)
-> [Element] -> ExceptT [Char] Identity [Context]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM
    (Bool
-> Text
-> ItemData
-> Map Text [Text]
-> KeywordAttr
-> Element
-> ExceptT [Char] Identity Context
getContext Bool
casesensitive Text
syntaxname ItemData
itemDatas Map Text [Text]
lists KeywordAttr
defKeywordAttr)
    [Element]
contextEls

  Text
startingContext <- case [Context]
contexts of
                       (Context
c:[Context]
_) -> Text -> ExceptT [Char] Identity Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> ExceptT [Char] Identity Text)
-> Text -> ExceptT [Char] Identity Text
forall a b. (a -> b) -> a -> b
$ Context -> Text
cName Context
c
                       []    -> [Char] -> ExceptT [Char] Identity Text
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError [Char]
"No contexts"

  Syntax -> Except [Char] Syntax
forall (m :: * -> *) a. Monad m => a -> m a
return Syntax :: Text
-> [Char]
-> Text
-> Map Text Context
-> Text
-> Text
-> Text
-> [[Char]]
-> Text
-> Syntax
Syntax{
             sName :: Text
sName       = [Char] -> Element -> Text
getAttrValue [Char]
"name" Element
rootEl
           , sFilename :: [Char]
sFilename   = [Char]
filename
           , sShortname :: Text
sShortname  = [Char] -> Text
T.pack ([Char] -> Text) -> [Char] -> Text
forall a b. (a -> b) -> a -> b
$ [Char] -> [Char]
pathToLangName [Char]
filename
           , sAuthor :: Text
sAuthor     = [Char] -> Element -> Text
getAttrValue [Char]
"author" Element
rootEl
           , sVersion :: Text
sVersion    = [Char] -> Element -> Text
getAttrValue [Char]
"version" Element
rootEl
           , sLicense :: Text
sLicense    = [Char] -> Element -> Text
getAttrValue [Char]
"license" Element
rootEl
           , sExtensions :: [[Char]]
sExtensions = [Char] -> [[Char]]
words ([Char] -> [[Char]]) -> [Char] -> [[Char]]
forall a b. (a -> b) -> a -> b
$ (Char -> Char) -> [Char] -> [Char]
forall a b. (a -> b) -> [a] -> [b]
map (\Char
c -> if Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
';' then Char
' ' else Char
c)
                                 ([Char] -> [Char]) -> [Char] -> [Char]
forall a b. (a -> b) -> a -> b
$ Text -> [Char]
T.unpack
                                 (Text -> [Char]) -> Text -> [Char]
forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"extensions" Element
rootEl
           , sContexts :: Map Text Context
sContexts   = [(Text, Context)] -> Map Text Context
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList
                  [(Context -> Text
cName Context
c, Context
c) | Context
c <- [Context]
contexts]
           , sStartingContext :: Text
sStartingContext = Text
startingContext
           }

elementNamed :: String -> Node -> Bool
elementNamed :: [Char] -> Node -> Bool
elementNamed [Char]
name (NodeElement Element
el) = Element -> Name
elementName Element
el Name -> Name -> Bool
forall a. Eq a => a -> a -> Bool
== [Char] -> Name
forall a. IsString a => [Char] -> a
String.fromString [Char]
name
elementNamed [Char]
_ Node
_ = Bool
False

getElementsNamed :: String -> Element -> [Element]
getElementsNamed :: [Char] -> Element -> [Element]
getElementsNamed [Char]
name Element
node =
  [Element
el | NodeElement Element
el <- (Node -> Bool) -> [Node] -> [Node]
forall a. (a -> Bool) -> [a] -> [a]
filter ([Char] -> Node -> Bool
elementNamed [Char]
name) (Element -> [Node]
elementNodes Element
node)]

getAttrValue :: String -> Element -> Text
getAttrValue :: [Char] -> Element -> Text
getAttrValue [Char]
key Element
el = Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
forall a. Monoid a => a
mempty (Maybe Text -> Text) -> Maybe Text -> Text
forall a b. (a -> b) -> a -> b
$ Name -> Map Name Text -> Maybe Text
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup ([Char] -> Name
forall a. IsString a => [Char] -> a
String.fromString [Char]
key)
                                       (Map Name Text -> Maybe Text) -> Map Name Text -> Maybe Text
forall a b. (a -> b) -> a -> b
$ Element -> Map Name Text
elementAttributes Element
el

getTextContent :: Element -> Text
getTextContent :: Element -> Text
getTextContent Element
el =
  [Text] -> Text
forall a. Monoid a => [a] -> a
mconcat [Text
t | NodeContent Text
t <- Element -> [Node]
elementNodes Element
el]

getList :: Element -> Except String (Text, [Text])
getList :: Element -> ExceptT [Char] Identity (Text, [Text])
getList Element
el = do
  case Name -> Map Name Text -> Maybe Text
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Name
"name" (Element -> Map Name Text
elementAttributes Element
el) of
    Maybe Text
Nothing   -> [Char] -> ExceptT [Char] Identity (Text, [Text])
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError [Char]
"No name attribute on list"
    Just Text
name ->
      (Text, [Text]) -> ExceptT [Char] Identity (Text, [Text])
forall (m :: * -> *) a. Monad m => a -> m a
return (Text
name, (Element -> Text) -> [Element] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text -> Text
T.strip (Text -> Text) -> (Element -> Text) -> Element -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Element -> Text
getTextContent)
                        ([Char] -> Element -> [Element]
getElementsNamed [Char]
"item" Element
el))

getParser :: Bool -> Text -> ItemData -> M.Map Text [Text] -> KeywordAttr
          -> Text -> Element -> Except String Rule
getParser :: Bool
-> Text
-> ItemData
-> Map Text [Text]
-> KeywordAttr
-> Text
-> Element
-> Except [Char] Rule
getParser Bool
casesensitive Text
syntaxname ItemData
itemdatas Map Text [Text]
lists KeywordAttr
kwattr Text
cattr Element
el = do
  let name :: Text
name = Name -> Text
nameLocalName (Name -> Text) -> (Element -> Name) -> Element -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Element -> Name
elementName (Element -> Text) -> Element -> Text
forall a b. (a -> b) -> a -> b
$ Element
el
  let attribute :: Text
attribute = [Char] -> Element -> Text
getAttrValue [Char]
"attribute" Element
el
  let context :: Text
context = [Char] -> Element -> Text
getAttrValue [Char]
"context" Element
el
  let char0 :: Char
char0 = Text -> Char
readChar (Text -> Char) -> Text -> Char
forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"char" Element
el
  let char1 :: Char
char1 = Text -> Char
readChar (Text -> Char) -> Text -> Char
forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"char1" Element
el
  let str' :: Text
str' = [Char] -> Element -> Text
getAttrValue [Char]
"String" Element
el
  let insensitive :: Bool
insensitive = Bool -> Text -> Bool
vBool (Bool -> Bool
not Bool
casesensitive) (Text -> Bool) -> Text -> Bool
forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"insensitive" Element
el
  let includeAttrib :: Bool
includeAttrib = Bool -> Text -> Bool
vBool Bool
False (Text -> Bool) -> Text -> Bool
forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"includeAttrib" Element
el
  let lookahead :: Bool
lookahead = Bool -> Text -> Bool
vBool Bool
False (Text -> Bool) -> Text -> Bool
forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"lookAhead" Element
el
  let firstNonSpace :: Bool
firstNonSpace = Bool -> Text -> Bool
vBool Bool
False (Text -> Bool) -> Text -> Bool
forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"firstNonSpace" Element
el
  let column' :: Text
column' = [Char] -> Element -> Text
getAttrValue [Char]
"column" Element
el
  let dynamic :: Bool
dynamic = Bool -> Text -> Bool
vBool Bool
False (Text -> Bool) -> Text -> Bool
forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"dynamic" Element
el
  [Rule]
children <- (Element -> Except [Char] Rule)
-> [Element] -> ExceptT [Char] Identity [Rule]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (Bool
-> Text
-> ItemData
-> Map Text [Text]
-> KeywordAttr
-> Text
-> Element
-> Except [Char] Rule
getParser Bool
casesensitive
                    Text
syntaxname ItemData
itemdatas Map Text [Text]
lists KeywordAttr
kwattr Text
attribute)
                  [Element
e | NodeElement Element
e <- Element -> [Node]
elementNodes Element
el ]
  let tildeRegex :: Bool
tildeRegex = Text
name Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"RegExpr" Bool -> Bool -> Bool
&& Int -> Text -> Text
T.take Int
1 Text
str' Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"^"
  let str :: Text
str = if Bool
tildeRegex then Int -> Text -> Text
T.drop Int
1 Text
str' else Text
str'
  let column :: Maybe Int
column = if Bool
tildeRegex
                  then Int -> Maybe Int
forall a. a -> Maybe a
Just (Int
0 :: Int)
                  else ([Char] -> Maybe Int)
-> ((Int, Text) -> Maybe Int)
-> Either [Char] (Int, Text)
-> Maybe Int
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (\[Char]
_ -> Maybe Int
forall a. Maybe a
Nothing) (Int -> Maybe Int
forall a. a -> Maybe a
Just (Int -> Maybe Int)
-> ((Int, Text) -> Int) -> (Int, Text) -> Maybe Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int, Text) -> Int
forall a b. (a, b) -> a
fst) (Either [Char] (Int, Text) -> Maybe Int)
-> Either [Char] (Int, Text) -> Maybe Int
forall a b. (a -> b) -> a -> b
$
                         Reader Int
forall a. Integral a => Reader a
TR.decimal Text
column'
  let re :: Matcher
re = RE -> Matcher
RegExpr RE :: ByteString -> Bool -> RE
RE{ reString :: ByteString
reString = Text -> ByteString
TE.encodeUtf8 Text
str
                     , reCaseSensitive :: Bool
reCaseSensitive = Bool -> Bool
not Bool
insensitive }
  let (Text
incsyntax, Text
inccontext) =
          case Text -> Text -> (Text, Text)
T.breakOn Text
"##" Text
context of
                (Text
_,Text
x) | Text -> Bool
T.null Text
x -> (Text
syntaxname, Text
context)
                (Text
cont, Text
lang)     -> (Int -> Text -> Text
T.drop Int
2 Text
lang, Text
cont)
  Matcher
matcher <- case Text
name of
                 Text
"DetectChar" -> Matcher -> ExceptT [Char] Identity Matcher
forall (m :: * -> *) a. Monad m => a -> m a
return (Matcher -> ExceptT [Char] Identity Matcher)
-> Matcher -> ExceptT [Char] Identity Matcher
forall a b. (a -> b) -> a -> b
$ Char -> Matcher
DetectChar Char
char0
                 Text
"Detect2Chars" -> Matcher -> ExceptT [Char] Identity Matcher
forall (m :: * -> *) a. Monad m => a -> m a
return (Matcher -> ExceptT [Char] Identity Matcher)
-> Matcher -> ExceptT [Char] Identity Matcher
forall a b. (a -> b) -> a -> b
$ Char -> Char -> Matcher
Detect2Chars Char
char0 Char
char1
                 Text
"AnyChar" -> Matcher -> ExceptT [Char] Identity Matcher
forall (m :: * -> *) a. Monad m => a -> m a
return (Matcher -> ExceptT [Char] Identity Matcher)
-> Matcher -> ExceptT [Char] Identity Matcher
forall a b. (a -> b) -> a -> b
$ Set Char -> Matcher
AnyChar (Set Char -> Matcher) -> Set Char -> Matcher
forall a b. (a -> b) -> a -> b
$ [Char] -> Set Char
forall a. Ord a => [a] -> Set a
Set.fromList ([Char] -> Set Char) -> [Char] -> Set Char
forall a b. (a -> b) -> a -> b
$ Text -> [Char]
T.unpack Text
str
                 Text
"RangeDetect" -> Matcher -> ExceptT [Char] Identity Matcher
forall (m :: * -> *) a. Monad m => a -> m a
return (Matcher -> ExceptT [Char] Identity Matcher)
-> Matcher -> ExceptT [Char] Identity Matcher
forall a b. (a -> b) -> a -> b
$ Char -> Char -> Matcher
RangeDetect Char
char0 Char
char1
                 Text
"StringDetect" -> Matcher -> ExceptT [Char] Identity Matcher
forall (m :: * -> *) a. Monad m => a -> m a
return (Matcher -> ExceptT [Char] Identity Matcher)
-> Matcher -> ExceptT [Char] Identity Matcher
forall a b. (a -> b) -> a -> b
$ Text -> Matcher
StringDetect Text
str
                 Text
"WordDetect" -> Matcher -> ExceptT [Char] Identity Matcher
forall (m :: * -> *) a. Monad m => a -> m a
return (Matcher -> ExceptT [Char] Identity Matcher)
-> Matcher -> ExceptT [Char] Identity Matcher
forall a b. (a -> b) -> a -> b
$ Text -> Matcher
WordDetect Text
str
                 Text
"RegExpr" -> Matcher -> ExceptT [Char] Identity Matcher
forall (m :: * -> *) a. Monad m => a -> m a
return (Matcher -> ExceptT [Char] Identity Matcher)
-> Matcher -> ExceptT [Char] Identity Matcher
forall a b. (a -> b) -> a -> b
$ Matcher
re
                 Text
"keyword" -> Matcher -> ExceptT [Char] Identity Matcher
forall (m :: * -> *) a. Monad m => a -> m a
return (Matcher -> ExceptT [Char] Identity Matcher)
-> Matcher -> ExceptT [Char] Identity Matcher
forall a b. (a -> b) -> a -> b
$ KeywordAttr -> WordSet Text -> Matcher
Keyword KeywordAttr
kwattr (WordSet Text -> Matcher) -> WordSet Text -> Matcher
forall a b. (a -> b) -> a -> b
$
                    WordSet Text
-> ([Text] -> WordSet Text) -> Maybe [Text] -> WordSet Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Bool -> [Text] -> WordSet Text
forall a. (FoldCase a, Ord a) => Bool -> [a] -> WordSet a
makeWordSet Bool
True [])
                      (Bool -> [Text] -> WordSet Text
forall a. (FoldCase a, Ord a) => Bool -> [a] -> WordSet a
makeWordSet (KeywordAttr -> Bool
keywordCaseSensitive KeywordAttr
kwattr))
                      (Text -> Map Text [Text] -> Maybe [Text]
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
str Map Text [Text]
lists)
                 Text
"Int" -> Matcher -> ExceptT [Char] Identity Matcher
forall (m :: * -> *) a. Monad m => a -> m a
return (Matcher -> ExceptT [Char] Identity Matcher)
-> Matcher -> ExceptT [Char] Identity Matcher
forall a b. (a -> b) -> a -> b
$ Matcher
Int
                 Text
"Float" -> Matcher -> ExceptT [Char] Identity Matcher
forall (m :: * -> *) a. Monad m => a -> m a
return (Matcher -> ExceptT [Char] Identity Matcher)
-> Matcher -> ExceptT [Char] Identity Matcher
forall a b. (a -> b) -> a -> b
$ Matcher
Float
                 Text
"HlCOct" -> Matcher -> ExceptT [Char] Identity Matcher
forall (m :: * -> *) a. Monad m => a -> m a
return (Matcher -> ExceptT [Char] Identity Matcher)
-> Matcher -> ExceptT [Char] Identity Matcher
forall a b. (a -> b) -> a -> b
$ Matcher
HlCOct
                 Text
"HlCHex" -> Matcher -> ExceptT [Char] Identity Matcher
forall (m :: * -> *) a. Monad m => a -> m a
return (Matcher -> ExceptT [Char] Identity Matcher)
-> Matcher -> ExceptT [Char] Identity Matcher
forall a b. (a -> b) -> a -> b
$ Matcher
HlCHex
                 Text
"HlCStringChar" -> Matcher -> ExceptT [Char] Identity Matcher
forall (m :: * -> *) a. Monad m => a -> m a
return (Matcher -> ExceptT [Char] Identity Matcher)
-> Matcher -> ExceptT [Char] Identity Matcher
forall a b. (a -> b) -> a -> b
$ Matcher
HlCStringChar
                 Text
"HlCChar" -> Matcher -> ExceptT [Char] Identity Matcher
forall (m :: * -> *) a. Monad m => a -> m a
return (Matcher -> ExceptT [Char] Identity Matcher)
-> Matcher -> ExceptT [Char] Identity Matcher
forall a b. (a -> b) -> a -> b
$ Matcher
HlCChar
                 Text
"LineContinue" -> Matcher -> ExceptT [Char] Identity Matcher
forall (m :: * -> *) a. Monad m => a -> m a
return (Matcher -> ExceptT [Char] Identity Matcher)
-> Matcher -> ExceptT [Char] Identity Matcher
forall a b. (a -> b) -> a -> b
$ Matcher
LineContinue
                 Text
"IncludeRules" -> Matcher -> ExceptT [Char] Identity Matcher
forall (m :: * -> *) a. Monad m => a -> m a
return (Matcher -> ExceptT [Char] Identity Matcher)
-> Matcher -> ExceptT [Char] Identity Matcher
forall a b. (a -> b) -> a -> b
$
                   (Text, Text) -> Matcher
IncludeRules (Text
incsyntax, Text
inccontext)
                 Text
"DetectSpaces" -> Matcher -> ExceptT [Char] Identity Matcher
forall (m :: * -> *) a. Monad m => a -> m a
return (Matcher -> ExceptT [Char] Identity Matcher)
-> Matcher -> ExceptT [Char] Identity Matcher
forall a b. (a -> b) -> a -> b
$ Matcher
DetectSpaces
                 Text
"DetectIdentifier" -> Matcher -> ExceptT [Char] Identity Matcher
forall (m :: * -> *) a. Monad m => a -> m a
return (Matcher -> ExceptT [Char] Identity Matcher)
-> Matcher -> ExceptT [Char] Identity Matcher
forall a b. (a -> b) -> a -> b
$ Matcher
DetectIdentifier
                 Text
_ -> [Char] -> ExceptT [Char] Identity Matcher
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError ([Char] -> ExceptT [Char] Identity Matcher)
-> [Char] -> ExceptT [Char] Identity Matcher
forall a b. (a -> b) -> a -> b
$ [Char]
"Unknown element " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack Text
name

  let contextSwitch :: [ContextSwitch]
contextSwitch = if Text
name Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"IncludeRules"
                         then []  -- is this right?
                         else Text -> Text -> [ContextSwitch]
parseContextSwitch Text
incsyntax Text
inccontext
  Rule -> Except [Char] Rule
forall (m :: * -> *) a. Monad m => a -> m a
return (Rule -> Except [Char] Rule) -> Rule -> Except [Char] Rule
forall a b. (a -> b) -> a -> b
$ Rule :: Matcher
-> TokenType
-> Bool
-> Bool
-> Bool
-> [Rule]
-> Bool
-> Bool
-> Maybe Int
-> [ContextSwitch]
-> Rule
Rule{ rMatcher :: Matcher
rMatcher = Matcher
matcher
               , rAttribute :: TokenType
rAttribute = TokenType -> Maybe TokenType -> TokenType
forall a. a -> Maybe a -> a
fromMaybe TokenType
NormalTok (Maybe TokenType -> TokenType) -> Maybe TokenType -> TokenType
forall a b. (a -> b) -> a -> b
$
                    if Text -> Bool
T.null Text
attribute
                       then Text -> ItemData -> Maybe TokenType
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
cattr ItemData
itemdatas
                       else Text -> ItemData -> Maybe TokenType
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
attribute ItemData
itemdatas
               , rIncludeAttribute :: Bool
rIncludeAttribute = Bool
includeAttrib
               , rDynamic :: Bool
rDynamic = Bool
dynamic
               , rCaseSensitive :: Bool
rCaseSensitive = Bool -> Bool
not Bool
insensitive
               , rChildren :: [Rule]
rChildren = [Rule]
children
               , rContextSwitch :: [ContextSwitch]
rContextSwitch = [ContextSwitch]
contextSwitch
               , rLookahead :: Bool
rLookahead = Bool
lookahead
               , rFirstNonspace :: Bool
rFirstNonspace = Bool
firstNonSpace
               , rColumn :: Maybe Int
rColumn = Maybe Int
column
               }


getContext :: Bool
           -> Text
           -> ItemData
           -> M.Map Text [Text]
           -> KeywordAttr
           -> Element
           -> Except String Context
getContext :: Bool
-> Text
-> ItemData
-> Map Text [Text]
-> KeywordAttr
-> Element
-> ExceptT [Char] Identity Context
getContext Bool
casesensitive Text
syntaxname ItemData
itemDatas Map Text [Text]
lists KeywordAttr
kwattr Element
el = do
  let name :: Text
name = [Char] -> Element -> Text
getAttrValue [Char]
"name" Element
el
  let attribute :: Text
attribute = [Char] -> Element -> Text
getAttrValue [Char]
"attribute" Element
el
  let lineEmptyContext :: Text
lineEmptyContext = [Char] -> Element -> Text
getAttrValue [Char]
"lineEmptyContext" Element
el
  let lineEndContext :: Text
lineEndContext = [Char] -> Element -> Text
getAttrValue [Char]
"lineEndContext" Element
el
  let lineBeginContext :: Text
lineBeginContext = [Char] -> Element -> Text
getAttrValue [Char]
"lineBeginContext" Element
el
  let fallthrough :: Bool
fallthrough = Bool -> Text -> Bool
vBool Bool
False (Text -> Bool) -> Text -> Bool
forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"fallthrough" Element
el
  let fallthroughContext :: Text
fallthroughContext = [Char] -> Element -> Text
getAttrValue [Char]
"fallthroughContext" Element
el
  let dynamic :: Bool
dynamic = Bool -> Text -> Bool
vBool Bool
False (Text -> Bool) -> Text -> Bool
forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"dynamic" Element
el

  [Rule]
parsers <- (Element -> Except [Char] Rule)
-> [Element] -> ExceptT [Char] Identity [Rule]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (Bool
-> Text
-> ItemData
-> Map Text [Text]
-> KeywordAttr
-> Text
-> Element
-> Except [Char] Rule
getParser Bool
casesensitive
                    Text
syntaxname ItemData
itemDatas Map Text [Text]
lists KeywordAttr
kwattr Text
attribute)
                  [Element
e | NodeElement Element
e <- Element -> [Node]
elementNodes Element
el ]

  Context -> ExceptT [Char] Identity Context
forall (m :: * -> *) a. Monad m => a -> m a
return (Context -> ExceptT [Char] Identity Context)
-> Context -> ExceptT [Char] Identity Context
forall a b. (a -> b) -> a -> b
$ Context :: Text
-> Text
-> [Rule]
-> TokenType
-> [ContextSwitch]
-> [ContextSwitch]
-> [ContextSwitch]
-> Bool
-> [ContextSwitch]
-> Bool
-> Context
Context {
            cName :: Text
cName = Text
name
          , cSyntax :: Text
cSyntax = Text
syntaxname
          , cRules :: [Rule]
cRules = [Rule]
parsers
          , cAttribute :: TokenType
cAttribute = TokenType -> Maybe TokenType -> TokenType
forall a. a -> Maybe a -> a
fromMaybe TokenType
NormalTok (Maybe TokenType -> TokenType) -> Maybe TokenType -> TokenType
forall a b. (a -> b) -> a -> b
$ Text -> ItemData -> Maybe TokenType
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
attribute ItemData
itemDatas
          , cLineEmptyContext :: [ContextSwitch]
cLineEmptyContext =
               Text -> Text -> [ContextSwitch]
parseContextSwitch Text
syntaxname Text
lineEmptyContext
          , cLineEndContext :: [ContextSwitch]
cLineEndContext =
               Text -> Text -> [ContextSwitch]
parseContextSwitch Text
syntaxname Text
lineEndContext
          , cLineBeginContext :: [ContextSwitch]
cLineBeginContext =
               Text -> Text -> [ContextSwitch]
parseContextSwitch Text
syntaxname Text
lineBeginContext
          , cFallthrough :: Bool
cFallthrough = Bool
fallthrough
          , cFallthroughContext :: [ContextSwitch]
cFallthroughContext =
               Text -> Text -> [ContextSwitch]
parseContextSwitch Text
syntaxname Text
fallthroughContext
          , cDynamic :: Bool
cDynamic = Bool
dynamic
          }

getItemData :: Element -> ItemData
getItemData :: Element -> ItemData
getItemData Element
el = [(Text, Text)] -> ItemData
toItemDataTable ([(Text, Text)] -> ItemData) -> [(Text, Text)] -> ItemData
forall a b. (a -> b) -> a -> b
$
  [([Char] -> Element -> Text
getAttrValue [Char]
"name" Element
e, [Char] -> Element -> Text
getAttrValue [Char]
"defStyleNum" Element
e)
    | Element
e <- ([Char] -> Element -> [Element]
getElementsNamed [Char]
"itemDatas" Element
el [Element] -> (Element -> [Element]) -> [Element]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [Char] -> Element -> [Element]
getElementsNamed [Char]
"itemData")
  ]

getKeywordAttrs :: Element -> KeywordAttr
getKeywordAttrs :: Element -> KeywordAttr
getKeywordAttrs Element
el =
  case ([Char] -> Element -> [Element]
getElementsNamed [Char]
"general" Element
el [Element] -> (Element -> [Element]) -> [Element]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [Char] -> Element -> [Element]
getElementsNamed [Char]
"keywords") of
     []    -> KeywordAttr
defaultKeywordAttr
     (Element
x:[Element]
_) ->
       let weakDelim :: [Char]
weakDelim = Text -> [Char]
T.unpack (Text -> [Char]) -> Text -> [Char]
forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"weakDeliminator" Element
x
           additionalDelim :: [Char]
additionalDelim = Text -> [Char]
T.unpack (Text -> [Char]) -> Text -> [Char]
forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"additionalDeliminator" Element
x
        in KeywordAttr :: Bool -> Set Char -> KeywordAttr
KeywordAttr { keywordCaseSensitive :: Bool
keywordCaseSensitive =
                             Bool -> Text -> Bool
vBool Bool
True (Text -> Bool) -> Text -> Bool
forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"casesensitive" Element
x
                       , keywordDelims :: Set Char
keywordDelims = Set Char -> Set Char -> Set Char
forall a. Ord a => Set a -> Set a -> Set a
Set.union Set Char
standardDelims
                           ([Char] -> Set Char
forall a. Ord a => [a] -> Set a
Set.fromList [Char]
additionalDelim) Set Char -> Set Char -> Set Char
forall a. Ord a => Set a -> Set a -> Set a
Set.\\
                             [Char] -> Set Char
forall a. Ord a => [a] -> Set a
Set.fromList [Char]
weakDelim }

parseContextSwitch :: Text -> Text -> [ContextSwitch]
parseContextSwitch :: Text -> Text -> [ContextSwitch]
parseContextSwitch Text
syntaxname Text
t =
  if Text -> Bool
T.null Text
t Bool -> Bool -> Bool
|| Text
t Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"#stay"
     then []
     else
       case Text -> Text -> Maybe Text
T.stripPrefix Text
"#pop" Text
t of
         Just Text
rest -> ContextSwitch
Pop ContextSwitch -> [ContextSwitch] -> [ContextSwitch]
forall a. a -> [a] -> [a]
: Text -> Text -> [ContextSwitch]
parseContextSwitch Text
syntaxname Text
rest
         Maybe Text
Nothing   -> [(Text, Text) -> ContextSwitch
Push (Text
syntaxname, (Char -> Bool) -> Text -> Text
T.dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'!') Text
t)]

type ItemData = M.Map Text TokenType

toItemDataTable :: [(Text, Text)] -> ItemData
toItemDataTable :: [(Text, Text)] -> ItemData
toItemDataTable = [(Text, TokenType)] -> ItemData
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList ([(Text, TokenType)] -> ItemData)
-> ([(Text, Text)] -> [(Text, TokenType)])
-> [(Text, Text)]
-> ItemData
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Text, Text) -> (Text, TokenType))
-> [(Text, Text)] -> [(Text, TokenType)]
forall a b. (a -> b) -> [a] -> [b]
map (\(Text
s,Text
t) -> (Text
s, Text -> TokenType
toTokenType Text
t))

toTokenType :: Text -> TokenType
toTokenType :: Text -> TokenType
toTokenType Text
t =
  case Text
t of
    Text
"dsNormal"         -> TokenType
NormalTok
    Text
"dsKeyword"        -> TokenType
KeywordTok
    Text
"dsDataType"       -> TokenType
DataTypeTok
    Text
"dsDecVal"         -> TokenType
DecValTok
    Text
"dsBaseN"          -> TokenType
BaseNTok
    Text
"dsFloat"          -> TokenType
FloatTok
    Text
"dsConstant"       -> TokenType
ConstantTok
    Text
"dsChar"           -> TokenType
CharTok
    Text
"dsSpecialChar"    -> TokenType
SpecialCharTok
    Text
"dsString"         -> TokenType
StringTok
    Text
"dsVerbatimString" -> TokenType
VerbatimStringTok
    Text
"dsSpecialString"  -> TokenType
SpecialStringTok
    Text
"dsImport"         -> TokenType
ImportTok
    Text
"dsComment"        -> TokenType
CommentTok
    Text
"dsDocumentation"  -> TokenType
DocumentationTok
    Text
"dsAnnotation"     -> TokenType
AnnotationTok
    Text
"dsCommentVar"     -> TokenType
CommentVarTok
    Text
"dsOthers"         -> TokenType
OtherTok
    Text
"dsFunction"       -> TokenType
FunctionTok
    Text
"dsVariable"       -> TokenType
VariableTok
    Text
"dsControlFlow"    -> TokenType
ControlFlowTok
    Text
"dsOperator"       -> TokenType
OperatorTok
    Text
"dsBuiltIn"        -> TokenType
BuiltInTok
    Text
"dsExtension"      -> TokenType
ExtensionTok
    Text
"dsPreprocessor"   -> TokenType
PreprocessorTok
    Text
"dsAttribute"      -> TokenType
AttributeTok
    Text
"dsRegionMarker"   -> TokenType
RegionMarkerTok
    Text
"dsInformation"    -> TokenType
InformationTok
    Text
"dsWarning"        -> TokenType
WarningTok
    Text
"dsAlert"          -> TokenType
AlertTok
    Text
"dsError"          -> TokenType
ErrorTok
    Text
_                  -> TokenType
NormalTok

-- Note, some xml files have "\\" for a backslash,
-- others have "\".  Not sure what the rules are, but
-- this covers both bases:
readChar :: Text -> Char
readChar :: Text -> Char
readChar Text
t = case Text -> [Char]
T.unpack Text
t of
                  [Char
c] -> Char
c
                  [Char]
s   -> Char -> [Char] -> Char
forall a. Read a => a -> [Char] -> a
readDef Char
'\xffff' ([Char] -> Char) -> [Char] -> Char
forall a b. (a -> b) -> a -> b
$ [Char]
"'" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
s [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"'"

pathToLangName :: String -> String
pathToLangName :: [Char] -> [Char]
pathToLangName [Char]
s = [Char] -> [Char]
capitalize ([Char] -> [Char]
camelize ([Char] -> [Char]
takeBaseName [Char]
s))

camelize :: String -> String
camelize :: [Char] -> [Char]
camelize (Char
d:Char
c:[Char]
cs) | Bool -> Bool
not (Char -> Bool
isAlphaNum Char
d) = Char -> Char
toUpper Char
c Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: [Char] -> [Char]
camelize [Char]
cs
camelize (Char
c:[Char]
cs)   = Char
c Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: [Char] -> [Char]
camelize [Char]
cs
camelize []       = []

capitalize :: String -> String
capitalize :: [Char] -> [Char]
capitalize (Char
c:[Char]
cs) = Char -> Char
toUpper Char
c Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: [Char]
cs
capitalize []     = []