{-# LANGUAGE OverloadedStrings #-} ----------------------------------------------------------------------------- -- | -- Module : Hasmin.Parser.Internal -- Copyright : (c) 2017 Cristian Adrián Ontivero -- License : BSD3 -- Stability : experimental -- Portability : unknown -- ----------------------------------------------------------------------------- module Hasmin.Parser.Internal ( stylesheet , atRule , atMedia , styleRule , rule , rules , declaration , declarations , selector , supportsCondition ) where import Control.Applicative ((<|>), many, some, empty, optional) import Data.Functor (($>)) import Data.Attoparsec.Combinator (lookAhead, sepBy, endOfInput) import Data.Attoparsec.Text (asciiCI, char, many1, manyTill, option, Parser, satisfy, string) import Data.List.NonEmpty (NonEmpty( (:|) )) import Data.Monoid ((<>)) import Data.Maybe (fromMaybe) import Data.Text (Text) import Data.Text.Lazy.Builder as LB import Data.Map.Strict (Map) import qualified Data.List.NonEmpty as NE import qualified Data.Map.Strict as Map import qualified Data.Set as Set import qualified Data.Attoparsec.Text as A import qualified Data.Char as C import qualified Data.Text as T import qualified Data.Text.Lazy as TL import Hasmin.Parser.Utils import Hasmin.Parser.Value import Hasmin.Types.Selector import Hasmin.Types.Stylesheet import Hasmin.Types.Declaration import Hasmin.Types.String -- | Parser for CSS complex selectors (see 'Selector' for more details). selector :: Parser Selector selector = Selector <$> compoundSelector <*> combinatorsAndSelectors where combinatorsAndSelectors = many ((,) <$> combinator <* skipComments <*> compoundSelector) -- First tries with '>>' (descendant), '>' (child), '+' (adjacent sibling), and -- '~' (general sibling) combinators. If those fail, it tries with the -- descendant (whitespace) combinator. This is done to allow comments in-between. -- -- | Parser for selector combinators, i.e. ">>" (descendant), '>' (child), '+' -- (adjacent sibling), '~' (general sibling), and ' ' (descendant) combinators. combinator :: Parser Combinator combinator = (skipComments *> ((string ">>" $> DescendantBrackets) <|> (char '>' $> Child) <|> (char '+' $> AdjacentSibling) <|> (char '~' $> GeneralSibling))) <|> (satisfy ws $> DescendantSpace) where ws c = c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' compoundSelector :: Parser CompoundSelector compoundSelector = (:|) <$> (typeSelector <|> universal) <*> many p <|> ((Universal mempty :|) <$> many1 p) where p = idSel <|> classSel <|> attributeSel <|> pseudo -- | Parses a \"number sign\" (U+0023, \#) immediately followed by the ID value, -- which must be a CSS identifier. idSel :: Parser SimpleSelector idSel = do _ <- char '#' name <- mconcat <$> many1 nmchar pure . IdSel . TL.toStrict $ toLazyText name -- class: '.' IDENT classSel :: Parser SimpleSelector classSel = char '.' *> (ClassSel <$> ident) {- attrib : '[' S* [ namespace_prefix ]? IDENT S* [ [ PREFIXMATCH | SUFFIXMATCH | SUBSTRINGMATCH | '=' | INCLUDES | DASHMATCH ] S* [ IDENT | STRING ] S* ]? ']' ; -} -- FIXME namespace prefixes aren't allowed inside attribute selectors, -- but they should be. attributeSel :: Parser SimpleSelector attributeSel = do _ <- char '[' attId <- lexeme ident g <- option Attribute attValue _ <- char ']' pure $ AttributeSel (g attId) where attValue = do f <- ((string "^=" $> (:^=:)) <|> (string "$=" $> (:$=:)) <|> (string "*=" $> (:*=:)) <|> (string "=" $> (:=:)) <|> (string "~=" $> (:~=:)) <|> (string "|=" $> (:|=:))) <* skipComments attval <- identOrString <* skipComments pure (`f` attval) identOrString :: Parser (Either Text StringType) identOrString = (Left <$> ident) <|> (Right <$> stringtype) -- type_selector: [namespace_prefix]? element_name typeSelector :: Parser SimpleSelector typeSelector = Type <$> opt namespacePrefix <*> ident -- universal: [ namespace_prefix ]? '*' universal :: Parser SimpleSelector universal = Universal <$> opt namespacePrefix <* char '*' -- namespace_prefix: [ IDENT | '*' ]? '|' namespacePrefix :: Parser Text namespacePrefix = opt (ident <|> string "*") <* char '|' {- '::' starts a pseudo-element, ':' a pseudo-class Exceptions: :first-line, :first-letter, :before and :after. Note that pseudo-elements are restricted to one per selector and occur only in the last simple_selector_sequence. = ':' | ':' ')' = ':' -} -- pseudo: ':' ':'? [ IDENT | functional_pseudo ] pseudo :: Parser SimpleSelector pseudo = char ':' *> (pseudoElementSelector <|> pseudoClassSelector) where pseudoClassSelector = do i <- ident c <- A.peekChar case c of Just '(' -> char '(' *> case Map.lookup (T.toCaseFold i) fpcMap of Just p -> functionParser p Nothing -> functionParser (FunctionalPseudoClass i <$> A.takeWhile (/= ')')) _ -> pure $ PseudoClass i pseudoElementSelector = (char ':' *> (PseudoElem <$> ident)) <|> (ident >>= handleSpecialCase) where handleSpecialCase :: Text -> Parser SimpleSelector handleSpecialCase t | isSpecialPseudoElement = pure $ PseudoElem t | otherwise = empty where isSpecialPseudoElement = T.toCaseFold t `elem` specialPseudoElements -- \ microsyntax parser. anplusb :: Parser AnPlusB anplusb = (asciiCI "even" $> Even) <|> (asciiCI "odd" $> Odd) <|> do s <- optional parseSign dgts <- option mempty digits case dgts of [] -> ciN *> skipComments *> option (A s Nothing) (AB s Nothing <$> bValue) _ -> let n = read dgts :: Int in (ciN *> skipComments *> option (A s $ Just n) (AB s (Just n) <$> bValue)) <|> (pure . B $ getSign s * n) where ciN = satisfy (\c -> c == 'N' || c == 'n') parseSign = (char '-' $> Minus) <|> (char '+' $> Plus) getSign (Just Minus) = -1 getSign _ = 1 bValue = do readPlus <- (char '-' $> False) <|> (char '+' $> True) d <- skipComments *> digits if readPlus then pure $ read d else pure $ read ('-':d) -- Functional pseudo classes parsers map fpcMap :: Map Text (Parser SimpleSelector) fpcMap = Map.fromList [buildTuple "nth-of-type" (\x -> FunctionalPseudoClass2 x <$> anplusb) ,buildTuple "nth-last-of-type" (\x -> FunctionalPseudoClass2 x <$> anplusb) ,buildTuple "nth-column" (\x -> FunctionalPseudoClass2 x <$> anplusb) ,buildTuple "nth-last-column" (\x -> FunctionalPseudoClass2 x <$> anplusb) ,buildTuple "not" (\x -> FunctionalPseudoClass1 x <$> compoundSelectorList) ,buildTuple "matches" (\x -> FunctionalPseudoClass1 x <$> compoundSelectorList) ,buildTuple "nth-child" (anbAndSelectors . FunctionalPseudoClass3) ,buildTuple "nth-last-child" (anbAndSelectors . FunctionalPseudoClass3) ,buildTuple "lang" (const (Lang <$> identOrString)) -- -- :drop( [ active || valid || invalid ]? ) -- The :drop() functional pseudo-class is identical to :drop -- ,("drop", anplusb) -- -- It accepts a comma-separated list of one or more language ranges as its -- argument. Each language range in :lang() must be a valid CSS or -- . -- ,("lang", anplusb) -- -- ,("dir", text) -- ,("has", relative selectors) ] where buildTuple t c = (t, c t) compoundSelectorList = (:) <$> compoundSelector <*> many (comma *> compoundSelector) anbAndSelectors constructor = do a <- anplusb <* skipComments o <- option [] (asciiCI "of" *> skipComments *> compoundSelectorList) pure $ constructor a o -- | Parse a list of comma-separated selectors, ignoring whitespace and -- comments. selectors :: Parser [Selector] selectors = lexeme selector `sepBy` char ',' -- | Parser for a declaration, starting by the property name. declaration :: Parser Declaration declaration = do p <- property <* colon v <- valuesFor p <|> valuesFallback i <- important ie <- lexeme iehack pure $ Declaration p v i ie -- | Parser for property names. Usually, 'ident' would be enough, but this -- parser adds support for IE hacks (e.g. *width), which deviate from the CSS -- grammar. property :: Parser Text property = mappend <$> opt ie7orLessHack <*> ident where ie7orLessHack = T.singleton <$> satisfy (`Set.member` ie7orLessHacks) ie7orLessHacks = Set.fromList ("!$&*()=%+@,./`[]#~?:<>|" :: String) -- | Used to parse the "!important" at the end of declarations, ignoring spaces -- and comments after the '!'. important :: Parser Bool important = option False (char '!' *> skipComments *> asciiCI "important" $> True) iehack :: Parser Bool iehack = option False (string "\\9" $> True) -- Note: The handleSemicolons outside is needed to handle parsing "h1 { ; }". -- -- | Parser for a list of declarations, ignoring spaces, comments, and empty -- declarations (e.g. ; ;) declarations :: Parser [Declaration] declarations = many (declaration <* handleSemicolons) <* handleSemicolons where handleSemicolons = many (string ";" *> skipComments) -- | Parser for CSS at-rules (e.g. \@keyframes, \@media) atRule :: Parser Rule atRule = do _ <- char '@' ruleType <- ident fromMaybe (atBlock ruleType) (Map.lookup ruleType m) where m = Map.fromList [("charset", atCharset) ,("import", atImport) ,("namespace", atNamespace) ,("media", atMedia) ,("supports", atSupports) -- ,("document", atDocument) -- ,("page", atPage) ,("font-face", skipComments *> atBlock "font-face") ,("keyframes", atKeyframe mempty ) ,("-webkit-keyframes", atKeyframe "-webkit-") ,("-moz-keyframes", atKeyframe "-moz-") ,("-o-keyframes", atKeyframe "-o-") -- ,("viewport", atViewport) -- ,("counter-style", atCounterStyle) -- ,("font-feature-value", atFontFeatureValue) ] -- @import [ | ] []?; atImport :: Parser Rule atImport = do esu <- skipComments *> stringOrUrl mql <- option [] mediaQueryList _ <- skipComments <* char ';' pure $ AtImport esu mql atCharset :: Parser Rule atCharset = AtCharset <$> (lexeme stringtype <* char ';') -- @namespace ? [ | ]; -- where -- = IDENT atNamespace :: Parser Rule atNamespace = do i <- skipComments *> option mempty ident ret <- if T.null i then (AtNamespace i . Left) <$> stringtype else decideBasedOn i _ <- skipComments <* char ';' pure ret where decideBasedOn x | T.toCaseFold x == "url" = do c <- A.peekChar case c of Just '(' -> AtNamespace mempty <$> (char '(' *> (Right <$> url)) _ -> AtNamespace x <$> (skipComments *> stringOrUrl) | otherwise = AtNamespace x <$> (skipComments *> stringOrUrl) atKeyframe :: Text -> Parser Rule atKeyframe t = do name <- lexeme ident <* char '{' bs <- many (keyframeBlock <* skipComments) _ <- char '}' pure $ AtKeyframes t name bs keyframeBlock :: Parser KeyframeBlock keyframeBlock = do sel <- lexeme kfsList ds <- char '{' *> skipComments *> declarations <* char '}' pure $ KeyframeBlock sel ds where from = asciiCI "from" $> From to = asciiCI "to" $> To keyframeSelector = from <|> to <|> (KFPercentage <$> percentage) kfsList = (:) <$> keyframeSelector <*> many (comma *> keyframeSelector) atMedia :: Parser Rule atMedia = do m <- satisfy C.isSpace *> mediaQueryList _ <- char '{' <* skipComments r <- manyTill (rule <* skipComments) (lookAhead (char '}')) _ <- char '}' pure $ AtMedia m r atSupports :: Parser Rule atSupports = do sc <- satisfy C.isSpace *> supportsCondition _ <- lexeme (char '{') r <- manyTill (rule <* skipComments) (lookAhead (char '}')) _ <- char '}' pure $ AtSupports sc r -- | Parser for a , -- needed by @\@supports@ rules. supportsCondition :: Parser SupportsCondition supportsCondition = asciiCI "not" *> skipComments *> (Not <$> supportsCondInParens) <|> supportsConjunction <|> supportsDisjunction <|> (Parens <$> supportsCondInParens) where supportsDisjunction :: Parser SupportsCondition supportsDisjunction = supportsHelper Or "or" supportsConjunction :: Parser SupportsCondition supportsConjunction = supportsHelper And "and" supportsCondInParens :: Parser SupportsCondInParens supportsCondInParens = do _ <- char '(' x <- lexeme $ (ParensCond <$> supportsCondition) <|> (ParensDec <$> atSupportsDeclaration) _ <- char ')' pure x atSupportsDeclaration :: Parser Declaration atSupportsDeclaration = do p <- property <* colon v <- valuesFor p <|> valuesInParens pure $ Declaration p v False False -- customPropertyIdent :: Parser Text -- customPropertyIdent = (<>) <$> string "-" <*> ident supportsHelper :: (SupportsCondInParens -> NonEmpty SupportsCondInParens -> SupportsCondition) -> Text -> Parser SupportsCondition supportsHelper c t = do x <- supportsCondInParens <* skipComments xs <- some (asciiCI t *> lexeme supportsCondInParens) pure $ c x (NE.fromList xs) -- TODO clean code -- the "manyTill .. lookAhead" was added because if we only used "rules", it -- doesn't know when to stop, and breaks the parser atBlock :: Text -> Parser Rule atBlock i = do t <- mappend i <$> A.takeWhile (/= '{') <* char '{' r <- skipComments *> ((AtBlockWithDec t <$> declarations) <|> (AtBlockWithRules t <$> manyTill (rule <* skipComments) (lookAhead (char '}')))) _ <- char '}' pure r -- | Parses a CSS style rule, e.g. @body { padding: 0; }@ styleRule :: Parser Rule styleRule = do sels <- selectors <* char '{' <* skipComments decs <- declarations <* char '}' pure $ StyleRule sels decs -- | Parser for a CSS rule, which can be either an at-rule (e.g. \@charset), or a style -- rule. rule :: Parser Rule rule = atRule <|> styleRule -- | Parser for CSS rules (both style rules, and at-rules), which can be -- separated by whitespace or comments. rules :: Parser [Rule] rules = manyTill (rule <* skipComments) endOfInput -- | Parse a stylesheet, starting by the \@charset, \@import and \@namespace -- rules, followed by the list of rules, and ignoring any unneeded whitespace -- and comments. stylesheet :: Parser [Rule] stylesheet = do charset <- option [] ((:[]) <$> atCharset <* skipComments) imports <- many (atImport <* skipComments) namespaces <- many (atNamespace <* skipComments) _ <- skipComments -- if there is no charset, import, or namespace at rule we need this here. rest <- rules pure $ charset <> imports <> namespaces <> rest -- | Media Feature values. Per the -- , -- \ only accepts dpi and dpcm units, dppx was added later. -- However, the w3c validator considers it valid, so we make no exceptions. {- data MediaFeatureValue = MFV_Length Length | MFV_Ratio Ratio | MFV_Number Number | MFV_Resolution Resolution | MFV_Other Text -} -- | Specs: -- -- https://drafts.csswg.org/mediaqueries-3/#syntax -- https://www.w3.org/TR/css3-mediaqueries/ -- https://www.w3.org/TR/CSS21/grammar.html -- -- Implementation based on mozilla's pseudo BNF: -- https://developer.mozilla.org/en-US/docs/Web/CSS/Media_Queries/Using_media_queries mediaQueryList :: Parser [MediaQuery] mediaQueryList = lexeme ((:) <$> mediaQuery <*> many (char ',' *> skipComments *> mediaQuery)) mediaQuery :: Parser MediaQuery mediaQuery = mediaQuery1 <|> mediaQuery2 where mediaQuery1 = MediaQuery1 <$> optionalNotOrOnly <*> mediaType <*> andExpressions mediaQuery2 = MediaQuery2 <$> ((:) <$> expression <*> andExpressions) mediaType = lexeme ident andExpressions = many (h *> expression) h = lexeme (asciiCI "and" *> satisfy C.isSpace) optionalNotOrOnly = option mempty (asciiCI "not" <|> asciiCI "only") -- https://www.w3.org/TR/mediaqueries-4/#typedef-media-condition-without-or expression :: Parser Expression expression = char '(' *> skipComments *> (expr <|> expFallback) where expr = do e <- ident <* skipComments v <- optional (char ':' *> lexeme value) _ <- char ')' -- Needed here for expFallback to trigger pure $ Expression e v expFallback = InvalidExpression <$> A.takeWhile (/= ')') <* char ')' -- TODO implement the whole spec 4, or at least 3. -- Note: The code below pertains to CSS Media Queries Level 4. -- Since it is still too new and nobody implements it (afaik), -- I leave it here for future reference, when the need to cater for it -- arrives. {- mediaCondition = asciiCI "not" <|> asciiCI "and" <|> asciiCI "or" <|> mediaInParens mediaInParens = char '(' *> skipComments *> mediaCondition <* skipComments <* char ')' mediaFeature :: Parser MediaFeature mediaFeature = mfPlain <|> mfRange <|> mfBoolean where mfBoolean = ident mfRange = ident data MediaFeature = MFPlain Text Value | MFBoolean Text | MFRange Range data Range = Range1 Text RangeOp Value | Range2 Value RangeOp Text | Range3 Value RangeOp Text RangeOp Value data RangeOp = LTOP | GTOP | EQOP | GEQOP | LEQOP -- = [ '<' | '>' ]? '='? -- | [ '<' | '>' ]? '='? -- | '<' '='? '<' '='? -- | '>' '='? '>' '='? mfPlain = ident *> skipComments *> char ':' *> skipComments *> mfValue mfValue :: Parser Value mfValue = number <|> dimension <|> ident <|> ratio -- TODO check if both integers are positive (required by the spec) ratio :: Parser Value ratio = do n <- digits _ <- skipComments *> char '/' <* skipComments m <- digits pure $ Other (mconcat [n, "/", m]) -- expr -- : term [ operator? term ]* -- ; -- --term -- : unary_operator? -- [ NUMBER S* | PERCENTAGE S* | LENGTH S* | EMS S* | EXS S* | ANGLE S* | -- TIME S* | FREQ S* ] -- | STRING S* | IDENT S* | URI S* | hexcolor | function -- ; -- data MediaFeatureType = Range | Discrete t = [("width", Range) ,("height", Range) ,("aspect-ratio", Range) ,("orientation", Discrete) ,("resolution", Range) ,("scan", Discrete) ,("grid", Discrete) ,("update", Discrete) ,("overflow-block", Discrete) ,("overflow-inline", Discrete) ,("color", Range) ,("color-index", Range) ,("monochrome", Range) ,("color-gamut", Discrete) ,("pointer", Discrete) ,("hover", Discrete) ,("any-pointer", Discrete) ,("any-hover", Discrete) ,("scripting", Discrete) ,("device-width", Range) ,("device-height", Range) ,("device-aspect-ratio", Range) ] -}