module Data.GraphViz.Attributes.HTML
( Label(..)
, Text
, TextItem(..)
, Format(..)
, Table(..)
, Row(..)
, Cell(..)
, Img(..)
, Attributes
, Attribute(..)
, Align(..)
, VAlign(..)
, Scale(..)
) where
import Data.GraphViz.Parsing
import Data.GraphViz.Printing
import Data.GraphViz.Attributes.Colors
import Data.GraphViz.Attributes.Internal
import Data.GraphViz.Util(bool)
import Numeric(readHex)
import Data.Char(chr, ord, isSpace)
import Data.Function(on)
import Data.List(delete)
import Data.Maybe(catMaybes, listToMaybe)
import Data.Word(Word8, Word16)
import qualified Data.Map as Map
import qualified Data.Text.Lazy as T
data Label = Text Text
| Table Table
deriving (Eq, Ord, Show, Read)
instance PrintDot Label where
unqtDot (Text txt) = unqtDot txt
unqtDot (Table tbl) = unqtDot tbl
instance ParseDot Label where
parseUnqt = fmap Table parseUnqt
`onFail`
fmap Text parseUnqt
`adjustErr`
("Can't parse Html.Label\n\t"++)
parse = parseUnqt
type Text = [TextItem]
data TextItem = Str T.Text
| Newline Attributes
| Font Attributes Text
| Format Format Text
deriving (Eq, Ord, Show, Read)
instance PrintDot TextItem where
unqtDot (Str str) = escapeValue str
unqtDot (Newline as) = printEmptyTag (text "BR") as
unqtDot (Font as txt) = printFontTag as $ unqtDot txt
unqtDot (Format fmt txt) = printTag (unqtDot fmt) [] $ unqtDot txt
unqtListToDot = hcat . mapM unqtDot
listToDot = unqtListToDot
instance ParseDot TextItem where
parseUnqt = oneOf [ fmap Str unescapeValue
, parseEmptyTag Newline "BR"
, parseFontTag Font parseUnqt
, parseTagRep Format parseUnqt parseUnqt
]
`adjustErr`
("Can't parse Html.TextItem\n\t"++)
parse = parseUnqt
parseUnqtList = many parseUnqt
parseList = parseUnqtList
data Format = Italics
| Bold
| Underline
| Subscript
| Superscript
deriving (Eq, Ord, Bounded, Enum, Show, Read)
instance PrintDot Format where
unqtDot Italics = text "I"
unqtDot Bold = text "B"
unqtDot Underline = text "U"
unqtDot Subscript = text "SUB"
unqtDot Superscript = text "SUP"
instance ParseDot Format where
parseUnqt = stringValue [ ("I", Italics)
, ("B", Bold)
, ("U", Underline)
, ("SUB", Subscript)
, ("SUP", Superscript)
]
data Table = HTable {
tableFontAttrs :: Maybe Attributes
, tableAttrs :: Attributes
, tableRows :: [Row]
}
deriving (Eq, Ord, Show, Read)
instance PrintDot Table where
unqtDot tbl = case tableFontAttrs tbl of
(Just as) -> printFontTag as tbl'
Nothing -> tbl'
where
tbl' = printTag (text "TABLE")
(tableAttrs tbl)
(toDot $ tableRows tbl)
instance ParseDot Table where
parseUnqt = wrapWhitespace (parseFontTag addFontAttrs pTbl)
`onFail`
pTbl
`adjustErr`
("Can't parse Html.Table\n\t"++)
where
pTbl = wrapWhitespace $ parseTag (HTable Nothing)
"TABLE"
(wrapWhitespace parseUnqt)
addFontAttrs fas tbl = tbl { tableFontAttrs = Just fas }
parse = parseUnqt
data Row = Cells [Cell]
| HorizontalRule
deriving (Eq, Ord, Show, Read)
instance PrintDot Row where
unqtDot (Cells cs) = printTag (text "TR") [] $ unqtDot cs
unqtDot HorizontalRule = printEmptyTag (text "HR") []
unqtListToDot = align . cat . mapM unqtDot
listToDot = unqtListToDot
instance ParseDot Row where
parseUnqt = wrapWhitespace $ parseTag (const Cells) "TR" parseUnqt
`onFail`
parseEmptyTag (const HorizontalRule) "HR"
`adjustErr`
("Can't parse Html.Row\n\t"++)
parse = parseUnqt
parseUnqtList = wrapWhitespace $ sepBy1 parseUnqt whitespace
parseList = parseUnqtList
data Cell = LabelCell Attributes Label
| ImgCell Attributes Img
| VerticalRule
deriving (Eq, Ord, Show, Read)
instance PrintDot Cell where
unqtDot (LabelCell as l) = printCell as $ unqtDot l
unqtDot (ImgCell as img) = printCell as $ unqtDot img
unqtDot VerticalRule = printEmptyTag (text "VR") []
unqtListToDot = hsep . mapM unqtDot
listToDot = unqtListToDot
printCell :: Attributes -> DotCode -> DotCode
printCell = printTag (text "TD")
instance ParseDot Cell where
parseUnqt = oneOf [ parseCell LabelCell parse
, parseCell ImgCell $ wrapWhitespace parseUnqt
, parseEmptyTag (const VerticalRule) "VR"
]
`adjustErr`
("Can't parse Html.Cell\n\t"++)
where
parseCell = (`parseTag` "TD")
parse = parseUnqt
parseUnqtList = wrapWhitespace $ sepBy1 parseUnqt whitespace
parseList = parseUnqtList
newtype Img = Img Attributes
deriving (Eq, Ord, Show, Read)
instance PrintDot Img where
unqtDot (Img as) = printEmptyTag (text "IMG") as
instance ParseDot Img where
parseUnqt = wrapWhitespace (parseEmptyTag Img "IMG")
`adjustErr`
("Can't parse Html.Img\n\t"++)
parse = parseUnqt
type Attributes = [Attribute]
data Attribute = Align Align
| BAlign Align
| BGColor Color
| Border Word8
| CellBorder Word8
| CellPadding Word8
| CellSpacing Word8
| Color Color
| ColSpan Word16
| Face T.Text
| FixedSize Bool
| Height Word16
| HRef T.Text
| ID T.Text
| PointSize Double
| Port PortName
| RowSpan Word16
| Scale Scale
| Src FilePath
| Target T.Text
| Title T.Text
| VAlign VAlign
| Width Word16
deriving (Eq, Ord, Show, Read)
instance PrintDot Attribute where
unqtDot (Align v) = printHtmlField "ALIGN" v
unqtDot (BAlign v) = printHtmlField "BALIGN" v
unqtDot (BGColor v) = printHtmlField "BGCOLOR" v
unqtDot (Border v) = printHtmlField "BORDER" v
unqtDot (CellBorder v) = printHtmlField "CELLBORDER" v
unqtDot (CellPadding v) = printHtmlField "CELLPADDING" v
unqtDot (CellSpacing v) = printHtmlField "CELLSPACING" v
unqtDot (Color v) = printHtmlField "COLOR" v
unqtDot (ColSpan v) = printHtmlField "COLSPAN" v
unqtDot (Face v) = printHtmlField' "FACE" $ escapeAttribute v
unqtDot (FixedSize v) = printHtmlField' "FIXEDSIZE" $ printBoolHtml v
unqtDot (Height v) = printHtmlField "HEIGHT" v
unqtDot (HRef v) = printHtmlField' "HREF" $ escapeAttribute v
unqtDot (ID v) = printHtmlField' "ID" $ escapeAttribute v
unqtDot (PointSize v) = printHtmlField "POINT-SIZE" v
unqtDot (Port v) = printHtmlField' "PORT" . escapeAttribute $ portName v
unqtDot (RowSpan v) = printHtmlField "ROWSPAN" v
unqtDot (Scale v) = printHtmlField "SCALE" v
unqtDot (Src v) = printHtmlField' "SRC" . escapeAttribute $ T.pack v
unqtDot (Target v) = printHtmlField' "TARGET" $ escapeAttribute v
unqtDot (Title v) = printHtmlField' "TITLE" $ escapeAttribute v
unqtDot (VAlign v) = printHtmlField "VALIGN" v
unqtDot (Width v) = printHtmlField "WIDTH" v
unqtListToDot = hsep . mapM unqtDot
listToDot = unqtListToDot
printHtmlField :: (PrintDot a) => T.Text -> a -> DotCode
printHtmlField f = printHtmlField' f . unqtDot
printHtmlField' :: T.Text -> DotCode -> DotCode
printHtmlField' f v = text f <> equals <> dquotes v
instance ParseDot Attribute where
parseUnqt = oneOf [ parseHtmlField Align "ALIGN"
, parseHtmlField BAlign "BALIGN"
, parseHtmlField BGColor "BGCOLOR"
, parseHtmlField Border "BORDER"
, parseHtmlField CellBorder "CELLBORDER"
, parseHtmlField CellPadding "CELLPADDING"
, parseHtmlField CellSpacing "CELLSPACING"
, parseHtmlField Color "COLOR"
, parseHtmlField ColSpan "COLSPAN"
, parseHtmlField' Face "FACE" unescapeAttribute
, parseHtmlField' FixedSize "FIXEDSIZE" parseBoolHtml
, parseHtmlField Height "HEIGHT"
, parseHtmlField' HRef "HREF" unescapeAttribute
, parseHtmlField' ID "ID" unescapeAttribute
, parseHtmlField PointSize "POINT-SIZE"
, parseHtmlField' (Port . PN) "PORT" unescapeAttribute
, parseHtmlField RowSpan "ROWSPAN"
, parseHtmlField Scale "SCALE"
, parseHtmlField' Src "SRC" $ fmap T.unpack unescapeAttribute
, parseHtmlField' Target "TARGET" unescapeAttribute
, parseHtmlField' Title "TITLE" unescapeAttribute
`onFail`
parseHtmlField' Title "TOOLTIP" unescapeAttribute
, parseHtmlField VAlign "VALIGN"
, parseHtmlField Width "WIDTH"
]
parse = parseUnqt
parseUnqtList = sepBy parseUnqt whitespace1
parseList = parseUnqtList
parseHtmlField :: (ParseDot a) => (a -> Attribute) -> String
-> Parse Attribute
parseHtmlField c f = parseHtmlField' c f parseUnqt
parseHtmlField' :: (a -> Attribute) -> String -> Parse a
-> Parse Attribute
parseHtmlField' c f p = string f
*> parseEq
*> ( c <$> ( quotedParse p
`adjustErr`
(("Can't parse HTML.Attribute." ++ f ++ "\n\t")++)
)
)
data Align = HLeft
| HCenter
| HRight
| HText
deriving (Eq, Ord, Bounded, Enum, Show, Read)
instance PrintDot Align where
unqtDot HLeft = text "LEFT"
unqtDot HCenter = text "CENTER"
unqtDot HRight = text "RIGHT"
unqtDot HText = text "TEXT"
instance ParseDot Align where
parseUnqt = oneOf [ stringRep HLeft "LEFT"
, stringRep HCenter "CENTER"
, stringRep HRight "RIGHT"
, stringRep HText "TEXT"
]
parse = parseUnqt
data VAlign = HTop
| HMiddle
| HBottom
deriving (Eq, Ord, Bounded, Enum, Show, Read)
instance PrintDot VAlign where
unqtDot HTop = text "TOP"
unqtDot HMiddle = text "MIDDLE"
unqtDot HBottom = text "BOTTOM"
instance ParseDot VAlign where
parseUnqt = oneOf [ stringRep HTop "TOP"
, stringRep HMiddle "MIDDLE"
, stringRep HBottom "BOTTOM"
]
parse = parseUnqt
data Scale = NaturalSize
| ScaleUniformly
| ExpandWidth
| ExpandHeight
| ExpandBoth
deriving (Eq, Ord, Bounded, Enum, Show, Read)
instance PrintDot Scale where
unqtDot NaturalSize = text "FALSE"
unqtDot ScaleUniformly = text "TRUE"
unqtDot ExpandWidth = text "WIDTH"
unqtDot ExpandHeight = text "HEIGHT"
unqtDot ExpandBoth = text "BOTH"
instance ParseDot Scale where
parseUnqt = oneOf [ stringRep NaturalSize "FALSE"
, stringRep ScaleUniformly "TRUE"
, stringRep ExpandWidth "WIDTH"
, stringRep ExpandHeight "HEIGHT"
, stringRep ExpandBoth "BOTH"
]
parse = parseUnqt
escapeAttribute :: T.Text -> DotCode
escapeAttribute = escapeHtml False
escapeValue :: T.Text -> DotCode
escapeValue = escapeHtml True
escapeHtml :: Bool -> T.Text -> DotCode
escapeHtml quotesAllowed = hcat . fmap concat
. mapM (escapeSegment . T.unpack)
. T.groupBy ((==) `on` isSpace)
where
escapeSegment (s:sps) | isSpace s = liftA2 (:) (char s) $ mapM numEscape sps
escapeSegment txt = mapM xmlChar txt
allowQuotes = if quotesAllowed
then Map.delete '"'
else id
escs = allowQuotes $ Map.fromList htmlEscapes
xmlChar c = maybe (char c) escape $ c `Map.lookup` escs
numEscape = escape' . (<>) (char '#') . int . ord
escape' e = char '&' <> e <> char ';'
escape = escape' . text
unescapeAttribute :: Parse T.Text
unescapeAttribute = unescapeHtml False
unescapeValue :: Parse T.Text
unescapeValue = unescapeHtml True
unescapeHtml :: Bool -> Parse T.Text
unescapeHtml quotesAllowed = fmap (T.pack . catMaybes)
. many1 . oneOf $ [ parseEscpd
, validChars
]
where
parseEscpd :: Parse (Maybe Char)
parseEscpd = do character '&'
esc <- many1Satisfy (';' /=)
character ';'
let c = case T.uncons $ T.toLower esc of
Just ('#',dec) | Just ('x',hex) <- T.uncons dec
-> readMaybe readHex $ T.unpack hex
| otherwise
-> readMaybe readInt $ T.unpack dec
_ -> esc `Map.lookup` escMap
return c
readMaybe f str = do (n, []) <- listToMaybe $ f str
return $ chr n
readInt :: ReadS Int
readInt = reads
allowQuotes = if quotesAllowed
then delete '"'
else id
escMap = Map.fromList htmlUnescapes
validChars = fmap Just $ satisfy (`notElem` needEscaping)
needEscaping = allowQuotes $ map fst htmlEscapes
htmlEscapes :: [(Char, T.Text)]
htmlEscapes = [ ('"', "quot")
, ('<', "lt")
, ('>', "gt")
, ('&', "amp")
]
++ map numEscape ['-', '\'']
where
numEscape c = (c, T.pack $ '#' : show (ord c))
htmlUnescapes :: [(T.Text, Char)]
htmlUnescapes = maybeEscaped
++
map (uncurry $ flip (,)) htmlEscapes
where
maybeEscaped = [("nbsp", ' '), ("apos", '\'')]
printBoolHtml :: Bool -> DotCode
printBoolHtml = text . bool "FALSE" "TRUE"
parseBoolHtml :: Parse Bool
parseBoolHtml = stringRep True "TRUE"
`onFail`
stringRep False "FALSE"
printTag :: DotCode -> Attributes -> DotCode -> DotCode
printTag t as v = angled (t <+> toDot as)
<> v
<> angled (fslash <> t)
printFontTag :: Attributes -> DotCode -> DotCode
printFontTag = printTag (text "FONT")
printEmptyTag :: DotCode -> Attributes -> DotCode
printEmptyTag t as = angled $ t <+> toDot as <> fslash
parseTag :: (Attributes -> val -> tag) -> String
-> Parse val -> Parse tag
parseTag c t pv = c <$> parseAngled openingTag
<*> pv
<* parseAngled (character '/' *> t' *> whitespace)
`adjustErr`
(("Can't parse Html tag: " ++ t ++ "\n\t")++)
where
t' = string t
openingTag :: Parse Attributes
openingTag = t'
*> tryParseList' (whitespace1 >> parse)
<* whitespace
parseFontTag :: (Attributes -> val -> tag) -> Parse val -> Parse tag
parseFontTag = (`parseTag` "FONT")
parseTagRep :: (tagName -> val -> tag) -> Parse tagName -> Parse val -> Parse tag
parseTagRep c pt pv = c <$> parseAngled (pt `discard` whitespace)
<*> pv
<* parseAngled (character '/' *> pt *> whitespace)
`adjustErr`
("Can't parse attribute-less Html tag\n\t"++)
parseEmptyTag :: (Attributes -> tag) -> String -> Parse tag
parseEmptyTag c t = c <$> parseAngled
( string t
*> tryParseList' (whitespace1 *> parse)
<* whitespace
<* character '/'
)
`adjustErr`
(("Can't parse empty Html tag: " ++ t ++ "\n\t")++)