{-# OPTIONS_GHC -Wall #-} module ALON.Diff.HTML ( ChunkTree(..) , DiffToken(..) , DiffWithPatchRefs(..) , buildChunkTreeFromHTML , buildChunkTreeFromHTMLLazy , buildChunkTree , mapChunkTree , diffHTML , diffHTMLLazy , diffChunkTree , patchHTML , patchHTMLLazy , patchTreeToTokens , getTreeTokens , serializeDiffTokens , normalizeLineEndings , normalizeHTMLTokens , getPatchRefs , getPatchRefsAsDiffTokens , diffHTMLWithPatchRefs , diffHTMLWithPatchRefsLazy ) where import qualified Data.Text as T import qualified Data.Text.Lazy as TL import Crypto.Hash.MD5 as MD5 import Data.Text.Encoding (encodeUtf8, decodeUtf8) import Data.ByteString (ByteString) import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString as B (length) import qualified Data.List as L import qualified Data.Map.Strict as Map import Text.HTML.Parser import Data.Maybe (maybeToList) import Data.Aeson import Data.ByteString.Base64 (encodeBase64') import qualified Data.Bytes.Put as P import qualified Data.Bytes.Serial as S import qualified Data.Bytes.VarInt as V import Data.Bits import Data.Word8 import HTMLEntities.Decoder (htmlEncodedText) import Data.Text.Lazy.Builder (toLazyText) data ChunkTree = ChunkTree { fullNodeHash :: !(MD5.Ctx) , nodeToken :: (Maybe Token) , fullNodeLength :: !Int , branches :: [ChunkTree] } data DiffToken = DiffChunkReference !ByteString | DiffContent !T.Text | DiffOpenTag !T.Text | DiffAttribute !T.Text !T.Text | DiffCloseTag | DiffSoloTag !T.Text | DiffDoctype !T.Text deriving (Show) data DiffWithPatchRefs = DiffWithPatchRefs [DiffToken] [(ByteString, Maybe [DiffToken])] -- Compact JSON serialization of a diff token instance ToJSON DiffToken where toJSON (DiffChunkReference refValue) = object [ T.pack "r" .= (decodeUtf8 $ encodeBase64' refValue) ] toJSON (DiffContent text) = String $ descapeHTMLText text toJSON (DiffOpenTag text) = toJSON [ String text ] toJSON (DiffAttribute name value) = toJSON [ String name, String value ] toJSON (DiffCloseTag) = toJSON ([]::[Value]) toJSON (DiffSoloTag text) = toJSON [ String text, toJSON ([]::[Value])] toJSON (DiffDoctype text) = object [ T.pack "d" .= text ] instance Show ChunkTree where show tree = "(ChunkTree " ++ show (decodeUtf8 $ encodeBase64' (treeDigest tree)) ++ " (" ++ show (nodeToken tree) ++ ") " ++ show (fullNodeLength tree) ++ " " ++ show (branches tree) ++ ")" -- Note, this should be calculated based off the hash in future. refMaxSize :: Int --refMaxSize = 17 -- comment this in for binary based deltas refMaxSize = 34 -- comment this in for JSON deltas attributeOverhead :: Int -- attributeOverhead = 2 -- comment this in for binary based deltas attributeOverhead = 8 -- comment this in for JSON deltas -- Transform HTML escapes/entities in text node text into descapeHTMLText :: T.Text -> T.Text descapeHTMLText = TL.toStrict . toLazyText . htmlEncodedText -- Start a (potentially sub tree) state from a token initialTreeState :: String -> T.Text -> [Attr] -> Token -> ChunkTree initialTreeState hashPrefix nodeText attributes inputToken = ChunkTree nodeHashContextWithAttributes ( Just inputToken ) ( tokenSizeOverhead + ( T.length nodeText ) + attributesLength ) [] where tokenSizeOverhead = case inputToken of {- (TagOpen _ _) -> 2 -- Comment this set in for more optimal binary based deltas. (TagSelfClose _ _) -> 1 (TagClose _ ) -> 0 (ContentText _ ) -> 1 (ContentChar _ ) -> 1 (Doctype _) -> 1 (Comment _) -> 0 --} (TagOpen _ _) -> 8 -- Comment this set in for more optimal JSON based deltas (TagSelfClose _ _) -> 8 (TagClose _ ) -> 0 (ContentText _ ) -> 3 (ContentChar _ ) -> 3 (Doctype _) -> 8 (Comment _) -> 0 attributeOrder (Attr name1 _) (Attr name2 _ ) = compare name1 name2 nodeHashContext = MD5.update (MD5.init) $ encodeUtf8 $ ( T.pack hashPrefix ) `mappend` nodeText sortedAttributes = L.sortBy attributeOrder attributes nodeHashContextWithAttributes = L.foldl' hashAttribute nodeHashContext sortedAttributes hashAttribute context (Attr name value) = MD5.updates context $ fmap encodeUtf8 [T.pack " attr-", name, T.pack "=", value] attributeLength previousLength (Attr name value) = previousLength + attributeOverhead + T.length name + T.length value attributesLength = L.foldl' attributeLength 0 attributes -- Add a branch chunk to a previous chunk tree addBranchChunk :: ChunkTree -> ChunkTree -> ChunkTree addBranchChunk oldParentState branch = ChunkTree { fullNodeHash = MD5.update (fullNodeHash oldParentState) (treeDigest branch) , nodeToken = nodeToken oldParentState , fullNodeLength = fullNodeLength oldParentState + fullNodeLength branch , branches = branches oldParentState ++ [branch] } voidTokens :: [T.Text] voidTokens = map T.pack ["area", "base", "br", "col", "command", "embed", "hr", "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr"] paragraphEndTokens :: [T.Text] paragraphEndTokens = map T.pack ["p", "h1", "h2", "h3", "h4", "h5", "h6", "ol", "ul", "pre", "address", "blockquote", "dl", "div", "fieldset", "form", "hr", "noscript", "table", "body"] -- Given a token, and the current chunk tree state state, output a modified state stack buildChunkNode :: Token -> [ChunkTree] -> Maybe [ChunkTree] buildChunkNode ( TagOpen tagName attributes ) stateStack = Just ( ( initialTreeState "tag-" (T.toLower tagName) attributes $ TagOpen tagName attributes ) : stateStack ) buildChunkNode ( TagSelfClose tagName attributes ) (state:deepState) = Just ( addBranchChunk state localState : deepState ) where localState = initialTreeState "tag-" (T.toLower tagName) attributes $ TagSelfClose tagName attributes buildChunkNode ( TagClose tagName ) (state:deepState) = do initialTag <- nodeToken state tagValue <- openedTag initialTag if ( tagValue == tagName ) then addChild deepState else Nothing -- Check the HTML is well formed; note in normalization void tokens are changed to self-closing. where -- Note, add child also does a content defined aggregatation of the node. addChild (parentState:deeperState) = Just ( ( addBranchChunk parentState $ contentAggregateChunkTreeNode state ) : deeperState ) addChild [] = Nothing openedTag (TagOpen openTagName _) = Just openTagName openedTag _ = Nothing buildChunkNode ( ContentText value ) (state:deepState) = Just ( addBranchChunk state localState : deepState ) where localState = initialTreeState "content-" (descapeHTMLText value) [] ( ContentText value ) buildChunkNode ( ContentChar value ) (state:deepState) = Just ( addBranchChunk state localState : deepState ) where localState = initialTreeState "content-" (T.singleton value) [] ( ContentChar value ) buildChunkNode ( Doctype value ) (state:deepState) = Just ( addBranchChunk state localState : deepState ) where localState = initialTreeState "doctype-" (T.strip $ T.toLower value) [] ( Doctype value ) buildChunkNode ( Comment _ ) stateStack = Just stateStack -- Comments aren't included in the output buildChunkNode _ [] = Nothing oneItemOnly :: [a] -> Maybe a oneItemOnly (item:[]) = Just item oneItemOnly _ = Nothing -- Normalize line endings of HTML to remove carriage returns. normalizeLineEndings :: T.Text -> T.Text normalizeLineEndings inputText = T.replace (T.pack "\r") (T.pack "\n") $ T.replace (T.pack "\r\n") (T.pack "\n") inputText -- Note, we only handle paragraph elements at the moment, but should probably add support for other -- elements that don't need to be closed correctly. normalizeParagraphElements :: [Token] -> [Token] normalizeParagraphElements tokens = let processToken :: ([Token], Bool, Int) -> Token -> ([Token], Bool, Int) processToken (previousTokens, False, _) inputToken = case inputToken of (TagOpen tagName _) | T.toLower tagName == T.pack "p" -> (inputToken:previousTokens, True, 1) (TagClose tagName) | T.toLower tagName == T.pack "p" -> (previousTokens, False, 0) _ -> (inputToken:previousTokens, False, 0) processToken (previousTokens, True, pDepth) inputToken = case (inputToken, pDepth) of (TagOpen tagName _, _) | T.toLower tagName == T.pack "p" -> (inputToken:(TagClose $ T.pack "p"):previousTokens, True, 1) (TagOpen tagName _, _) | elem (T.toLower tagName) paragraphEndTokens -> (inputToken:(TagClose $ T.pack "p"):previousTokens, False, 0) (TagSelfClose tagName _, _) | elem (T.toLower tagName) paragraphEndTokens -> (inputToken:(TagClose $ T.pack "p"):previousTokens, False, 0) (TagClose tagName, _) | T.toLower tagName == T.pack "p" -> (inputToken:previousTokens, False, 0) (TagClose tagName, _) | elem (T.toLower tagName) paragraphEndTokens -> (inputToken:(TagClose $ T.pack "p"):previousTokens, False, 0) (TagClose _, 1) -> (inputToken:(TagClose $ T.pack "p"):previousTokens, False, 0) (TagClose _, _) -> (inputToken:previousTokens, True, pDepth - 1) (TagOpen _ _, _) -> (inputToken:previousTokens, True, pDepth + 1) (_, _) -> (inputToken:previousTokens, True, pDepth) (reverseOutputTokens, isParagraphOpen, _) = L.foldl' processToken ([], False, 0) tokens in L.reverse $ (if isParagraphOpen then (TagClose $ T.pack "p") : reverseOutputTokens else reverseOutputTokens) -- Normalize tokens in such a way that the DOM we will build in the chunk-tree will match what's built in the browser. normalizeHTMLTokens :: [Token] -> [Token] normalizeHTMLTokens tokens = let (outputTokens, finalText, _) = foldr processBackwardsToken ([], Nothing, False) tokens processBackwardsToken inputToken (nextTokens, aggregateText, stopAggregating) = case (inputToken, aggregateText) of (TagOpen tagName _, _) | T.toLower tagName == T.pack "html" -> (inputToken:nextTokens, Nothing, True) (TagClose tagName, _) | T.toLower tagName == T.pack "html" || T.toLower tagName == T.pack "body" -> (inputToken:nextTokens, aggregateText, False) -- Push text nodes from after the close of html/body into the end child of body. (ContentText _, _) | stopAggregating -> (nextTokens, Nothing, True) -- Don't collect text nodes if text nodes (ContentChar _, _) | stopAggregating -> (nextTokens, Nothing, True) (ContentText text, Nothing) -> (nextTokens, Just $ normalizeLineEndings text, False) -- Start aggregating text (normalize line endings) (ContentText text, Just previousText) -> (nextTokens, Just $ previousText `T.append` normalizeLineEndings text, False) -- Append text (with normalized line endings) (ContentChar single, Nothing) -> (nextTokens, Just $ normalizeLineEndings $ T.singleton single, False) -- Single char case (ContentChar single, Just previousText) -> (nextTokens, Just $ previousText `T.append` (normalizeLineEndings $ T.singleton single), False) (Comment _, _) -> (nextTokens, aggregateText, stopAggregating) -- Drop comments, keep the accumatulated text (Doctype _, _) -> (inputToken:nextTokens, Nothing, True) -- Doctype, drop any text nodes that appear after it (TagOpen tagName attributes, Just previousText) | elem (T.toLower tagName) voidTokens -> ((TagSelfClose tagName attributes):(ContentText previousText):nextTokens, Nothing, stopAggregating) -- Convert void token to self-closing tag (TagOpen tagName attributes, Nothing) | elem (T.toLower tagName) voidTokens -> ((TagSelfClose tagName attributes):nextTokens, Nothing, stopAggregating) -- Convert void token to self-closing tag (_, Just previousText) -> (inputToken:(ContentText previousText):nextTokens, Nothing, stopAggregating) -- normal tag close, put out tag close followed by aggregate text (_, Nothing) -> (inputToken:nextTokens, Nothing, stopAggregating) -- no aggregate text preParagraphNormalization = case finalText of (Just text) -> ContentText text : outputTokens Nothing -> outputTokens in normalizeParagraphElements preParagraphNormalization -- Build a Chunk tree from a set of HTML tokens (which may represent a forest) -- Will return Nothing if the set of HTML tokens is not well formed. buildChunkTree :: [Token] -> Maybe ChunkTree buildChunkTree tokens = buildChunkTreeInner >>= oneItemOnly where buildChunkTreeInner :: Maybe [ChunkTree] buildChunkTreeInner = L.foldl' (\ms tkn -> buildChunkNode tkn =<< ms) (Just [ChunkTree MD5.init Nothing 0 []]) (normalizeHTMLTokens tokens) -- Note, this will content aggregate a chunk tree node and it's children, top down, but stopping at branches with -- two or less children. -- Basically, creates "ghost" intermediary nodes, which don't effect the hashes of the nodes of the tree with tokens, -- but adds aggregate "ghost" child nodes in intermediate levels that have their own hash, as a way of representing -- a set of multiple children. contentAggregateChunkTreeNode :: ChunkTree -> ChunkTree contentAggregateChunkTreeNode unaggregated = ChunkTree (fullNodeHash unaggregated) (nodeToken unaggregated) (fullNodeLength unaggregated) $ maybeToList $ aggregateChildren (branches unaggregated) where aggregateChildren :: [ChunkTree] -> Maybe ChunkTree aggregateChildren [] = Nothing aggregateChildren (first:[]) = Just first aggregateChildren (first:second:[]) = Just $ createAggregateChild first second aggregateChildren children = do let (leftChildren, rightChildren) = splitAt (maximumDigestIndex children) children leftAggregate <- aggregateChildren leftChildren rightAggregate <- aggregateChildren rightChildren return $ createAggregateChild leftAggregate rightAggregate createAggregateChild :: ChunkTree -> ChunkTree -> ChunkTree createAggregateChild left right = let aggregateHash = MD5.update (MD5.update MD5.init $ treeDigest left) (treeDigest right) aggregateLength = fullNodeLength left + fullNodeLength right in ChunkTree aggregateHash Nothing aggregateLength [left, right] greaterDigest (maxIndex, maxDigest) (index, branchDigest) = if branchDigest > maxDigest then (index, branchDigest) else (maxIndex, maxDigest) maximumDigestIndex (_:second:fromList) = let (foundIndex, _) = L.foldl' greaterDigest (1, treeDigest second) $ zip [2..] $ map treeDigest fromList in foundIndex maximumDigestIndex _ = 0 -- should never get here -- Given HTML, build a chunk tree. buildChunkTreeFromHTML :: T.Text -> Maybe ChunkTree buildChunkTreeFromHTML text = buildChunkTree $ parseTokens text -- Given lazy text HTML, build a chunk tree. buildChunkTreeFromHTMLLazy :: TL.Text -> Maybe ChunkTree buildChunkTreeFromHTMLLazy text = buildChunkTree $ parseTokensLazy text -- Get the hash digest of a ChunkTree node. treeDigest :: ChunkTree -> ByteString treeDigest = MD5.finalize . fullNodeHash -- Produces a map of hash digests to chunk-tree nodes, as long as they are -- above the break even size. mapChunkTree :: ChunkTree -> Map.Map ByteString ChunkTree mapChunkTree tree = mapChunkTreeInner tree mempty where mapChunkTreeInner branch parentMap = foldr (mapChunkTreeInner) (insertTreeMaybe parentMap branch) $ branches branch insertTreeMaybe inputMap currentBranch = if fullNodeLength currentBranch > refMaxSize then Map.insert (treeDigest currentBranch) currentBranch inputMap else inputMap -- Produce diff tokens for a given a chunk tree node if it can not be matched in the source. treeTokenToDiffTokens :: ChunkTree -> [DiffToken] treeTokenToDiffTokens (ChunkTree _ Nothing _ _ ) = [] treeTokenToDiffTokens (ChunkTree _ (Just aToken) _ _ ) = case aToken of (TagOpen tagName attributes) -> DiffOpenTag tagName : map translateAttribute attributes (TagClose _) -> [DiffCloseTag] (TagSelfClose tagName attributes) -> DiffSoloTag tagName : map translateAttribute attributes (ContentText value) -> [DiffContent value] (ContentChar value) -> [DiffContent $ T.singleton value] (Doctype value) -> [DiffDoctype value] (Comment _) -> [] where translateAttribute (Attr name value) = DiffAttribute name value -- After the nodes have been traversed in a tree, this is used to produce a closing token. treeTokenToCloseDiffTokens :: ChunkTree -> [DiffToken] treeTokenToCloseDiffTokens (ChunkTree _ (Just (TagOpen _ _ )) _ _ ) = [DiffCloseTag] treeTokenToCloseDiffTokens _ = [] -- Given a map of a source chunk-tree and a target chunk-tree, create a diff patch diffChunkTree :: Map.Map ByteString ChunkTree -> ChunkTree -> [DiffToken] diffChunkTree sourceMap tree = let nodeDigest = treeDigest tree in case Map.lookup (nodeDigest) sourceMap of Nothing -> treeTokenToDiffTokens tree ++ ( foldr (++) [] $ map (diffChunkTree sourceMap) $ branches tree ) ++ treeTokenToCloseDiffTokens tree Just _ -> [DiffChunkReference $ nodeDigest] -- Change a chunktree node into a set of diff tokens with no references chunkTreeToDiffTokens :: ChunkTree -> [DiffToken] chunkTreeToDiffTokens tree = treeTokenToDiffTokens tree ++ ( foldr (++) [] $ map (chunkTreeToDiffTokens) $ branches tree ) ++ treeTokenToCloseDiffTokens tree -- Return all the patch refs in a delta (in the form as a set of diff tokens), with their chunk-tree tokenized to difftokens (with no references) getPatchRefsAsDiffTokens :: [DiffToken] -> Map.Map ByteString ChunkTree -> [(ByteString, Maybe [DiffToken])] getPatchRefsAsDiffTokens delta sourceMap = let patchRefs = getPatchRefs delta patchRefToDiffTokens patchRef = do patchTree <- Map.lookup patchRef sourceMap return (chunkTreeToDiffTokens patchTree) in map (\patchRef -> (patchRef, patchRefToDiffTokens patchRef)) patchRefs -- Perform a diff between two well formed HTML texts and return the diff on success. diffHTML :: T.Text -> T.Text -> Maybe [DiffToken] diffHTML sourceHTML targetHTML = do sourceChunkTree <- buildChunkTreeFromHTML sourceHTML targetChunkTree <- buildChunkTreeFromHTML targetHTML if treeDigest sourceChunkTree /= treeDigest targetChunkTree then do let sourceMap = mapChunkTree sourceChunkTree return (diffChunkTree sourceMap targetChunkTree) else return [] -- Perform a diff between two well formed HTML texts and return the diff and the full version of the patches the diff references. diffHTMLWithPatchRefs :: T.Text -> T.Text -> Maybe DiffWithPatchRefs diffHTMLWithPatchRefs sourceHTML targetHTML = do sourceChunkTree <- buildChunkTreeFromHTML sourceHTML targetChunkTree <- buildChunkTreeFromHTML targetHTML if treeDigest sourceChunkTree /= treeDigest targetChunkTree then do let sourceMap = mapChunkTree sourceChunkTree let diff = diffChunkTree sourceMap targetChunkTree let patches = getPatchRefsAsDiffTokens diff sourceMap return (DiffWithPatchRefs diff patches) else return (DiffWithPatchRefs [] []) diffHTMLWithPatchRefsLazy :: TL.Text -> TL.Text -> Maybe DiffWithPatchRefs diffHTMLWithPatchRefsLazy sourceHTML targetHTML = do sourceChunkTree <- buildChunkTreeFromHTMLLazy sourceHTML targetChunkTree <- buildChunkTreeFromHTMLLazy targetHTML if treeDigest sourceChunkTree /= treeDigest targetChunkTree then do let sourceMap = mapChunkTree sourceChunkTree let diff = diffChunkTree sourceMap targetChunkTree let patches = getPatchRefsAsDiffTokens diff sourceMap return (DiffWithPatchRefs diff patches) else return (DiffWithPatchRefs [] []) -- Same as diff html but with lazy text diffHTMLLazy :: TL.Text -> TL.Text -> Maybe [DiffToken] diffHTMLLazy sourceHTML targetHTML = do sourceChunkTree <- buildChunkTreeFromHTMLLazy sourceHTML targetChunkTree <- buildChunkTreeFromHTMLLazy targetHTML if treeDigest sourceChunkTree /= treeDigest targetChunkTree then do let sourceMap = mapChunkTree sourceChunkTree return (diffChunkTree sourceMap targetChunkTree) else return [] -- Render the chunk tree node back to a set of tokens. getTreeTokens :: ChunkTree -> [Token] getTreeTokens tree = ( L.foldl' accumulateChildTokens (maybeToList treeNodeToken) (branches tree) ) ++ closeToken treeNodeToken where accumulateChildTokens previous currentTree = previous ++ getTreeTokens currentTree treeNodeToken = nodeToken tree closeToken (Just (TagOpen tagName _)) = [TagClose tagName] closeToken _ = [] -- Given a tree and a set of diff tokens, use them to patch into a stream of HTML tokens. patchTreeToTokens :: ChunkTree -> [DiffToken] -> Maybe [Token] patchTreeToTokens tree patch = let sourceMap = mapChunkTree tree patchDiffToken (Just (tokenStack, outputTokens)) (DiffChunkReference refValue) = do foundTree <- Map.lookup refValue sourceMap let treeTokens = reverse $ getTreeTokens foundTree return (tokenStack, treeTokens ++ outputTokens) patchDiffToken (Just (tokenStack, outputTokens)) (DiffContent value) = Just (tokenStack, ContentText value : outputTokens) patchDiffToken (Just (tokenStack, outputTokens)) (DiffOpenTag tagName) = let outputToken = TagOpen tagName [] in Just (outputToken:tokenStack, outputToken:outputTokens) patchDiffToken (Just (tokenStack, (TagOpen tagName attributes):outputTokens)) (DiffAttribute name value) = Just (tokenStack, TagOpen tagName (attributes ++ [Attr name value]):outputTokens ) patchDiffToken (Just (tokenStack, (TagSelfClose tagName attributes):outputTokens)) (DiffAttribute name value) = Just (tokenStack, TagSelfClose tagName (attributes ++ [Attr name value]):outputTokens ) patchDiffToken _ (DiffAttribute _ _) = Nothing patchDiffToken (Just ((TagOpen tagName _):tokenStack, outputTokens)) (DiffCloseTag) = Just (tokenStack, TagClose tagName : outputTokens) patchDiffToken _ (DiffCloseTag) = Nothing patchDiffToken (Just (tokenStack, outputTokens)) (DiffSoloTag tagName) = Just (tokenStack, (TagSelfClose tagName []) : outputTokens) patchDiffToken (Just (tokenStack, outputTokens)) (DiffDoctype value) = Just (tokenStack, (Doctype value):outputTokens) patchDiffToken _ _ = Nothing in do tokenState <- ( L.foldl' patchDiffToken (Just ([],[])) patch ) let (_, tokenStream) = tokenState return (reverse tokenStream) -- Given HTML and a set of diff tokens, patch to create a target HTML patchHTML :: T.Text -> [DiffToken] -> Maybe TL.Text patchHTML htmlText patch = do sourceTree <- buildChunkTreeFromHTML htmlText patchedTokens <- patchTreeToTokens sourceTree patch return (renderTokens patchedTokens) -- Given HTML (lazy) and a set of diff tokens, patch to create a target HTML patchHTMLLazy :: TL.Text -> [DiffToken] -> Maybe TL.Text patchHTMLLazy htmlText patch = do sourceTree <- buildChunkTreeFromHTMLLazy htmlText patchedTokens <- patchTreeToTokens sourceTree patch return (renderTokens patchedTokens) -- Get all the references getPatchRefs :: [DiffToken] -> [ByteString] getPatchRefs tokens = foldr prependChunkRefs [] tokens where prependChunkRefs (DiffChunkReference reference) currentList = reference : currentList prependChunkRefs _ currentList = currentList -- Serialize to a compact format in binary serializeDiffTokens :: [DiffToken] -> BL.ByteString serializeDiffTokens tokens = P.runPutL $ ( sequence_ (map serializeToken tokens) >> S.serialize ( 7 :: Word8 ) ) where serializeToken (DiffChunkReference refValue) = encodeTypeAndBytes 0 refValue serializeToken (DiffContent text) = encodeTypeAndText 1 text serializeToken (DiffOpenTag text) = encodeTypeAndText 2 text serializeToken (DiffAttribute name value) = encodeTypeAndText 3 name >> S.serialize (V.VarInt $ B.length valueBytes ) >> P.putByteString valueBytes where valueBytes = encodeUtf8 value serializeToken (DiffCloseTag) = S.serialize (4 :: Word8) serializeToken (DiffSoloTag text) = encodeTypeAndText 5 text serializeToken (DiffDoctype text) = encodeTypeAndText 6 text combineByte :: Int -> Int -> Word8 combineByte typeInt contentLength = fromIntegral (typeInt .|. (contentLength `shiftL` 3)) encodeTypeAndText typeId content = encodeTypeAndBytes typeId $ encodeUtf8 content encodeTypeAndBytes typeId content = ( serializeLengthAndType typeId $ B.length content ) >> P.putByteString content serializeLengthAndType typeId contentLength | contentLength < 32 = S.serialize (combineByte typeId contentLength) | otherwise = S.serialize ( (fromIntegral typeId ) :: Word8 ) >> S.serialize (V.VarInt ((fromIntegral $ contentLength - 32 ) :: Word))