{-# LANGUAGE BangPatterns, OverloadedStrings #-} module Text.LDIF.Preproc ( preproc , transposePos , PosTable ) where import Text.Parsec import Text.Parsec.Error (setErrorPos) import qualified Data.ByteString.Char8 as BC import Data.List (foldl', sortBy) -- | Opaque data necessary for relation between text after preprocessing and original type PosTable = [ PosOp ] data PosOp = PosOpAddLine { psLine :: Int } | PosOpWrap { psLine :: Int, psW :: Int, psWP :: Int } deriving Show data LdifLine = LdifLine { llNum :: Int, llStr :: BC.ByteString } | LdifLineCont { llNum :: Int, llStr :: BC.ByteString } | LdifComment { llNum :: Int, llStr :: BC.ByteString } -- | Convert error position to original text before preprocessing transposePos :: PosTable -> ParseError -> ParseError transposePos ptab oe = setErrorPos npos oe where opos = errorPos oe npos = setSourceColumn (setSourceLine opos nlin) ncol where opIdx a b = (psLine a) `compare` (psLine b) ocord = (sourceLine opos,sourceColumn opos) (nlin,ncol) = calcPos (sortBy opIdx ptab) ocord calcPos :: PosTable -> (Int, Int) -> (Int, Int) calcPos xs cord = foldl' updatePos cord xs where updatePos (l0,c0) (PosOpAddLine l) | l0 >= l = (l0+1,c0) updatePos (l0,c0) (PosOpWrap l _ _) | l0 >= l = (l0+1,c0) updatePos (l0,c0) (PosOpWrap l w wp) | (l0+1) == l && c0 > w && (c0-1-w) > wp = (l0+1,c0) updatePos (l0,c0) (PosOpWrap l w wp) | (l0+1) == l && c0 > w && (c0-1-w) <= wp = (l0+1,c0-w) updatePos x _ = x -- | Preprocessing of LDIF file, concat wrapped lines and remove comment lines preproc :: BC.ByteString -> (BC.ByteString, PosTable) preproc xs = (str, ptab) where str = BC.unlines $ map llStr ys (ys, ptab) = lns xs where lns zs = stripComments $ unwrap $ (tokenizeLines $ specLines zs, []) specLines :: BC.ByteString -> [BC.ByteString] specLines xs = map cleanLine $ BC.lines xs where isCR c = c == '\r' cleanLine l | BC.null l = l | isCR (BC.last l) = cleanLine $ BC.init l | otherwise = l tokenizeLines :: [BC.ByteString] -> [LdifLine] tokenizeLines xs = map tokenizeLine $ zip xs [1..] where tokenizeLine (x,i) | BC.null x = LdifLine i BC.empty | BC.head x == '#' = LdifComment i x | BC.head x == ' ' = LdifLineCont i $ BC.tail x | otherwise = LdifLine i x -- | Remove Comment Lines stripComments :: ([LdifLine],PosTable) -> ([LdifLine],PosTable) stripComments (xs,pt) = foldl' procLine ([],pt) xs where procLine (v,p) (LdifComment i _) = (v,(PosOpAddLine i):p) procLine (v,p) o = (o:v,p) -- | Unwrap lines, lines with space at begin is continue of previous line unwrap :: ([LdifLine],PosTable) -> ([LdifLine],PosTable) unwrap (xs,pt) = foldl' procLine ([],pt) xs where procLine ([],p) o = (o:[],p) procLine (v,p) (LdifLineCont i s) = let (z,r) = splitAt 1 v o = head z o' = o { llStr = (llStr o) `BC.append` s } p' = (PosOpWrap i (BC.length $ llStr o) (BC.length s)):p in (o':r,p') procLine (v,p) o = (o:v,p)