{- | Utility functions shared by modules that need to read the
   contents of XML documents.
-}
module Codec.Epub.Util
   where

import Text.Regex


{- | An evil hack to remove *ILLEGAL* characters before the XML
   declaration. Why do people write software that does this?
   Can't they follow directions?
-}
removeIllegalStartChars :: String -> String
removeIllegalStartChars :: String -> String
removeIllegalStartChars = forall a. (a -> Bool) -> [a] -> [a]
dropWhile (forall a. Eq a => a -> a -> Bool
/= Char
'<')


-- | An evil hack to remove encoding from the document
removeEncoding :: String -> String
removeEncoding :: String -> String
removeEncoding = forall a b c. (a -> b -> c) -> b -> a -> c
flip (Regex -> String -> String -> String
subRegex 
   (String -> Bool -> Bool -> Regex
mkRegexWithOpts String
" +encoding=\"UTF-8\"" Bool
False Bool
False)) String
""


-- | An evil hack to remove any \<!DOCTYPE ...\> from the document
removeDoctype :: String -> String
removeDoctype :: String -> String
removeDoctype = forall a b c. (a -> b -> c) -> b -> a -> c
flip (Regex -> String -> String -> String
subRegex 
   (String -> Bool -> Bool -> Regex
mkRegexWithOpts String
"<!DOCTYPE [^>]*>" Bool
False Bool
True)) String
""