{- | Utility functions shared by modules that need to read the
   contents of XML documents.
-}
module Codec.Epub.Util
   where

import Text.Regex ( mkRegexWithOpts, subRegex )


{- | An evil hack to remove *ILLEGAL* characters before the XML
   declaration. Why do people write software that does this?
   Can't they follow directions?
-}
removeIllegalStartChars :: String -> String
removeIllegalStartChars :: String -> String
removeIllegalStartChars = (Char -> Bool) -> String -> String
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'<')


-- | An evil hack to remove encoding from the document
removeEncoding :: String -> String
removeEncoding :: String -> String
removeEncoding = (String -> String -> String) -> String -> String -> String
forall a b c. (a -> b -> c) -> b -> a -> c
flip (Regex -> String -> String -> String
subRegex 
   (String -> Bool -> Bool -> Regex
mkRegexWithOpts String
" +encoding=\"UTF-8\"" Bool
False Bool
False)) String
""


-- | An evil hack to remove any \<!DOCTYPE ...\> from the document
removeDoctype :: String -> String
removeDoctype :: String -> String
removeDoctype = (String -> String -> String) -> String -> String -> String
forall a b c. (a -> b -> c) -> b -> a -> c
flip (Regex -> String -> String -> String
subRegex 
   (String -> Bool -> Bool -> Regex
mkRegexWithOpts String
"<!DOCTYPE [^>]*>" Bool
False Bool
True)) String
""