module System.FilePath.Glob.Base
( Token(..), Pattern(..)
, CompOptions(..), MatchOptions(..)
, compDefault, compPosix, matchDefault, matchPosix
, decompile
, compile
, compileWith, tryCompileWith
, tokenize
, optimize
, liftP, tokToLower
) where
import Control.Arrow (first)
import Control.Monad.Trans.Class (lift)
import Control.Monad.Trans.Error (ErrorT, runErrorT, throwError)
import Control.Monad.Trans.Writer.Strict (Writer, runWriter, tell)
import Control.Exception (assert)
import Data.Char (isDigit, isAlpha, toLower)
import Data.List (find, sortBy)
import Data.Maybe (fromMaybe)
import Data.Monoid (Monoid, mappend, mempty, mconcat)
import System.FilePath ( pathSeparator, extSeparator
, isExtSeparator, isPathSeparator
)
import System.FilePath.Glob.Utils ( dropLeadingZeroes
, isLeft, fromLeft
, increasingSeq
, addToRange, overlap
)
#if __GLASGOW_HASKELL__
import Text.Read (readPrec, lexP, parens, prec, Lexeme(Ident))
#endif
data Token
= Literal !Char
| ExtSeparator
| PathSeparator
| NonPathSeparator
| CharRange !Bool [Either Char (Char,Char)]
| OpenRange (Maybe String) (Maybe String)
| AnyNonPathSeparator
| AnyDirectory
| LongLiteral !Int String
deriving (Eq)
tokToLower :: Token -> Token
tokToLower (Literal c) = Literal (toLower c)
tokToLower (LongLiteral n s) = LongLiteral n (map toLower s)
tokToLower tok = tok
newtype Pattern = Pattern { unPattern :: [Token] } deriving (Eq)
liftP :: ([Token] -> [Token]) -> Pattern -> Pattern
liftP f (Pattern pat) = Pattern (f pat)
instance Show Token where
show (Literal c)
| c `elem` "*?[<" || isExtSeparator c
= ['[',c,']']
| otherwise = assert (not $ isPathSeparator c) [c]
show ExtSeparator = [ extSeparator]
show PathSeparator = [pathSeparator]
show NonPathSeparator = "?"
show AnyNonPathSeparator = "*"
show AnyDirectory = "**/"
show (LongLiteral _ s) = concatMap (show . Literal) s
show (OpenRange a b) =
'<' : fromMaybe "" a ++ "-" ++
fromMaybe "" b ++ ">"
show (CharRange b r) =
let f = either (:[]) (\(x,y) -> [x,'-',y])
(caret,exclamation,fs) =
foldr (\c (ca,ex,ss) ->
case c of
Left '^' -> ("^",ex,ss)
Left '!' -> (ca,"!",ss)
_ -> (ca, ex,(f c ++) . ss)
)
("", "", id)
r
(beg,rest) = let s' = fs []
(x,y) = splitAt 1 s'
in if not b && x == "-"
then (y,x)
else (s',"")
in concat [ "["
, if b then "" else "^"
, beg, caret, exclamation, rest
, "]"
]
instance Show Pattern where
showsPrec d p = showParen (d > 10) $
showString "compile " . showsPrec (d+1) (decompile p)
instance Read Pattern where
#if __GLASGOW_HASKELL__
readPrec = parens . prec 10 $ do
Ident "compile" <- lexP
fmap compile readPrec
#else
readsPrec d = readParen (d > 10) $ \r -> do
("compile",string) <- lex r
(xs,rest) <- readsPrec (d+1) string
[(compile xs, rest)]
#endif
instance Monoid Pattern where
mempty = Pattern []
mappend (Pattern a) (Pattern b) = optimize . Pattern $ (a ++ b)
mconcat = optimize . Pattern . concatMap unPattern
data CompOptions = CompOptions
{ characterClasses :: Bool
, characterRanges :: Bool
, numberRanges :: Bool
, wildcards :: Bool
, recursiveWildcards :: Bool
, pathSepInRanges :: Bool
, errorRecovery :: Bool
} deriving (Show,Read,Eq)
compDefault :: CompOptions
compDefault = CompOptions
{ characterClasses = True
, characterRanges = True
, numberRanges = True
, wildcards = True
, recursiveWildcards = True
, pathSepInRanges = True
, errorRecovery = True
}
compPosix :: CompOptions
compPosix = CompOptions
{ characterClasses = True
, characterRanges = True
, numberRanges = False
, wildcards = True
, recursiveWildcards = False
, pathSepInRanges = False
, errorRecovery = True
}
data MatchOptions = MatchOptions
{ matchDotsImplicitly :: Bool
, ignoreCase :: Bool
, ignoreDotSlash :: Bool
}
matchDefault :: MatchOptions
matchDefault = matchPosix
matchPosix :: MatchOptions
matchPosix = MatchOptions
{ matchDotsImplicitly = False
, ignoreCase = False
, ignoreDotSlash = True
}
decompile :: Pattern -> String
decompile = concatMap show . unPattern
compile :: String -> Pattern
compile = compileWith compDefault
compileWith :: CompOptions -> String -> Pattern
compileWith opts = either error id . tryCompileWith opts
tryCompileWith :: CompOptions -> String -> Either String Pattern
tryCompileWith opts = fmap optimize . tokenize opts
tokenize :: CompOptions -> String -> Either String Pattern
tokenize opts = fmap Pattern . sequence . go
where
err _ c cs | errorRecovery opts = Right (Literal c) : go cs
err s _ _ = [Left s]
go :: String -> [Either String Token]
go [] = []
go ('?':cs) | wcs = Right NonPathSeparator : go cs
go ('*':cs) | wcs =
case cs of
'*':p:xs | rwcs && isPathSeparator p
-> Right AnyDirectory : go xs
_ -> Right AnyNonPathSeparator : go cs
go ('[':cs) | crs = let (range,rest) = charRange opts cs
in case range of
Left s -> err s '[' cs
r -> r : go rest
go ('<':cs) | ors =
let (range, rest) = break (=='>') cs
in if null rest
then err "compile :: unclosed <> in pattern" '<' cs
else case openRange range of
Left s -> err s '<' cs
r -> r : go (tail rest)
go (c:cs)
| isPathSeparator c = Right PathSeparator : go cs
| isExtSeparator c = Right ExtSeparator : go cs
| otherwise = Right (Literal c) : go cs
wcs = wildcards opts
rwcs = recursiveWildcards opts
crs = characterRanges opts
ors = numberRanges opts
openRange :: String -> Either String Token
openRange ['-'] = Right $ OpenRange Nothing Nothing
openRange ('-':s) =
case span isDigit s of
(b,"") -> Right $ OpenRange Nothing (openRangeNum b)
_ -> Left $ "compile :: bad <>, expected number, got " ++ s
openRange s =
case span isDigit s of
(a,"-") -> Right $ OpenRange (openRangeNum a) Nothing
(a,'-':s') ->
case span isDigit s' of
(b,"") -> Right $ OpenRange (openRangeNum a) (openRangeNum b)
_ -> Left $ "compile :: bad <>, expected number, got " ++ s'
_ -> Left $ "compile :: bad <>, expected number followed by - in " ++ s
openRangeNum :: String -> Maybe String
openRangeNum = Just . dropLeadingZeroes
type CharRange = [Either Char (Char,Char)]
charRange :: CompOptions -> String -> (Either String Token, String)
charRange opts zs =
case zs of
y:ys | y `elem` "^!" ->
case ys of
'-':']':xs -> (Right (CharRange False [Left '-']), xs)
'-' :_ -> first (fmap (CharRange True )) (start zs)
xs -> first (fmap (CharRange False)) (start xs)
_ -> first (fmap (CharRange True )) (start zs)
where
start :: String -> (Either String CharRange, String)
start (']':xs) = run $ char ']' xs
start ('-':xs) = run $ char '-' xs
start xs = run $ go xs
run :: ErrorT String (Writer CharRange) String
-> (Either String CharRange, String)
run m = case runWriter.runErrorT $ m of
(Left err, _) -> (Left err, [])
(Right rest, cs) -> (Right cs, rest)
go :: String -> ErrorT String (Writer CharRange) String
go ('[':':':xs) | characterClasses opts = readClass xs
go ( ']':xs) = return xs
go ( c:xs) =
if not (pathSepInRanges opts) && isPathSeparator c
then throwError "compile :: path separator within []"
else char c xs
go [] = throwError "compile :: unclosed [] in pattern"
char :: Char -> String -> ErrorT String (Writer CharRange) String
char c ('-':x:xs) =
if x == ']'
then ltell [Left c, Left '-'] >> return xs
else ltell [Right (c,x)] >> go xs
char c xs = ltell [Left c] >> go xs
readClass :: String -> ErrorT String (Writer CharRange) String
readClass xs = let (name,end) = span isAlpha xs
in case end of
':':']':rest -> charClass name >> go rest
_ -> ltell [Left '[',Left ':'] >> go xs
charClass :: String -> ErrorT String (Writer CharRange) ()
charClass name =
case name of
"alnum" -> ltell [digit,upper,lower]
"alpha" -> ltell [upper,lower]
"blank" -> ltell blanks
"cntrl" -> ltell [Right ('\0','\x1f'), Left '\x7f']
"digit" -> ltell [digit]
"graph" -> ltell [Right ('!','~')]
"lower" -> ltell [lower]
"print" -> ltell [Right (' ','~')]
"punct" -> ltell punct
"space" -> ltell spaces
"upper" -> ltell [upper]
"xdigit" -> ltell [digit, Right ('A','F'), Right ('a','f')]
_ ->
throwError ("compile :: unknown character class '" ++name++ "'")
digit = Right ('0','9')
upper = Right ('A','Z')
lower = Right ('a','z')
punct = map Right [('!','/'), (':','@'), ('[','`'), ('{','~')]
blanks = [Left '\t', Left ' ']
spaces = [Right ('\t','\r'), Left ' ']
ltell = lift . tell
optimize :: Pattern -> Pattern
optimize = liftP (fin . go)
where
fin [] = []
fin (x:y:xs) | isLiteral x && isLiteral y =
let (ls,rest) = span isLiteral xs
in fin $ LongLiteral (length ls + 2)
(foldr (\(Literal a) -> (a:)) [] (x:y:ls))
: rest
fin (LongLiteral l1 s1 : LongLiteral l2 s2 : xs) =
fin $ LongLiteral (l1+l2) (s1++s2) : xs
fin (LongLiteral l s : Literal c : xs) =
fin $ LongLiteral (l+1) (s++[c]) : xs
fin (LongLiteral 1 s : xs) = Literal (head s) : fin xs
fin (Literal c : LongLiteral l s : xs) =
fin $ LongLiteral (l+1) (c:s) : xs
fin (x:xs) = x : fin xs
go [] = []
go (x@(CharRange _ _) : xs) =
case optimizeCharRange x of
x'@(CharRange _ _) -> x' : go xs
x' -> go (x':xs)
go (OpenRange (Just a) (Just b):xs)
| a == b = LongLiteral (length a) a : go xs
go (OpenRange (Just [a]) (Just [b]):xs)
| b > a = go $ CharRange True [Right (a,b)] : xs
go (x:xs) =
case find ($ x) compressors of
Just c -> let (compressed,ys) = span c xs
in if null compressed
then x : go ys
else go (x : ys)
Nothing -> x : go xs
compressors = [isStar, isStarSlash, isAnyNumber]
isLiteral (Literal _) = True
isLiteral _ = False
isStar AnyNonPathSeparator = True
isStar _ = False
isStarSlash AnyDirectory = True
isStarSlash _ = False
isAnyNumber (OpenRange Nothing Nothing) = True
isAnyNumber _ = False
optimizeCharRange :: Token -> Token
optimizeCharRange (CharRange b_ rs) = fin b_ . go . sortCharRange $ rs
where
fin True [Left c] | not (isPathSeparator c) = Literal c
fin True [Right r] | r == (minBound,maxBound) = NonPathSeparator
fin b x = CharRange b x
go [] = []
go (x@(Left c) : xs) =
case xs of
[] -> [x]
y@(Left d) : ys
| c == d -> go$ Left c : ys
| d == succ c ->
let (ls,rest) = span isLeft xs
(catable,others) = increasingSeq (map fromLeft ls)
range = (c, head catable)
in
if null catable || null (tail catable)
then x : y : go ys
else go$ Right range : map Left others ++ rest
| otherwise -> x : go xs
Right r : ys ->
case addToRange r c of
Just r' -> go$ Right r' : ys
Nothing -> x : go xs
go (x@(Right r) : xs) =
case xs of
[] -> [x]
Left c : ys ->
case addToRange r c of
Just r' -> go$ Right r' : ys
Nothing -> x : go xs
Right r' : ys ->
case overlap r r' of
Just o -> go$ Right o : ys
Nothing -> x : go xs
optimizeCharRange _ = error "Glob.optimizeCharRange :: internal error"
sortCharRange :: [Either Char (Char,Char)] -> [Either Char (Char,Char)]
sortCharRange = sortBy cmp
where
cmp (Left a) (Left b) = compare a b
cmp (Left a) (Right (b,_)) = compare a b
cmp (Right (a,_)) (Left b) = compare a b
cmp (Right (a,_)) (Right (b,_)) = compare a b