{-# LANGUAGE CPP #-}
module Text.Hyphenation.Pattern
(
Patterns
, insertPattern
, lookupPattern
, scorePattern
, parsePatterns
) where
import qualified Data.IntMap as IM
import Data.Monoid (Monoid(..))
import Data.Semigroup (Semigroup(..))
import Prelude hiding (lookup)
import Data.Char (digitToInt, isDigit)
data Patterns = Patterns [Int] (IM.IntMap Patterns)
deriving Show
instance Semigroup Patterns where
Patterns ps m <> Patterns qs n = Patterns (zipMax ps qs) (IM.unionWith mappend m n)
instance Monoid Patterns where
mempty = Patterns [] IM.empty
#if !(MIN_VERSION_base(4,11,0))
mappend = (<>)
#endif
lookupPattern :: String -> Patterns -> [Int]
lookupPattern xs0 = init . tail . go ('.' : xs0 ++ ".") where
go [] (Patterns ys _) = ys
go xxs@(_:xs) t = zipMax (go1 xxs t) (0:go xs t)
go1 [] (Patterns ys _) = ys
go1 (x:xs) (Patterns ys m) = case IM.lookup (fromEnum x) m of
Just t' -> zipMax ys (go1 xs t')
Nothing -> ys
insertPattern :: String -> Patterns -> Patterns
insertPattern s0 = go (chars s0) where
pts = scorePattern s0
go [] (Patterns _ m) = Patterns pts m
go (x:xs) (Patterns n m) = Patterns n (IM.insertWith (\_ -> go xs) (fromEnum x) (mk xs) m)
mk [] = Patterns pts IM.empty
mk (x:xs) = Patterns [] (IM.singleton (fromEnum x) (mk xs))
parsePatterns :: String -> Patterns
parsePatterns = foldr insertPattern mempty . lines
chars :: String -> String
chars = filter (\x -> x < '0' || x > '9')
scorePattern :: String -> [Int]
scorePattern [] = [0]
scorePattern (x:ys)
| isDigit x = digitToInt x : if null ys then [] else scorePattern (tail ys)
| otherwise = 0 : scorePattern ys
zipMax :: [Int] -> [Int] -> [Int]
zipMax (x:xs) (y:ys) = max x y : zipMax xs ys
zipMax [] ys = ys
zipMax xs [] = xs