module Data.Text.ParagraphLayout.Internal.BiDiLevels ( Level , TextLevels (TextLevels) , WithLevel , baseLevel , directionLevel , dropLevels , firstStrongDirection , headLevel , level , levelDirectionH , tailLevels , textLevels ) where import Data.Maybe (catMaybes, listToMaybe) import Data.Text (Text, uncons, unpack) import Data.Text.Glyphize (Direction (..)) import qualified Data.Text.ICU.Char as ICUChar import Data.Word (Word8) -- | BiDi level, between 0 and 125 inclusive. -- Even values mean left-to-right text. -- Odd values mean right-to-left text. type Level = Word8 -- | Typeclass for any data structure with an associated BiDi level. class WithLevel a where level :: a -> Level -- | BiDi levels for each character from a given input text, -- plus the base level (for safe handling of empty text runs). -- -- This wrapper is meant to ease the transition to a different internal -- representation, if required when integrating with the ICU. data TextLevels = TextLevels [Level] Level deriving (Eq, Show) headLevel :: TextLevels -> Level headLevel (TextLevels xs base) = case xs of [] -> base (x : _) -> x tailLevels :: TextLevels -> TextLevels tailLevels (TextLevels xs base) = TextLevels (tail xs) base dropLevels :: Int -> TextLevels -> TextLevels dropLevels n (TextLevels xs base) = TextLevels (drop n xs) base baseLevel :: TextLevels -> Level baseLevel (TextLevels _ base) = base -- | Determine the BiDi level of each character in the input text -- using a simplified algorithm with the following main limitations: -- -- - Explicit directional formatting characters are ignored. -- This, by extension, means that levels cannot be controlled manually. -- -- - Arabic and European numbers are treated equally and do not absorb -- separators, terminators, or nonspacing marks. -- -- - Paired brackets have no effect on text direction. -- -- TODO: Use Haskell bindings to the ICU BiDi implementation once available. textLevels :: Direction -> Text -> TextLevels textLevels baseDir txt = TextLevels (textLevels' base base 0 txt) base where base = directionLevel 0 baseDir -- | Determine the BiDi level of each character in the input text -- using a simplified algorithm. textLevels' :: Level -> Level -> Int -> Text -> [Level] textLevels' base previousLevel neutrals txt = case uncons txt of Just (char, rest) -> case charLevel base char of Just currentLevel -> replicate neutrals (mergeLevel base previousLevel currentLevel) ++ currentLevel : textLevels' base currentLevel 0 rest Nothing -> textLevels' base previousLevel (neutrals + 1) rest Nothing -> replicate neutrals base -- | Determine the level of a neutral character based on its surrounding levels. mergeLevel :: Level -> Level -> Level -> Level mergeLevel base prev cur | prev == cur = prev | low > base && even high -- Avoid breaking a RTL run by numbers. = low | otherwise = base where low = min prev cur high = max prev cur data SimpleType = StrongLTR | StrongRTL | Number | Neutral | Reset -- | Determine the BiDi level of one character -- using a simplified algorithm. charLevel :: Level -> Char -> Maybe Level charLevel base c = case simpleType c of StrongLTR -> Just $ directionLevel base DirLTR StrongRTL -> Just $ directionLevel base DirRTL Number -> Just $ directionLevel (base + 1) DirLTR Neutral -> Nothing Reset -> Just base -- | Reduce the character's BiDi class into a simpler category -- for the purposes of the simplified algorithm. simpleType :: Char -> SimpleType simpleType c = case ICUChar.direction c of -- Strong characters have a definitive type. ICUChar.LeftToRight -> StrongLTR ICUChar.RightToLeft -> StrongRTL ICUChar.RightToLeftArabic -> StrongRTL -- Ignoring separators and terminators, all numbers behave the same. ICUChar.EuropeanNumber -> Number ICUChar.ArabicNumber -> Number -- Treating the following weak characters as neutral for simplicity. ICUChar.EuropeanNumberSeparator -> Neutral ICUChar.EuropeanNumberTerminator -> Neutral ICUChar.CommonNumberSeparator -> Neutral ICUChar.DirNonSpacingMark -> Neutral ICUChar.BoundaryNeutral -> Neutral -- The following characters are always neutral. ICUChar.WhiteSpaceNeutral -> Neutral ICUChar.OtherNeutral -> Neutral -- The following characters get their level reset. ICUChar.BlockSeparator -> Reset ICUChar.SegmentSeparator -> Reset -- Explicit formatting is not handled by this algorithm. ICUChar.LeftToRightEmbedding -> Neutral ICUChar.LeftToRightOverride -> Neutral ICUChar.RightToLeftEmbedding -> Neutral ICUChar.RightToLeftOverride -> Neutral ICUChar.PopDirectionalFormat -> Neutral ICUChar.FirstStrongIsolate -> Neutral ICUChar.LeftToRightIsolate -> Neutral ICUChar.RightToLeftIsolate -> Neutral ICUChar.PopDirectionalIsolate -> Neutral -- | `Just` the direction of the first strongly directional character, -- or `Nothing` if there is no strongly directional character. firstStrongDirection :: Text -> Maybe Direction firstStrongDirection = firstJust . map strongDirection . unpack -- | `Just` the direction of a strongly directional character, -- or `Nothing` if the character is not strongly directional. strongDirection :: Char -> Maybe Direction strongDirection c = case ICUChar.direction c of ICUChar.LeftToRight -> Just DirLTR ICUChar.RightToLeft -> Just DirRTL ICUChar.RightToLeftArabic -> Just DirRTL _ -> Nothing -- | The first `Just` value found in the input list, if there is one, -- otherwise `Nothing`. firstJust :: [Maybe a] -> Maybe a firstJust = listToMaybe . catMaybes -- | Convert embedding level to horizontal text direction. levelDirectionH :: Level -> Direction levelDirectionH lvl | even lvl = DirLTR | otherwise = DirRTL -- | Convert text direction to the smallest corresponding embedding level, -- but no smaller than the given minimum. directionLevel :: Level -> Direction -> Level directionLevel low DirLTR = smallestEvenAtLeast low directionLevel low DirRTL = smallestOddAtLeast low directionLevel low DirTTB = smallestEvenAtLeast low directionLevel low DirBTT = smallestOddAtLeast low -- | Smallest even integer greater than or equal to @x@. smallestEvenAtLeast :: Integral a => a -> a smallestEvenAtLeast x = if even x then x else x + 1 -- | Smallest odd integer greater than or equal to @x@. smallestOddAtLeast :: Integral a => a -> a smallestOddAtLeast x = if odd x then x else x + 1