module Data.Text.ParagraphLayout.Internal.BreakSpec (spec) where import Control.Monad (forM_) import Data.Text (empty, pack, singleton) import Data.Text.ICU ( LocaleName (Locale) , breakCharacter , breakLine , breakSentence , breakWord ) import qualified Data.Text.ICU as BreakStatus (Line (..), Word (..)) import Test.Hspec import Data.Text.ParagraphLayout.Internal.Break spec :: Spec spec = do describe "breaksDesc" $ do -- One of the crucial building blocks of a text layout engine. describe "breakLine" $ do let b lang = breaksDesc $ breakLine (Locale lang) it "finds no breaks in empty input" $ b "en" empty `shouldBe` [] it "finds break at offset 0 in non-empty input" $ b "en" (singleton 'a') `shouldBe` [ (0, BreakStatus.Soft)] it "finds hard break after newline" $ b "en" (pack "hello\nworld") `shouldBe` [ (6, BreakStatus.Hard) , (0, BreakStatus.Soft) ] it "finds hard break after each of newlines" $ b "en" (pack "hello\n\nworld") `shouldBe` [ (7, BreakStatus.Hard) , (6, BreakStatus.Hard) , (0, BreakStatus.Soft) ] it "finds soft breaks after spaces and tabs" $ b "en" (pack "a few\twords") `shouldBe` [ (6, BreakStatus.Soft) , (2, BreakStatus.Soft) , (0, BreakStatus.Soft) ] it "finds soft breaks after each run of whitespace" $ b "en" (pack " a few\t more \n words\n") `shouldBe` [ (16, BreakStatus.Soft) , (15, BreakStatus.Hard) , (9, BreakStatus.Soft) , (4, BreakStatus.Soft) , (1, BreakStatus.Soft) , (0, BreakStatus.Soft) ] it "finds soft breaks after spaces and hyphens" $ b "cs" (pack "následuje stanice Frýdek-Místek") `shouldBe` [ (27, BreakStatus.Soft) , (19, BreakStatus.Soft) , (11, BreakStatus.Soft) , (0, BreakStatus.Soft) ] it "finds soft breaks in Japanese kana" $ b "ja" (pack "トイレはどこですか?") `shouldBe` [ (24, BreakStatus.Soft) , (21, BreakStatus.Soft) , (18, BreakStatus.Soft) , (15, BreakStatus.Soft) , (12, BreakStatus.Soft) , (9, BreakStatus.Soft) , (6, BreakStatus.Soft) , (3, BreakStatus.Soft) , (0, BreakStatus.Soft) ] let jaText = pack "五ヶ月‡コード" let jaBreaksStrict = [ (18, BreakStatus.Soft) , (12, BreakStatus.Soft) , (9, BreakStatus.Soft) , (6, BreakStatus.Soft) , (0, BreakStatus.Soft) ] let jaBreaksLoose = [ (18, BreakStatus.Soft) , (15, BreakStatus.Soft) , (12, BreakStatus.Soft) , (9, BreakStatus.Soft) , (6, BreakStatus.Soft) , (3, BreakStatus.Soft) , (0, BreakStatus.Soft) ] -- Observed behaviour. -- Not sure why Chinese rules are stricter for Japanese text. -- This behaviour may change with future versions of ICU. let expectedStrictLocales = [ "" , "en" , "ja@lb=strict" , "zh" , "zh_Hans" , "zh_Hant" , "zxx" , "zxx-any-invalid-suffix" ] let expectedLooseLocales = [ "@lb=loose" , "en@lb=loose" , "ja" , "ja_JP" , "ja-JP" , "ja-any-invalid-suffix" , "zh@lb=loose" , "zxx-any-invalid-suffix@lb=loose" ] expectedStrictLocales `forM_` \ l -> it ("uses strict line breaks for " ++ l ++ " locale") $ b l jaText `shouldBe` jaBreaksStrict expectedLooseLocales `forM_` \ l -> it ("uses loose line breaks for " ++ l ++ " locale") $ b l jaText `shouldBe` jaBreaksLoose -- Probably not useful for a web browser rendering engine. describe "breakSentence" $ do let b lang = breaksDesc $ breakSentence (Locale lang) it "finds no breaks in empty input" $ b "en" empty `shouldBe` [] it "finds break at offset 0 in non-empty input" $ b "en" (singleton 'a') `shouldBe` [(0, ())] -- Probably not useful for a web browser rendering engine, -- but may be used for text search and selection. describe "breakWord" $ do let b lang = breaksDesc $ breakWord (Locale lang) it "finds no breaks in empty input" $ b "en" empty `shouldBe` [] it "finds break at offset 0 in non-empty input" $ b "en" (singleton 'a') `shouldBe` [ (0, BreakStatus.Uncategorized) ] it "finds breaks after runs of letters and spaces" $ b "en" (pack "a few words") `shouldBe` [ (8, BreakStatus.Uncategorized) , (5, BreakStatus.Letter) , (2, BreakStatus.Uncategorized) , (1, BreakStatus.Letter) , (0, BreakStatus.Uncategorized) ] -- Useful for breaking inside words for narrow output. -- This can result in breaking ligatures. describe "breakCharacter" $ do let b lang = breaksDesc $ breakCharacter (Locale lang) it "finds no breaks in empty input" $ b "en" empty `shouldBe` [] it "finds break at offset 0 in non-empty input" $ b "en" (singleton 'a') `shouldBe` [(0, ())] describe "subOffsetsDesc" $ do let result = subOffsetsDesc 5 [(11, 'a'), (8, 'b'), (5, 'c'), (2, 'd')] it "should reduce offsets" $ map fst result `shouldBe` [6, 3, 0] it "should preserve payload" $ map snd result `shouldBe` ['a', 'b', 'c']