{-# LANGUAGE OverloadedStrings #-}
{-# OPTIONS_HADDOCK hide #-}

-- This is an Internal module, hidden from Haddock
module Core.Text.Breaking (
    breakWords,
    breakLines,
    breakPieces,
    intoPieces,
    intoChunks,
    isNewline,
) where

import Core.Text.Rope
import Data.Char (isSpace)
import Data.List (uncons)
import qualified Data.Text.Short as S (ShortText, break, empty, null, uncons)

{- |
Split a passage of text into a list of words. A line is broken wherever there
is one or more whitespace characters, as defined by "Data.Char"'s
'Data.Char.isSpace'.

Examples:

@
λ> __breakWords \"This is a test\"__
[\"This\",\"is\",\"a\",\"test\"]
λ> __breakWords (\"St\" <> \"op and \" <> \"go left\")__
[\"Stop\",\"and\",\"go\",\"left\"]
λ> __breakWords emptyRope__
[]
@
-}
breakWords :: Rope -> [Rope]
breakWords :: Rope -> [Rope]
breakWords = (Rope -> Bool) -> [Rope] -> [Rope]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (Rope -> Bool) -> Rope -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Rope -> Bool
nullRope) ([Rope] -> [Rope]) -> (Rope -> [Rope]) -> Rope -> [Rope]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Rope -> [Rope]
breakPieces Char -> Bool
isSpace

{- |
Split a paragraph of text into a list of its individual lines. The paragraph
will be broken wherever there is a @'\n'@ character.

Blank lines will be preserved. Note that as a special case you do /not/ get a
blank entry at the end of the a list of newline terminated strings.

@
λ> __breakLines \"Hello\\n\\nWorld\\n\"__
[\"Hello\",\"\",\"World\"]
@
-}
breakLines :: Rope -> [Rope]
breakLines :: Rope -> [Rope]
breakLines Rope
text =
    let result :: [Rope]
result = (Char -> Bool) -> Rope -> [Rope]
breakPieces Char -> Bool
isNewline Rope
text
        n :: Int
n = [Rope] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Rope]
result Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
        ([Rope]
fore, [Rope]
aft) = Int -> [Rope] -> ([Rope], [Rope])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
n [Rope]
result
     in case [Rope]
result of
            [] -> []
            [Rope
p] -> [Rope
p]
            [Rope]
_ ->
                if [Rope]
aft [Rope] -> [Rope] -> Bool
forall a. Eq a => a -> a -> Bool
== [Rope
""]
                    then [Rope]
fore
                    else [Rope]
result

{- |
Predicate testing whether a character is a newline. After
'Data.Char.isSpace' et al in "Data.Char".
-}
isNewline :: Char -> Bool
isNewline :: Char -> Bool
isNewline Char
c = Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n'
{-# INLINEABLE isNewline #-}

{- |
Break a Rope into pieces whereever the given predicate function returns
@True@. If found, that character will not be included on either side. Empty
runs, however, *will* be preserved.
-}
breakPieces :: (Char -> Bool) -> Rope -> [Rope]
breakPieces :: (Char -> Bool) -> Rope -> [Rope]
breakPieces Char -> Bool
predicate Rope
text =
    let x :: FingerTree Width ShortText
x = Rope -> FingerTree Width ShortText
unRope Rope
text
        (Maybe ShortText
final, [Rope]
result) = (ShortText
 -> (Maybe ShortText, [Rope]) -> (Maybe ShortText, [Rope]))
-> (Maybe ShortText, [Rope])
-> FingerTree Width ShortText
-> (Maybe ShortText, [Rope])
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ((Char -> Bool)
-> ShortText
-> (Maybe ShortText, [Rope])
-> (Maybe ShortText, [Rope])
intoPieces Char -> Bool
predicate) (Maybe ShortText
forall a. Maybe a
Nothing, []) FingerTree Width ShortText
x
     in case Maybe ShortText
final of
            Maybe ShortText
Nothing -> [Rope]
result
            Just ShortText
piece -> ShortText -> Rope
forall α. Textual α => α -> Rope
intoRope ShortText
piece Rope -> [Rope] -> [Rope]
forall a. a -> [a] -> [a]
: [Rope]
result

{-
Was the previous piece a match, or are we in the middle of a run of
characters? If we were, then join the previous run to the current piece
before processing into chunks.
-}
-- now for right fold
intoPieces :: (Char -> Bool) -> S.ShortText -> (Maybe S.ShortText, [Rope]) -> (Maybe S.ShortText, [Rope])
intoPieces :: (Char -> Bool)
-> ShortText
-> (Maybe ShortText, [Rope])
-> (Maybe ShortText, [Rope])
intoPieces Char -> Bool
predicate ShortText
piece (Maybe ShortText
stream, [Rope]
list) =
    let piece' :: ShortText
piece' = case Maybe ShortText
stream of
            Maybe ShortText
Nothing -> ShortText
piece
            Just ShortText
previous -> ShortText
piece ShortText -> ShortText -> ShortText
forall a. Semigroup a => a -> a -> a
<> ShortText
previous -- more rope, less text?
        pieces :: [Rope]
pieces = (Char -> Bool) -> ShortText -> [Rope]
intoChunks Char -> Bool
predicate ShortText
piece'
     in case [Rope] -> Maybe (Rope, [Rope])
forall a. [a] -> Maybe (a, [a])
uncons [Rope]
pieces of
            Maybe (Rope, [Rope])
Nothing -> (Maybe ShortText
forall a. Maybe a
Nothing, [Rope]
list)
            Just (Rope
text, [Rope]
remainder) -> (ShortText -> Maybe ShortText
forall a. a -> Maybe a
Just (Rope -> ShortText
forall α. Textual α => Rope -> α
fromRope Rope
text), [Rope]
remainder [Rope] -> [Rope] -> [Rope]
forall a. [a] -> [a] -> [a]
++ [Rope]
list)

--
-- λ> S.break isSpace "a d"
-- ("a"," d")
--
-- λ> S.break isSpace " and"
-- (""," and")
--
-- λ> S.break isSpace "and "
-- ("and"," ")
--
-- λ> S.break isSpace ""
-- ("","")
--
-- λ> S.break isSpace " "
-- (""," ")
--

{-
This was more easily expressed as

  let
    remainder' = S.drop 1 remainder
  in
    if remainder == " "

for the case when we were breaking on spaces. But generalized to a predicate
we have to strip off the leading character and test that its the only character;
this is cheaper than S.length etc.
-}
intoChunks :: (Char -> Bool) -> S.ShortText -> [Rope]
intoChunks :: (Char -> Bool) -> ShortText -> [Rope]
intoChunks Char -> Bool
_ ShortText
piece | ShortText -> Bool
S.null ShortText
piece = []
intoChunks Char -> Bool
predicate ShortText
piece =
    let (ShortText
chunk, ShortText
remainder) = (Char -> Bool) -> ShortText -> (ShortText, ShortText)
S.break Char -> Bool
predicate ShortText
piece

        -- Handle the special case that a trailing " " (generalized to predicate)
        -- is the only character left.
        (Bool
trailing, ShortText
remainder') = case ShortText -> Maybe (Char, ShortText)
S.uncons ShortText
remainder of
            Maybe (Char, ShortText)
Nothing -> (Bool
False, ShortText
S.empty)
            Just (Char
c, ShortText
remaining) ->
                if ShortText -> Bool
S.null ShortText
remaining
                    then (Char -> Bool
predicate Char
c, ShortText
S.empty)
                    else (Bool
False, ShortText
remaining)
     in if Bool
trailing
            then ShortText -> Rope
forall α. Textual α => α -> Rope
intoRope ShortText
chunk Rope -> [Rope] -> [Rope]
forall a. a -> [a] -> [a]
: Rope
emptyRope Rope -> [Rope] -> [Rope]
forall a. a -> [a] -> [a]
: []
            else ShortText -> Rope
forall α. Textual α => α -> Rope
intoRope ShortText
chunk Rope -> [Rope] -> [Rope]
forall a. a -> [a] -> [a]
: (Char -> Bool) -> ShortText -> [Rope]
intoChunks Char -> Bool
predicate ShortText
remainder'