{-# LANGUAGE OverloadedStrings #-}
{-# OPTIONS_HADDOCK hide #-}

-- This is an Internal module, hidden from Haddock
module Core.Text.Breaking (
    breakWords,
    breakLines,
    breakPieces,
    intoPieces,
    intoChunks,
    isNewline,
) where

import Core.Text.Rope
import Data.Char (isSpace)
import Data.List (uncons)
import qualified Data.Text.Short as S (ShortText, break, empty, null, uncons)

{- |
Split a passage of text into a list of words. A line is broken wherever there
is one or more whitespace characters, as defined by "Data.Char"'s
'Data.Char.isSpace'.

Examples:

@
λ> __breakWords \"This is a test\"__
[\"This\",\"is\",\"a\",\"test\"]
λ> __breakWords (\"St\" <> \"op and \" <> \"go left\")__
[\"Stop\",\"and\",\"go\",\"left\"]
λ> __breakWords emptyRope__
[]
@
-}
breakWords :: Rope -> [Rope]
breakWords = filter (not . nullRope) . breakPieces isSpace

{- |
Split a paragraph of text into a list of its individual lines. The paragraph
will be broken wherever there is a @'\n'@ character.

Blank lines will be preserved. Note that as a special case you do /not/ get a
blank entry at the end of the a list of newline terminated strings.

@
λ> __breakLines \"Hello\\n\\nWorld\\n\"__
[\"Hello\",\"\",\"World\"]
@
-}
breakLines :: Rope -> [Rope]
breakLines text =
    let result = breakPieces isNewline text
        n = length result - 1
        (fore, aft) = splitAt n result
     in case result of
            [] -> []
            [p] -> [p]
            _ ->
                if aft == [""]
                    then fore
                    else result

{- |
Predicate testing whether a character is a newline. After
'Data.Char.isSpace' et al in "Data.Char".
-}
isNewline :: Char -> Bool
isNewline c = c == '\n'
{-# INLINEABLE isNewline #-}

{- |
Break a Rope into pieces whereever the given predicate function returns
@True@. If found, that character will not be included on either side. Empty
runs, however, *will* be preserved.
-}
breakPieces :: (Char -> Bool) -> Rope -> [Rope]
breakPieces predicate text =
    let x = unRope text
        (final, result) = foldr (intoPieces predicate) (Nothing, []) x
     in case final of
            Nothing -> result
            Just piece -> intoRope piece : result

{-
Was the previous piece a match, or are we in the middle of a run of
characters? If we were, then join the previous run to the current piece
before processing into chunks.
-}
-- now for right fold
intoPieces :: (Char -> Bool) -> S.ShortText -> (Maybe S.ShortText, [Rope]) -> (Maybe S.ShortText, [Rope])
intoPieces predicate piece (stream, list) =
    let piece' = case stream of
            Nothing -> piece
            Just previous -> piece <> previous -- more rope, less text?
        pieces = intoChunks predicate piece'
     in case uncons pieces of
            Nothing -> (Nothing, list)
            Just (text, remainder) -> (Just (fromRope text), remainder ++ list)

--
-- λ> S.break isSpace "a d"
-- ("a"," d")
--
-- λ> S.break isSpace " and"
-- (""," and")
--
-- λ> S.break isSpace "and "
-- ("and"," ")
--
-- λ> S.break isSpace ""
-- ("","")
--
-- λ> S.break isSpace " "
-- (""," ")
--

{-
This was more easily expressed as

  let
    remainder' = S.drop 1 remainder
  in
    if remainder == " "

for the case when we were breaking on spaces. But generalized to a predicate
we have to strip off the leading character and test that its the only character;
this is cheaper than S.length etc.
-}
intoChunks :: (Char -> Bool) -> S.ShortText -> [Rope]
intoChunks _ piece | S.null piece = []
intoChunks predicate piece =
    let (chunk, remainder) = S.break predicate piece

        -- Handle the special case that a trailing " " (generalized to predicate)
        -- is the only character left.
        (trailing, remainder') = case S.uncons remainder of
            Nothing -> (False, S.empty)
            Just (c, remaining) ->
                if S.null remaining
                    then (predicate c, S.empty)
                    else (False, remaining)
     in if trailing
            then intoRope chunk : emptyRope : []
            else intoRope chunk : intoChunks predicate remainder'