{- -----------------------------------------------------------------------------
Copyright 2020 Kevin P. Barry

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
----------------------------------------------------------------------------- -}

-- Author: Kevin P. Barry [ta0kira@gmail.com]

-- | Language-specific hyphenation rules.

{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE Safe #-}

module WEditorHyphen.LangHyphen (
  LangHyphen,
  langHyphen,
) where

import Data.Char
import Data.List
import Text.Hyphenation
import WEditor.LineWrap


data LangHyphen = LangHyphen Language Hyphenator

-- | Hyphenates words using 'Language'-specific rules.
--
--   Example usage:
--
-- @
-- import Text.Hyphenation
-- import WEditor.Document
-- import WEditor.LineWrap
-- import WEditorHyphen.LangHyphen
--
-- content = map UnparsedPara (lines "Your document content.")
--
-- doc = editDocument (breakWords (langHyphen English_US)) content
-- @
langHyphen :: Language -> LangHyphen
langHyphen l = LangHyphen l (languageHyphenator l)


-- Private below here.

instance Show LangHyphen where
  show (LangHyphen l _) = show l

instance WordSplitter LangHyphen Char where
  splitWord (LangHyphen l h) k w cs
    | w < (minWidth l) || k > w = Nothing
    | otherwise = Just breaks where
        (cb,cs',ce) = trimPunct l cs
        (s0:ss) = hyphenate h cs'
        breaks
          -- Move the word to the next line if it has punctuation in the middle.
          | any (noSplitChars l) cs' || null ss = []
          | otherwise = combine k (cb ++ s0) (init ss ++ [last ss ++ ce])
        combine _ _ [] = []
        combine t x (y:ys)
          -- Move the rest to the next line if the segment is already too large.
          | size x > t = []
          -- Add a break if adding a segment would exceed the remaining space.
          | length (x ++ y) > t && null ys = (length x):(combine w y ys)
          | size   (x ++ y) > t            = (length x):(combine w y ys)
          -- Append the next segment to the current segment.
          | otherwise = combine t (x ++ y) ys
        size s = if hyphenChar l `isSuffixOf` s
                    then length s
                    else length s+length (hyphenChar l)
  isWordChar (LangHyphen l _) = wordChars l
  isWhitespace (LangHyphen l _) = whitespaceChars l
  appendHyphen (LangHyphen l _) = (++ hyphenChar l)
  endsWithHyphen (LangHyphen l _) cs
    | null (hyphenChar l) = False
    | otherwise           = hyphenChar l `isSuffixOf` cs

-- Set the language-specific minimum line width here.
minWidth :: Language -> Int
minWidth _ = 8

wordChars :: Language -> Char -> Bool
wordChars = check where
  -- Override the language-specific predicate here.
  check l@English_US c = checkDefault l c || c `elem` "'"
  check l@English_GB c = checkDefault l c || c `elem` "'"
  check l c = checkDefault l c
  -- Override the language-specific character categories here.
  cats _ = defaultCats
  -- Leave the stuff below here alone.
  checkDefault l c = generalCategory c `elem` cats l || noSplitChars l c
  defaultCats = [
      DashPunctuation,
      LowercaseLetter,
      ModifierLetter,
      NonSpacingMark,
      OtherLetter,
      SpacingCombiningMark,
      TitlecaseLetter,
      UppercaseLetter
    ]

noSplitChars :: Language -> Char -> Bool
noSplitChars = check where
  -- Override the language-specific predicate here.
  check l@English_US c = checkDefault l c && not (c `elem` "'")
  check l@English_GB c = checkDefault l c && not (c `elem` "'")
  check l c = checkDefault l c
  -- Override the language-specific character categories here.
  cats _ = defaultCats
  -- Leave the stuff below here alone.
  checkDefault l c = generalCategory c `elem` cats l
  defaultCats = [
      ConnectorPunctuation,
      CurrencySymbol,
      DecimalNumber,
      FinalQuote,
      InitialQuote,
      OtherNumber,
      OtherPunctuation
    ]

-- Set language-specific whitespace detection here.
whitespaceChars :: Language -> Char -> Bool
whitespaceChars _ c = isSeparator c

-- Set the language-specific hyphen char here.
hyphenChar :: Language -> [Char]
hyphenChar _ = "-"

trimPunct :: Language -> [Char] -> ([Char],[Char],[Char])
trimPunct l cs =
  (takeWhile (noSplitChars l) cs,
   dropWhile (noSplitChars l) $ reverse $ dropWhile (noSplitChars l) $ reverse cs,
   takeWhile (noSplitChars l) $ reverse cs)