{- |
Copyright : (c) 2024 Pierre Le Marre
Maintainer: dev@wismill.eu
Stability   : experimental

Parser for properties files:

* [DerivedCoreProperties.txt](https://www.unicode.org/reports/tr44/#DerivedCoreProperties.txt)
* [PropList.txt](https://www.unicode.org/reports/tr44/#PropList.txt)
* [DerivedNormalizationProps.txt](https://www.unicode.org/reports/tr44/#DerivedNormalizationProps.txt)
* [extracted/DerivedCombiningClass.txt](https://www.unicode.org/reports/tr44/#DerivedCombiningClass.txt)

@since 0.1.0
-}
module UCD.Parser.Properties (Entry (..), parse) where

import Data.ByteString qualified as B
import Data.ByteString.Char8 qualified as B8
import Data.ByteString.Short qualified as BS
import Data.List qualified as L
import UCD.Parser.Common (
  UnicodeRange,
  parseRange,
  pattern HashTag,
  pattern NewLine,
  pattern SemiColon,
 )

-- | An entry from a properties file
--
-- @since 0.1.0
data Entry = Entry
  { Entry -> UnicodeRange ()
_range  !(UnicodeRange ())
  , Entry -> ShortByteString
_property  !BS.ShortByteString
  }
  deriving (Entry -> Entry -> Bool
(Entry -> Entry -> Bool) -> (Entry -> Entry -> Bool) -> Eq Entry
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Entry -> Entry -> Bool
== :: Entry -> Entry -> Bool
$c/= :: Entry -> Entry -> Bool
/= :: Entry -> Entry -> Bool
Eq, Int -> Entry -> ShowS
[Entry] -> ShowS
Entry -> String
(Int -> Entry -> ShowS)
-> (Entry -> String) -> ([Entry] -> ShowS) -> Show Entry
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Entry -> ShowS
showsPrec :: Int -> Entry -> ShowS
$cshow :: Entry -> String
show :: Entry -> String
$cshowList :: [Entry] -> ShowS
showList :: [Entry] -> ShowS
Show)

-- | A parser for properties files
--
-- @since 0.1.0
parse  B.ByteString  [Entry]
parse :: ByteString -> [Entry]
parse = (ByteString -> Maybe (Entry, ByteString)) -> ByteString -> [Entry]
forall b a. (b -> Maybe (a, b)) -> b -> [a]
L.unfoldr ByteString -> Maybe (Entry, ByteString)
go
 where
  go  B.ByteString  Maybe (Entry, B.ByteString)
  go :: ByteString -> Maybe (Entry, ByteString)
go ByteString
raw
    | ByteString -> Bool
B.null ByteString
raw = Maybe (Entry, ByteString)
forall a. Maybe a
Nothing
    | Bool
otherwise = case (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
NewLine) ByteString
raw of
        (ByteString -> ByteString
B8.strip  ByteString
line, Int -> ByteString -> ByteString
B.drop Int
1  ByteString
raw') 
          case ByteString -> Maybe Entry
parsePropertyLine ByteString
line of
            Maybe Entry
Nothing  ByteString -> Maybe (Entry, ByteString)
go ByteString
raw'
            Just Entry
entry  (Entry, ByteString) -> Maybe (Entry, ByteString)
forall a. a -> Maybe a
Just (Entry
entry, ByteString
raw')

parsePropertyLine  B.ByteString  Maybe Entry
parsePropertyLine :: ByteString -> Maybe Entry
parsePropertyLine ByteString
line
  | ByteString -> Bool
B.null ByteString
line Bool -> Bool -> Bool
|| HasCallStack => ByteString -> Word8
ByteString -> Word8
B.head ByteString
line Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
HashTag = Maybe Entry
forall a. Maybe a
Nothing
  | Bool
otherwise = Entry -> Maybe Entry
forall a. a -> Maybe a
Just (ByteString -> Entry
parseLine ByteString
line)
 where
  parseLine  B.ByteString  Entry
  parseLine :: ByteString -> Entry
parseLine ByteString
raw =
    let (ByteString
rangeLn, ByteString
line1) = (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
SemiColon) ByteString
raw
        property :: ByteString
property = (Word8 -> Bool) -> ByteString -> ByteString
B.takeWhile (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
HashTag) (HasCallStack => ByteString -> ByteString
ByteString -> ByteString
B.tail ByteString
line1)
     in UnicodeRange () -> ShortByteString -> Entry
Entry (ByteString -> UnicodeRange ()
parseRange (ByteString -> ByteString
B8.strip ByteString
rangeLn)) (ByteString -> ShortByteString
BS.toShort (ByteString -> ByteString
B8.strip ByteString
property))

--------------------------------------------------------------------------------
-- Doctest
--------------------------------------------------------------------------------

{- $
>>> parse "0009..000D    ; White_Space # Cc   [5] <control-0009>..<control-000D>"
[Entry {_range = CharRange {_first = '\t', _last = '\r', _rangeName = ()}, _property = "White_Space"}]
>>>parse "061C          ; Bidi_Control # Cf       ARABIC LETTER MARK"
[Entry {_range = SingleChar {_first = '\1564'}, _property = "Bidi_Control"}]
-}