Safe Haskell | Safe-Inferred |
---|---|
Language | Haskell2010 |
This module builds on module Control.Foldl.Text, adding stateful transducers and grouping operations.
Synopsis
- decoder :: (ByteString -> Decoding) -> OnDecodeError -> Transducer ByteString Text ()
- utf8 :: OnDecodeError -> Transducer ByteString Text ()
- utf8lenient :: Transducer ByteString Text ()
- utf8strict :: Transducer ByteString Text ()
- decoderE :: MonadIO m => (OnDecodeError -> ByteString -> Decoding) -> TransducerM (ExceptT UnicodeException m) ByteString Text ()
- utf8E :: MonadIO m => TransducerM (ExceptT UnicodeException m) ByteString Text ()
- newline :: Transducer Text Text ()
- stripStart :: Transducer Text Text ()
- stripEnd :: Transducer Text Text ()
- words :: Transducer Text Text ()
- lines :: Transducer Text Text ()
- paragraphs :: Transducer Text Text ()
- sections :: [Text] -> Transducer Text Text ()
- textualSplit :: TextualMonoid m => (Char -> Bool) -> Transducer m m ()
- textualBreak :: TextualMonoid m => (Char -> Bool) -> Transducer m m ()
Decoding transducers
decoder :: (ByteString -> Decoding) -> OnDecodeError -> Transducer ByteString Text () Source #
Builds a decoding Transducer
out of a stream-oriented decoding function
from Data.Text.Encoding and an error handler from
Data.Text.Encoding.Error.
utf8 :: OnDecodeError -> Transducer ByteString Text () Source #
Builds a UTF8-decoding Transducer
. Takes an error handler from
Data.Text.Encoding.Error.
utf8lenient :: Transducer ByteString Text () Source #
UTF8-decoding Transducer
that replaces invalid input bytes with the
Unicode replacement character U+FFFD.
>>>
L.fold (transduce utf8lenient L.list) (map fromString ["decode","this"])
["decode","this"]
>>>
L.fold (transduce utf8lenient L.list) (map fromString ["across \xe2","\x98\x83 boundaries"])
["across ","\9731 boundaries"]
>>>
L.fold (transduce utf8lenient L.list) (map fromString ["invalid \xc3\x28 sequence"])
["invalid \65533( sequence"]
>>>
L.fold (transduce utf8lenient L.list) (map fromString ["incomplete \xe2"])
["incomplete ","\65533"]
utf8strict :: Transducer ByteString Text () Source #
BEWARE!
This Transducer
may throw UnicodeException
.
BEWARE!
>>>
L.fold (transduce utf8strict L.list) (map fromString ["invalid \xc3\x28 sequence"])
*** Exception: Cannot decode byte '\xc3': Data.Text.Internal.Encoding.streamDecodeUtf8With: Invalid UTF-8 stream
>>>
L.fold (transduce utf8strict L.list) (map fromString ["incomplete \xe2"])
*** Exception: Cannot decode input: leftovers
decoderE :: MonadIO m => (OnDecodeError -> ByteString -> Decoding) -> TransducerM (ExceptT UnicodeException m) ByteString Text () Source #
utf8E :: MonadIO m => TransducerM (ExceptT UnicodeException m) ByteString Text () Source #
Like utf8strict
, but catches UnicodeException
in IO
and uses
Except
to communicate the error.
>>>
runExceptT $ L.foldM (transduceM utf8E (L.generalize L.list)) (map fromString ["invalid \xc3\x28 sequence"])
Left Cannot decode byte '\xc3': Data.Text.Internal.Encoding.streamDecodeUtf8With: Invalid UTF-8 stream
>>>
runExceptT $ L.foldM (transduceM utf8E (L.generalize L.list)) (map fromString ["incomplete \xe2"])
Left Cannot decode input: leftovers
Other transducers
newline :: Transducer Text Text () Source #
Appends a newline at the end of the stream.
>>>
L.fold (transduce newline L.list) (map T.pack ["without","newline"])
["without","newline","\n"]
stripStart :: Transducer Text Text () Source #
Remove leading white space from a stream of Text
.
>>>
L.fold (transduce stripStart L.list) (map T.pack [" ","", " text "])
["text "]
stripEnd :: Transducer Text Text () Source #
Remove trailing white space from a stream of Text
.
BEWARE! This function naively accumulates in memory any arriving "blank blocks" of text until a non-blank block or end-of-stream arrives, and therefore it is potentially dangerous. Do not use with untrusted inputs.
>>>
L.fold (transduce stripEnd L.list) (map T.pack [" ", " \n text ", " ", "" , " "])
[" "," \n text"]
Splitters
words :: Transducer Text Text () Source #
Splits a stream of text into words, removing whitespace.
>>>
L.fold (folds words L.list L.list) (map T.pack [" a","aa ", "bb c","cc dd ","ee f","f"])
[["a","aa"],["bb"],["c","cc"],["dd"],["ee"],["f","f"]]
Used with transduce
, it simply removes all whitespace:
>>>
L.fold (L.transduce words L.list) (map T.pack [" a","aa ", "bb c","cc dd ","ee f","f"])
["a","aa","bb","c","cc","dd","ee","f","f"]
lines :: Transducer Text Text () Source #
Splits a stream of text into lines, removing the newlines.
>>>
L.fold (L.groups lines (surround [T.pack "x"] []) L.list) (map T.pack ["line 1\n line 2\n"])
["x","line 1","x"," line 2"]
>>>
L.fold (L.groups lines newline L.list) (map T.pack ["line 1\n line 2\n"])
["line 1","\n"," line 2","\n"]
Used with transduce
, it simply removes newlines:
>>>
L.fold (L.transduce lines L.list) (map T.pack ["line 1\n line 2\n"])
["line 1"," line 2"]
paragraphs :: Transducer Text Text () Source #
Splits a stream of text into paragraphs, removing empty lines and trimming newspace from the start of each line.
>>>
map mconcat (L.fold (folds paragraphs L.list L.list) (map T.pack [" \n aaa","\naa ", " \n\nbb\n"]))
["aaa\naa \n","bb\n"]
Used with transduce
, it removes empty lines and trims newspace from the
start of each line.
sections :: [Text] -> Transducer Text Text () Source #
Given a (possibly infinite) list of section headings, split the stream into sections and remove the headings.
>>>
map mconcat (L.fold (folds (sections (map T.pack ["#1\n","#2\n"])) L.list L.list) (map T.pack [" #1\naa\n#","2\nbb"]))
[" ","aa\n","bb"]
>>>
map mconcat (L.fold (folds (sections (map T.pack ["1234"])) L.list L.list) (map T.pack [" 1","2","x","1","2","3","4","5"]))
[" 12x","5"]
Used with transduce
, it simply removes all headings.
Textual
Transducers that work on Text
and other text-like types.
textualSplit :: TextualMonoid m => (Char -> Bool) -> Transducer m m () Source #
>>>
L.fold (folds (textualSplit (=='.')) L.list L.list) [".","bb.bb","c.c."]
[[""],["","bb"],["bb","c"],["c"],[""]]
textualBreak :: TextualMonoid m => (Char -> Bool) -> Transducer m m () Source #
>>>
L.fold (bisect (textualBreak (=='.')) (reify id) ignore L.list) ["aa","bb.bb","cc"]
["aa","bb"]