| Copyright | © 2021 Julian Ospald | 
|---|---|
| License | MIT | 
| Maintainer | Julian Ospald <hasufell@posteo.de> | 
| Stability | experimental | 
| Portability | portable | 
| Safe Haskell | Safe-Inferred | 
| Language | Haskell2010 | 
System.OsString
Description
An implementation of platform specific short OsString, which is:
- on windows wide char bytes ([Word16])
- on unix char bytes ([Word8])
It captures the notion of syscall specific encoding (or the lack thereof) to avoid roundtrip issues and memory fragmentation by using unpinned byte arrays. Bytes are not touched or interpreted.
Synopsis
- data OsString
- encodeUtf :: MonadThrow m => String -> m OsString
- unsafeEncodeUtf :: HasCallStack => String -> OsString
- encodeWith :: TextEncoding -> TextEncoding -> String -> Either EncodingException OsString
- encodeFS :: String -> IO OsString
- osstr :: QuasiQuoter
- empty :: OsString
- singleton :: OsChar -> OsString
- pack :: [OsChar] -> OsString
- decodeUtf :: MonadThrow m => OsString -> m String
- decodeWith :: TextEncoding -> TextEncoding -> OsString -> Either EncodingException String
- decodeFS :: OsString -> IO String
- unpack :: OsString -> [OsChar]
- data OsChar
- unsafeFromChar :: Char -> OsChar
- toChar :: OsChar -> Char
- snoc :: OsString -> OsChar -> OsString
- cons :: OsChar -> OsString -> OsString
- last :: HasCallStack => OsString -> OsChar
- tail :: HasCallStack => OsString -> OsString
- uncons :: OsString -> Maybe (OsChar, OsString)
- head :: HasCallStack => OsString -> OsChar
- init :: HasCallStack => OsString -> OsString
- unsnoc :: OsString -> Maybe (OsString, OsChar)
- null :: OsString -> Bool
- length :: OsString -> Int
- map :: (OsChar -> OsChar) -> OsString -> OsString
- reverse :: OsString -> OsString
- intercalate :: OsString -> [OsString] -> OsString
- foldl :: forall a. (a -> OsChar -> a) -> a -> OsString -> a
- foldl' :: forall a. (a -> OsChar -> a) -> a -> OsString -> a
- foldl1 :: (OsChar -> OsChar -> OsChar) -> OsString -> OsChar
- foldl1' :: (OsChar -> OsChar -> OsChar) -> OsString -> OsChar
- foldr :: forall a. (OsChar -> a -> a) -> a -> OsString -> a
- foldr' :: forall a. (OsChar -> a -> a) -> a -> OsString -> a
- foldr1 :: (OsChar -> OsChar -> OsChar) -> OsString -> OsChar
- foldr1' :: (OsChar -> OsChar -> OsChar) -> OsString -> OsChar
- all :: (OsChar -> Bool) -> OsString -> Bool
- any :: (OsChar -> Bool) -> OsString -> Bool
- concat :: [OsString] -> OsString
- replicate :: Int -> OsChar -> OsString
- unfoldr :: forall a. (a -> Maybe (OsChar, a)) -> a -> OsString
- unfoldrN :: forall a. Int -> (a -> Maybe (OsChar, a)) -> a -> (OsString, Maybe a)
- take :: Int -> OsString -> OsString
- takeEnd :: Int -> OsString -> OsString
- takeWhileEnd :: (OsChar -> Bool) -> OsString -> OsString
- takeWhile :: (OsChar -> Bool) -> OsString -> OsString
- drop :: Int -> OsString -> OsString
- dropEnd :: Int -> OsString -> OsString
- dropWhileEnd :: (OsChar -> Bool) -> OsString -> OsString
- dropWhile :: (OsChar -> Bool) -> OsString -> OsString
- break :: (OsChar -> Bool) -> OsString -> (OsString, OsString)
- breakEnd :: (OsChar -> Bool) -> OsString -> (OsString, OsString)
- span :: (OsChar -> Bool) -> OsString -> (OsString, OsString)
- spanEnd :: (OsChar -> Bool) -> OsString -> (OsString, OsString)
- splitAt :: Int -> OsString -> (OsString, OsString)
- split :: OsChar -> OsString -> [OsString]
- splitWith :: (OsChar -> Bool) -> OsString -> [OsString]
- stripSuffix :: OsString -> OsString -> Maybe OsString
- stripPrefix :: OsString -> OsString -> Maybe OsString
- isInfixOf :: OsString -> OsString -> Bool
- isPrefixOf :: OsString -> OsString -> Bool
- isSuffixOf :: OsString -> OsString -> Bool
- breakSubstring :: OsString -> OsString -> (OsString, OsString)
- elem :: OsChar -> OsString -> Bool
- find :: (OsChar -> Bool) -> OsString -> Maybe OsChar
- filter :: (OsChar -> Bool) -> OsString -> OsString
- partition :: (OsChar -> Bool) -> OsString -> (OsString, OsString)
- index :: HasCallStack => OsString -> Int -> OsChar
- indexMaybe :: OsString -> Int -> Maybe OsChar
- (!?) :: OsString -> Int -> Maybe OsChar
- elemIndex :: OsChar -> OsString -> Maybe Int
- elemIndices :: OsChar -> OsString -> [Int]
- count :: OsChar -> OsString -> Int
- findIndex :: (OsChar -> Bool) -> OsString -> Maybe Int
- findIndices :: (OsChar -> Bool) -> OsString -> [Int]
- coercionToPlatformTypes :: Either (Coercion OsChar WindowsChar, Coercion OsString WindowsString) (Coercion OsChar PosixChar, Coercion OsString PosixString)
String types
Newtype representing short operating system specific strings.
Internally this is either WindowsString or PosixString,
 depending on the platform. Both use unpinned
 ShortByteString for efficiency.
The constructor is only exported via System.OsString.Internal.Types, since dealing with the internals isn't generally recommended, but supported in case you need to write platform specific code.
Instances
| Monoid OsString Source # | "String-Concatenation" for  | 
| Semigroup OsString Source # | |
| Generic OsString Source # | |
| Show OsString Source # | On windows, decodes as UCS-2. On unix prints the raw bytes without decoding. | 
| NFData OsString Source # | |
| Defined in System.OsString.Internal.Types | |
| Eq OsString Source # | Byte equality of the internal representation. | 
| Ord OsString Source # | Byte ordering of the internal representation. | 
| Defined in System.OsString.Internal.Types | |
| Lift OsString Source # | |
| type Rep OsString Source # | |
| Defined in System.OsString.Internal.Types type Rep OsString = D1 ('MetaData "OsString" "System.OsString.Internal.Types" "os-string-2.0.4-inplace" 'True) (C1 ('MetaCons "OsString" 'PrefixI 'True) (S1 ('MetaSel ('Just "getOsString") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 PlatformString))) | |
OsString construction
encodeUtf :: MonadThrow m => String -> m OsString Source #
Partial unicode friendly encoding.
On windows this encodes as UTF16-LE (strictly), which is a pretty good guess. On unix this encodes as UTF8 (strictly), which is a good guess.
Throws an EncodingException if encoding fails. If the input does not
 contain surrogate chars, you can use unsafeEncodeUtf.
unsafeEncodeUtf :: HasCallStack => String -> OsString Source #
Unsafe unicode friendly encoding.
Like encodeUtf, except it crashes when the input contains
 surrogate chars. For sanitized input, this can be useful.
Arguments
| :: TextEncoding | unix text encoding | 
| -> TextEncoding | windows text encoding (wide char) | 
| -> String | |
| -> Either EncodingException OsString | 
Encode a FilePath with the specified encoding.
Note: on windows, we expect a "wide char" encoding (e.g. UCS-2 or UTF-16). Anything
 that works with Word16 boundaries. Picking an incompatible encoding may crash
 filepath operations.
encodeFS :: String -> IO OsString Source #
Like encodeUtf, except this mimics the behavior of the base library when doing filesystem
 operations, which is:
- on unix, uses shady PEP 383 style encoding (based on the current locale, but PEP 383 only works properly on UTF-8 encodings, so good luck)
- on windows does permissive UTF-16 encoding, where coding errors generate Chars in the surrogate range
Looking up the locale requires IO. If you're not worried about calls
 to setFileSystemEncoding, then unsafePerformIO may be feasible (make sure
 to deeply evaluate the result to catch exceptions).
osstr :: QuasiQuoter Source #
QuasiQuote an OsString. This accepts Unicode characters
 and encodes as UTF-8 on unix and UTF-16 on windows.
 If used as pattern, requires turning on the ViewPatterns extension.
pack :: [OsChar] -> OsString Source #
Pack a list of OsChar to an OsString
Note that using this in conjunction with unsafeFromChar to
 convert from [Char] to OsString is probably not what
 you want, because it will truncate unicode code points.
OsString deconstruction
decodeUtf :: MonadThrow m => OsString -> m String Source #
Partial unicode friendly decoding.
On windows this decodes as UTF16-LE (strictly), which is a pretty good guess. On unix this decodes as UTF8 (strictly), which is a good guess. Note that filenames on unix are encoding agnostic char arrays.
Throws a EncodingException if decoding fails.
Arguments
| :: TextEncoding | unix text encoding | 
| -> TextEncoding | windows text encoding | 
| -> OsString | |
| -> Either EncodingException String | 
Decode an OsString with the specified encoding.
The String is forced into memory to catch all exceptions.
decodeFS :: OsString -> IO String Source #
Like decodeUtf, except this mimics the behavior of the base library when doing filesystem
 operations, which is:
- on unix, uses shady PEP 383 style encoding (based on the current locale, but PEP 383 only works properly on UTF-8 encodings, so good luck)
- on windows does permissive UTF-16 encoding, where coding errors generate Chars in the surrogate range
Looking up the locale requires IO. If you're not worried about calls
 to setFileSystemEncoding, then unsafePerformIO may be feasible (make sure
 to deeply evaluate the result to catch exceptions).
Word types
Newtype representing a code unit.
On Windows, this is restricted to two-octet codepoints Word16,
 on POSIX one-octet (Word8).
Instances
| Generic OsChar Source # | |
| Show OsChar Source # | |
| NFData OsChar Source # | |
| Defined in System.OsString.Internal.Types | |
| Eq OsChar Source # | Byte equality of the internal representation. | 
| Ord OsChar Source # | Byte ordering of the internal representation. | 
| type Rep OsChar Source # | |
| Defined in System.OsString.Internal.Types type Rep OsChar = D1 ('MetaData "OsChar" "System.OsString.Internal.Types" "os-string-2.0.4-inplace" 'True) (C1 ('MetaCons "OsChar" 'PrefixI 'True) (S1 ('MetaSel ('Just "getOsChar") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 PlatformChar))) | |
Word construction
unsafeFromChar :: Char -> OsChar Source #
Truncates on unix to 1 and on Windows to 2 octets.
Word deconstruction
Basic interface
snoc :: OsString -> OsChar -> OsString Source #
O(n) Append a byte to the end of a OsString
Since: 1.4.200.0
cons :: OsChar -> OsString -> OsString Source #
O(n) cons is analogous to (:) for lists.
Since: 1.4.200.0
last :: HasCallStack => OsString -> OsChar Source #
O(1) Extract the last element of a OsString, which must be finite and non-empty. An exception will be thrown in the case of an empty OsString.
This is a partial function, consider using unsnoc instead.
Since: 1.4.200.0
tail :: HasCallStack => OsString -> OsString Source #
O(n) Extract the elements after the head of a OsString, which must be non-empty. An exception will be thrown in the case of an empty OsString.
This is a partial function, consider using uncons instead.
Since: 1.4.200.0
head :: HasCallStack => OsString -> OsChar Source #
O(1) Extract the first element of a OsString, which must be non-empty. An exception will be thrown in the case of an empty OsString.
This is a partial function, consider using uncons instead.
Since: 1.4.200.0
Transforming OsString
map :: (OsChar -> OsChar) -> OsString -> OsString Source #
O(n) map f xs is the OsString obtained by applying f to each
 element of xs.
Since: 1.4.200.0
reverse :: OsString -> OsString Source #
O(n) reverse xs efficiently returns the elements of xs in reverse order.
Since: 1.4.200.0
intercalate :: OsString -> [OsString] -> OsString Source #
O(n) The intercalate function takes a OsString and a list of
 OsStrings and concatenates the list after interspersing the first
 argument between each element of the list.
Since: 1.4.200.0
Reducing OsStrings (folds)
foldl :: forall a. (a -> OsChar -> a) -> a -> OsString -> a Source #
foldl, applied to a binary operator, a starting value (typically
 the left-identity of the operator), and a OsString, reduces the
 OsString using the binary operator, from left to right.
Since: 1.4.200.0
foldr :: forall a. (OsChar -> a -> a) -> a -> OsString -> a Source #
foldr, applied to a binary operator, a starting value
 (typically the right-identity of the operator), and a OsString,
 reduces the OsString using the binary operator, from right to left.
Since: 1.4.200.0
Special folds
Generating and unfolding OsStrings
replicate :: Int -> OsChar -> OsString Source #
O(n) replicate n x is a OsString of length n with x
 the value of every element. The following holds:
replicate w c = unfoldr w (\u -> Just (u,u)) c
Since: 1.4.200.0
unfoldr :: forall a. (a -> Maybe (OsChar, a)) -> a -> OsString Source #
O(n), where n is the length of the result.  The unfoldr
 function is analogous to the List 'unfoldr'.  unfoldr builds a
 OsString from a seed value.  The function takes the element and
 returns Nothing if it is done producing the OsString or returns
 Just (a,b), in which case, a is the next byte in the string,
 and b is the seed value for further production.
This function is not efficient/safe. It will build a list of [Word8]
 and run the generator until it returns Nothing, otherwise recurse infinitely,
 then finally create a OsString.
If you know the maximum length, consider using unfoldrN.
Examples:
unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0 == pack [0, 1, 2, 3, 4, 5]
Since: 1.4.200.0
unfoldrN :: forall a. Int -> (a -> Maybe (OsChar, a)) -> a -> (OsString, Maybe a) Source #
O(n) Like unfoldr, unfoldrN builds a OsString from a seed
 value.  However, the length of the result is limited by the first
 argument to unfoldrN.  This function is more efficient than unfoldr
 when the maximum length of the result is known.
The following equation relates unfoldrN and unfoldr:
fst (unfoldrN n f s) == take n (unfoldr f s)
Since: 1.4.200.0
Substrings
Breaking strings
takeWhileEnd :: (OsChar -> Bool) -> OsString -> OsString Source #
Returns the longest (possibly empty) suffix of elements satisfying the predicate.
takeWhileEnd preverse . takeWhile p . reverse
Since: 1.4.200.0
takeWhile :: (OsChar -> Bool) -> OsString -> OsString Source #
Similar to takeWhile,
 returns the longest (possibly empty) prefix of elements
 satisfying the predicate.
Since: 1.4.200.0
dropWhileEnd :: (OsChar -> Bool) -> OsString -> OsString Source #
Similar to dropWhileEnd,
 drops the longest (possibly empty) suffix of elements
 satisfying the predicate and returns the remainder.
dropWhileEnd preverse . dropWhile p . reverse
Since: 1.4.200.0
dropWhile :: (OsChar -> Bool) -> OsString -> OsString Source #
Similar to dropWhile,
 drops the longest (possibly empty) prefix of elements
 satisfying the predicate and returns the remainder.
Since: 1.4.200.0
breakEnd :: (OsChar -> Bool) -> OsString -> (OsString, OsString) Source #
Returns the longest (possibly empty) suffix of elements which do not satisfy the predicate and the remainder of the string.
breakEnd p is equivalent to spanEnd (not . p)(.takeWhileEnd (not . p) &&& dropWhileEnd (not . p))
Since: 1.4.200.0
spanEnd :: (OsChar -> Bool) -> OsString -> (OsString, OsString) Source #
Returns the longest (possibly empty) suffix of elements satisfying the predicate and the remainder of the string.
spanEnd p is equivalent to breakEnd (not . p)(.takeWhileEnd p &&& dropWhileEnd p)
We have
spanEnd (not . isSpace) "x y z" == ("x y ", "z")and
spanEnd (not . isSpace) sbs == let (x, y) = span (not . isSpace) (reverse sbs) in (reverse y, reverse x)
Since: 1.4.200.0
split :: OsChar -> OsString -> [OsString] Source #
O(n) Break a OsString into pieces separated by the byte
 argument, consuming the delimiter. I.e.
split 10 "a\nb\nd\ne" == ["a","b","d","e"] -- fromEnum '\n' == 10 split 97 "aXaXaXa" == ["","X","X","X",""] -- fromEnum 'a' == 97 split 120 "x" == ["",""] -- fromEnum 'x' == 120 split undefined "" == [] -- and not [""]
and
intercalate [c] . split c == id split == splitWith . (==)
Since: 1.4.200.0
splitWith :: (OsChar -> Bool) -> OsString -> [OsString] Source #
O(n) Splits a OsString into components delimited by
 separators, where the predicate returns True for a separator element.
 The resulting components do not contain the separators.  Two adjacent
 separators result in an empty component in the output.  eg.
splitWith (==97) "aabbaca" == ["","","bb","c",""] -- fromEnum 'a' == 97 splitWith undefined "" == [] -- and not [""]
Since: 1.4.200.0
stripSuffix :: OsString -> OsString -> Maybe OsString Source #
O(n) The stripSuffix function takes two OsStrings and returns Just
 the remainder of the second iff the first is its suffix, and otherwise
 Nothing.
Since: 1.4.200.0
stripPrefix :: OsString -> OsString -> Maybe OsString Source #
O(n) The stripPrefix function takes two OsStrings and returns Just
 the remainder of the second iff the first is its prefix, and otherwise
 Nothing.
Since: 1.4.200.0
Predicates
isInfixOf :: OsString -> OsString -> Bool Source #
Check whether one string is a substring of another.
Since: 1.4.200.0
isPrefixOf :: OsString -> OsString -> Bool Source #
O(n) The isPrefixOf function takes two OsStrings and returns True
Since: 1.4.200.0
isSuffixOf :: OsString -> OsString -> Bool Source #
O(n) The isSuffixOf function takes two OsStrings and returns True
 iff the first is a suffix of the second.
The following holds:
isSuffixOf x y == reverse x `isPrefixOf` reverse y
Since: 1.4.200.0
Search for arbitrary susbstrings
breakSubstring :: OsString -> OsString -> (OsString, OsString) Source #
Break a string on a substring, returning a pair of the part of the string prior to the match, and the rest of the string.
The following relationships hold:
break (== c) l == breakSubstring (singleton c) l
For example, to tokenise a string, dropping delimiters:
tokenise x y = h : if null t then [] else tokenise x (drop (length x) t)
    where (h,t) = breakSubstring x yTo skip to the first occurrence of a string:
snd (breakSubstring x y)
To take the parts of a string before a delimiter:
fst (breakSubstring x y)
Note that calling `breakSubstring x` does some preprocessing work, so you should avoid unnecessarily duplicating breakSubstring calls with the same pattern.
Since: 1.4.200.0
Searching OsStrings
Searching by equality
filter :: (OsChar -> Bool) -> OsString -> OsString Source #
O(n) filter, applied to a predicate and a OsString,
 returns a OsString containing those characters that satisfy the
 predicate.
Since: 1.4.200.0
partition :: (OsChar -> Bool) -> OsString -> (OsString, OsString) Source #
O(n) The partition function takes a predicate a OsString and returns
 the pair of OsStrings with elements which do and do not satisfy the
 predicate, respectively; i.e.,
partition p bs == (filter p sbs, filter (not . p) sbs)
Since: 1.4.200.0
Indexing OsStrings
index :: HasCallStack => OsString -> Int -> OsChar Source #
O(1) OsString index (subscript) operator, starting from 0.
Since: 1.4.200.0
elemIndices :: OsChar -> OsString -> [Int] Source #
O(n) The elemIndices function extends elemIndex, by returning
 the indices of all elements equal to the query element, in ascending order.
Since: 1.4.200.0
count :: OsChar -> OsString -> Int Source #
count returns the number of times its argument appears in the OsString
Since: 1.4.200.0
findIndices :: (OsChar -> Bool) -> OsString -> [Int] Source #
O(n) The findIndices function extends findIndex, by returning the
 indices of all elements satisfying the predicate, in ascending order.
Since: 1.4.200.0
Coercions
coercionToPlatformTypes :: Either (Coercion OsChar WindowsChar, Coercion OsString WindowsString) (Coercion OsChar PosixChar, Coercion OsString PosixString) Source #
This is a type-level evidence that OsChar is a newtype wrapper
 over WindowsChar or PosixChar and OsString is a newtype wrapper
 over WindowsString or PosixString. If you pattern match on
 coercionToPlatformTypes, GHC will know that relevant types
 are coercible to each other. This helps to avoid CPP in certain scenarios.