{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
module Source.Source
( Source
, bytes
, fromUTF8
, Source.Source.length
, Source.Source.null
, totalRange
, totalSpan
, fromText
, toText
, slice
, drop
, take
, Source.Source.lines
, lineRanges
, lineRangesWithin
, newlineIndices
) where
import Prelude hiding (drop, take)
import Control.Arrow ((&&&))
import Control.DeepSeq (NFData)
import Data.Aeson (FromJSON (..), withText)
import qualified Data.ByteString as B
import Data.Char (ord)
import Data.Maybe (fromMaybe)
import Data.Monoid (Last (..))
import Data.Semilattice.Lower
import Data.String (IsString (..))
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import Data.Text.Encoding.Error (lenientDecode)
import GHC.Generics (Generic)
import Source.Range
import Source.Span (Pos (..), Span (Span))
newtype Source = Source { bytes :: B.ByteString }
deriving (Eq, Semigroup, Monoid, IsString, Show, Generic, NFData)
fromUTF8 :: B.ByteString -> Source
fromUTF8 = Source
instance FromJSON Source where
parseJSON = withText "Source" (pure . fromText)
length :: Source -> Int
length = B.length . bytes
null :: Source -> Bool
null = B.null . bytes
totalRange :: Source -> Range
totalRange = Range 0 . B.length . bytes
totalSpan :: Source -> Span
totalSpan source = Span (Pos 1 1) (Pos (Prelude.length ranges) (succ (end lastRange - start lastRange))) where
ranges = lineRanges source
lastRange = fromMaybe lowerBound (getLast (foldMap (Last . Just) ranges))
fromText :: T.Text -> Source
fromText = Source . T.encodeUtf8
toText :: Source -> T.Text
toText = T.decodeUtf8With lenientDecode . bytes
slice :: Source -> Range -> Source
slice source range = taking $ dropping source where
dropping = drop (start range)
taking = take (rangeLength range)
drop :: Int -> Source -> Source
drop i = Source . B.drop i . bytes
take :: Int -> Source -> Source
take i = Source . B.take i . bytes
lines :: Source -> [Source]
lines source = slice source <$> lineRanges source
lineRanges :: Source -> [Range]
lineRanges source = lineRangesWithin source (totalRange source)
lineRangesWithin :: Source -> Range -> [Range]
lineRangesWithin source range
= uncurry (zipWith Range)
. ((start range:) &&& (<> [ end range ]))
. fmap (+ succ (start range))
. newlineIndices
. bytes
$ slice source range
newlineIndices :: B.ByteString -> [Int]
newlineIndices = go 0 where
go n bs
| B.null bs = []
| otherwise = case (searchCR bs, searchLF bs) of
(Nothing, Nothing) -> []
(Just i, Nothing) -> recur n i bs
(Nothing, Just i) -> recur n i bs
(Just crI, Just lfI)
| succ crI == lfI -> recur n lfI bs
| otherwise -> recur n (min crI lfI) bs
recur n i bs = let j = n + i in j : go (succ j) (B.drop (succ i) bs)
searchLF = B.elemIndex (toEnum (ord '\n'))
searchCR = B.elemIndex (toEnum (ord '\r'))
{-# INLINE newlineIndices #-}