{-# LANGUAGE DataKinds #-} {-# LANGUAGE DerivingVia #-} {-# LANGUAGE RoleAnnotations #-} {-# LANGUAGE TypeFamilies #-} -- | Strongly typed indices and offsets. -- -- The goal of this module is to make it as easy as possible to keep track -- of the various indexing schemes used by functions in the nvim API. -- -- The core abstraction is the type 'Index' tagged with the sort of things it -- is indexing (@Byte@, @CodePoint@, @Line@) and whether things are 0-indexed -- or 1-indexed. -- -- Two constructors and two destructors are provided: 'toZeroIndexed', -- 'toOneIndexed', 'fromZeroIndexed', and 'fromOneIndexed'. They should only -- be used to make external API calls, to unwrap input indices and to wrap -- output indices. The names of those functions are self-documenting, indicating -- the indexing scheme used by every index that goes in and out of the external -- API. -- -- Within Cornelis, indices remain typed at all times, using dedicated functions -- to convert between 0/1-indexing ('zeroIndex', 'oneIndex') and between -- @Byte@ and @CodePoint@ indexing ('toByte', 'fromByte'). -- -- Usually, indices are relative to a common origin (beginning of the same buffer -- or line), so it doesn't make sense to add them. There is a separate type of -- 'Offset' which can be added to indices using the operator @('.+')@. -- And @('.-.')@ gives the offset between two indices. -- -- @ -- i :: Index 'Byte 'ZeroIndexed -- i .+ Offset 42 :: Index 'Byte 'ZeroIndexed -- @ -- -- Types of 'Pos'isitions (pairs of line and column indices) and 'Interval's -- (pairs of positions or indices) are also provided, and should be used -- as much as possible to reduce the likelihood of mixing up indices. -- -- When talking about 'Pos', "(i,j)-indexed" means "i-indexed lines, j-indexed -- columns". -- -- Agda's indexing scheme (codepoints, (1,1)-indexed) is the preferred one -- (0- vs 1-indexing is heavily checked, so it doesn't matter much which -- we choose; codepoint indexing is preferred for manipulating unicode text -- (fewer invalid states than byte indexing)). -- -- A secondary indexing scheme is bytes, (0,0)-indexed, used as a unified -- low-level representation right before talking to the nvim API. module Cornelis.Offsets ( Index() , Indexing(..) , Unit(..) , Offset(..) , Pos(..) , Interval(..) , LineNumber , AgdaIndex , AgdaOffset , AgdaPos , AgdaInterval , VimIndex , VimOffset , VimPos , VimInterval , toZeroIndexed , toOneIndexed , fromZeroIndexed , fromOneIndexed , zeroIndex , oneIndex , incIndex , (.+) , (.-.) , offsetPlus , textToBytes , charToBytes , toBytes , fromBytes , containsPoint , addCol ) where import Data.Aeson (FromJSON) import qualified Data.ByteString as BS import Data.Coerce (coerce) import Data.Monoid (Sum(..)) import qualified Data.Text as T import Data.Text.Encoding (encodeUtf8) import GHC.Generics (Generic) import GHC.Stack (HasCallStack) import GHC.Show (showSpace) import Prettyprinter (Pretty) import qualified DiffLoc as D -- | Indexing scheme: whether the first index is zero or one. data Indexing = OneIndexed | ZeroIndexed -- | What are we counting? data Unit = Byte | CodePoint | Line -- | The constructor is hidden, use 'toZeroIndexed' and 'toOneIndexed' to construct it, -- and 'fromZeroIndexed' and 'fromOneIndexed' to destruct it. newtype Index (e :: Unit) (i :: Indexing) = Index Int deriving newtype (Eq, Ord, Show, Read, FromJSON, Pretty) type role Index nominal nominal -- | It doesn't seem worth the trouble to hide this constructor. newtype Offset (e :: Unit) = Offset Int deriving newtype (Eq, Ord, Show, Read, FromJSON, Pretty) deriving (Semigroup, Monoid) via Sum Int type role Offset nominal -- | Position in a text file as line-column numbers. This type is indexed by -- the units of the columns (@Byte@ or @CodePoint@) and by the indexing scheme -- of lines and columns. data Pos e i j = Pos { p_line :: Index 'Line i , p_col :: Index e j } deriving (Eq, Ord, Generic) instance Show (Pos e i j) where showsPrec n (Pos l c) = showParen (n >= 11) $ showString "Pn () 0 " . showsPrec 11 l . showSpace . showsPrec 11 c data Interval p = Interval { iStart, iEnd :: !p } deriving (Eq, Ord, Functor, Foldable, Traversable, Generic) instance Show p => Show (Interval p) where showsPrec n (Interval s e) = showParen (n >= 11) $ showString "Interval " . showsPrec 11 s . showSpace . showsPrec 11 e -- Common specializations type LineNumber = Index 'Line type AgdaIndex = Index 'CodePoint 'OneIndexed type AgdaOffset = Offset 'CodePoint type AgdaPos = Pos 'CodePoint 'OneIndexed 'OneIndexed type AgdaInterval = Interval AgdaPos type VimIndex = Index 'Byte 'ZeroIndexed type VimOffset = Offset 'Byte type VimPos = Pos 'Byte 'ZeroIndexed 'ZeroIndexed type VimInterval = Interval VimPos -- To pass indices to and from external sources. -- | Mark a raw index as zero-indexed. toZeroIndexed :: Integral a => a -> Index e 'ZeroIndexed toZeroIndexed a = Index (fromIntegral a) -- | Mark a raw index as one-indexed. toOneIndexed :: Integral a => a -> Index e 'OneIndexed toOneIndexed a = Index (fromIntegral a) -- | Unwrap a raw zero-indexed index. fromZeroIndexed :: Num a => Index e 'ZeroIndexed -> a fromZeroIndexed (Index a) = fromIntegral a -- | Unwrap a raw zero-indexed index. fromOneIndexed :: Num a => Index e 'OneIndexed -> a fromOneIndexed (Index a) = fromIntegral a -- | Convert from one- to zero-indexed. zeroIndex :: Index e 'OneIndexed -> Index e 'ZeroIndexed zeroIndex (Index i) = Index (i - 1) -- | Convert from zero- to one-indexed. oneIndex :: Index e 'ZeroIndexed -> Index e 'OneIndexed oneIndex (Index i) = Index (i + 1) -- | Increment index. incIndex :: Index e i -> Index e i incIndex (Index i) = Index (i + 1) -- | Add an offset to an index. (.+) :: Index e i -> Offset e -> Index e i Index i .+ Offset n = Index (i + n) (.-.) :: Index e i -> Index e i -> Offset e Index i .-. Index j = Offset (i - j) -- offsetPlus :: Offset a -> Offset a -> Offset a offsetPlus = coerce $ (+) @Int -- containsPoint :: Ord p => Interval p -> p -> Bool containsPoint (Interval s e) p = s <= p && p < e -- | Number of bytes in a 'T.Text'. textToBytes :: T.Text -> Int textToBytes t = BS.length (encodeUtf8 t) -- | Number of bytes in a 'Char'. charToBytes :: Char -> Int charToBytes c = textToBytes (T.singleton c) ------------------------------------------------------------------------------ -- | Convert a character-based index into a byte-indexed one toBytes :: T.Text -> Index 'CodePoint 'ZeroIndexed -> Index 'Byte 'ZeroIndexed toBytes s (Index i) = Index $ textToBytes $ T.take (fromIntegral i) s ------------------------------------------------------------------------------ -- | Convert a byte-based index into a character-indexed one. fromBytes :: HasCallStack => T.Text -> Index 'Byte 'ZeroIndexed -> Index 'CodePoint 'ZeroIndexed fromBytes t (Index i) | i < 0 = error $ "from bytes underflow" <> show (t, i) fromBytes _ (Index 0) = Index 0 fromBytes t (Index i) | Just (c, str) <- T.uncons t = let diff = BS.length $ encodeUtf8 $ T.singleton c in case i - diff >= 0 of True -> Index $ 1 + coerce (fromBytes str (Index (i - diff))) -- We ran out of bytes in the middle of a multibyte character. Just -- return the one we're on, and don't underflow! False -> Index 0 fromBytes t i = error $ "missing bytes: " <> show (t, i) addCol :: Pos e i j -> Offset e -> Pos e i j addCol (Pos l c) dc = Pos l (c .+ dc) -- | Ordered monoid action of offsets on indices. instance D.Amor (Index e i) where type Trans (Index e i) = Offset e (.+) = (Cornelis.Offsets..+) i .-.? j | i >= j = Just (i .-. j) | otherwise = Nothing -- | The zero in zero-indexing. instance D.Origin (Index e 'ZeroIndexed) where origin = toZeroIndexed (0 :: Int)