-- |
-- Module      :  Character
-- Description :  Character interface
-- Copyright   :  (c) Dominik Schrempf 2021
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Fri Oct 12 16:24:02 2018.
--
-- See header of 'ELynx.Data.Alphabet.Alphabet'.
module ELynx.Data.Character.Character
  ( Character (..),
    fromChar,
    toChar,
    fromString,
    toString,
    CharacterX (..),
    isGap,
    CharacterI (..),
    isUnknown,
    isIUPAC,
    isStandard,
    convert,
  )
where

import Data.ByteString.Internal (c2w, w2c)
import qualified Data.Set as S
import Data.Vector.Unboxed.Base (Unbox)
import Data.Word8 (Word8)

-- XXX: Remove name clash with ELynx.Data.Alphabet.Alphabet.Character?

-- | A set of characters forms an 'ELynx.Data.Alphabet.Alphabet'. At the
-- moment, 'Word8' is used, since none of the alphabets has more than 255
-- characters.
class (Show a, Read a, Eq a, Ord a, Enum a, Bounded a, Unbox a) => Character a where
  -- | Write characters.
  toWord :: a -> Word8

  -- | Read characters.
  fromWord :: Word8 -> a

-- | Conversion to 'Char'.
toChar :: Character a => a -> Char
toChar :: a -> Char
toChar = Word8 -> Char
w2c (Word8 -> Char) -> (a -> Word8) -> a -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> Word8
forall a. Character a => a -> Word8
toWord

-- | Conversion from 'Char'.
fromChar :: Character a => Char -> a
fromChar :: Char -> a
fromChar = Word8 -> a
forall a. Character a => Word8 -> a
fromWord (Word8 -> a) -> (Char -> Word8) -> Char -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Word8
c2w

-- | Conversion to 'String'.
toString :: Character a => [a] -> String
toString :: [a] -> String
toString = (a -> Char) -> [a] -> String
forall a b. (a -> b) -> [a] -> [b]
map a -> Char
forall a. Character a => a -> Char
toChar

-- | Conversion from 'String'.
fromString :: Character a => String -> [a]
fromString :: String -> [a]
fromString = (Char -> a) -> String -> [a]
forall a b. (a -> b) -> [a] -> [b]
map Char -> a
forall a. Character a => Char -> a
fromChar

-- | An extended character type with gaps and unknowns.
class Character a => CharacterX a where
  gap :: a

-- | Is the character a gap or unknown?
isGap :: CharacterX a => a -> Bool
isGap :: a -> Bool
isGap a
c = a
c a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
forall a. CharacterX a => a
gap

-- | IUPAC characters with a mapping to extended characters.
class CharacterX a => CharacterI a where
  unknown :: a
  iupac :: [a]
  toStandard :: a -> [a]

-- | Check if a IUPAC 'CharacterI' is unknown (e.g., N for nucleotides).
isUnknown :: CharacterI a => a -> Bool
isUnknown :: a -> Bool
isUnknown a
c = a
c a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
forall a. CharacterI a => a
unknown

iupacLookup :: CharacterI a => S.Set a
iupacLookup :: Set a
iupacLookup = [a] -> Set a
forall a. Ord a => [a] -> Set a
S.fromList [a]
forall a. CharacterI a => [a]
iupac

-- | Is the given character a IUPAC character?
isIUPAC :: CharacterI a => a -> Bool
isIUPAC :: a -> Bool
isIUPAC a
c = a
c a -> Set a -> Bool
forall a. Ord a => a -> Set a -> Bool
`S.member` Set a
forall a. CharacterI a => Set a
iupacLookup

-- | Is the given character a standard character?
isStandard :: CharacterI a => a -> Bool
isStandard :: a -> Bool
isStandard a
c = Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ a -> Bool
forall a. CharacterI a => a -> Bool
isIUPAC a
c

-- | Convert between character classes. May throw error.
convert :: (Character a, Character b) => a -> b
convert :: a -> b
convert = Word8 -> b
forall a. Character a => Word8 -> a
fromWord (Word8 -> b) -> (a -> Word8) -> a -> b
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> Word8
forall a. Character a => a -> Word8
toWord