{-# LANGUAGE EmptyDataDecls #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE CPP #-}

module Generics.Deriving.Base (
-- * Introduction
--
-- |
--
-- Datatype-generic functions are are based on the idea of converting values of
-- a datatype @T@ into corresponding values of a (nearly) isomorphic type @'Rep' T@.
-- The type @'Rep' T@ is
-- built from a limited set of type constructors, all provided by this module. A
-- datatype-generic function is then an overloaded function with instances
-- for most of these type constructors, together with a wrapper that performs
-- the mapping between @T@ and @'Rep' T@. By using this technique, we merely need
-- a few generic instances in order to implement functionality that works for any
-- representable type.
--
-- Representable types are collected in the 'Generic' class, which defines the
-- associated type 'Rep' as well as conversion functions 'from' and 'to'.
-- Typically, you will not define 'Generic' instances by hand, but have the compiler
-- derive them for you.

-- ** Representing datatypes
--
-- |
--
-- The key to defining your own datatype-generic functions is to understand how to
-- represent datatypes using the given set of type constructors.
--
-- Let us look at an example first:
--
-- @
-- data Tree a = Leaf a | Node (Tree a) (Tree a)
--   deriving 'Generic'
-- @
--
-- The above declaration (which requires the language pragma @DeriveGeneric@)
-- causes the following representation to be generated:
--
-- @
-- class 'Generic' (Tree a) where
--   type 'Rep' (Tree a) =
--     'D1' D1Tree
--       ('C1' C1_0Tree
--          ('S1' 'NoSelector' ('Par0' a))
--        ':+:'
--        'C1' C1_1Tree
--          ('S1' 'NoSelector' ('Rec0' (Tree a))
--           ':*:'
--           'S1' 'NoSelector' ('Rec0' (Tree a))))
--   ...
-- @
--
-- /Hint:/ You can obtain information about the code being generated from GHC by passing
-- the @-ddump-deriv@ flag. In GHCi, you can expand a type family such as 'Rep' using
-- the @:kind!@ command.
--
#if 0
-- /TODO:/ Newer GHC versions abandon the distinction between 'Par0' and 'Rec0' and will
-- use 'Rec0' everywhere.
--
#endif
-- This is a lot of information! However, most of it is actually merely meta-information
-- that makes names of datatypes and constructors and more available on the type level.
--
-- Here is a reduced representation for 'Tree' with nearly all meta-information removed,
-- for now keeping only the most essential aspects:
--
-- @
-- instance 'Generic' (Tree a) where
--   type 'Rep' (Tree a) =
--     'Par0' a
--     ':+:'
--     ('Rec0' (Tree a) ':*:' 'Rec0' (Tree a))
-- @
--
-- The @Tree@ datatype has two constructors. The representation of individual constructors
-- is combined using the binary type constructor ':+:'.
--
-- The first constructor consists of a single field, which is the parameter @a@. This is
-- represented as @'Par0' a@.
--
-- The second constructor consists of two fields. Each is a recursive field of type @Tree a@,
-- represented as @'Rec0' (Tree a)@. Representations of individual fields are combined using
-- the binary type constructor ':*:'.
--
-- Now let us explain the additional tags being used in the complete representation:
--
--    * The @'S1' 'NoSelector'@ indicates that there is no record field selector associated with
--      this field of the constructor.
--
--    * The @'C1' C1_0Tree@ and @'C1' C1_1Tree@ invocations indicate that the enclosed part is
--      the representation of the first and second constructor of datatype @Tree@, respectively.
--      Here, @C1_0Tree@ and @C1_1Tree@ are datatypes generated by the compiler as part of
--      @deriving 'Generic'@. These datatypes are proxy types with no values. They are useful
--      because they are instances of the type class 'Constructor'. This type class can be used
--      to obtain information about the constructor in question, such as its name
--      or infix priority.
--
--    * The @'D1' D1Tree@ tag indicates that the enclosed part is the representation of the
--      datatype @Tree@. Again, @D1Tree@ is a datatype generated by the compiler. It is a
--      proxy type, and is useful by being an instance of class 'Datatype', which
--      can be used to obtain the name of a datatype, the module it has been defined in, and
--      whether it has been defined using @data@ or @newtype@.

-- ** Derived and fundamental representation types
--
-- |
--
-- There are many datatype-generic functions that do not distinguish between positions that
-- are parameters or positions that are recursive calls. There are also many datatype-generic
-- functions that do not care about the names of datatypes and constructors at all. To keep
-- the number of cases to consider in generic functions in such a situation to a minimum,
-- it turns out that many of the type constructors introduced above are actually synonyms,
-- defining them to be variants of a smaller set of constructors.

-- *** Individual fields of constructors: 'K1'
--
-- |
--
-- The type constructors 'Par0' and 'Rec0' are variants of 'K1':
--
-- @
-- type 'Par0' = 'K1' 'P'
-- type 'Rec0' = 'K1' 'R'
-- @
--
-- Here, 'P' and 'R' are type-level proxies again that do not have any associated values.

-- *** Meta information: 'M1'
--
-- |
--
-- The type constructors 'S1', 'C1' and 'D1' are all variants of 'M1':
--
-- @
-- type 'S1' = 'M1' 'S'
-- type 'C1' = 'M1' 'C'
-- type 'D1' = 'M1' 'D'
-- @
--
-- The types 'S', 'C' and 'R' are once again type-level proxies, just used to create
-- several variants of 'M1'.

-- *** Additional generic representation type constructors
--
-- |
--
-- Next to 'K1', 'M1', ':+:' and ':*:' there are a few more type constructors that occur
-- in the representations of other datatypes.

-- **** Empty datatypes: 'V1'
--
-- |
--
-- For empty datatypes, 'V1' is used as a representation. For example,
--
-- @
-- data Empty deriving 'Generic'
-- @
--
-- yields
--
-- @
-- instance 'Generic' Empty where
--   type 'Rep' Empty = 'D1' D1Empty 'V1'
-- @

-- **** Constructors without fields: 'U1'
--
-- |
--
-- If a constructor has no arguments, then 'U1' is used as its representation. For example
-- the representation of 'Bool' is
--
-- @
-- instance 'Generic' Bool where
--   type 'Rep' Bool =
--     'D1' D1Bool
--       ('C1' C1_0Bool 'U1' ':+:' 'C1' C1_1Bool 'U1')
-- @

-- *** Representation of types with many constructors or many fields
--
-- |
--
-- As ':+:' and ':*:' are just binary operators, one might ask what happens if the
-- datatype has more than two constructors, or a constructor with more than two
-- fields. The answer is simple: the operators are used several times, to combine
-- all the constructors and fields as needed. However, users /should not rely on
-- a specific nesting strategy/ for ':+:' and ':*:' being used. The compiler is
-- free to choose any nesting it prefers. (In practice, the current implementation
-- tries to produce a more or less balanced nesting, so that the traversal of the
-- structure of the datatype from the root to a particular component can be performed
-- in logarithmic rather than linear time.)

-- ** Defining datatype-generic functions
--
-- |
--
-- A datatype-generic function comprises two parts:
--
--    1. /Generic instances/ for the function, implementing it for most of the representation
--       type constructors introduced above.
--
--    2. A /wrapper/ that for any datatype that is in `Generic`, performs the conversion
--       between the original value and its `Rep`-based representation and then invokes the
--       generic instances.
--
-- As an example, let us look at a function 'encode' that produces a naive, but lossless
-- bit encoding of values of various datatypes. So we are aiming to define a function
--
-- @
-- encode :: 'Generic' a => a -> [Bool]
-- @
--
-- where we use 'Bool' as our datatype for bits.
--
-- For part 1, we define a class @Encode'@. Perhaps surprisingly, this class is parameterized
-- over a type constructor @f@ of kind @* -> *@. This is a technicality: all the representation
-- type constructors operate with kind @* -> *@ as base kind. But the type argument is never
-- being used. This may be changed at some point in the future. The class has a single method,
-- and we use the type we want our final function to have, but we replace the occurrences of
-- the generic type argument @a@ with @f p@ (where the @p@ is any argument; it will not be used).
--
-- > class Encode' f where
-- >   encode' :: f p -> [Bool]
--
-- With the goal in mind to make @encode@ work on @Tree@ and other datatypes, we now define
-- instances for the representation type constructors 'V1', 'U1', ':+:', ':*:', 'K1', and 'M1'.

-- *** Definition of the generic representation types
--
-- |
--
-- In order to be able to do this, we need to know the actual definitions of these types:
--
-- @
-- data    'V1'        p                       -- lifted version of Empty
-- data    'U1'        p = 'U1'                  -- lifted version of ()
-- data    (':+:') f g p = 'L1' (f p) | 'R1' (g p) -- lifted version of 'Either'
-- data    (':*:') f g p = (f p) ':*:' (g p)     -- lifted version of (,) 
-- newtype 'K1'    i c p = 'K1' { 'unK1' :: c }    -- a container for a c
-- newtype 'M1'  i t f p = 'M1' { 'unM1' :: f p }  -- a wrapper
-- @
--
-- So, 'U1' is just the unit type, ':+:' is just a binary choice like 'Either',
-- ':*:' is a binary pair like the pair constructor @(,)@, and 'K1' is a value
-- of a specific type @c@, and 'M1' wraps a value of the generic type argument,
-- which in the lifted world is an @f p@ (where we do not care about @p@).

-- *** Generic instances
--
-- |
--
-- The instance for 'V1' is slightly awkward (but also rarely used):
--
-- @
-- instance Encode' 'V1' where
--   encode' x = undefined
-- @
--
-- There are no values of type @V1 p@ to pass (except undefined), so this is
-- actually impossible. One can ask why it is useful to define an instance for
-- 'V1' at all in this case? Well, an empty type can be used as an argument to
-- a non-empty type, and you might still want to encode the resulting type.
-- As a somewhat contrived example, consider @[Empty]@, which is not an empty
-- type, but contains just the empty list. The 'V1' instance ensures that we
-- can call the generic function on such types.
--
-- There is exactly one value of type 'U1', so encoding it requires no
-- knowledge, and we can use zero bits:
--
-- @
-- instance Encode' 'U1' where
--   encode' 'U1' = []
-- @
--
-- In the case for ':+:', we produce 'False' or 'True' depending on whether
-- the constructor of the value provided is located on the left or on the right:
--
-- @
-- instance (Encode' f, Encode' g) => Encode' (f ':+:' g) where
--   encode' ('L1' x) = False : encode' x
--   encode' ('R1' x) = True  : encode' x
-- @
--
-- In the case for ':*:', we append the encodings of the two subcomponents:
--
-- @
-- instance (Encode' f, Encode' g) => Encode' (f ':*:' g) where
--   encode' (x ':*:' y) = encode' x ++ encode' y
-- @
--
-- The case for 'K1' is rather interesting. Here, we call the final function
-- 'encode' that we yet have to define, recursively. We will use another type
-- class 'Encode' for that function:
--
-- @
-- instance (Encode c) => Encode' ('K1' i c) where
--   encode' ('K1' x) = encode x
-- @
--
-- Note how 'Par0' and 'Rec0' both being mapped to 'K1' allows us to define
-- a uniform instance here.
--
-- Similarly, we can define a uniform instance for 'M1', because we completely
-- disregard all meta-information:
--
-- @
-- instance (Encode' f) => Encode' ('M1' i t f) where
--   encode' ('M1' x) = encode' x
-- @
--
-- Unlike in 'K1', the instance for 'M1' refers to 'encode'', not 'encode'.

-- *** The wrapper and generic default
--
-- |
--
-- We now define class 'Encode' for the actual 'encode' function:
--
-- @
-- class Encode a where
--   encode :: a -> [Bool]
--   default encode :: ('Generic' a) => a -> [Bool]
--   encode x = encode' ('from' x)
-- @
--
-- The incoming 'x' is converted using 'from', then we dispatch to the
-- generic instances using 'encode''. We use this as a default definition
-- for 'encode'. We need the 'default encode' signature because ordinary
-- Haskell default methods must not introduce additional class constraints,
-- but our generic default does.
--
-- Defining a particular instance is now as simple as saying
--
-- @
-- instance (Encode a) => Encode (Tree a)
-- @
--
#if 0
-- /TODO:/ Add usage example?
--
#endif
-- The generic default is being used. In the future, it will hopefully be
-- possible to use @deriving Encode@ as well, but GHC does not yet support
-- that syntax for this situation.
--
-- Having 'Encode' as a class has the advantage that we can define
-- non-generic special cases, which is particularly useful for abstract
-- datatypes that have no structural representation. For example, given
-- a suitable integer encoding function 'encodeInt', we can define
--
-- @
-- instance Encode Int where
--   encode = encodeInt
-- @

-- *** Omitting generic instances
--
-- |
--
-- It is not always required to provide instances for all the generic
-- representation types, but omitting instances restricts the set of
-- datatypes the functions will work for:
--
--    * If no ':+:' instance is given, the function may still work for
--      empty datatypes or datatypes that have a single constructor,
--      but will fail on datatypes with more than one constructor.
--
--    * If no ':*:' instance is given, the function may still work for
--      datatypes where each constructor has just zero or one field,
--      in particular for enumeration types.
--
--    * If no 'K1' instance is given, the function may still work for
--      enumeration types, where no constructor has any fields.
--
--    * If no 'V1' instance is given, the function may still work for
--      any datatype that is not empty.
--
--    * If no 'U1' instance is given, the function may still work for
--      any datatype where each constructor has at least one field.
--
-- An 'M1' instance is always required (but it can just ignore the
-- meta-information, as is the case for 'encode' above).
#if 0
-- *** Using meta-information
--
-- |
--
-- TODO
#endif
-- ** Generic constructor classes
--
-- |
--
-- Datatype-generic functions as defined above work for a large class
-- of datatypes, including parameterized datatypes. (We have used 'Tree'
-- as our example above, which is of kind @* -> *@.) However, the
-- 'Generic' class ranges over types of kind @*@, and therefore, the
-- resulting generic functions (such as 'encode') must be parameterized
-- by a generic type argument of kind @*@.
--
-- What if we want to define generic classes that range over type
-- constructors (such as 'Functor', 'Traversable', or 'Foldable')?

-- *** The 'Generic1' class
--
-- |
--
-- Like 'Generic', there is a class 'Generic1' that defines a
-- representation 'Rep1' and conversion functions 'from1' and 'to1',
-- only that 'Generic1' ranges over types of kind @* -> *@.
-- The 'Generic1' class is also derivable.
--
-- The representation 'Rep1' is ever so slightly different from 'Rep'.
-- Let us look at 'Tree' as an example again:
--
-- @
-- data Tree a = Leaf a | Node (Tree a) (Tree a)
--   deriving 'Generic1'
-- @
--
-- The above declaration causes the following representation to be generated:
--
-- class 'Generic1' Tree where
--   type 'Rep1' Tree =
--     'D1' D1Tree
--       ('C1' C1_0Tree
--          ('S1' 'NoSelector' 'Par1')
--        ':+:'
--        'C1' C1_1Tree
--          ('S1' 'NoSelector' ('Rec1' Tree)
--           ':*:'
--           'S1' 'NoSelector' ('Rec1' Tree)))
--   ...
--
-- The representation reuses 'D1', 'C1', 'S1' (and thereby 'M1') as well
-- as ':+:' and ':*:' from 'Rep'. (This reusability is the reason that we
-- carry around the dummy type argument for kind-@*@-types, but there are
-- already enough different names involved without duplicating each of
-- these.)
--
-- What's different is that we now use 'Par1' to refer to the parameter
-- (and that parameter, which used to be @a@), is not mentioned explicitly
-- by name anywhere; and we use 'Rec1' to refer to a recursive use of @Tree a@.

-- *** Representation of @* -> *@ types
--
-- |
--
-- Unlike 'Par0' and 'Rec0', the 'Par1' and 'Rec1' type constructors do not
-- map to 'K1'. They are defined directly, as follows:
--
-- @
-- newtype 'Par1'   p = 'Par1' { 'unPar1' ::   p } -- gives access to parameter p
-- newtype 'Rec1' f p = 'Rec1' { 'unRec1' :: f p } -- a wrapper
-- @
--
-- In 'Par1', the parameter @p@ is used for the first time, whereas 'Rec1' simply
-- wraps an application of @f@ to @p@.
--
-- Note that 'K1' (in the guise of 'Rec0') can still occur in a 'Rep1' representation,
-- namely when the datatype has a field that does not mention the parameter.
--
-- The declaration
--
-- @
-- data WithInt a = WithInt Int a
--   deriving 'Generic1'
-- @
--
-- yields
--
-- @
-- class 'Rep1' WithInt where
--   type 'Rep1' WithInt =
--     'D1' D1WithInt
--       ('C1' C1_0WithInt
--         ('S1' 'NoSelector' ('Rec0' Int)
--          ':*:'
--          'S1' 'NoSelector' 'Par1'))
-- @
--
-- If the parameter @a@ appears underneath a composition of other type constructors,
-- then the representation involves composition, too:
--
-- @
-- data Rose a = Fork a [Rose a]
-- @
--
-- yields
--
-- @
-- class 'Rep1' Rose where
--   type 'Rep1' Rose =
--     'D1' D1Rose
--       ('C1' C1_0Rose
--         ('S1' 'NoSelector' 'Par1'
--          ':*:'
--          'S1' 'NoSelector' ([] ':.:' 'Rec1' Rose)
-- @
--
-- where
--
-- @
-- newtype (':.:') f g p = 'Comp1' { 'unComp1' :: f (g p) }
-- @
#if 0
-- *** Limitations
--
-- |
--
-- /TODO/
--
-- /TODO:/ Also clear up confusion about 'Rec0' and 'Rec1' not really indicating recursion.
--
#endif
#if __GLASGOW_HASKELL__ < 701
  -- * Generic representation types
    V1, U1(..), Par1(..), Rec1(..), K1(..), M1(..)
  , (:+:)(..), (:*:)(..), (:.:)(..)

  -- ** Synonyms for convenience
  , Rec0, Par0, R, P
  , D1, C1, S1, D, C, S

  -- * Meta-information
  , Datatype(..), Constructor(..), Selector(..), NoSelector
  , Fixity(..), Associativity(..), Arity(..), prec

  -- * Generic type classes
  , Generic(..), Generic1(..)

#else
  module GHC.Generics,
#endif
  ) where


#if __GLASGOW_HASKELL__ >= 701
import GHC.Generics

#else
--------------------------------------------------------------------------------
-- Representation types
--------------------------------------------------------------------------------

-- | Void: used for datatypes without constructors
data V1 p

-- | Unit: used for constructors without arguments
data U1 p = U1

-- | Used for marking occurrences of the parameter
newtype Par1 p = Par1 { unPar1 :: p }


-- | Recursive calls of kind * -> *
newtype Rec1 f p = Rec1 { unRec1 :: f p }

-- | Constants, additional parameters and recursion of kind *
newtype K1 i c p = K1 { unK1 :: c }

-- | Meta-information (constructor names, etc.)
newtype M1 i c f p = M1 { unM1 :: f p }

-- | Sums: encode choice between constructors
infixr 5 :+:
data (:+:) f g p = L1 { unL1 :: f p } | R1 { unR1 :: g p }

-- | Products: encode multiple arguments to constructors
infixr 6 :*:
data (:*:) f g p = f p :*: g p

-- | Composition of functors
infixr 7 :.:
newtype (:.:) f g p = Comp1 { unComp1 :: f (g p) }

-- | Tag for K1: recursion (of kind *)
data R
-- | Tag for K1: parameters (other than the last)
data P

-- | Type synonym for encoding recursion (of kind *)
type Rec0  = K1 R
-- | Type synonym for encoding parameters (other than the last)
type Par0  = K1 P

-- | Tag for M1: datatype
data D
-- | Tag for M1: constructor
data C
-- | Tag for M1: record selector
data S

-- | Type synonym for encoding meta-information for datatypes
type D1 = M1 D

-- | Type synonym for encoding meta-information for constructors
type C1 = M1 C

-- | Type synonym for encoding meta-information for record selectors
type S1 = M1 S

-- | Class for datatypes that represent datatypes
class Datatype d where
  -- | The name of the datatype, fully qualified
  datatypeName :: t d (f :: * -> *) a -> String
  moduleName   :: t d (f :: * -> *) a -> String

-- | Class for datatypes that represent records
class Selector s where
  -- | The name of the selector
  selName :: t s (f :: * -> *) a -> String

-- | Used for constructor fields without a name
data NoSelector

instance Selector NoSelector where selName _ = ""

-- | Class for datatypes that represent data constructors
class Constructor c where
  -- | The name of the constructor
  conName :: t c (f :: * -> *) a -> String

  -- | The fixity of the constructor
  conFixity :: t c (f :: * -> *) a -> Fixity
  conFixity = const Prefix

  -- | Marks if this constructor is a record
  conIsRecord :: t c (f :: * -> *) a -> Bool
  conIsRecord = const False


-- | Datatype to represent the arity of a tuple.
data Arity = NoArity | Arity Int
  deriving (Eq, Show, Ord, Read)

-- | Datatype to represent the fixity of a constructor. An infix
-- | declaration directly corresponds to an application of 'Infix'.
data Fixity = Prefix | Infix Associativity Int
  deriving (Eq, Show, Ord, Read)

-- | Get the precedence of a fixity value.
prec :: Fixity -> Int
prec Prefix      = 10
prec (Infix _ n) = n

-- | Datatype to represent the associativy of a constructor
data Associativity =  LeftAssociative 
                   |  RightAssociative
                   |  NotAssociative
  deriving (Eq, Show, Ord, Read)

-- | Representable types of kind *
class Generic a where
  type Rep a :: * -> *
  -- | Convert from the datatype to its representation
  from  :: a -> Rep a x
  -- | Convert from the representation to the datatype
  to    :: Rep a x -> a

-- | Representable types of kind * -> *
class Generic1 f where
  type Rep1 f :: * -> *
  -- | Convert from the datatype to its representation
  from1  :: f a -> Rep1 f a
  -- | Convert from the representation to the datatype
  to1    :: Rep1 f a -> f a

#endif