Algebraic datatypes are a family of constructions arising out of two
operations, products (a * b
) and sums (a + b
) (sometimes also called
coproducts). A product encodes multiple arguments to constructors and sums
encode choice between constructors.
{-# LANGUAGE TypeOperators #-}
data Unit = Unit -- 1
data Empty -- 0
data (a * b) = Product a b -- a * b
data (a + b) = Inl a | Inr b -- a + b
data Exp a b = Exp (a -> b) -- a^b
data Rec f = Rec (f (Rec f)) -- \mu
The two constructors Inl
and Inr
are the left and right injections for
the sum. These allows us to construct sums.
Inl :: a -> a + b
Inr :: b -> a + b
Likewise for the product there are two function fst
and snd
which are
projections which de construct products.
fst :: a * b -> a
snd :: a * b -> b
Once a language is endowed with the capacity to write a single product or a single sum, all higher order products can written in terms of sums of products. For example a 3-tuple can be written in terms of the composite of two 2-tuples. And indeed any n-tuple or record type can be written in terms of compositions of products.
type Prod3 a b c = a*(b*c)
data Prod3' a b c
= Prod3 a b c
prod3 :: Prod3 Int Int Int
prod3 = Product 1 (Product 2 3)
Or a sum type of three options can be written in terms of two sums:
type Sum3 a b c = (a+b)+c
data Sum3' a b c
= Opt1 a
| Opt2 b
| Opt3 c
sum3 :: Sum3 Int Int Int
sum3 = Inl (Inl 2)
data Option a = None | Some a
type Option' a = Unit + a
some :: Unit + a
some = Inl Unit
none :: a -> Unit + a
none a = Inr a
In Haskell the convention for the sum and product notation is as follows:
Notation Haskell Type
a * b
(a,b)
a + b
Either a b
Inl
Left
Inr
Right
Empty
Void
Unit
()
roll :: Rec f -> f (Rec f)
roll (Rec f) = f
unroll :: f (Rec f) -> Rec f
unroll f = Rec f
Peano numbers:
type Nat = Rec NatF
data NatF s = Zero | Succ s
zero :: Nat
zero = Rec Zero
succ :: Nat -> Nat
succ x = Rec (Succ x)
Lists:
type List a = Rec (ListF a)
data ListF a b = Nil | Cons a b
nil :: List a
nil = Rec Nil
cons :: a -> List a -> List a
cons x y = Rec (Cons x y)
Just as the type-level representation
typedef union {
int a;
float b;
} Sum;
typedef struct {
int a;
float b;
} Prod;
int main()
{
Prod x = { .a = 1, .b = 2.0 };
Sum sum1 = { .a = 1 };
Sum sum2 = { .b = 2.0 };
}
#include <stddef.h>
typedef struct T
{
enum { NONE, SOME } tag;
union
{
void *none;
int some;
} value;
} Option;
int main()
{
Option a = { .tag = NONE, .value = { .none = NULL } };
Option b = { .tag = SOME, .value = { .some = 3 } };
}
In Haskell:
data T
= Add T T
| Mul T T
| Div T T
| Sub T T
| Num Int
eval :: T -> Int
eval x = case x of
Add a b -> eval a + eval b
Mul a b -> eval a * eval b
Div a b -> eval a / eval b
Sub a b -> eval a - eval b
Num a -> a
In C:
typedef struct T {
enum { ADD, MUL, DIV, SUB, NUM } tag;
union {
struct {
struct T *left, *right;
} node;
int value;
};
} Expr;
int eval(Expr t)
{
switch (t.tag) {
case ADD:
return eval(*t.node.left) + eval(*t.node.right);
break;
case MUL:
return eval(*t.node.left) * eval(*t.node.right);
break;
case DIV:
return eval(*t.node.left) / eval(*t.node.right);
break;
case SUB:
return eval(*t.node.left) - eval(*t.node.right);
break;
case NUM:
return t.value;
break;
}
}
class Generic a where
type family Rep a :: * -> *
to :: a -> Rep a x
from :: Rep a x -> a
Constructor Models
V1
Void: used for datatypes without constructors
U1
Unit: used for constructors without arguments
K1
Constants, additional parameters.
:*:
Products: encode multiple arguments to constructors
:+:
Sums: encode choice between constructors
L1
Left hand side of a sum.
R1
Right hand side of a sum.
M1
Meta-information (constructor names, etc.)
newtype M1 i c f p = M1 (f p)
newtype K1 i c p = K1 c
data U p = U
data (:*:) a b p = a p :*: b p
data (:+:) a b p = L1 (a p) | R1 (b p)
Implementation:
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE DefaultSignatures #-}
import GHC.Generics
-- Auxiliary class
class GEq' f where
geq' :: f a -> f a -> Bool
instance GEq' U1 where
geq' _ _ = True
instance (GEq c) => GEq' (K1 i c) where
geq' (K1 a) (K1 b) = geq a b
instance (GEq' a) => GEq' (M1 i c a) where
geq' (M1 a) (M1 b) = geq' a b
instance (GEq' a, GEq' b) => GEq' (a :+: b) where
geq' (L1 a) (L1 b) = geq' a b
geq' (R1 a) (R1 b) = geq' a b
geq' _ _ = False
instance (GEq' a, GEq' b) => GEq' (a :*: b) where
geq' (a1 :*: b1) (a2 :*: b2) = geq' a1 a2 && geq' b1 b2
--
class GEq a where
geq :: a -> a -> Bool
default geq :: (Generic a, GEq' (Rep a)) => a -> a -> Bool
geq x y = geq' (from x) (from y)
-- Base equalities
instance GEq Char where geq = (==)
instance GEq Int where geq = (==)
instance GEq Float where geq = (==)
-- Equalities derived from structure of (:+:) and (:*:) for free
instance GEq a => GEq (Maybe a)
instance (GEq a, GEq b) => GEq (a,b)
main :: IO ()
main = do
print $ geq 2 (3 :: Int) -- False
print $ geq 'a' 'b' -- False
print $ geq (Just 'a') (Just 'a') -- True
print $ geq ('a','b') ('a', 'b') -- True
\clearpage