{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE PackageImports #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# OPTIONS_GHC -Wall #-}
{-# OPTIONS_GHC -fwarn-incomplete-patterns #-}
-- | An implementation of Lawrence S. Moss' @1\#@ language and Text
-- Register Machine ().
--
-- This module also includes a slightly higher-level language, @1\#L@,
-- that replaces the forward and backward relative jumps of @1\#@ with
-- labels and goto instructions.
module Language.TRM.Base (
-- * Basic Text Register Machine
-- ** Letters and Words
Letter (..)
, Word (..)
, wordToString
-- ** Registers, Instructions, and Programs
, Register (..)
, Instruction (..)
, instructionToString
, Program
, programToString
, parseProgram
-- ** Machine Implementation
, Machine (..)
, step
, run
, phi
-- * Labels and Gotos
-- ** Language Definition
, Label
, LInstruction (..)
, LProgram
-- ** Conversion Between Languages
, toLabeledProgram
, fromLabeledProgram
-- ** Concrete Syntax and Semantics
, LSymantics (..)
, LComp (..)
, compileL
, runL
, runL'
-- ** Useful helpers
, do_
, freshLabelHere
-- * Examples
-- * Backwards-Binary Notation
, encodeBB
, decodeBB
) where
import Control.Applicative
import Control.Monad
import "mtl" Control.Monad.State
import "mtl" Control.Monad.Writer
import Data.Char (isSpace)
import Data.List hiding ((++), break)
import Data.Maybe
import Data.Monoid ()
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Set (Set)
import qualified Data.Set as Set
import Data.Vector (Vector)
import qualified Data.Vector as Vector
import GHC.Exts hiding (Word)
import Prelude hiding ((++), break, compare)
import Text.Printf
(++) :: Monoid a => a -> a -> a
(++) = mappend
--------------------------------------------------------------------------------
-- Basic 1# parser and machine
-- | Typed representation of the @1#@ letters.
data Letter = One | Hash deriving (Eq)
-- | A wrapper around a list of 'Letter's with an 'IsString' instance,
-- so that literal strings of @1@s, @#@s, and whitespace can be used
-- instead of lists of 'One's and 'Hash'es. This requires the
-- @-XOverloadedStrings@ flag.
--
-- > loop :: Word
-- > loop = "1### 11####"
newtype Word = W [Letter] deriving (Eq, Monoid)
instance IsString Word where
fromString [] = W []
fromString (x:xs) =
let (W ls) = fromString xs
in case x of
'1' -> W (One:ls)
'#' -> W (Hash:ls)
c | isSpace c -> W ls
_ -> error $ "invalid 1# string: " ++ (x:xs)
-- | Convert a 'Word' back into a 'String'.
wordToString :: Word -> String
wordToString (W []) = ""
wordToString (W (One :ls)) = '1':wordToString (W ls)
wordToString (W (Hash:ls)) = '#':wordToString (W ls)
instance Show Word where
show = show . wordToString
-- | Register identifiers.
newtype Register = R Int deriving (Eq, Ord, Show, Enum, Real, Integral, Num)
-- | Abstract syntax for the primitive @1#@ instructions.
data Instruction = SnocOne Register
| SnocHash Register
| Forward Int
| Backward Int
| Case Register
deriving (Eq, Show)
-- | Convert an 'Instruction' to concrete syntax.
instructionToString :: Instruction -> String
instructionToString (SnocOne (R r)) = replicate r '1' ++ "#"
instructionToString (SnocHash (R r)) = replicate r '1' ++ "##"
instructionToString (Forward i) = replicate i '1' ++ "###"
instructionToString (Backward i) = replicate i '1' ++ "####"
instructionToString (Case (R r)) = replicate r '1' ++ "#####"
-- | A @1#@ program is a 'Vector' of 'Instruction's.
type Program = Vector Instruction
-- | Convert a 'Program' to concrete syntax.
programToString :: Program -> String
programToString = (intercalate " ") . (map instructionToString) . Vector.toList
parseInstruction :: StateT Word Maybe Instruction
parseInstruction = do
(W ls) <- get
guard $ not (null ls)
let (ones , ls' ) = span (One ==) ls
(hashes, ls'') = span (Hash ==) ls'
put (W ls'')
case (length ones, length hashes) of
(r, 1) -> return $ SnocOne (R r)
(r, 2) -> return $ SnocHash (R r)
(i, 3) -> return $ Forward i
(i, 4) -> return $ Backward i
(r, 5) -> return $ Case (R r)
_ -> mzero
-- | Parse a 'Word' into a 'Program'; returns 'Nothing' if an invalid
-- instruction is found.
parseProgram :: Word -> Maybe Program
parseProgram w = Vector.fromList <$> evalStateT loop w
where loop = do (W ls) <- get
case ls of
[] -> return []
_ -> (:) <$> parseInstruction <*> loop
-- | A 'Machine' consists of a 'Program', a program counter, and a
-- 'Map' from registers to the words they contain.
data Machine = M { program :: Program
, pc :: Int
, regs :: Map Register Word
} deriving (Eq, Show)
snocReg :: Register -> Letter -> Map Register Word -> Map Register Word
snocReg r l regs = Map.insertWith (flip (++)) r (W [l]) regs
unsnocReg :: Register -> Map Register Word -> Maybe (Letter, Map Register Word)
unsnocReg r regs =
case Map.lookup r regs of
Nothing -> mzero
Just (W []) -> mzero
Just (W (One :ls)) -> Just (One , Map.insert r (W ls) regs)
Just (W (Hash:ls)) -> Just (Hash, Map.insert r (W ls) regs)
-- | Performs the single 'Instruction' indicated by the program
-- counter, if available. Returns 'Left mach' if a step cannot be
-- performed, and 'Right mach' with an updated 'Machine' otherwise.
step :: Machine -> Either Machine Machine
step mach@M { program, pc }
| pc < 0 || pc >= Vector.length program = Left mach
step mach@M { program, pc, regs } =
case program Vector.! pc of
SnocOne r -> return $ mach { pc = pc+1, regs = snocReg r One regs }
SnocHash r -> return $ mach { pc = pc+1, regs = snocReg r Hash regs }
Forward i -> return $ mach { pc = pc+i }
Backward i -> return $ mach { pc = pc-i }
Case r ->
case unsnocReg r regs of
Nothing -> return $ mach { pc = pc+1 }
Just (One , regs') -> return $ mach { pc = pc+2, regs = regs' }
Just (Hash, regs') -> return $ mach { pc = pc+3, regs = regs' }
-- | Given a 'Program' and the initial state of the registers, return
-- the final state of the registers.
run :: Program -> Map Register Word -> Map Register Word
run p rs = regs $ final
where Left final = loop M { program = p, pc = 0, regs = rs }
loop mach = step mach >>= loop
checkState :: Map Register Word -> Maybe ()
checkState regs = do
_ <- Map.lookup 1 regs
let regs' = Map.delete 1 regs
guard $ all (W [] ==) (Map.elems regs')
-- | Wrapper around 'run' that parses the given 'Word' into a
-- 'Program', and then runs it in the given register state. Returns
-- the value in register 1 once the program halts.
--
-- Returns 'Nothing' when either the given 'Word' fails to parse, or
-- if the machine halts abnormally with an invalid program counter or
-- values in registers other than register 1.
phi :: Word -> [(Register, Word)] -> Maybe Word
phi p rs = do p' <- parseProgram p
let final = run p' $ Map.fromList rs
checkState final
Map.lookup 1 $! final
--------------------------------------------------------------------------------
-- 1#L: Labels and Gotos instead of Forward and Backward
-- | Label representation.
type Label = Int
-- | Abstract syntax for a variant of @1\#@, @1\#L@ with labels and
-- gotos instead of forward and backward jumps.
data LInstruction = LSnocOne Register
| LSnocHash Register
| LCase Register
| LGoto Label
| LLabel Label
deriving (Eq, Show)
-- | A @1\#L@ program is a 'Vector' of 'LInstruction's.
type LProgram = Vector LInstruction
exposeLabels :: Program -> Map Int Label
exposeLabels p = Vector.ifoldl' exposeLabel Map.empty p
where end = Vector.length p
fresh labs = Map.size labs
exposeLabel :: Map Int Label
-> Int
-> Instruction
-> Map Int Label
exposeLabel labs pos (Forward rel)
| pos + rel <= end && pos + rel >= 0
= Map.insertWith (\_ lab -> lab) (pos+rel) (fresh labs) labs
exposeLabel labs pos (Backward rel)
| pos - rel <= end && pos - rel >= 0
= Map.insertWith (\_ lab -> lab) (pos-rel) (fresh labs) labs
exposeLabel _ _ (Forward _) = error "forward jump out of range"
exposeLabel _ _ (Backward _) = error "backward jump out of range"
exposeLabel labs _ _ = labs
-- | Convert a @1\#@ 'Program' into a semantically-equivalent @1\#L@
-- 'LProgram'. May fail with an error if the original 'Program' is
-- /non-tidy/, that is it contains forward or backward jumps to
-- instructions outside of the program.
toLabeledProgram :: Program -> LProgram
toLabeledProgram p = Vector.concat (insertLabels 0)
where labels = exposeLabels p
p' = Vector.imap substLabel p
substLabel _ (SnocOne r) = LSnocOne r
substLabel _ (SnocHash r) = LSnocHash r
substLabel _ (Case r) = LCase r
substLabel pos (Forward rel) =
case Map.lookup (pos+rel) labels of
Just lab -> LGoto lab
Nothing -> error "couldn't find label for position"
substLabel pos (Backward rel) =
case Map.lookup (pos-rel) labels of
Just lab -> LGoto lab
Nothing -> error "couldn't find label for position"
insertLabels i | i == Vector.length p' =
case Map.lookup i labels of
Nothing -> []
Just lab -> [Vector.singleton $ LLabel lab]
insertLabels i =
case Map.lookup i labels of
Nothing -> (Vector.singleton $ p' Vector.! i) : insertLabels (i+1)
Just lab -> (Vector.fromList $ [LLabel lab, p' Vector.! i])
: insertLabels (i+1)
exposePositions :: LProgram -> Map Label Int
exposePositions lp = fst $ Vector.ifoldl' exposePosition (Map.empty, 0) lp
where exposePosition (poss, seen) pos (LLabel lab) =
( Map.insertWith (error $ "duplicate label " ++ show lab)
lab (pos-seen) poss
, seen+1 )
exposePosition p _ _ = p
-- | Convert a @1\#L@ 'LProgram' into a semantically-equivalent @1\#@
-- 'Program'. May fail with an error if the 'LProgram' contains
-- duplicate labels, jumps to undefined labels. An error will also
-- occur if the 'LProgram' contains a goto that would translate into a
-- jump of 0 instructions, as this is impossible to express in @1\#@.
fromLabeledProgram :: LProgram -> Program
fromLabeledProgram lp = insertJumps . removeLabels $ lp
where removeLabels = Vector.filter (not . isLabel)
isLabel (LLabel _) = True
isLabel _ = False
poss = exposePositions lp
insertJumps = Vector.imap insertJump
insertJump _ (LSnocOne r) = SnocOne r
insertJump _ (LSnocHash r) = SnocHash r
insertJump _ (LCase r) = Case r
insertJump pos (LGoto lab) =
case Map.lookup lab poss of
Nothing -> error $ "unbound label " ++ show lab
Just dest | dest > pos -> Forward (dest - pos)
| dest < pos -> Backward (pos - dest)
| otherwise -> error "can't jump to self"
insertJump _ (LLabel _) = error "labels shouldn't exist here"
-- | Concrete syntax for @1\#L@, indexed by backend representation in
-- the typed tagless style
-- ().
class LSymantics repr where
-- | Append a @1@ to the end of the given 'Register'.
snocOne :: Register -> repr ()
-- | Append a @#@ to the end of the given 'Register'.
snocHash :: Register -> repr ()
-- | Return a fresh 'Label' to be used in a call to 'label' or 'goto'.
freshLabel :: repr Label
-- | Place a 'Label' at the given point in the program. Note that a
-- particular 'Label' may be used only once per program.
label :: Label -> repr ()
-- | Unconditional jump to the given 'Label'.
goto :: Label -> repr ()
-- | Case analysis; pops a 'Letter' from the front of the
-- scrutinized 'Register', if non-empty. Note that in the default
-- backend, new labels are automatically created and placed for the
-- branches of the 'cond'.
cond :: Register -- ^ The 'Register' to scrutinize.
-> repr () -- ^ Run if the 'Register' is empty.
-> repr () -- ^ Run if the front of the 'Register' is a @1@.
-> repr () -- ^ Run if the front of the 'Register' is a @#@.
-> repr ()
-- | The default backend for 'LSymantics'.
newtype LComp a = LC { unLC :: StateT (Int, Set Label) (Writer LProgram) a }
deriving ( Functor, Applicative, Monad, MonadFix
, MonadState (Int, Set Label), MonadWriter LProgram)
instance LSymantics LComp where
snocOne = tell . Vector.singleton . LSnocOne
snocHash = tell . Vector.singleton . LSnocHash
freshLabel = do (l, ls) <- get
put (l+1, ls)
return l
label l = do (l', ls) <- get
case Set.member l ls of
True -> error $ printf "duplicate label %s" l
False -> do put (l', Set.insert l ls)
tell . Vector.singleton $ LLabel l
goto = tell . Vector.singleton . LGoto
cond r bEmpty bOne bHash = do
[lEmpty, lOne, lHash] <- replicateM 3 freshLabel
tell . Vector.singleton $ LCase r
goto lEmpty >> goto lOne >> goto lHash
label lEmpty >> bEmpty
label lOne >> bOne
label lHash >> bHash
-- | Convenience function to create a fresh label and place it at the
-- current position.
freshLabelHere :: (Monad repr, LSymantics repr) => repr Label
freshLabelHere = do l <- freshLabel ; label l ; return l
-- | Compiles an 'LComp' program into an 'LProgram'.
compileL :: LComp () -> LProgram
compileL prog = execWriter (evalStateT (unLC prog) (0, Set.empty))
-- | Given an 'LComp' program and an initial register state, and then
-- runs it in the given register state. May return 'Nothing' if the
-- program does not halt cleanly, as with 'run'.
runL :: LComp () -> [(Register, Word)] -> Maybe Word
runL p rs = do
let final = (run . fromLabeledProgram . compileL $ p) (Map.fromList rs)
checkState final
Map.lookup 1 final
-- | Given an 'LComp' program and an initial register state, and then
-- runs it in the given register state. May return 'Nothing' if the
-- program does not halt cleanly, as with 'run'.
runL' :: LComp () -> [(Register, Word)] -> [(Register, Word)]
runL' p rs = Map.toList final
where final = (run . fromLabeledProgram . compileL $ p) (Map.fromList rs)
--------------------------------------------------------------------------------
-- Backwards binary encoding
-- | Encodes an 'Integral' type into a 'Word' of backwards-binary
-- digits using @1@s and @#@s for @1@s and @0@s, respectively. Note
-- that the representation of zero is a single @#@ rather than the
-- empty 'Word'.
encodeBB :: Integral a => a -> Word
encodeBB x | toInteger x == 0 = W [Hash]
| otherwise = W (enc (toInteger x))
where enc 0 = []
enc n | odd n = One : (enc $ n `div` 2)
| even n = Hash : (enc $ n `div` 2)
enc _ = error "encodeBB only accepts non-negative integers"
-- | Decodes a 'Word' containing backwards-binary digits into a 'Num'
-- type. Fails with an error if the 'Word' is empty.
decodeBB :: Num a => Word -> a
decodeBB (W []) = error "Backwards-binary words cannot be empty"
decodeBB (W ys) = fromInteger $ dec ys
where dec [] = 0
dec (Hash:xs) = 2 * dec xs
dec (One:xs) = 1 + (2 * dec xs)
-- | A combinator to cleanly implement looping structures in 'LComp' code.
--
-- Takes a function that expects two arguments, @continue@ and
-- @break@. The body of the function is a block of 'LComp' code that
-- gets repeated whenever @continue@ is run. If @break@ is run,
-- control jumps to the instruction after the call to 'do_'.
do_ :: (LComp () -> LComp () -> LComp ()) -> LComp ()
do_ f = do
break <- freshLabel
continue <- freshLabelHere
f (goto continue) (goto break)
label break