Let us consider the imperative programming language IMP, featuring assignment, arrays, sequence, conditionals, and while loops.
To write an interpreter for IMP, we have to go through the following definitions:
emptyenv
, bind
, applyenv
emptystore
, applystore
, update
sem_exp
sem_dec
sem_com
sem_prog
The environment is a mapping from identifiers ide
to denoted values dval
.
type ide = string;; type loc = int;;
The type dval
represent the semantic domain of environments:
type dval = Unbound | DConst of int | DVar of loc | DArray of loc ;;
The type env
is then defined as follows:
type env = Env of (ide -> dval)
exception UnboundIde of ide;; exception TypeMismatch of string;;
The specification of env
requires the following functions:
emptyenv
evaluates to the empty environment (i.e. mapping all identifiers to Unbound
)bind r x d
evaluates to an environment extending r
with a new binding for x
to d
applyenv r x
evaluates to the value bound to x
, if any. It raises an exception if no binding for x
is present in r
.# let emptyenv = fun () -> Env (fun x -> Unbound);; val emptyenv : unit -> env
# let bind (Env rho) x d = Env (fun y -> if y=x then d else rho y);; val bind : env -> ide -> dval -> env
# let applyenv (Env rho) x = match rho x with Unbound -> raise (UnboundIde x) | _ as d -> d ;; val applyenv : env -> ide -> dval
The store is modelled as a pair.
The first element of the pair is a mapping from locations loc
to memorized values mval
.
The second element of the pair is an integer that records the size of the store.
type mval = int;; type store = Store of (loc -> mval) * int;;
The specification of store
requires the following functions:
emptystore ()
evaluates to the “empty” store (which mapps all locations to the integer 0). The size of the store is 216 (in hexadecimal, 0x10000
)update s l m
evaluates to an store where the value m
is memorized at location l
applystore s l
evaluates to the value memorized at location l
. It raises an AddressOutOfBounds
exception if l
if a negative number, or exceeds the size of the store.# let emptystore = fun () -> Store ((fun x -> 0), 0x10000);; val emptystore : unit -> store = <fun>
exception AddressOutOfBounds of loc;;
# let applystore (Store (sigma,dim)) l = if 0 <= l && l < dim then sigma l else raise (AddressOutOfBounds l) ;; val applystore : store -> loc -> mval = <fun>
# let update (Store (sigma,dim)) l m = if 0 <= l && l < dim then Store ((fun l' -> if l'=l then m else sigma l'), dim) else raise (AddressOutOfBounds l) val update : store -> loc -> mval -> store = <fun>
Expressions have the following abstract syntax:
type exp = N of int | Val of ide | ArrayVal of ide * exp | Add of exp * exp | Sub of exp * exp | Mul of exp * exp | Eq of exp * exp | Lt of exp * exp | Not of exp | And of exp * exp | Or of exp * exp ;;
The evaluation of an expression e
in an environment rho
and a store sigma
is defined by the function sem_exp
, by induction on the structure of e
.
An exception is raised when trying to access variables or arrays with the wrong construct.
exception TypeMismatch of string let rec sem_exp e (r, s) = match e with N n -> n | Val x -> (match applyenv r x with DVar l -> applystore s l | DConst c -> c | DArray l -> raise (TypeMismatch ( "You have tried to access array" ^ x ^ " as a variable")) | _ -> raise (UnboundIde x) ) | ArrayVal (x,e') -> (match applyenv r x with DArray l -> applystore s (l + sem_exp e' (r,s)) | DVar l -> raise (TypeMismatch ( "You have tried to access variable " ^ x ^ " as an array")) | DConst c -> raise (TypeMismatch ( "You have tried to access constant " ^ x ^ " as an array")) | Unbound -> raise (UnboundIde x) ) | Add (e1,e2) -> sem_exp e1 (r,s) + sem_exp e2 (r,s) | Sub (e1,e2) -> sem_exp e1 (r,s) - sem_exp e2 (r,s) | Mul (e1,e2) -> sem_exp e1 (r,s) * sem_exp e2 (r,s) | Eq (e1,e2) -> if sem_exp e1 (r,s) = sem_exp e2 (r,s) then 1 else 0 | Lt (e1,e2) -> if sem_exp e1 (r,s) < sem_exp e2 (r,s) then 1 else 0 | Not e' -> if sem_exp e' (r,s) = 0 then 1 else 0 | And (e1,e2) -> if sem_exp e1 (r,s) = 0 || sem_exp e2 (r,s) = 0 then 0 else 1 | Or (e1,e2) -> if sem_exp e1 (r,s) = 0 && sem_exp e2 (r,s) = 0 then 0 else 1 ;; val sem_exp : exp -> env * store -> mval = <fun>
Declarations comprise the empty one, the declaration of a constant, the declaration of a variable, the declaration of an array, and the sequence of two declarations.
type dec = Empty | Const of ide * int | Var of ide | Array of ide * int | Dseq of dec * dec ;;
The semantics of a declaration d
in an environment rho
and a starting location l
is defined by the function sem_dec
, with the following signature.
val sem_dec : dec -> env * loc -> env * loc
let rec sem_dec d (r,l) = match d with Empty -> (r,l) | Const(x,v) -> (bind r x (DConst v), l) | Var x -> (bind r x (DVar l), l+1) | Array (x,v) -> (bind r x (DArray l), l+v) | Dseq(d1,d2) -> sem_dec d2 (sem_dec d1 (r,l)) ;;
sem_dec
as a function dec → env * loc → env * loc
.
Updating the store is needed when you can declare and initialize a variable at the same time (try it as an exercise).
A command is either a skip, an assignment to a variable or an array, a sequence of commands, an if-then-else, or a while loop.
type com = Skip | AssignVar of ide * exp | AssignArray of ide * exp * exp | Cseq of com * com | If of exp * com * com | While of exp * com ;;
The semantics of a command c
in an environment r
and a store s
is defined by the function sem_com
(note that commands do not modify the environment):
val sem_com : com -> env * store -> store
let rec sem_com c (rho,sigma) = match c with Skip -> sigma | AssignVar(x,e) -> (match applyenv rho x with DVar l -> update sigma l (sem_exp e (rho,sigma)) | _ -> raise (TypeMismatch ("You have tried to assign to non-variable" ^ x))) | AssignArray(x,e,e') -> (match applyenv rho x with DArray l -> update sigma (l + sem_exp e (rho,sigma)) (sem_exp e' (rho,sigma)) | _ -> raise (TypeMismatch ("You have tried to assign to non-array" ^ x))) | Cseq(c1,c2) -> sem_com c2 (rho,sem_com c1 (rho,sigma)) | If(e,c1,c2) -> if sem_exp e (rho,sigma) = 0 then sem_com c2 (rho,sigma) else sem_com c1 (rho,sigma) | While(e,c') -> if sem_exp e (rho,sigma) = 0 then sigma else sem_com c (rho,sem_com c' (rho,sigma)) ;;
A program is a pair (declaration, command). Its semantics starts from the empty environment and empty store, then applies the semantics of the declaration and of the comment.
type prog = Program of dec * com;;
let sem_prog (Program (d,c)) = let (rho,l) = sem_dec d (emptyenv(),0) in sem_com c (rho,emptystore()) ;;
The signature of sem_prog
is as follows:
val sem_prog : prog -> store = <fun>
We now introduce some auxiliary functions to debug programs:
let rec range a b = if a>b then [] else a::(range (a+1) b);; let rec ide = function Empty -> [] | Const (x,v) -> [x] | Var x -> [x] | Array (x,v) -> [x] | Dseq (d1,d2) -> (ide d1) @ (ide d2) ;; let dump (Program (d,c)) = let (r,l) = sem_dec d (emptyenv(),0) in let s' = sem_com c (r,emptystore()) in (List.map (fun x -> (x,applyenv r x)) (ide d), List.map (applystore s') (range 0 (l-1))) ;;
Let us write an IMP program to compute the factorial function:
let fact x = Program( Dseq(Const ("n", x),Dseq(Var "i",Var "f")), Cseq(Cseq(AssignVar ("i", N 1),AssignVar ("f", N 1)), While(Or(Lt (Val "i", Val "n"),Eq (Val "i", Val "n")), Cseq(AssignVar ("f", Mul (Val "f",Val "i")), AssignVar ("i", Add (Val "i",N 1)))))) ;;
# dump (fact 5);; ([("n", DConst 5); ("i", DVar 0); ("f", DVar 1)], [6; 120])