Plov003 : build a recursive descent parser
After the traditional scanner, involving massive overhead to keep track of where we would be in a source file,
I decided to do as Professor Wirth: create a recursive descent parser, entirely based upon the EBNF definition
of Plov.
Pay attention to the function 'ConstantDeclaration'. Lots of checks and lots of things to do....
Current source of Plov003.mod
Below is the source code for Plov003 as it is on October 6, 2007. This is a snapshot. The page will change regularly.
MODULE Plov003;
(* Plov is the PL/0 compiler as modified by Verhoeven *)
(* Ver Comment : Date *)
(* ----- ----------------------------------------------------- ------------ *)
(* 0.01 First version: read symbols and store them in a list 01 Oct 2007 *)
(* 0.02 Introduce a limited symbol processor; aborted 02 Oct 2007 *)
(* 0.03 Introduce a recursive descent parser *)
IMPORT ASCII, InOut, MemPools, Strings, SYSTEM;
TYPE Identifier = ARRAY [0..31] OF CHAR;
Opcode = ARRAY [0..7] OF CHAR;
SymbolType = (Progtype, Proctype, Constype, Vartype, Keytype);
SymbolPtr = POINTER TO SymbolNode;
SymbolNode = RECORD
Name : Identifier;
previous,
next : SymbolPtr;
type : SymbolType;
value,
address : CARDINAL
END;
VAR token : Identifier;
op : Opcode;
SyntaxError,
Exhausted : BOOLEAN;
Symbols : MemPools.MemPool;
firstSymbol,
lastSymbol,
thisSymbol : SymbolPtr;
currentType : SymbolType;
LOOPdepth,
PROCdepth,
currentValue : CARDINAL;
PROCEDURE ErrorMessage (n : CARDINAL);
BEGIN
CASE n OF
0 : InOut.WriteString ("Error filling Symbol table") |
1 : InOut.WriteString ("Invalid digit in : ");
InOut.WriteString (token) |
2 : InOut.WriteString ("Invalid letter in : ");
InOut.WriteString (token) |
3 : InOut.WriteString ("Program names do not match.") |
4 : InOut.WriteString ("BEGIN expected") |
5 : InOut.WriteString ("Illegal identifier name : ");
InOut.WriteString (token) |
6 : InOut.WriteString ("Duplicate identifier : ");
InOut.WriteString (token) |
7 : InOut.WriteString ("'=' expected") |
8 : InOut.WriteString ("END expected") |
9 : InOut.WriteString ("Undefined symbol : ");
InOut.WriteString (token) |
10 : InOut.WriteString ("Error in StatementSequence") |
11 : InOut.WriteString ("EXIT without LOOP") |
12 : InOut.WriteString ("THEN expected") |
13 : InOut.WriteString ("Illegal comparator : ");
InOut.WriteString (token) |
14 : InOut.WriteString ("Illegal operator in factor : ");
InOut.WriteString (token) |
15 : InOut.WriteString ("Error in factor : ");
InOut.WriteString (token) |
16 : InOut.WriteString ("Missing right parenthesis") |
17 : InOut.WriteString ("Missing assigment operator") |
18 : InOut.WriteString ("PROGRAM expected")
ELSE
InOut.WriteString ("Unknown error detected.")
END;
SyntaxError := TRUE;
InOut.WriteLn
END ErrorMessage;
PROCEDURE StoreSymbol (str : Identifier) : BOOLEAN;
VAR thisOne, nextOne : SymbolPtr;
BEGIN
thisOne := firstSymbol;
LOOP
IF Strings.StrEq (thisOne^.Name, str) THEN RETURN FALSE END;
IF thisOne^.next = NIL THEN EXIT END;
thisOne := thisOne^.next
END;
MemPools.PoolAllocate (Symbols, nextOne, SYSTEM.TSIZE (SymbolNode));
thisOne^.next := nextOne;
WITH nextOne^ DO
Name := str;
type := currentType;
next := NIL;
previous := thisOne
END;
lastSymbol := nextOne;
thisSymbol := thisOne;
RETURN TRUE
END StoreSymbol;
PROCEDURE FindSymbol (str : Identifier) : BOOLEAN;
VAR thisOne, nextOne : SymbolPtr;
BEGIN
thisOne := firstSymbol;
LOOP
IF Strings.StrEq (thisOne^.Name, str) THEN
thisSymbol := thisOne;
RETURN TRUE
END;
IF thisOne^.next = NIL THEN EXIT END;
thisOne := thisOne^.next
END;
RETURN FALSE
END FindSymbol;
PROCEDURE PrintNames;
VAR thisOne : SymbolPtr;
BEGIN
thisOne := firstSymbol;
LOOP
InOut.WriteString (thisOne^.Name); InOut.Write (11C);
CASE thisOne^.type OF
Progtype : InOut.WriteString ("Program") |
Proctype : InOut.WriteString ("Procedure") |
Constype : InOut.WriteString ("Constant = ");
InOut.WriteCard (thisOne^.value, 5) |
Vartype : InOut.WriteString ("Variable") |
Keytype : InOut.WriteString ("Keyword")
END;
InOut.WriteString (" type."); InOut.WriteLn;
IF thisOne^.next = NIL THEN
EXIT
ELSE
thisOne := thisOne^.next
END
END
END PrintNames;
PROCEDURE GetSymbol;
BEGIN
IF InOut.EOF () = FALSE THEN
InOut.ReadString (token)
END;
InOut.WriteString (token); InOut.Write (11C)
END GetSymbol;
PROCEDURE Synchronize;
VAR chr : CHAR;
BEGIN
REPEAT InOut.Read (chr) UNTIL chr = ASCII.LF;
SyntaxError := FALSE
END Synchronize;
PROCEDURE isDigit (chr : CHAR) : BOOLEAN;
BEGIN
IF (chr >= '0') AND (chr <= '9') THEN
RETURN TRUE
END;
RETURN FALSE
END isDigit;
PROCEDURE isAlpha (chr : CHAR) : BOOLEAN;
BEGIN
chr := CAP (chr);
IF (chr >= 'A') AND (chr <= 'Z') THEN
RETURN TRUE
END;
RETURN FALSE
END isAlpha;
PROCEDURE isAlphaNum (chr : CHAR) : BOOLEAN;
BEGIN
RETURN isAlpha (chr) OR isDigit (chr)
END isAlphaNum;
PROCEDURE isNumber (str : Identifier) : BOOLEAN;
VAR i : CARDINAL;
BEGIN
i := 0;
REPEAT
IF isDigit (str [i]) = FALSE THEN RETURN FALSE END;
INC (i)
UNTIL (str [i] = 0C) OR (i > HIGH (str));
RETURN TRUE
END isNumber;
PROCEDURE isIdentifier (str : Identifier) : BOOLEAN;
VAR i : CARDINAL;
BEGIN
IF NOT isAlpha (str [0]) THEN RETURN FALSE END;
i := 1;
REPEAT
IF NOT isAlphaNum (str [i]) THEN RETURN FALSE END;
INC (i)
UNTIL (str [i] = 0C) OR (i > HIGH (str));
RETURN TRUE
END isIdentifier;
PROCEDURE Program;
BEGIN
GetSymbol;
IF Strings.StrEq (token, "PROGRAM") THEN
GetSymbol;
IF isIdentifier (token) THEN
WITH firstSymbol^ DO
Name := token;
type := Progtype
END;
GetSymbol;
Block;
IF Strings.StrEq (token, firstSymbol^.Name) = FALSE THEN
ErrorMessage (3)
END
ELSE
ErrorMessage (5) (* Illegal identifier *)
END
ELSE
ErrorMessage (18) (* 'PROGRAM' expected *)
END
END Program;
PROCEDURE Block;
BEGIN
IF Strings.StrEq (token, "CONSTANTS") THEN ConstantDeclaration END;
IF Strings.StrEq (token, "VARIABLES") THEN VariableDeclaration END;
WHILE Strings.StrEq (token, "PROCEDURE") DO ProcedureDeclaration END;
IF Strings.StrEq (token, "BEGIN") THEN
BlockBody
ELSIF Strings.StrEq (token, "END") = FALSE THEN
ErrorMessage (4)
END;
IF SyntaxError THEN Synchronize END;
GetSymbol
END Block;
PROCEDURE ConstantDeclaration;
VAR Val : CARDINAL;
BEGIN
GetSymbol;
REPEAT
currentType := Constype;
IF isIdentifier (token) THEN
IF StoreSymbol (token) = TRUE THEN
GetSymbol;
IF token [0] = '=' THEN
InOut.ReadCard (Val);
IF InOut.Done () THEN
WITH thisSymbol^ DO
value := Val;
type := Constype
END;
GetSymbol
ELSE
ErrorMessage (1) (* Invalid digit *)
END
ELSE
ErrorMessage (7) (* '=' expected *)
END
ELSE
ErrorMessage (6) (* Duplicate identifier *)
END
ELSE
ErrorMessage (5) (* Illegal Identifier *)
END;
UNTIL Strings.StrEq (token, "END");
IF SyntaxError THEN
Synchronize
END;
GetSymbol
END ConstantDeclaration;
PROCEDURE VariableDeclaration;
BEGIN
REPEAT
currentType := Vartype;
GetSymbol;
IF isIdentifier (token) THEN
IF StoreSymbol (token) = TRUE THEN
GetSymbol
ELSE
ErrorMessage (6) (* Duplicate identifier *)
END
ELSE
ErrorMessage (5) (* Illegal Identifier *)
END
UNTIL Strings.StrEq (token, "END");
IF SyntaxError THEN Synchronize END;
GetSymbol
END VariableDeclaration;
PROCEDURE ProcedureDeclaration;
BEGIN
REPEAT
currentType := Proctype;
GetSymbol;
IF isIdentifier (token) THEN
IF StoreSymbol (token) = TRUE THEN
INC (PROCdepth);
GetSymbol;
BlockBody;
DEC (PROCdepth)
ELSE
ErrorMessage (6) (* Duplicate identifier *)
END
ELSE
ErrorMessage (5) (* Illegal identifier *)
END
UNTIL Strings.StrEq (token, "BEGIN");
IF SyntaxError THEN Synchronize END;
GetSymbol
END ProcedureDeclaration;
PROCEDURE BlockBody;
BEGIN
IF Strings.StrEq (token, "BEGIN") = TRUE THEN
GetSymbol;
StatementSequence;
IF Strings.StrEq (token, "END") = FALSE THEN
ErrorMessage (8) (* END expected *)
END
ELSE
ErrorMessage (4) (* BEGIN expected *)
END;
IF SyntaxError THEN Synchronize END;
GetSymbol
END BlockBody;
PROCEDURE StatementSequence;
BEGIN
REPEAT
IF FindSymbol (token) = TRUE THEN
IF thisSymbol^.type = Vartype THEN Assignment
ELSIF thisSymbol^.type = Proctype THEN Procedurecall
ELSIF Strings.StrEq (token, "IF") THEN isIF
ELSIF Strings.StrEq (token, "LOOP") THEN isLOOP
ELSIF Strings.StrEq (token, "RETURN") THEN isRETURN
ELSIF Strings.StrEq (token, "EXIT") THEN isEXIT
ELSE
ErrorMessage (10) (* Error in StatementSequence *)
END
ELSE
ErrorMessage (9) (* Undefined symbol *)
END;
GetSymbol
UNTIL Strings.StrEq (token, "END");
IF SyntaxError THEN Synchronize END;
GetSymbol
END StatementSequence;
PROCEDURE Procedurecall;
BEGIN
END Procedurecall;
PROCEDURE Assignment;
BEGIN
GetSymbol;
IF Strings.StrEq (token, ':=') THEN
GetSymbol;
Expression
ELSE
ErrorMessage (17)
END;
IF SyntaxError THEN Synchronize END;
GetSymbol
END Assignment;
PROCEDURE isLOOP;
BEGIN
INC (LOOPdepth);
GetSymbol;
StatementSequence;
IF Strings.StrEq (token, "END") THEN
DEC (LOOPdepth)
ELSE
ErrorMessage (8) (* END expected *)
END;
IF SyntaxError THEN Synchronize END;
GetSymbol
END isLOOP;
PROCEDURE isEXIT;
BEGIN
IF LOOPdepth = 0 THEN
ErrorMessage (11) (* EXIT without LOOP *)
ELSE
DEC (LOOPdepth)
END;
IF SyntaxError THEN Synchronize END;
GetSymbol
END isEXIT;
PROCEDURE isRETURN;
BEGIN
IF PROCdepth = 0 THEN
ErrorMessage (19) (* RETURN without PROCEDURE *)
END;
IF SyntaxError THEN Synchronize END;
GetSymbol
END isRETURN;
PROCEDURE isIF;
BEGIN
Condition;
GetSymbol;
IF Strings.StrEq (token, "THEN") THEN
GetSymbol;
StatementSequence;
WHILE Strings.StrEq (token, "ELSIF") DO
Condition;
IF Strings.StrEq (token, "THEN") THEN
GetSymbol;
StatementSequence
ELSE
ErrorMessage (12) (* THEN expected *)
END
END;
IF Strings.StrEq (token, "ELSE") THEN StatementSequence END;
IF Strings.StrEq (token, "END") = FALSE THEN
ErrorMessage (8) (* END expected *)
END
ELSE
ErrorMessage (12) (* THEN expected *)
END;
IF SyntaxError THEN Synchronize END;
GetSymbol
END isIF;
PROCEDURE Condition;
BEGIN
GetSymbol;
IF Strings.StrEq (token, "ODD") THEN
Expression
ELSE
Expression;
Comparison;
Expression
END
END Condition;
PROCEDURE Comparison;
BEGIN
GetSymbol;
CASE token [0] OF
'=' : op := 'EQL' |
'<' : CASE token [1] OF
0C : op := 'LESS' |
'>' : op := 'NEQL' |
'=' : op := 'LEQL'
ELSE
ErrorMessage (13) (* Illegal comparator *)
END |
'>' : CASE token [1] OF
0C : op := 'GRTR' |
'=' : op := 'GEQL'
ELSE
ErrorMessage (13) (* Illegal comparator *)
END |
'#' : op := 'NEQL'
ELSE
ErrorMessage (13) (* Illegal comparator *)
END;
IF SyntaxError THEN Synchronize END;
GetSymbol
END Comparison;
PROCEDURE Expression;
VAR OP : CHAR;
BEGIN
Term;
OP := token [0];
WHILE (OP = '+') OR (OP = '-') DO
Term;
OP := token [0]
END
END Expression;
PROCEDURE Term;
VAR Operator : CHAR;
BEGIN
Factor;
Operator := token [0];
CASE Operator OF
'x', '*' : op := 'MUL' |
':', '/' : op := 'DIV' |
'%', 'M' : IF (token [1] = 'O') AND (token [2] = 'D') THEN
op := 'MOD'
ELSE
ErrorMessage (14) (* Error in TERM *)
END
ELSE
ErrorMessage (14) (* Error in TERM *)
END;
IF SyntaxError THEN Synchronize END;
GetSymbol
END Term;
PROCEDURE Factor;
BEGIN
GetSymbol;
IF FindSymbol (token) = TRUE THEN
IF (thisSymbol^.type = Constype) OR (thisSymbol^.type = Vartype) THEN op := 'PUSH'
ELSIF isNumber (token) THEN op := 'PUSH'
ELSIF token [0] = '(' THEN
Expression;
GetSymbol;
IF token [0] # ')' THEN ErrorMessage (16) END
ELSE
ErrorMessage (15) (* Error in Factor *)
END
ELSE
ErrorMessage (9) (* Undefined identifier *)
END;
IF SyntaxError THEN Synchronize END;
GetSymbol
END Factor;
PROCEDURE Init;
BEGIN
MemPools.NewPool (Symbols);
MemPools.PoolAllocate (Symbols, firstSymbol, SYSTEM.TSIZE (SymbolNode));
WITH firstSymbol^ DO
Name := "|";
next := NIL;
previous := NIL
END;
currentType := Keytype;
IF StoreSymbol ("RETURN") = FALSE THEN ErrorMessage (0) END;
IF StoreSymbol ("EXIT") = FALSE THEN ErrorMessage (0) END;
IF StoreSymbol ("LOOP") = FALSE THEN ErrorMessage (0) END;
IF StoreSymbol ("BEGIN") = FALSE THEN ErrorMessage (0) END;
IF StoreSymbol ("END") = FALSE THEN ErrorMessage (0) END;
IF StoreSymbol ("IF") = FALSE THEN ErrorMessage (0) END;
IF StoreSymbol ("THEN") = FALSE THEN ErrorMessage (0) END;
IF StoreSymbol ("ELSIF") = FALSE THEN ErrorMessage (0) END;
IF StoreSymbol ("ODD") = FALSE THEN ErrorMessage (0) END;
IF StoreSymbol ("PROCEDURE") = FALSE THEN ErrorMessage (0) END;
IF StoreSymbol ("PROGRAM") = FALSE THEN ErrorMessage (0) END;
IF StoreSymbol ("CONSTANTS") = FALSE THEN ErrorMessage (0) END;
IF StoreSymbol ("VARIABLES") = FALSE THEN ErrorMessage (0) END;
currentType := Progtype;
LOOPdepth := 0;
PROCdepth := 0;
Exhausted := FALSE
END Init;
BEGIN
Init;
Program;
InOut.WriteLn;
InOut.WriteLn;
PrintNames;
MemPools.KillPool (Symbols)
END Plov003.
For the time being, the symbol table is printed at program end. And the symbols which are read are being
printed to screen as well. This is the first proof of concept. Until now, the semicolon and similar
punctuation marks are not needed which makes reading and writing a lot easier.
Plov003 : compile 'test1'
The file 'test1' is very simple. Yet, it is essential that it runs. Stepwise refinement! Here comes what happens:
PROGRAM Number1 END Number1resulting in:
jan@beryllium:~/modula/Plov$ Plov003 <test1 PROGRAM Number1 END Number1 Number1 Program type. RETURN Keyword type. EXIT Keyword type. LOOP Keyword type. BEGIN Keyword type. END Keyword type. IF Keyword type. THEN Keyword type. ELSIF Keyword type. ODD Keyword type. PROCEDURE Keyword type. PROGRAM Keyword type. CONSTANTS Keyword type. VARIABLES Keyword type. jan@beryllium:~/modula/Plov$
Plov003 : compile 'test2'
Program source 'test2' is already slightly more complex.
PROGRAM Number1 CONSTANTS pi = 314 ee = 272 END END Number1resulting in
jan@beryllium:~/modula/Plov$ Plov003 <test2 PROGRAM Number1 CONSTANTS pi = ee = END END Number1 Number1 Program type. RETURN Keyword type. EXIT Keyword type. LOOP Keyword type. BEGIN Keyword type. END Keyword type. IF Keyword type. THEN Keyword type. ELSIF Keyword type. ODD Keyword type. PROCEDURE Keyword type. PROGRAM Keyword type. CONSTANTS Keyword type. VARIABLES Constant = 314 type. pi Constant = 272 type. ee Constant = 0 type. jan@beryllium:~/modula/Plov$I see it only now, when writing the webpage: The values of the constants are offset one symbol entry backwards. So there is an error with handling 'thisSymbol'. It was fixed by the new PROCEDURE StoreSymbol listed below:
PROCEDURE StoreSymbol (str : Identifier) : BOOLEAN;
VAR thisOne, nextOne : SymbolPtr;
BEGIN
thisOne := firstSymbol;
LOOP
IF Strings.StrEq (thisOne^.Name, str) THEN RETURN FALSE END;
IF thisOne^.next = NIL THEN EXIT END;
thisOne := thisOne^.next
END;
MemPools.PoolAllocate (Symbols, nextOne, SYSTEM.TSIZE (SymbolNode));
thisOne^.next := nextOne;
WITH nextOne^ DO
Name := str;
type := currentType;
next := NIL;
previous := thisOne
END;
lastSymbol := nextOne;
thisSymbol := nextOne; (* Used to be 'thisOne' *)
RETURN TRUE
END StoreSymbol;
Page created on 3 October 2007 and
Page equipped with FroogleBuster technology