Plov003 : build a recursive descent parser

After the traditional scanner, involving massive overhead to keep track of where we would be in a source file, I decided to do as Professor Wirth: create a recursive descent parser, entirely based upon the EBNF definition of Plov.
Pay attention to the function 'ConstantDeclaration'. Lots of checks and lots of things to do....

Current source of Plov003.mod

Below is the source code for Plov003 as it is on October 6, 2007. This is a snapshot. The page will change regularly.

MODULE Plov003;

(*  Plov is the PL/0 compiler as modified by Verhoeven				*)
(*  Ver	  Comment						:     Date  	*)
(*  ----- -----------------------------------------------------   ------------ 	*)
(*  0.01  First version: read symbols and store them in a list	  01 Oct 2007	*)
(*  0.02  Introduce a limited symbol processor; aborted		  02 Oct 2007	*)
(*  0.03  Introduce a recursive descent parser					*)

IMPORT	ASCII, InOut, MemPools, Strings, SYSTEM;


TYPE	Identifier	= ARRAY [0..31] OF CHAR;
	Opcode		= ARRAY [0..7]  OF CHAR;
	SymbolType	= (Progtype, Proctype, Constype, Vartype, Keytype);
	SymbolPtr	= POINTER TO SymbolNode;
	SymbolNode	= RECORD
			    Name	: Identifier;
			    previous,
			    next	: SymbolPtr;
			    type	: SymbolType;
			    value,
			    address	: CARDINAL
			  END;

VAR	token		: Identifier;
	op		: Opcode;
	SyntaxError,
	Exhausted	: BOOLEAN;
	Symbols		: MemPools.MemPool;
	firstSymbol,
	lastSymbol,
	thisSymbol	: SymbolPtr;
	currentType	: SymbolType;
	LOOPdepth,
	PROCdepth,
	currentValue	: CARDINAL;


PROCEDURE ErrorMessage (n  : CARDINAL);

BEGIN
   CASE  n  OF
      0 : InOut.WriteString ("Error filling Symbol table")		|
      1 : InOut.WriteString ("Invalid digit in : ");
      	  InOut.WriteString (token)					|
      2 : InOut.WriteString ("Invalid letter in : ");
      	  InOut.WriteString (token)					|
      3 : InOut.WriteString ("Program names do not match.")		|
      4 : InOut.WriteString ("BEGIN expected") 				|
      5 : InOut.WriteString ("Illegal identifier name : ");
      	  InOut.WriteString (token)					|
      6 : InOut.WriteString ("Duplicate identifier : ");
      	  InOut.WriteString (token)					|
      7 : InOut.WriteString ("'=' expected")				|
      8 : InOut.WriteString ("END expected")				|
      9 : InOut.WriteString ("Undefined symbol : ");
      	  InOut.WriteString (token)					|
     10 : InOut.WriteString ("Error in StatementSequence")		|
     11 : InOut.WriteString ("EXIT without LOOP")			|
     12 : InOut.WriteString ("THEN expected")				|
     13 : InOut.WriteString ("Illegal comparator : ");
     	  InOut.WriteString (token)					|
     14 : InOut.WriteString ("Illegal operator in factor : ");
     	  InOut.WriteString (token)					|
     15 : InOut.WriteString ("Error in factor : ");
     	  InOut.WriteString (token)					|
     16 : InOut.WriteString ("Missing right parenthesis")		|
     17 : InOut.WriteString ("Missing assigment operator")		|
     18 : InOut.WriteString ("PROGRAM expected")
   ELSE
      InOut.WriteString ("Unknown error detected.")
   END;
   SyntaxError := TRUE;
   InOut.WriteLn
END ErrorMessage;


PROCEDURE StoreSymbol (str   : Identifier) : BOOLEAN;

VAR	thisOne, nextOne	: SymbolPtr;

BEGIN
   thisOne := firstSymbol;
   LOOP
      IF  Strings.StrEq (thisOne^.Name, str)  THEN  RETURN FALSE  END;
      IF  thisOne^.next = NIL  THEN  EXIT  END;
      thisOne := thisOne^.next
   END;
   MemPools.PoolAllocate (Symbols, nextOne, SYSTEM.TSIZE (SymbolNode));
   thisOne^.next := nextOne;
   WITH  nextOne^  DO
      Name := str;
      type := currentType;
      next := NIL;
      previous := thisOne
   END;
   lastSymbol := nextOne;
   thisSymbol := thisOne;
   RETURN TRUE
END StoreSymbol;


PROCEDURE FindSymbol (str  : Identifier) : BOOLEAN;

VAR	thisOne, nextOne	: SymbolPtr;

BEGIN
   thisOne := firstSymbol;
   LOOP
      IF  Strings.StrEq (thisOne^.Name, str)  THEN
	    thisSymbol := thisOne;
	    RETURN TRUE
      END;
      IF  thisOne^.next = NIL  THEN  EXIT  END;
      thisOne := thisOne^.next
   END;
   RETURN  FALSE
END FindSymbol;


PROCEDURE PrintNames;

VAR	  thisOne    		: SymbolPtr;

BEGIN
   thisOne := firstSymbol;
   LOOP
      InOut.WriteString (thisOne^.Name);		InOut.Write (11C);
      CASE  thisOne^.type  OF
        Progtype : InOut.WriteString ("Program")	|
	Proctype : InOut.WriteString ("Procedure")	|
	Constype : InOut.WriteString ("Constant = ");
		   InOut.WriteCard (thisOne^.value, 5)	|
	Vartype	 : InOut.WriteString ("Variable")	|
	Keytype	 : InOut.WriteString ("Keyword")
      END;
      InOut.WriteString (" type.");			InOut.WriteLn;
      IF  thisOne^.next = NIL  THEN
         EXIT
      ELSE
         thisOne := thisOne^.next
      END
   END
END PrintNames;


PROCEDURE GetSymbol;

BEGIN
   IF  InOut.EOF () = FALSE  THEN
      InOut.ReadString (token)
   END;
   InOut.WriteString (token);	InOut.Write (11C)
END GetSymbol;


PROCEDURE Synchronize;

VAR	chr	: CHAR;

BEGIN
   REPEAT  InOut.Read (chr)  UNTIL  chr = ASCII.LF;
   SyntaxError := FALSE
END Synchronize;


PROCEDURE isDigit (chr : CHAR) : BOOLEAN;

BEGIN
   IF  (chr >= '0') AND (chr <= '9')  THEN
      RETURN TRUE
   END;
   RETURN FALSE
END isDigit;


PROCEDURE isAlpha (chr  : CHAR) : BOOLEAN;

BEGIN
   chr := CAP (chr);
   IF  (chr >= 'A') AND (chr <= 'Z')  THEN
      RETURN TRUE
   END;
   RETURN FALSE
END isAlpha;


PROCEDURE isAlphaNum (chr  : CHAR) : BOOLEAN;

BEGIN
   RETURN  isAlpha (chr) OR isDigit (chr)
END isAlphaNum;


PROCEDURE isNumber (str  : Identifier) : BOOLEAN;

VAR	i	: CARDINAL;

BEGIN
   i := 0;
   REPEAT
      IF  isDigit (str [i]) = FALSE  THEN  RETURN FALSE  END;
      INC (i)
   UNTIL  (str [i] = 0C) OR (i > HIGH (str));
   RETURN TRUE
END isNumber;


PROCEDURE isIdentifier (str  : Identifier) : BOOLEAN;

VAR	i	: CARDINAL;

BEGIN
   IF  NOT isAlpha (str [0])  THEN  RETURN FALSE  END;
   i := 1;
   REPEAT
      IF  NOT isAlphaNum (str [i])  THEN  RETURN FALSE  END;
      INC (i)
   UNTIL  (str [i] = 0C) OR (i > HIGH (str));
   RETURN TRUE
END isIdentifier;


PROCEDURE Program;

BEGIN
   GetSymbol;
   IF  Strings.StrEq (token, "PROGRAM")  THEN
      GetSymbol;
      IF  isIdentifier (token)  THEN
         WITH  firstSymbol^  DO
	    Name := token;
	    type := Progtype
	 END;
	 GetSymbol;
	 Block;
	 IF  Strings.StrEq (token, firstSymbol^.Name) = FALSE  THEN
	    ErrorMessage (3)
	 END
      ELSE
         ErrorMessage (5)	(* Illegal identifier		*)
      END
   ELSE
      ErrorMessage (18)		(* 'PROGRAM' expected		*)
   END
END Program;


PROCEDURE Block;

BEGIN
   IF	  Strings.StrEq (token, "CONSTANTS")  THEN  ConstantDeclaration   END;
   IF	  Strings.StrEq (token, "VARIABLES")  THEN  VariableDeclaration   END;
   WHILE  Strings.StrEq (token, "PROCEDURE")  DO    ProcedureDeclaration  END;

   IF     Strings.StrEq (token, "BEGIN")  THEN  
      BlockBody
   ELSIF  Strings.StrEq (token, "END") = FALSE  THEN
      ErrorMessage (4)
   END;
   IF  SyntaxError  THEN  Synchronize  END;
   GetSymbol
END Block;


PROCEDURE ConstantDeclaration;

VAR	Val	: CARDINAL;

BEGIN
   GetSymbol;
   REPEAT
      currentType := Constype;
      IF  isIdentifier (token)  THEN
         IF  StoreSymbol (token) = TRUE  THEN
	    GetSymbol;
	    IF  token [0] = '='  THEN
	       InOut.ReadCard (Val);
	       IF  InOut.Done ()  THEN
		  WITH  thisSymbol^  DO
		     value := Val;
		     type := Constype
		  END;
		  GetSymbol
	       ELSE
	          ErrorMessage (1)	(* Invalid digit	*)
	       END
	    ELSE
	       ErrorMessage (7)		(* '=' expected		*)
	    END
	 ELSE
	    ErrorMessage (6)		(* Duplicate identifier	*)
	 END
      ELSE
         ErrorMessage (5)		(* Illegal Identifier	*)
      END;
   UNTIL  Strings.StrEq (token, "END");
   IF  SyntaxError  THEN  
      Synchronize
   END;
   GetSymbol
END ConstantDeclaration;


PROCEDURE VariableDeclaration;

BEGIN
   REPEAT
      currentType := Vartype;
      GetSymbol;
      IF  isIdentifier (token)  THEN
         IF  StoreSymbol (token) = TRUE  THEN
	    GetSymbol
	 ELSE
	    ErrorMessage (6)		(* Duplicate identifier *)
	 END
      ELSE
         ErrorMessage (5)		(* Illegal Identifier	*)
      END
   UNTIL Strings.StrEq (token, "END");
   IF  SyntaxError  THEN  Synchronize  END;
   GetSymbol
END VariableDeclaration;


PROCEDURE ProcedureDeclaration;

BEGIN
   REPEAT
      currentType := Proctype;
      GetSymbol;
      IF  isIdentifier (token)  THEN
	 IF  StoreSymbol (token) = TRUE  THEN
	    INC (PROCdepth);
	    GetSymbol;
	    BlockBody;
	    DEC (PROCdepth)
         ELSE
	    ErrorMessage (6)		(* Duplicate identifier	*)
	 END
      ELSE
         ErrorMessage (5)		(* Illegal identifier 	*)
      END
   UNTIL  Strings.StrEq (token, "BEGIN");
   IF  SyntaxError  THEN  Synchronize  END;
   GetSymbol
END ProcedureDeclaration;


PROCEDURE BlockBody;

BEGIN
   IF  Strings.StrEq (token, "BEGIN") = TRUE  THEN
      GetSymbol;
      StatementSequence;
      IF  Strings.StrEq (token, "END") = FALSE  THEN
         ErrorMessage (8)		(* END expected		*)
      END
   ELSE
      ErrorMessage (4)			(* BEGIN expected	*)
   END;
   IF  SyntaxError  THEN  Synchronize  END;
   GetSymbol
END BlockBody;


PROCEDURE StatementSequence;

BEGIN
   REPEAT
      IF  FindSymbol (token) = TRUE  THEN
         IF	thisSymbol^.type = Vartype  	THEN  Assignment
	 ELSIF	thisSymbol^.type = Proctype 	THEN  Procedurecall
	 ELSIF	Strings.StrEq (token, "IF")  	THEN  isIF
	 ELSIF  Strings.StrEq (token, "LOOP")	THEN  isLOOP
	 ELSIF  Strings.StrEq (token, "RETURN")	THEN  isRETURN
	 ELSIF  Strings.StrEq (token, "EXIT")	THEN  isEXIT
	 ELSE
	    ErrorMessage (10)		(* Error in StatementSequence	*)
	 END
      ELSE
	 ErrorMessage (9)		(* Undefined symbol	*)
      END;
      GetSymbol
   UNTIL  Strings.StrEq (token, "END");
   IF  SyntaxError  THEN  Synchronize  END;
   GetSymbol
END StatementSequence;


PROCEDURE Procedurecall;

BEGIN
END Procedurecall;


PROCEDURE Assignment;

BEGIN
   GetSymbol;
   IF  Strings.StrEq (token, ':=')  THEN
      GetSymbol;
      Expression
   ELSE
      ErrorMessage (17)
   END;
   IF  SyntaxError  THEN  Synchronize  END;
   GetSymbol
END Assignment;


PROCEDURE isLOOP;

BEGIN
   INC (LOOPdepth);
   GetSymbol;
   StatementSequence;
   IF  Strings.StrEq (token, "END")  THEN
      DEC (LOOPdepth)
   ELSE
      ErrorMessage (8)			(* END expected		*)
   END;
   IF  SyntaxError  THEN  Synchronize  END;
   GetSymbol
END isLOOP;


PROCEDURE isEXIT;

BEGIN
   IF  LOOPdepth = 0  THEN
      ErrorMessage (11)			(* EXIT without LOOP	*)
   ELSE
      DEC (LOOPdepth)
   END;
   IF  SyntaxError  THEN  Synchronize  END;
   GetSymbol
END isEXIT;


PROCEDURE isRETURN;

BEGIN
   IF  PROCdepth = 0  THEN
      ErrorMessage (19)			(* RETURN without PROCEDURE	*)
   END;
   IF  SyntaxError  THEN  Synchronize  END;
   GetSymbol
END isRETURN;


PROCEDURE isIF;

BEGIN
   Condition;
   GetSymbol;
   IF  Strings.StrEq (token, "THEN")  THEN
      GetSymbol;
      StatementSequence;
      WHILE  Strings.StrEq (token, "ELSIF")  DO
         Condition;
	 IF  Strings.StrEq (token, "THEN")  THEN
	    GetSymbol;
	    StatementSequence
	 ELSE
	    ErrorMessage (12)		(* THEN expected	*)
	 END
      END;
      IF  Strings.StrEq (token, "ELSE")  THEN  StatementSequence  END;
      IF  Strings.StrEq (token, "END") = FALSE  THEN
         ErrorMessage (8)		(* END expected		*)
      END
   ELSE
      ErrorMessage (12)			(* THEN expected	*)
   END;
   IF  SyntaxError  THEN  Synchronize  END;
   GetSymbol
END isIF;


PROCEDURE Condition;

BEGIN
   GetSymbol;
   IF  Strings.StrEq (token, "ODD")  THEN
      Expression
   ELSE
      Expression;
      Comparison;
      Expression
   END
END Condition;


PROCEDURE Comparison;

BEGIN
   GetSymbol;
   CASE  token [0]  OF
    '=' :  op := 'EQL'			|
    '<' :  CASE  token [1]  OF
	     0C  : op := 'LESS'	|
	     '>' : op := 'NEQL'	|
	     '=' : op := 'LEQL'
	   ELSE
	     ErrorMessage (13)		(* Illegal comparator   *)
	   END				|
    '>' :  CASE  token [1]  OF
	     0C  : op := 'GRTR'	|
	     '=' : op := 'GEQL'
	   ELSE
	     ErrorMessage (13)		(* Illegal comparator   *)
	   END				|
    '#' : op := 'NEQL'
   ELSE
      ErrorMessage (13)			(* Illegal comparator	*)
   END;
   IF  SyntaxError  THEN  Synchronize  END;
   GetSymbol
END Comparison;


PROCEDURE Expression;

VAR	OP	: CHAR;

BEGIN
   Term;
   OP := token [0];
   WHILE  (OP = '+') OR (OP = '-')  DO
      Term;
      OP := token [0]
   END
END Expression;


PROCEDURE Term;

VAR	Operator	: CHAR;

BEGIN
   Factor;
   Operator := token [0];
   CASE  Operator  OF
     'x', '*' : op := 'MUL'		|
     ':', '/' : op := 'DIV'		|
     '%', 'M' : IF  (token [1] = 'O') AND (token [2] = 'D')  THEN
		   op := 'MOD'
		ELSE
		   ErrorMessage (14)	(* Error in TERM	*)
		END
   ELSE
      ErrorMessage (14)			(* Error in TERM	*)
   END;
   IF  SyntaxError  THEN  Synchronize  END;
   GetSymbol
END Term;


PROCEDURE Factor;

BEGIN
   GetSymbol;
   IF  FindSymbol (token) = TRUE  THEN
      IF  (thisSymbol^.type = Constype) OR (thisSymbol^.type = Vartype)  THEN  op := 'PUSH'
      ELSIF  isNumber (token)  THEN  op := 'PUSH'
      ELSIF  token [0] = '(' THEN
	 Expression;
	 GetSymbol;
	 IF  token [0] # ')'  THEN  ErrorMessage (16)  END
      ELSE
         ErrorMessage (15)		(* Error in Factor	*)
      END
   ELSE
      ErrorMessage (9)			(* Undefined identifier	*)
   END;
   IF  SyntaxError  THEN  Synchronize  END;
   GetSymbol
END Factor;


PROCEDURE Init;

BEGIN
   MemPools.NewPool (Symbols);
   MemPools.PoolAllocate (Symbols, firstSymbol, SYSTEM.TSIZE (SymbolNode));
   WITH  firstSymbol^  DO
      Name := "|";
      next := NIL;
      previous := NIL
   END;
   currentType := Keytype;
   IF  StoreSymbol ("RETURN")	 = FALSE  THEN  ErrorMessage (0)  END;
   IF  StoreSymbol ("EXIT") 	 = FALSE  THEN  ErrorMessage (0)  END;
   IF  StoreSymbol ("LOOP")	 = FALSE  THEN  ErrorMessage (0)  END;
   IF  StoreSymbol ("BEGIN")	 = FALSE  THEN  ErrorMessage (0)  END;
   IF  StoreSymbol ("END")	 = FALSE  THEN  ErrorMessage (0)  END;
   IF  StoreSymbol ("IF")	 = FALSE  THEN  ErrorMessage (0)  END;
   IF  StoreSymbol ("THEN")	 = FALSE  THEN  ErrorMessage (0)  END;
   IF  StoreSymbol ("ELSIF")	 = FALSE  THEN  ErrorMessage (0)  END;
   IF  StoreSymbol ("ODD") 	 = FALSE  THEN  ErrorMessage (0)  END;
   IF  StoreSymbol ("PROCEDURE") = FALSE  THEN  ErrorMessage (0)  END;
   IF  StoreSymbol ("PROGRAM")	 = FALSE  THEN  ErrorMessage (0)  END;
   IF  StoreSymbol ("CONSTANTS") = FALSE  THEN  ErrorMessage (0)  END;
   IF  StoreSymbol ("VARIABLES") = FALSE  THEN  ErrorMessage (0)  END;
   currentType := Progtype;
   LOOPdepth := 0;
   PROCdepth := 0;
   Exhausted := FALSE
END Init;


BEGIN
   Init;
   Program;
   InOut.WriteLn;
   InOut.WriteLn;
   PrintNames;
   MemPools.KillPool (Symbols)
END Plov003.
   
For the time being, the symbol table is printed at program end. And the symbols which are read are being printed to screen as well. This is the first proof of concept. Until now, the semicolon and similar punctuation marks are not needed which makes reading and writing a lot easier.

Plov003 : compile 'test1'

The file 'test1' is very simple. Yet, it is essential that it runs. Stepwise refinement! Here comes what happens:

      
	PROGRAM Number1

	END Number1
   
resulting in:
jan@beryllium:~/modula/Plov$ Plov003 <test1

PROGRAM Number1 END     Number1

Number1 Program type.
RETURN  Keyword type.
EXIT    Keyword type.
LOOP    Keyword type.
BEGIN   Keyword type.
END     Keyword type.
IF      Keyword type.
THEN    Keyword type.
ELSIF   Keyword type.
ODD     Keyword type.
PROCEDURE       Keyword type.
PROGRAM Keyword type.
CONSTANTS       Keyword type.
VARIABLES       Keyword type.

jan@beryllium:~/modula/Plov$
   

Plov003 : compile 'test2'

Program source 'test2' is already slightly more complex.

	PROGRAM Number1

	CONSTANTS  pi = 314
		   ee = 272
		   END

	END Number1
   
resulting in
jan@beryllium:~/modula/Plov$ Plov003 <test2
PROGRAM Number1 CONSTANTS       pi      =       ee      =       END     END     Number1

Number1 Program type.
RETURN  Keyword type.
EXIT    Keyword type.
LOOP    Keyword type.
BEGIN   Keyword type.
END     Keyword type.
IF      Keyword type.
THEN    Keyword type.
ELSIF   Keyword type.
ODD     Keyword type.
PROCEDURE       Keyword type.
PROGRAM Keyword type.
CONSTANTS       Keyword type.
VARIABLES       Constant =   314 type.
pi      Constant =   272 type.
ee      Constant =     0 type.

jan@beryllium:~/modula/Plov$
   
I see it only now, when writing the webpage: The values of the constants are offset one symbol entry backwards. So there is an error with handling 'thisSymbol'. It was fixed by the new PROCEDURE StoreSymbol listed below:
PROCEDURE StoreSymbol (str   : Identifier) : BOOLEAN;

VAR	thisOne, nextOne	: SymbolPtr;

BEGIN
   thisOne := firstSymbol;
   LOOP
      IF  Strings.StrEq (thisOne^.Name, str)  THEN  RETURN FALSE  END;
      IF  thisOne^.next = NIL  THEN  EXIT  END;
      thisOne := thisOne^.next
   END;
   MemPools.PoolAllocate (Symbols, nextOne, SYSTEM.TSIZE (SymbolNode));
   thisOne^.next := nextOne;
   WITH  nextOne^  DO
      Name := str;
      type := currentType;
      next := NIL;
      previous := thisOne
   END;
   lastSymbol := nextOne;
   thisSymbol := nextOne;		(*      Used to be 'thisOne'		*)
   RETURN TRUE
END StoreSymbol;
   

Page created on 3 October 2007 and

Page equipped with FroogleBuster technology