Compiler/fall2019/looplang
From Maryville College CS Wiki
BNF
< program > ::= < statement-list >
< statement-list > ::= < statement > |
< statement > < statement-list >
< statement > ::= < assignment-stmnt > |
< loop-stmnt > |
< print-stmnt > |
< expression >
< assignment-stmnt > ::= < identifier > = < expression >
< loop-stmnt > ::= while < condition > < statement-list > end
< print-stmnt > ::= print < expression >
< expression > ::= < operand > |
< operand > < operator > < expression >
< operand > ::= < identifier > | < integer >
< operator > ::= + | -
< condition > ::= < expression > < comparison > < expression >
< comparison > ::= < | <= | > | >= | == | !=
< identifier > ::= < letter > | < letter > < identifier' >
< identifier' > ::= \lambda |
< identifier > < digit > |
< identifier > < letter >
< integer > ::= < digit > | < digit > < integer >
< digit > ::= 0|1|2|3|4|5|6|7|8|9
< letter > ::= a-z | A-Z
Regular Grammar Portion (Lexer Grammar)
ASSIGN_TOK = =
PLUS_TOK = +
MINUS_TOK = -
LESS_TOK = <
LESS_EQUAL_TOK = <=
GREATER_TOK = >
GREATER_EQUAL_TOK = >=
EQUAL_TOK = ==
NOT_EQUAL_TOK = !=
INTEGER_TOK = (0|1|2|3|4|5|6|7|8|9)+
IDENTIFIER_TOK = (a-zA-Z)((a-zA-Z)|(0|1|2|3|4|5|6|7|8|9))*
WHILE_TOK = while
END_TOK = end
PRINT_TOK = print
lexer.h
//Definitions for the lexical analyzer
#ifndef LEXER_H
#define LEXER_H
#include <iostream>
#include <string>
enum Token_Type { INVALID_TOK=0, ASSIGN_TOK, PLUS_TOK, MINUS_TOK, LESS_TOK,
LESS_EQUAL_TOK, GREATER_TOK, GREATER_EQUAL_TOK, EQUAL_TOK,
NOT_EQUAL_TOK, INTEGER_TOK, IDENTIFIER_TOK, WHILE_TOK,
END_TOK, PRINT_TOK, EOF_TOK};
struct Token {
Token_Type type; //type of the token
std::string repn; //representation of the token
};
//lexer interface
void next_symbol(); //get the next symbol
Token curr_symbol(); //return the current symbol
bool have(Token_Type t); //if we have the token, advance and return true
//false (with no advance) o.w.
void mustbe(Token_Type t); //like have, but prints error message on failure
std::ostream & operator<<(std::ostream &os, Token t);
#endif
lexer.cpp
// Lexical analyzer for loop lang
#include <iostream>
#include <string>
#include <cctype>
#include "lexer.h"
using namespace std;
static const char* TOKEN_LABEL[] = { "INVALID_TOK", "ASSIGN_TOK", "PLUS_TOK",
"MINUS_TOK", "LESS_TOK", "LESS_EQUAL_TOK",
"GREATER_TOK", "GREATER_EQUAL_TOK",
"EQUAL_TOK", "NOT_EQUAL_TOK", "INTEGER_TOK",
"IDENTIFIER_TOK", "WHILE_TOK", "END_TOK",
"PRINT_TOK", "EOF_TOK"};
static Token cur; //the current symbol
///////////////////////////////////
//helper functions for the lexer
///////////////////////////////////
static void lt()
{
cur.repn = cin.get();
//check for <=
if(cin.peek() == '=') {
//<=
cur.repn += cin.get();
cur.type = LESS_EQUAL_TOK;
} else {
cur.type = LESS_TOK;
}
}
static void gt()
{
cur.repn = cin.get();
//check for >=
if(cin.peek() == '=') {
//>=
cur.repn += cin.get();
cur.type = GREATER_EQUAL_TOK;
} else {
cur.type = GREATER_TOK;
}
}
static void eq()
{
cur.repn = cin.get();
//check for ==
if(cin.peek() == '=') {
//==
cur.repn += cin.get();
cur.type = EQUAL_TOK;
} else {
cur.type = ASSIGN_TOK;
}
}
//!= processor
static void neq()
{
cur.repn = cin.get();
if(cin.peek() != '=') {
cur.type = INVALID_TOK;
return;
}
//consume the =
cur.repn += cin.get();
cur.type = NOT_EQUAL_TOK;
}
//integers
static void integer()
{
//blank integer
cur.repn = "";
cur.type = INTEGER_TOK;
do {
//add the digit to the repn
cur.repn+= cin.get();
} while(isdigit(cin.peek()));
}
//identifiers
static void identifier()
{
//set our type correctly
cur.type = IDENTIFIER_TOK;
//consume the identifier
while(isalnum(cin.peek())) {
cur.repn += cin.get();
}
}
//keyword
static void keyword(string k, Token_Type t)
{
//start with empty string
cur.repn = "";
//consume until we no longer match
for(int i=0; i<k.length(); i++) {
//look for mismatch
if(cin.peek() != k[i]) {
identifier();
return;
}
cur.repn += cin.get();
}
//detect the end of this chain
if(isalnum(cin.peek())) {
identifier();
return;
}
//yay it's the keyword!
cur.type = t;
}
//get the next symbol
void next_symbol()
{
//detect end of file
if(cin.eof()) {
cur.type = EOF_TOK;
cur.repn = "";
return;
}
//skip spaces
while(isspace(cin.peek())) { cin.get(); }
char c = cin.peek();
if(c == '+') {
//the plus token
cur.type=PLUS_TOK;
cur.repn = cin.get();
} else if(c == '-') {
//the minus token
cur.type=MINUS_TOK;
cur.repn = cin.get();
} else if(c == '<') {
//< group
lt();
} else if(c == '>') {
// > group
gt();
} else if(c == '=') {
// = group
eq();
} else if(c == '!') {
// !=
neq();
} else if(isdigit(c)) {
integer();
} else if(c == 'w') {
keyword("while", WHILE_TOK);
} else if(c == 'e') {
keyword("end", END_TOK);
} else if(c == 'p') {
keyword("print", PRINT_TOK);
} else if(isalpha(c)) {
cur.repn = "";
identifier();
} else {
cur.type = INVALID_TOK;
cur.repn = cin.get();
}
}
//return the current symbol
Token curr_symbol()
{
return cur;
}
//if we have the token, advance and return true
//false (with no advance) o.w.
bool have(Token_Type t)
{
if(cur.type == t) {
next_symbol();
return true;
}
return false;
}
//like have, but prints error message on failure
void mustbe(Token_Type t)
{
if(have(t)) {
return;
}
cerr << "Unexpected token: " << cur << endl;
}
//print the token
std::ostream & operator<<(std::ostream &os, Token t)
{
return os << TOKEN_LABEL[t.type] << ": \"" << t.repn << "\"";
}
main.cpp
#include <iostream>
#include "lexer.h"
using namespace std;
int main()
{
do {
next_symbol();
cout << curr_symbol() << endl;
} while(not have(EOF_TOK));
}