Compiler/fall2019/looplang

From Maryville College CS Wiki
Jump to: navigation, search

BNF

< program > ::= < statement-list >

< statement-list > ::= < statement > |
                      < statement > < statement-list >

< statement > ::= < assignment-stmnt > |
                 < loop-stmnt > |
                 < print-stmnt > |
                 < expression >

< assignment-stmnt > ::= < identifier > = < expression >

< loop-stmnt > ::= while < condition > < statement-list > end

< print-stmnt > ::= print < expression >

< expression > ::= < operand > |
                  < operand > < operator > < expression >

< operand > ::= < identifier > | < integer >

< operator > ::= + | -

< condition > ::= < expression > < comparison > < expression >

< comparison > ::= < | <= | > | >= | == | !=

< identifier > ::= < letter > | < letter > < identifier' >

< identifier' > ::= \lambda | 
                    < identifier > < digit >  |
                    < identifier > < letter >

< integer > ::= < digit > | < digit > < integer >

< digit >  ::= 0|1|2|3|4|5|6|7|8|9

< letter > ::= a-z | A-Z

Regular Grammar Portion (Lexer Grammar)

ASSIGN_TOK = =
PLUS_TOK = +
MINUS_TOK = -
LESS_TOK = <
LESS_EQUAL_TOK = <=
GREATER_TOK = >
GREATER_EQUAL_TOK = >=
EQUAL_TOK = ==
NOT_EQUAL_TOK = !=
INTEGER_TOK = (0|1|2|3|4|5|6|7|8|9)+
IDENTIFIER_TOK = (a-zA-Z)((a-zA-Z)|(0|1|2|3|4|5|6|7|8|9))*
WHILE_TOK = while
END_TOK = end
PRINT_TOK = print

lexer.h

//Definitions for the lexical analyzer
#ifndef LEXER_H
#define LEXER_H
#include <iostream>
#include <string>

enum Token_Type { INVALID_TOK=0, ASSIGN_TOK, PLUS_TOK, MINUS_TOK, LESS_TOK,
                  LESS_EQUAL_TOK, GREATER_TOK, GREATER_EQUAL_TOK, EQUAL_TOK,
                  NOT_EQUAL_TOK, INTEGER_TOK, IDENTIFIER_TOK, WHILE_TOK,
                  END_TOK, PRINT_TOK, EOF_TOK}; 
struct Token {
    Token_Type type;  //type of the token
    std::string repn; //representation of the token
};

//lexer interface
void next_symbol();        //get the next symbol
Token curr_symbol();       //return the current symbol
bool have(Token_Type t);   //if we have the token, advance and return true 
                           //false (with no advance) o.w.
void mustbe(Token_Type t); //like have, but prints error message on failure
std::ostream & operator<<(std::ostream &os, Token t);
#endif

lexer.cpp

// Lexical analyzer for loop lang
#include <iostream>
#include <string>
#include <cctype>
#include "lexer.h"

using namespace std;

static const char* TOKEN_LABEL[] = { "INVALID_TOK", "ASSIGN_TOK", "PLUS_TOK",
                                     "MINUS_TOK", "LESS_TOK", "LESS_EQUAL_TOK",
                                     "GREATER_TOK", "GREATER_EQUAL_TOK",
                                     "EQUAL_TOK", "NOT_EQUAL_TOK", "INTEGER_TOK",
                                     "IDENTIFIER_TOK", "WHILE_TOK", "END_TOK",
                                     "PRINT_TOK", "EOF_TOK"}; 
static Token cur;  //the current symbol


///////////////////////////////////
//helper functions for the lexer
///////////////////////////////////

static void lt() 
{
    cur.repn = cin.get();

    //check for <=
    if(cin.peek() == '=') {
        //<= 
        cur.repn += cin.get();
        cur.type = LESS_EQUAL_TOK;
    } else {
        cur.type = LESS_TOK;
    }
}


static void gt() 
{
    cur.repn = cin.get();

    //check for >=
    if(cin.peek() == '=') {
        //>= 
        cur.repn += cin.get();
        cur.type = GREATER_EQUAL_TOK;
    } else {
        cur.type = GREATER_TOK;
    }
}


static void eq()
{
    cur.repn = cin.get();

    //check for ==
    if(cin.peek() == '=') {
        //== 
        cur.repn += cin.get();
        cur.type = EQUAL_TOK;
    } else {
        cur.type = ASSIGN_TOK;
    }
}


//!= processor
static void neq()
{
    cur.repn = cin.get();

    if(cin.peek() != '=') {
        cur.type = INVALID_TOK;
        return;
    }

    //consume the =
    cur.repn += cin.get();
    cur.type = NOT_EQUAL_TOK;
}


//integers
static void integer()
{
    //blank integer 
    cur.repn = "";
    cur.type = INTEGER_TOK;

    do {
        //add the digit to the repn
        cur.repn+= cin.get();
    } while(isdigit(cin.peek()));
}


//identifiers
static void identifier()
{
    //set our type correctly
    cur.type = IDENTIFIER_TOK;

    //consume the identifier
    while(isalnum(cin.peek())) {
        cur.repn += cin.get();
    }
}


//keyword
static void keyword(string k, Token_Type t)
{
    //start with empty string
    cur.repn = "";

    //consume until we no longer match
    for(int i=0; i<k.length(); i++) {
        //look for mismatch
        if(cin.peek() != k[i]) {
            identifier();
            return;
        }
        cur.repn += cin.get();
    }

    //detect the end of this chain
    if(isalnum(cin.peek())) {
        identifier();
        return;
    }

    //yay it's the keyword!
    cur.type = t; 
}


//get the next symbol
void next_symbol()
{
    //detect end of file
    if(cin.eof()) {
        cur.type = EOF_TOK;
        cur.repn = "";
        return;
    }

    //skip spaces
    while(isspace(cin.peek())) { cin.get(); }

    char c = cin.peek();
    if(c == '+') {
        //the plus token
        cur.type=PLUS_TOK;
        cur.repn = cin.get();
    } else if(c == '-') {
        //the minus token
        cur.type=MINUS_TOK;
        cur.repn = cin.get();
    } else if(c == '<') {
        //< group
        lt();
    } else if(c == '>') {
        // > group
        gt();
    } else if(c == '=') {
        // = group
        eq();
    } else if(c == '!') {
        // !=
        neq();
    } else if(isdigit(c)) {
        integer();
    } else if(c == 'w') {
        keyword("while", WHILE_TOK);
    } else if(c == 'e') {
        keyword("end", END_TOK);
    } else if(c == 'p') {
        keyword("print", PRINT_TOK);
    } else if(isalpha(c)) {
        cur.repn = "";
        identifier();
    } else {
        cur.type = INVALID_TOK;
        cur.repn = cin.get();
    }
}


//return the current symbol
Token curr_symbol()
{
    return cur;
}

//if we have the token, advance and return true 
//false (with no advance) o.w.
bool have(Token_Type t)  
{
    if(cur.type == t) {
        next_symbol();
        return true;
    }

    return false;
}

//like have, but prints error message on failure
void mustbe(Token_Type t)
{
    if(have(t)) {
        return;
    }

    cerr << "Unexpected token: " << cur << endl;
}


//print the token
std::ostream & operator<<(std::ostream &os, Token t)
{
    return os << TOKEN_LABEL[t.type] << ": \"" << t.repn << "\"";
}

main.cpp

#include <iostream>
#include "lexer.h"

using namespace std;

int main()
{
    do {
        next_symbol();
        cout << curr_symbol() << endl;
    } while(not have(EOF_TOK));
}