ZOMFG IT TOKENIZES!!11eleven

This commit is contained in:
Markus Hauschild
2013-05-31 22:20:31 +02:00
parent 2f0e5b08d3
commit 713b5078cd
3 changed files with 80 additions and 8 deletions

View File

@@ -15,8 +15,10 @@ INCLUDE_DIRECTORIES (${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES (${CMAKE_SOURCE_DIR}/inc) INCLUDE_DIRECTORIES (${CMAKE_SOURCE_DIR}/inc)
ADD_DEFINITIONS("-std=c++0x")
ADD_EXECUTABLE (lemon grammar/lemon.c) ADD_EXECUTABLE (lemon grammar/lemon.c)
ADD_EXECUTABLE (makeheaders grammar/makeheaders.c) #ADD_EXECUTABLE (makeheaders grammar/makeheaders.c)
ADD_CUSTOM_COMMAND( ADD_CUSTOM_COMMAND(
COMMAND cp COMMAND cp
@@ -24,17 +26,18 @@ ADD_CUSTOM_COMMAND(
COMMAND cp COMMAND cp
ARGS ${CMAKE_CURRENT_SOURCE_DIR}/grammar/lempar.c ${CMAKE_CURRENT_BINARY_DIR} ARGS ${CMAKE_CURRENT_SOURCE_DIR}/grammar/lempar.c ${CMAKE_CURRENT_BINARY_DIR}
COMMAND ${CMAKE_CURRENT_BINARY_DIR}/lemon COMMAND ${CMAKE_CURRENT_BINARY_DIR}/lemon
ARGS -q -m ${CMAKE_CURRENT_BINARY_DIR}/grammar.y # ARGS -q -m ${CMAKE_CURRENT_BINARY_DIR}/grammar.y
ARGS -q ${CMAKE_CURRENT_BINARY_DIR}/grammar.y
COMMAND rm COMMAND rm
ARGS ${CMAKE_CURRENT_BINARY_DIR}/lempar.c ARGS ${CMAKE_CURRENT_BINARY_DIR}/lempar.c
COMMAND rm COMMAND rm
ARGS ${CMAKE_CURRENT_BINARY_DIR}/grammar.y ARGS ${CMAKE_CURRENT_BINARY_DIR}/grammar.y
COMMAND ${CMAKE_CURRENT_BINARY_DIR}/makeheaders # COMMAND ${CMAKE_CURRENT_BINARY_DIR}/makeheaders
ARGS ${CMAKE_CURRENT_BINARY_DIR}/grammar.c # ARGS ${CMAKE_CURRENT_BINARY_DIR}/grammar.c
COMMAND mv COMMAND mv
ARGS ${CMAKE_CURRENT_BINARY_DIR}/grammar.c ${CMAKE_CURRENT_BINARY_DIR}/grammar.cpp ARGS ${CMAKE_CURRENT_BINARY_DIR}/grammar.c ${CMAKE_CURRENT_BINARY_DIR}/grammar.cpp
DEPENDS lemon DEPENDS lemon
DEPENDS makeheaders # DEPENDS makeheaders
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/grammar/grammar.y DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/grammar/grammar.y
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/grammar.cpp OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/grammar.cpp
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/grammar.h OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/grammar.h

View File

@@ -9,6 +9,9 @@
%token_type {int} %token_type {int}
// whitespace and comments
%type T_WHITESPACE {int}
%right T_ASSIGN. %right T_ASSIGN.
%left T_EQUALS. %left T_EQUALS.
%left T_LESS. %left T_LESS.

View File

@@ -5,8 +5,7 @@
*/ */
// this file is auto generated from grammar/grammar.y // this file is auto generated from grammar/grammar.y
// but it does not work yet #include "grammar.h"
// #include "grammar.h"
#include "lexertl/generator.hpp" #include "lexertl/generator.hpp"
#include "lexertl/lookup.hpp" #include "lexertl/lookup.hpp"
@@ -14,5 +13,72 @@
#include "lexertl/state_machine.hpp" #include "lexertl/state_machine.hpp"
int main() { int main() {
return 0; lexertl::rules rules;
lexertl::state_machine state_machine;
//keywords
rules.add("bool", T_BOOL);
rules.add("for", T_FOR);
rules.add("if", T_IF);
rules.add("int", T_INT);
rules.add("return", T_RETURN);
rules.add("void", T_VOID);
rules.add("rfor", T_RFOR);
rules.add("rif", T_RIF);
// special characters
rules.add("\"(\"", T_LPAREN);
rules.add("\")\"", T_RPAREN);
rules.add("mulder", T_BEGIN);
rules.add("scully", T_END);
rules.add(",", T_COMMA);
rules.add(";", T_SEMICOLON);
// operators
rules.add("=", T_ASSIGN);
rules.add("==", T_EQUALS);
rules.add("<", T_LESS);
rules.add("\"+\"", T_PLUS);
rules.add("\"-\"", T_MINUS);
rules.add("\"*\"", T_TIMES);
rules.add("\"/\"", T_DIV);
// constants
rules.add("true", T_TRUE);
rules.add("false", T_FALSE);
rules.add("\\d+", T_CINT);
// identifier
rules.add("[a-zA-Z_][a-zA-Z_0-9]*", T_IDENTIFIER);
// whitespace
rules.add("\\s+", T_WHITESPACE);
lexertl::generator::build(rules, state_machine);
state_machine.minimise();
std::cout << "The scully programming languae v0.1" << std::endl;
while (true) {
std::cout << "> ";
std::string input;
std::getline(std::cin, input);
if (std::cin.eof()) {
std::cout << std::endl << "Bye." << std::endl;
return 0;
}
auto iter = input.begin();
auto end = input.end();
lexertl::smatch results(iter, end);
do {
lexertl::lookup (state_machine, results);
std::string s(results.start, results.end);
if (results.id != T_WHITESPACE) {
std::cout << "Id: " << results.id << ", Token: " << s << std::endl;
}
} while (results.id != 0);
}
return 0;
} }