From 713b5078cddd0b7605b478931fe24ae984326592 Mon Sep 17 00:00:00 2001 From: Markus Hauschild Date: Fri, 31 May 2013 22:20:31 +0200 Subject: [PATCH] ZOMFG IT TOKENIZES!!11eleven --- CMakeLists.txt | 13 +++++---- grammar/grammar.y | 3 ++ src/test.cpp | 72 +++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 80 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b9ddf96..7f6e77e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,8 +15,10 @@ INCLUDE_DIRECTORIES (${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES (${CMAKE_SOURCE_DIR}/inc) +ADD_DEFINITIONS("-std=c++0x") + ADD_EXECUTABLE (lemon grammar/lemon.c) -ADD_EXECUTABLE (makeheaders grammar/makeheaders.c) +#ADD_EXECUTABLE (makeheaders grammar/makeheaders.c) ADD_CUSTOM_COMMAND( COMMAND cp @@ -24,17 +26,18 @@ ADD_CUSTOM_COMMAND( COMMAND cp ARGS ${CMAKE_CURRENT_SOURCE_DIR}/grammar/lempar.c ${CMAKE_CURRENT_BINARY_DIR} COMMAND ${CMAKE_CURRENT_BINARY_DIR}/lemon - ARGS -q -m ${CMAKE_CURRENT_BINARY_DIR}/grammar.y +# ARGS -q -m ${CMAKE_CURRENT_BINARY_DIR}/grammar.y + ARGS -q ${CMAKE_CURRENT_BINARY_DIR}/grammar.y COMMAND rm ARGS ${CMAKE_CURRENT_BINARY_DIR}/lempar.c COMMAND rm ARGS ${CMAKE_CURRENT_BINARY_DIR}/grammar.y - COMMAND ${CMAKE_CURRENT_BINARY_DIR}/makeheaders - ARGS ${CMAKE_CURRENT_BINARY_DIR}/grammar.c +# COMMAND ${CMAKE_CURRENT_BINARY_DIR}/makeheaders +# ARGS ${CMAKE_CURRENT_BINARY_DIR}/grammar.c COMMAND mv ARGS ${CMAKE_CURRENT_BINARY_DIR}/grammar.c ${CMAKE_CURRENT_BINARY_DIR}/grammar.cpp DEPENDS lemon - DEPENDS makeheaders +# DEPENDS makeheaders DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/grammar/grammar.y OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/grammar.cpp OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/grammar.h diff --git a/grammar/grammar.y b/grammar/grammar.y index 6264236..2453f46 100644 --- a/grammar/grammar.y +++ b/grammar/grammar.y @@ -9,6 +9,9 @@ %token_type {int} +// whitespace and comments +%type T_WHITESPACE {int} + %right T_ASSIGN. %left T_EQUALS. %left T_LESS. diff --git a/src/test.cpp b/src/test.cpp index c4c52f7..0dac0d0 100644 --- a/src/test.cpp +++ b/src/test.cpp @@ -5,8 +5,7 @@ */ // this file is auto generated from grammar/grammar.y -// but it does not work yet -// #include "grammar.h" +#include "grammar.h" #include "lexertl/generator.hpp" #include "lexertl/lookup.hpp" @@ -14,5 +13,72 @@ #include "lexertl/state_machine.hpp" int main() { - return 0; + lexertl::rules rules; + lexertl::state_machine state_machine; + + //keywords + rules.add("bool", T_BOOL); + rules.add("for", T_FOR); + rules.add("if", T_IF); + rules.add("int", T_INT); + rules.add("return", T_RETURN); + rules.add("void", T_VOID); + rules.add("rfor", T_RFOR); + rules.add("rif", T_RIF); + + // special characters + rules.add("\"(\"", T_LPAREN); + rules.add("\")\"", T_RPAREN); + rules.add("mulder", T_BEGIN); + rules.add("scully", T_END); + rules.add(",", T_COMMA); + rules.add(";", T_SEMICOLON); + + // operators + rules.add("=", T_ASSIGN); + rules.add("==", T_EQUALS); + rules.add("<", T_LESS); + rules.add("\"+\"", T_PLUS); + rules.add("\"-\"", T_MINUS); + rules.add("\"*\"", T_TIMES); + rules.add("\"/\"", T_DIV); + + // constants + rules.add("true", T_TRUE); + rules.add("false", T_FALSE); + rules.add("\\d+", T_CINT); + + // identifier + rules.add("[a-zA-Z_][a-zA-Z_0-9]*", T_IDENTIFIER); + + // whitespace + rules.add("\\s+", T_WHITESPACE); + + lexertl::generator::build(rules, state_machine); + state_machine.minimise(); + + std::cout << "The scully programming languae v0.1" << std::endl; + + while (true) { + std::cout << "> "; + std::string input; + std::getline(std::cin, input); + if (std::cin.eof()) { + std::cout << std::endl << "Bye." << std::endl; + return 0; + } + + auto iter = input.begin(); + auto end = input.end(); + lexertl::smatch results(iter, end); + + do { + lexertl::lookup (state_machine, results); + std::string s(results.start, results.end); + if (results.id != T_WHITESPACE) { + std::cout << "Id: " << results.id << ", Token: " << s << std::endl; + } + } while (results.id != 0); + } + return 0; }