Fix typo in grammar.

Add lexertl.
2013-05-31 21:35:44 +02:00
parent 7be9545e02
commit f3d3389f54
44 changed files with 12465 additions and 2 deletions
--- a/inc/lexertl/parser/parser.hpp
+++ b/inc/lexertl/parser/parser.hpp
--- a/inc/lexertl/parser/tokeniser/re_token.hpp
+++ b/inc/lexertl/parser/tokeniser/re_token.hpp
@@ -0,0 +1,100 @@
+// re_token.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_RE_TOKEN_HPP
+#define LEXERTL_RE_TOKEN_HPP
+
+#include "../../string_token.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+enum token_type {BEGIN, REGEX, OREXP, SEQUENCE, SUB, EXPRESSION, REPEAT,
+    DUP, OR, CHARSET, BOL, EOL, MACRO, OPENPAREN, CLOSEPAREN, OPT, AOPT,
+    ZEROORMORE, AZEROORMORE, ONEORMORE, AONEORMORE, REPEATN, AREPEATN,
+    END};
+
+template<typename input_char_type, typename char_type>
+struct basic_re_token
+{
+    typedef basic_string_token<char_type> string_token;
+    typedef std::basic_string<input_char_type> string;
+
+    token_type _type;
+    string _extra;
+    string_token _str;
+
+    basic_re_token (const token_type type_ = BEGIN) :
+        _type (type_),
+        _extra (),
+        _str ()
+    {
+    }
+
+    void clear ()
+    {
+        _type = BEGIN;
+        _extra.clear ();
+        _str.clear ();
+    }
+
+    basic_re_token &operator = (const basic_re_token &rhs_)
+    {
+        _type = rhs_._type;
+        _extra = rhs_._extra;
+        _str = rhs_._str;
+        return *this;
+    }
+
+    char precedence (const token_type type_) const
+    {
+        // Moved in here for Solaris compiler.
+        static const char precedence_table_[END + 1][END + 1] = {
+//        BEG, REG, ORE, SEQ, SUB, EXP, RPT, DUP,  | , CHR, BOL, EOL, MCR,  ( ,  ) ,  ? , ?? ,  * , *? ,  + , +?, {n}?, {n}, END
+/*BEGIN*/{' ', '<', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*REGEX*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*OREXP*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* SEQ */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* SUB */{' ', ' ', ' ', ' ', ' ', '=', '<', ' ', '>', '<', '<', '<', '<', '<', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*EXPRE*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* RPT */{' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', '>', '>', '>', '>', '>', '<', '<', '<', '<', '<', '<', '<', '<', '>'},
+/*DUPLI*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*  |  */{' ', ' ', ' ', '=', '<', '<', '<', ' ', ' ', '<', '<', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '},
+/*CHARA*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'},
+/* BOL */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'},
+/* EOL */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'},
+/*MACRO*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'},
+/*  (  */{' ', '=', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '},
+/*  )  */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'},
+/*  ?  */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* ??  */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*  *  */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* *?  */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*  +  */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* +?  */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*{n,m}*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*{nm}?*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* END */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '}
+};
+
+        return precedence_table_[_type][type_];
+    }
+
+    const char *precedence_string () const
+    {
+        // Moved in here for Solaris compiler.
+        static const char *precedence_strings_[END + 1] =
+            {"BEGIN", "REGEX", "OREXP", "SEQUENCE", "SUB", "EXPRESSION",
+            "REPEAT", "DUPLICATE", "|", "CHARSET", "^", "$", "MACRO", "(", ")",
+            "?", "??", "*", "*?", "+", "+?", "{n[,[m]]}", "{n[,[m]]}?", "END"};
+
+        return precedence_strings_[_type];
+    }
+};
+}
+}
+
+#endif
--- a/inc/lexertl/parser/tokeniser/re_tokeniser.hpp
+++ b/inc/lexertl/parser/tokeniser/re_tokeniser.hpp
@@ -0,0 +1,829 @@
+// tokeniser.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_RE_TOKENISER_HPP
+#define LEXERTL_RE_TOKENISER_HPP
+
+#include <cstring>
+#include "re_token.hpp"
+#include "../../runtime_error.hpp"
+#include "../../size_t.hpp"
+#include <sstream>
+#include "../../string_token.hpp"
+#include "re_tokeniser_helper.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename rules_char_type, typename char_type, typename id_type>
+class basic_re_tokeniser
+{
+public:
+    typedef basic_re_token<rules_char_type, char_type> re_token;
+    typedef basic_re_tokeniser_helper<rules_char_type, char_type, id_type>
+        tokeniser_helper;
+    typedef typename tokeniser_helper::char_state char_state;
+    typedef typename tokeniser_helper::state state;
+    typedef basic_string_token<char_type> string_token;
+
+    static void next (re_token *lhs_, state &state_, re_token *token_)
+    {
+        rules_char_type ch_ = 0;
+        bool eos_ = state_.next (ch_);
+        bool skipped_ = false;
+
+        token_->clear ();
+
+        do
+        {
+            // string begin/end
+            while (!eos_ && ch_ == '"')
+            {
+                state_._in_string ^= 1;
+                eos_ = state_.next (ch_);
+            }
+
+            // (?# ...)
+            skipped_ = comment (eos_, ch_, state_);
+            // skip_ws set
+            skipped_ |= skip (eos_, ch_, state_);
+        } while (skipped_);
+
+        if (eos_)
+        {
+            if (state_._in_string)
+            {
+                std::ostringstream ss_;
+
+                // Pointless returning index if at end of string
+                ss_ << "Unexpected end of regex (missing '\"') in rule id " <<
+                    state_._id << '.';
+                throw runtime_error (ss_.str ());
+            }
+
+            if (state_._paren_count)
+            {
+                std::ostringstream ss_;
+
+                // Pointless returning index if at end of string
+                ss_ << "Unexpected end of regex (missing ')') in rule id " <<
+                    state_._id << '.';
+                throw runtime_error (ss_.str ());
+            }
+
+            token_->_type = END;
+        }
+        else
+        {
+            if (ch_ == '\\')
+            {
+                // Even if we are in a string, respect escape sequences...
+                token_->_type = CHARSET;
+                escape (state_, token_->_str);
+            }
+            else if (state_._in_string)
+            {
+                // All other meta characters lose their special meaning
+                // inside a string.
+                token_->_type = CHARSET;
+                token_->_str.insert (typename string_token::range (ch_, ch_));
+            }
+            else
+            {
+                // Not an escape sequence and not inside a string, so
+                // check for meta characters.
+                switch (ch_)
+                {
+                    case '(':
+                        token_->_type = OPENPAREN;
+                        ++state_._paren_count;
+                        read_options (state_);
+                        break;
+                    case ')':
+                        --state_._paren_count;
+
+                        if (state_._paren_count < 0)
+                        {
+                            std::ostringstream ss_;
+
+                            ss_ << "Number of open parenthesis < 0 "
+                                "at index " << state_.index () - 1 <<
+                                " in rule id " << state_._id << '.';
+                            throw runtime_error (ss_.str ());
+                        }
+
+                        token_->_type = CLOSEPAREN;
+
+                        if (!state_._flags_stack.empty ())
+                        {
+                            state_._flags = state_._flags_stack.top ();
+                            state_._flags_stack.pop ();
+                        }
+
+                        break;
+                    case '?':
+                        if (!state_.eos () && *state_._curr == '?')
+                        {
+                            token_->_type = AOPT;
+                            state_.increment ();
+                        }
+                        else
+                        {
+                            token_->_type = OPT;
+                        }
+
+                        break;
+                    case '*':
+                        if (!state_.eos () && *state_._curr == '?')
+                        {
+                            token_->_type = AZEROORMORE;
+                            state_.increment ();
+                        }
+                        else
+                        {
+                            token_->_type = ZEROORMORE;
+                        }
+
+                        break;
+                    case '+':
+                        if (!state_.eos () && *state_._curr == '?')
+                        {
+                            token_->_type = AONEORMORE;
+                            state_.increment ();
+                        }
+                        else
+                        {
+                            token_->_type = ONEORMORE;
+                        }
+
+                        break;
+                    case '{':
+                        open_curly (lhs_, state_, token_);
+                        break;
+                    case '|':
+                        token_->_type = OR;
+                        break;
+                    case '^':
+                        if (!state_._macro && state_._curr - 1 == state_._start)
+                        {
+                            token_->_type = BOL;
+                        }
+                        else
+                        {
+                            token_->_type = CHARSET;
+                            token_->_str.insert (typename string_token::range
+                                (ch_, ch_));
+                        }
+
+                        break;
+                    case '$':
+                        if (!state_._macro && state_._curr == state_._end)
+                        {
+                            token_->_type = EOL;
+                        }
+                        else
+                        {
+                            token_->_type = CHARSET;
+                            token_->_str.insert (typename string_token::range
+                                (ch_, ch_));
+                        }
+
+                        break;
+                    case '.':
+                    {
+                        token_->_type = CHARSET;
+
+                        if (state_._flags & dot_not_newline)
+                        {
+                            token_->_str.insert (typename string_token::range
+                                ('\n', '\n'));
+                        }
+
+                        token_->_str.negate ();
+                        break;
+                    }
+                    case '[':
+                    {
+                        token_->_type = CHARSET;
+                        tokeniser_helper::charset (state_, token_->_str);
+                        break;
+                    }
+                    case '/':
+                    {
+                        std::ostringstream ss_;
+
+                        ss_ << "Lookahead ('/') is not supported yet in " <<
+                            "rule id " << state_._id << '.';
+                        throw runtime_error (ss_.str ());
+                        break;
+                    }
+                    default:
+                        token_->_type = CHARSET;
+
+                        if ((state_._flags & icase) &&
+                            (std::isupper (ch_, state_._locale) ||
+                            std::islower (ch_, state_._locale)))
+                        {
+                            char_type upper_ = std::toupper
+                                (ch_, state_._locale);
+                            char_type lower_ = std::tolower
+                                (ch_, state_._locale);
+
+                            token_->_str.insert (typename string_token::range
+                                (upper_, upper_));
+                            token_->_str.insert (typename string_token::range
+                                (lower_, lower_));
+                        }
+                        else
+                        {
+                            token_->_str.insert (typename string_token::range
+                                (ch_, ch_));
+                        }
+
+                        break;
+                }
+            }
+        }
+    }
+
+private:
+    static bool comment (bool &eos_, rules_char_type &ch_, state &state_)
+    {
+        bool skipped_ = false;
+
+        if (!eos_ && !state_._in_string && ch_ == '(' &&
+            !state_.eos () && *state_._curr == '?' &&
+            state_._curr + 1 < state_._end && *(state_._curr + 1) == '#')
+        {
+            std::size_t paren_count_ = 1;
+
+            state_.increment ();
+            state_.increment ();
+
+            do
+            {
+                eos_ = state_.next (ch_);
+
+                if (ch_ == '(')
+                {
+                    ++paren_count_;
+                }
+                else if (ch_ == ')')
+                {
+                    --paren_count_;
+                }
+            } while (!eos_ && !(ch_ == ')' && paren_count_ == 0));
+
+            if (eos_)
+            {
+                std::ostringstream ss_;
+
+                // Pointless returning index if at end of string
+                ss_ << "Unexpected end of regex (unterminated comment) " <<
+                    "in rule id " << state_._id << '.';
+                throw runtime_error (ss_.str ());
+            }
+            else
+            {
+                eos_ = state_.next (ch_);
+            }
+
+            skipped_ = true;
+        }
+
+        return skipped_;
+    }
+
+    static bool skip (bool &eos_, rules_char_type &ch_, state &state_)
+    {
+        bool skipped_ = false;
+
+        if (!eos_ && (state_._flags & skip_ws) && !state_._in_string)
+        {
+            bool c_comment_ = false;
+            bool skip_ws_ = false;
+
+            do
+            {
+                c_comment_ = ch_ == '/' && !state_.eos () &&
+                    *state_._curr == '*';
+                skip_ws_ = !c_comment_ && (ch_ == ' ' || ch_ == '\t' ||
+                    ch_ == '\n' || ch_ == '\r' || ch_ == '\f' || ch_ == '\v');
+
+                if (c_comment_)
+                {
+                    state_.increment ();
+                    eos_ = state_.next (ch_);
+
+                    while (!eos_ && !(ch_ == '*' && !state_.eos () &&
+                        *state_._curr == '/'))
+                    {
+                        eos_ = state_.next (ch_);
+                    }
+
+                    if (eos_)
+                    {
+                        std::ostringstream ss_;
+
+                        // Pointless returning index if at end of string
+                        ss_ << "Unexpected end of regex (unterminated " <<
+                            "C style comment) in rule id " <<
+                            state_._id << '.';
+                        throw runtime_error (ss_.str ());
+                    }
+                    else
+                    {
+                        state_.increment ();
+                        eos_ = state_.next (ch_);
+                    }
+
+                    skipped_ = true;
+                }
+                else if (skip_ws_)
+                {
+                    eos_ = state_.next (ch_);
+                    skipped_ = true;
+                }
+            } while (c_comment_ || skip_ws_);
+        }
+
+        return skipped_;
+    }
+
+    static void read_options (state &state_)
+    {
+        if (!state_.eos () && *state_._curr == '?')
+        {
+            rules_char_type ch_ = 0;
+            bool eos_ = false;
+            bool negate_ = false;
+
+            state_.increment ();
+            eos_ = state_.next (ch_);
+            state_._flags_stack.push (state_._flags);
+
+            while (!eos_ && ch_ != ':')
+            {
+                switch (ch_)
+                {
+                    case '-':
+                        negate_ ^= 1;
+                        break;
+                    case 'i':
+                        if (negate_)
+                        {
+                            state_._flags = state_._flags & ~icase;
+                        }
+                        else
+                        {
+                            state_._flags = state_._flags | icase;
+                        }
+
+                        negate_ = false;
+                        break;
+                    case 's':
+                        if (negate_)
+                        {
+                            state_._flags = state_._flags | dot_not_newline;
+                        }
+                        else
+                        {
+                            state_._flags = state_._flags & ~dot_not_newline;
+                        }
+
+                        negate_ = false;
+                        break;
+                    case 'x':
+                        if (negate_)
+                        {
+                            state_._flags = state_._flags & ~skip_ws;
+                        }
+                        else
+                        {
+                            state_._flags = state_._flags | skip_ws;
+                        }
+
+                        negate_ = false;
+                        break;
+                    default:
+                    {
+                        std::ostringstream ss_;
+
+                        ss_ << "Unknown option at index " <<
+                            state_.index () - 1 << " in rule id " <<
+                            state_._id << '.';
+                        throw runtime_error (ss_.str ());
+                    }
+                }
+
+                eos_ = state_.next (ch_);
+            }
+
+            // End of string handler will handle early termination
+        }
+        else if (!state_._flags_stack.empty ())
+        {
+            state_._flags_stack.push (state_._flags);
+        }
+    }
+
+    static void escape (state &state_, string_token &token_)
+    {
+        char_type ch_ = 0;
+        std::size_t str_len_ = 0;
+        const char *str_ = tokeniser_helper::escape_sequence (state_,
+            ch_, str_len_);
+
+        if (str_)
+        {
+            char_state state2_ (str_ + 1, str_ + str_len_, state_._id,
+                state_._flags, state_._locale, false);
+
+            tokeniser_helper::charset (state2_, token_);
+        }
+        else
+        {
+            token_.insert (typename string_token::range (ch_, ch_));
+        }
+    }
+
+    static void open_curly (re_token *lhs_, state &state_,
+        re_token *token_)
+    {
+        if (state_.eos ())
+        {
+            std::ostringstream ss_;
+
+            // Pointless returning index if at end of string
+            ss_ << "Unexpected end of regex (missing '}') in rule id " <<
+                state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+        else if (*state_._curr == '-')
+        {
+            charset_difference (lhs_, state_, token_);
+        }
+        else if (*state_._curr == '+')
+        {
+            charset_union (lhs_, state_, token_);
+        }
+        else if (*state_._curr >= '0' && *state_._curr <= '9')
+        {
+            repeat_n (state_, token_);
+        }
+        else
+        {
+            macro (state_, token_);
+        }
+    }
+
+    static void charset_difference (re_token *lhs_, state &state_,
+        re_token *token_)
+    {
+        rules_char_type ch_ = 0;
+
+        if (lhs_->_type != CHARSET)
+        {
+            std::ostringstream ss_;
+
+            ss_ << "CHARSET must precede {-} at index " <<
+                state_.index () - 1 << " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        state_.next (ch_);
+
+        if (state_.next (ch_))
+        {
+            std::ostringstream ss_;
+
+            // Pointless returning index if at end of string
+            ss_ << "Unexpected end of regex (missing '}') in rule id " <<
+                state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        if (ch_ != '}')
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Missing '}' at index " << state_.index () - 1 <<
+                " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        re_token rhs_;
+
+        next (lhs_, state_, &rhs_);
+
+        if (rhs_._type != CHARSET)
+        {
+            std::ostringstream ss_;
+
+            ss_ << "CHARSET must follow {-} at index " <<
+                state_.index () - 1 << " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        lhs_->_str.remove (rhs_._str);
+
+        if (lhs_->_str.empty ())
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Empty charset created by {-} at index " <<
+                state_.index () - 1 << " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        next (lhs_, state_, token_);
+    }
+
+    static void charset_union (re_token *lhs_, state &state_,
+        re_token *token_)
+    {
+        rules_char_type ch_ = 0;
+
+        if (lhs_->_type != CHARSET)
+        {
+            std::ostringstream ss_;
+
+            ss_ << "CHARSET must precede {+} at index " <<
+                state_.index () - 1 << " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        state_.next (ch_);
+
+        if (state_.next (ch_))
+        {
+            std::ostringstream ss_;
+
+            // Pointless returning index if at end of string
+            ss_ << "Unexpected end of regex (missing '}') in rule id " <<
+                state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        if (ch_ != '}')
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Missing '}' at index " << state_.index () - 1 <<
+                " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        re_token rhs_;
+
+        next (lhs_, state_, &rhs_);
+
+        if (rhs_._type != CHARSET)
+        {
+            std::ostringstream ss_;
+
+            ss_ << "CHARSET must follow {+} at index " <<
+                state_.index () - 1 << " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        lhs_->_str.insert (rhs_._str);
+        next (lhs_, state_, token_);
+    }
+
+    // SYNTAX:
+    //   {n[,[n]]}
+    // SEMANTIC RULES:
+    //   {0} - INVALID (throw exception)
+    //   {0,} = *
+    //   {0,0} - INVALID (throw exception)
+    //   {0,1} = ?
+    //   {1,} = +
+    //   {min,max} where min == max - {min}
+    //   {min,max} where max < min - INVALID (throw exception)
+    static void repeat_n (state &state_, re_token *token_)
+    {
+        rules_char_type ch_ = 0;
+        bool eos_ = state_.next (ch_);
+        std::size_t min_ = 0;
+        std::size_t max_ = 0;
+
+        while (!eos_ && ch_ >= '0' && ch_ <= '9')
+        {
+            min_ *= 10;
+            min_ += ch_ - '0';
+            token_->_extra += ch_;
+            eos_ = state_.next (ch_);
+        }
+
+        if (eos_)
+        {
+            std::ostringstream ss_;
+
+            // Pointless returning index if at end of string
+            ss_ << "Unexpected end of regex (missing '}') in rule id " <<
+                state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        bool min_max_ = false;
+        bool repeatn_ = true;
+
+        if (ch_ == ',')
+        {
+            token_->_extra += ch_;
+            eos_ = state_.next (ch_);
+
+            if (eos_)
+            {
+                std::ostringstream ss_;
+
+                // Pointless returning index if at end of string
+                ss_ << "Unexpected end of regex (missing '}') in rule id " <<
+                    state_._id << '.';
+                throw runtime_error (ss_.str ());
+            }
+
+            if (ch_ == '}')
+            {
+                // Small optimisation: Check for '*' equivalency.
+                if (min_ == 0)
+                {
+                    token_->_type = ZEROORMORE;
+                    repeatn_ = false;
+                }
+                // Small optimisation: Check for '+' equivalency.
+                else if (min_ == 1)
+                {
+                    token_->_type = ONEORMORE;
+                    repeatn_ = false;
+                }
+            }
+            else
+            {
+                if (ch_ < '0' || ch_ > '9')
+                {
+                    std::ostringstream ss_;
+
+                    ss_ << "Missing '}' at index " << state_.index () - 1 <<
+                        " in rule id " << state_._id << '.';
+                    throw runtime_error (ss_.str ());
+                }
+
+                min_max_ = true;
+
+                do
+                {
+                    max_ *= 10;
+                    max_ += ch_ - '0';
+                    token_->_extra += ch_;
+                    eos_ = state_.next (ch_);
+                } while (!eos_ && ch_ >= '0' && ch_ <= '9');
+
+                if (eos_)
+                {
+                    std::ostringstream ss_;
+
+                    // Pointless returning index if at end of string
+                    ss_ << "Unexpected end of regex (missing '}') "
+                        "in rule id " << state_._id << '.';
+                    throw runtime_error (ss_.str ());
+                }
+
+                // Small optimisation: Check for '?' equivalency.
+                if (min_ == 0 && max_ == 1)
+                {
+                    token_->_type = OPT;
+                    repeatn_ = false;
+                }
+                // Small optimisation: if min == max, then min.
+                else if (min_ == max_)
+                {
+                    token_->_extra.erase (token_->_extra.find (','));
+                    min_max_ = false;
+                    max_ = 0;
+                }
+            }
+        }
+
+        if (ch_ != '}')
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Missing '}' at index " << state_.index () - 1 <<
+                " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        if (repeatn_)
+        {
+            // SEMANTIC VALIDATION follows:
+            // NOTE: {0,} has already become *
+            // therefore we don't check for a comma.
+            if (min_ == 0 && max_ == 0)
+            {
+                std::ostringstream ss_;
+
+                ss_ << "Cannot have exactly zero repeats preceding index " <<
+                    state_.index () << " in rule id " << state_._id << '.';
+                throw runtime_error (ss_.str ());
+            }
+
+            if (min_max_ && max_ < min_)
+            {
+                std::ostringstream ss_;
+
+                ss_ << "Max less than min preceding index " <<
+                    state_.index () << " in rule id " << state_._id << '.';
+                throw runtime_error (ss_.str ());
+            }
+
+            if (!state_.eos () && *state_._curr == '?')
+            {
+                token_->_type = AREPEATN;
+                state_.increment ();
+            }
+            else
+            {
+                token_->_type = REPEATN;
+            }
+        }
+        else if (token_->_type == ZEROORMORE)
+        {
+            if (!state_.eos () && *state_._curr == '?')
+            {
+                token_->_type = AZEROORMORE;
+                state_.increment ();
+            }
+        }
+        else if (token_->_type == ONEORMORE)
+        {
+            if (!state_.eos () && *state_._curr == '?')
+            {
+                token_->_type = AONEORMORE;
+                state_.increment ();
+            }
+        }
+        else if (token_->_type == OPT)
+        {
+            if (!state_.eos () && *state_._curr == '?')
+            {
+                token_->_type = AOPT;
+                state_.increment ();
+            }
+        }
+    }
+
+    static void macro (state &state_, re_token *token_)
+    {
+        rules_char_type ch_ = 0;
+        bool eos_ = false;
+
+        state_.next (ch_);
+
+        if (ch_ != '_' && !(ch_ >= 'A' && ch_ <= 'Z') &&
+            !(ch_ >= 'a' && ch_ <= 'z'))
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Invalid MACRO name at index " << state_.index () - 1 <<
+                " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        do
+        {
+            token_->_extra += ch_;
+            eos_ = state_.next (ch_);
+
+            if (eos_)
+            {
+                std::ostringstream ss_;
+
+                // Pointless returning index if at end of string
+                ss_ << "Unexpected end of regex " <<
+                    "(missing '}') in rule id " << state_._id << '.';
+                throw runtime_error (ss_.str ());
+            }
+        } while (ch_ == '_' || ch_ == '-' || (ch_ >= 'A' && ch_ <= 'Z') ||
+            (ch_ >= 'a' && ch_ <= 'z') || (ch_ >= '0' && ch_ <= '9'));
+
+        if (ch_ != '}')
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Missing '}' at index " << state_.index () - 1 <<
+                " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        token_->_type = MACRO;
+    }
+};
+}
+}
+
+#endif
--- a/inc/lexertl/parser/tokeniser/re_tokeniser_helper.hpp
+++ b/inc/lexertl/parser/tokeniser/re_tokeniser_helper.hpp
--- a/inc/lexertl/parser/tokeniser/re_tokeniser_state.hpp
+++ b/inc/lexertl/parser/tokeniser/re_tokeniser_state.hpp
@@ -0,0 +1,115 @@
+// tokeniser_state.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_RE_TOKENISER_STATE_HPP
+#define LEXERTL_RE_TOKENISER_STATE_HPP
+
+#include "../../char_traits.hpp"
+#include "../../enums.hpp"
+#include <locale>
+#include "../../size_t.hpp"
+#include <stack>
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename ch_type, typename id_type>
+struct basic_re_tokeniser_state
+{
+    typedef ch_type char_type;
+    typedef typename basic_char_traits<char_type>::index_type index_type;
+
+    const char_type * const _start;
+    const char_type * const _end;
+    const char_type *_curr;
+    id_type _id;
+    std::size_t _flags;
+    std::stack<std::size_t> _flags_stack;
+    std::locale _locale;
+    bool _macro;
+    long _paren_count;
+    bool _in_string;
+    id_type _nl_id;
+
+    basic_re_tokeniser_state (const char_type *start_,
+        const char_type * const end_, id_type id_, const std::size_t flags_,
+        const std::locale locale_, const bool macro_) :
+        _start (start_),
+        _end (end_),
+        _curr (start_),
+        _id (id_),
+        _flags (flags_),
+        _flags_stack (),
+        _locale (locale_),
+        _macro (macro_),
+        _paren_count (0),
+        _in_string (false),
+        _nl_id (static_cast<id_type>(~0))
+    {
+    }
+
+    basic_re_tokeniser_state (const basic_re_tokeniser_state &rhs_)
+    {
+        assign (rhs_);
+    }
+
+    // prevent VC++ 7.1 warning:
+    const basic_re_tokeniser_state &operator =
+        (const basic_re_tokeniser_state &rhs_)
+    {
+        assign (rhs_);
+    }
+
+    void assign (const basic_re_tokeniser_state &rhs_)
+    {
+        _start = rhs_._start;
+        _end = rhs_._end;
+        _curr = rhs_._curr;
+        _id = rhs_._id;
+        _flags = rhs_._flags;
+        _flags_stack = rhs_._flags_stack;
+        _locale = rhs_._locale;
+        _macro = rhs_._macro;
+        _paren_count = rhs_._paren_count;
+        _in_string = rhs_._in_string;
+        _nl_id = rhs_._nl_id;
+        return this;
+    }
+
+    inline bool next (char_type &ch_)
+    {
+        if (_curr >= _end)
+        {
+            ch_ = 0;
+            return true;
+        }
+        else
+        {
+            ch_ = *_curr;
+            increment ();
+            return false;
+        }
+    }
+
+    inline void increment ()
+    {
+        ++_curr;
+    }
+
+    inline std::size_t index ()
+    {
+        return _curr - _start;
+    }
+
+    inline bool eos ()
+    {
+        return _curr >= _end;
+    }
+};
+}
+}
+
+#endif
--- a/inc/lexertl/parser/tree/end_node.hpp
+++ b/inc/lexertl/parser/tree/end_node.hpp
@@ -0,0 +1,112 @@
+// end_node.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_END_NODE_HPP
+#define LEXERTL_END_NODE_HPP
+
+#include "node.hpp"
+#include "../../size_t.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename id_type>
+class basic_end_node : public basic_node<id_type>
+{
+public:
+    typedef basic_node<id_type> node;
+    typedef typename node::bool_stack bool_stack;
+    typedef typename node::const_node_stack const_node_stack;
+    typedef typename node::node_ptr_vector node_ptr_vector;
+    typedef typename node::node_stack node_stack;
+    typedef typename node::node_type node_type;
+    typedef typename node::node_vector node_vector;
+
+    basic_end_node (const id_type id_, const id_type user_id_,
+        const id_type next_dfa_, const id_type push_dfa_,
+        const bool pop_dfa_) :
+        basic_node<id_type> (false),
+        _id (id_),
+        _user_id (user_id_),
+        _next_dfa (next_dfa_),
+        _push_dfa (push_dfa_),
+        _pop_dfa (pop_dfa_),
+        _followpos ()
+    {
+        basic_node<id_type>::_firstpos.push_back (this);
+        basic_node<id_type>::_lastpos.push_back (this);
+    }
+
+    virtual ~basic_end_node ()
+    {
+    }
+
+    virtual node_type what_type () const
+    {
+        return node::END;
+    }
+
+    virtual bool traverse (const_node_stack &/*node_stack_*/,
+        bool_stack &/*perform_op_stack_*/) const
+    {
+        return false;
+    }
+
+    virtual const node_vector &followpos () const
+    {
+        // _followpos is always empty..!
+        return _followpos;
+    }
+
+    virtual bool end_state () const
+    {
+        return true;
+    }
+
+    virtual id_type id () const
+    {
+        return _id;
+    }
+
+    virtual id_type user_id () const
+    {
+        return _user_id;
+    }
+
+    virtual id_type next_dfa () const
+    {
+        return _next_dfa;
+    }
+
+    virtual id_type push_dfa () const
+    {
+        return _push_dfa;
+    }
+
+    virtual bool pop_dfa () const
+    {
+        return _pop_dfa;
+    }
+
+private:
+    id_type _id;
+    id_type _user_id;
+    id_type _next_dfa;
+    id_type _push_dfa;
+    bool _pop_dfa;
+    node_vector _followpos;
+
+    virtual void copy_node (node_ptr_vector &/*node_ptr_vector_*/,
+        node_stack &/*new_node_stack_*/, bool_stack &/*perform_op_stack_*/,
+        bool &/*down_*/) const
+    {
+        // Nothing to do, as end_nodes are not copied.
+    }
+};
+}
+}
+
+#endif
--- a/inc/lexertl/parser/tree/iteration_node.hpp
+++ b/inc/lexertl/parser/tree/iteration_node.hpp
@@ -0,0 +1,103 @@
+// iteration_node.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_ITERATION_NODE_HPP
+#define LEXERTL_ITERATION_NODE_HPP
+
+#include "node.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename id_type>
+class basic_iteration_node : public basic_node<id_type>
+{
+public:
+    typedef basic_node<id_type> node;
+    typedef typename node::bool_stack bool_stack;
+    typedef typename node::const_node_stack const_node_stack;
+    typedef typename node::node_ptr_vector node_ptr_vector;
+    typedef typename node::node_stack node_stack;
+    typedef typename node::node_type node_type;
+    typedef typename node::node_vector node_vector;
+
+    basic_iteration_node (basic_node<id_type> *next_, const bool greedy_) :
+        basic_node<id_type> (true),
+        _next (next_),
+        _greedy (greedy_)
+    {
+        typename node_vector::iterator iter_;
+        typename node_vector::iterator end_;
+
+        _next->append_firstpos (node::_firstpos);
+        _next->append_lastpos (node::_lastpos);
+
+        for (iter_ = node::_lastpos.begin (), end_ = node::_lastpos.end ();
+            iter_ != end_; ++iter_)
+        {
+            (*iter_)->append_followpos (node::_firstpos);
+        }
+
+        for (iter_ = node::_firstpos.begin (), end_ = node::_firstpos.end ();
+            iter_ != end_; ++iter_)
+        {
+            (*iter_)->greedy (greedy_);
+        }
+    }
+
+    virtual ~basic_iteration_node ()
+    {
+    }
+
+    virtual node_type what_type () const
+    {
+        return node::ITERATION;
+    }
+
+    virtual bool traverse (const_node_stack &node_stack_,
+        bool_stack &perform_op_stack_) const
+    {
+        perform_op_stack_.push (true);
+        node_stack_.push (_next);
+        return true;
+    }
+
+private:
+    // Not owner of this pointer...
+    basic_node<id_type> *_next;
+    bool _greedy;
+
+    virtual void copy_node (node_ptr_vector &node_ptr_vector_,
+        node_stack &new_node_stack_, bool_stack &perform_op_stack_,
+        bool &down_) const
+    {
+        if (perform_op_stack_.top ())
+        {
+            basic_node<id_type> *ptr_ = new_node_stack_.top ();
+
+            node_ptr_vector_->push_back
+                (static_cast<basic_iteration_node<id_type> *>(0));
+            node_ptr_vector_->back () = new basic_iteration_node
+                (ptr_, _greedy);
+            new_node_stack_.top () = node_ptr_vector_->back ();
+        }
+        else
+        {
+            down_ = true;
+        }
+
+        perform_op_stack_.pop ();
+    }
+
+    // No copy construction.
+    basic_iteration_node (const basic_iteration_node &);
+    // No assignment.
+    const basic_iteration_node &operator = (const basic_iteration_node &);
+};
+}
+}
+
+#endif
--- a/inc/lexertl/parser/tree/leaf_node.hpp
+++ b/inc/lexertl/parser/tree/leaf_node.hpp
@@ -0,0 +1,114 @@
+// leaf_node.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_LEAF_NODE_HPP
+#define LEXERTL_LEAF_NODE_HPP
+
+#include "../../enums.hpp" // null_token
+#include "node.hpp"
+#include "../../size_t.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename id_type>
+class basic_leaf_node : public basic_node<id_type>
+{
+public:
+    typedef basic_node<id_type> node;
+    typedef typename node::bool_stack bool_stack;
+    typedef typename node::const_node_stack const_node_stack;
+    typedef typename node::node_ptr_vector node_ptr_vector;
+    typedef typename node::node_stack node_stack;
+    typedef typename node::node_type node_type;
+    typedef typename node::node_vector node_vector;
+
+    basic_leaf_node (const id_type token_, const bool greedy_) :
+        basic_node<id_type> (token_ == node::null_token ()),
+        _token (token_),
+        _set_greedy (!greedy_),
+        _greedy (greedy_),
+        _followpos ()
+    {
+        if (!node::_nullable)
+        {
+            node::_firstpos.push_back (this);
+            node::_lastpos.push_back (this);
+        }
+    }
+
+    virtual ~basic_leaf_node ()
+    {
+    }
+
+    virtual void append_followpos (const node_vector &followpos_)
+    {
+        for (typename node_vector::const_iterator iter_ = followpos_.begin (),
+            end_ = followpos_.end (); iter_ != end_; ++iter_)
+        {
+            _followpos.push_back (*iter_);
+        }
+    }
+
+    virtual node_type what_type () const
+    {
+        return node::LEAF;
+    }
+
+    virtual bool traverse (const_node_stack &/*node_stack_*/,
+        bool_stack &/*perform_op_stack_*/) const
+    {
+        return false;
+    }
+
+    virtual id_type token () const
+    {
+        return _token;
+    }
+
+    virtual void greedy (const bool greedy_)
+    {
+        if (!_set_greedy)
+        {
+            _greedy = greedy_;
+            _set_greedy = true;
+        }
+    }
+
+    virtual bool greedy () const
+    {
+        return _greedy;
+    }
+
+    virtual const node_vector &followpos () const
+    {
+        return _followpos;
+    }
+
+    virtual node_vector &followpos ()
+    {
+        return _followpos;
+    }
+
+private:
+    id_type _token;
+    bool _set_greedy;
+    bool _greedy;
+    node_vector _followpos;
+
+    virtual void copy_node (node_ptr_vector &node_ptr_vector_,
+        node_stack &new_node_stack_, bool_stack &/*perform_op_stack_*/,
+        bool &/*down_*/) const
+    {
+        node_ptr_vector_->push_back (static_cast<basic_leaf_node *>(0));
+        node_ptr_vector_->back () = new basic_leaf_node (_token, _greedy);
+        new_node_stack_.push (node_ptr_vector_->back ());
+    }
+};
+}
+}
+
+#endif
--- a/inc/lexertl/parser/tree/node.hpp
+++ b/inc/lexertl/parser/tree/node.hpp
@@ -0,0 +1,241 @@
+// node.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_NODE_HPP
+#define LEXERTL_NODE_HPP
+
+#include <assert.h>
+#include "../../containers/ptr_vector.hpp"
+#include "../../runtime_error.hpp"
+#include "../../size_t.hpp"
+#include <stack>
+#include <vector>
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename id_type>
+class basic_node
+{
+public:
+    enum node_type {LEAF, SEQUENCE, SELECTION, ITERATION, END};
+
+    typedef std::stack<bool> bool_stack;
+    typedef std::stack<basic_node<id_type> *> node_stack;
+    // stack and vector not owner of node pointers
+    typedef std::stack<const basic_node<id_type> *> const_node_stack;
+    typedef std::vector<basic_node<id_type> *> node_vector;
+    typedef ptr_vector<basic_node<id_type> > node_ptr_vector;
+
+    basic_node () :
+        _nullable (false),
+        _firstpos (),
+        _lastpos ()
+    {
+    }
+
+    basic_node (const bool nullable_) :
+        _nullable (nullable_),
+        _firstpos (),
+        _lastpos ()
+    {
+    }
+
+    virtual ~basic_node ()
+    {
+    }
+
+    static id_type null_token ()
+    {
+        return static_cast<id_type>(~0);
+    }
+
+    bool nullable () const
+    {
+        return _nullable;
+    }
+
+    void append_firstpos (node_vector &firstpos_) const
+    {
+        firstpos_.insert (firstpos_.end (),
+            _firstpos.begin (), _firstpos.end ());
+    }
+
+    void append_lastpos (node_vector &lastpos_) const
+    {
+        lastpos_.insert (lastpos_.end (),
+            _lastpos.begin (), _lastpos.end ());
+    }
+
+    virtual void append_followpos (const node_vector &/*followpos_*/)
+    {
+        throw runtime_error ("Internal error node::append_followpos().");
+    }
+
+    basic_node *copy (node_ptr_vector &node_ptr_vector_) const
+    {
+        basic_node *new_root_ = 0;
+        const_node_stack node_stack_;
+        bool_stack perform_op_stack_;
+        bool down_ = true;
+        node_stack new_node_stack_;
+
+        node_stack_.push (this);
+
+        while (!node_stack_.empty ())
+        {
+            while (down_)
+            {
+                down_ = node_stack_.top ()->traverse (node_stack_,
+                    perform_op_stack_);
+            }
+
+            while (!down_ && !node_stack_.empty ())
+            {
+                const basic_node *top_ = node_stack_.top ();
+
+                top_->copy_node (node_ptr_vector_, new_node_stack_,
+                    perform_op_stack_, down_);
+
+                if (!down_) node_stack_.pop ();
+            }
+        }
+
+        assert (new_node_stack_.size () == 1);
+        new_root_ = new_node_stack_.top ();
+        new_node_stack_.pop ();
+        return new_root_;
+    }
+
+    virtual node_type what_type () const = 0;
+
+    virtual bool traverse (const_node_stack &node_stack_,
+        bool_stack &perform_op_stack_) const = 0;
+
+    node_vector &firstpos ()
+    {
+        return _firstpos;
+    }
+
+    const node_vector &firstpos () const
+    {
+        return _firstpos;
+    }
+
+    // _lastpos modified externally, so not const &
+    node_vector &lastpos ()
+    {
+        return _lastpos;
+    }
+
+    virtual bool end_state () const
+    {
+        return false;
+    }
+
+    virtual id_type id () const
+    {
+        throw runtime_error ("Internal error node::id().");
+#ifdef __SUNPRO_CC
+        // Stop bogus Solaris compiler warning
+        return id_type ();
+#endif
+    }
+
+    virtual id_type user_id () const
+    {
+        throw runtime_error ("Internal error node::user_id().");
+#ifdef __SUNPRO_CC
+        // Stop bogus Solaris compiler warning
+        return id_type ();
+#endif
+    }
+
+    virtual id_type next_dfa () const
+    {
+        throw runtime_error ("Internal error node::next_dfa().");
+#ifdef __SUNPRO_CC
+        // Stop bogus Solaris compiler warning
+        return id_type ();
+#endif
+    }
+
+    virtual id_type push_dfa () const
+    {
+        throw runtime_error ("Internal error node::push_dfa().");
+#ifdef __SUNPRO_CC
+        // Stop bogus Solaris compiler warning
+        return id_type ();
+#endif
+    }
+
+    virtual bool pop_dfa () const
+    {
+        throw runtime_error ("Internal error node::pop_dfa().");
+#ifdef __SUNPRO_CC
+        // Stop bogus Solaris compiler warning
+        return false;
+#endif
+    }
+
+    virtual id_type token () const
+    {
+        throw runtime_error ("Internal error node::token().");
+#ifdef __SUNPRO_CC
+        // Stop bogus Solaris compiler warning
+        return id_type ();
+#endif
+    }
+
+    virtual void greedy (const bool /*greedy_*/)
+    {
+        throw runtime_error ("Internal error node::greedy(bool).");
+    }
+
+    virtual bool greedy () const
+    {
+        throw runtime_error ("Internal error node::greedy().");
+#ifdef __SUNPRO_CC
+        // Stop bogus Solaris compiler warning
+        return false;
+#endif
+    }
+
+    virtual const node_vector &followpos () const
+    {
+        throw runtime_error ("Internal error node::followpos().");
+#ifdef __SUNPRO_CC
+        // Stop bogus Solaris compiler warning
+        return firstpos;
+#endif
+    }
+
+    virtual node_vector &followpos ()
+    {
+        throw runtime_error ("Internal error node::followpos().");
+#ifdef __SUNPRO_CC
+        // Stop bogus Solaris compiler warning
+        return firstpos;
+#endif
+    }
+
+protected:
+    const bool _nullable;
+    node_vector _firstpos;
+    node_vector _lastpos;
+
+    virtual void copy_node (node_ptr_vector &node_ptr_vector_,
+        node_stack &new_node_stack_, bool_stack &perform_op_stack_,
+        bool &down_) const = 0;
+
+private:
+    basic_node (const basic_node &); // No copy construction.
+    const basic_node &operator = (const basic_node &); // No assignment.
+};
+}
+}
+
+#endif
--- a/inc/lexertl/parser/tree/selection_node.hpp
+++ b/inc/lexertl/parser/tree/selection_node.hpp
@@ -0,0 +1,106 @@
+// selection_node.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_SELECTION_NODE_HPP
+#define LEXERTL_SELECTION_NODE_HPP
+
+#include "node.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename id_type>
+class basic_selection_node : public basic_node<id_type>
+{
+public:
+    typedef basic_node<id_type> node;
+    typedef typename node::bool_stack bool_stack;
+    typedef typename node::const_node_stack const_node_stack;
+    typedef typename node::node_ptr_vector node_ptr_vector;
+    typedef typename node::node_stack node_stack;
+    typedef typename node::node_type node_type;
+
+    basic_selection_node (basic_node<id_type> *left_,
+        basic_node<id_type> *right_) :
+        basic_node<id_type> (left_->nullable () || right_->nullable ()),
+        _left (left_),
+        _right (right_)
+    {
+        _left->append_firstpos (node::_firstpos);
+        _right->append_firstpos (node::_firstpos);
+        _left->append_lastpos (node::_lastpos);
+        _right->append_lastpos (node::_lastpos);
+    }
+
+    virtual ~basic_selection_node ()
+    {
+    }
+
+    virtual node_type what_type () const
+    {
+        return node::SELECTION;
+    }
+
+    virtual bool traverse (const_node_stack &node_stack_,
+        bool_stack &perform_op_stack_) const
+    {
+        perform_op_stack_.push (true);
+
+        switch (_right->what_type ())
+        {
+        case node::SEQUENCE:
+        case node::SELECTION:
+        case node::ITERATION:
+            perform_op_stack_.push (false);
+            break;
+        default:
+            break;
+        }
+
+        node_stack_.push (_right);
+        node_stack_.push (_left);
+        return true;
+    }
+
+private:
+    // Not owner of these pointers...
+    basic_node<id_type> *_left;
+    basic_node<id_type> *_right;
+
+    virtual void copy_node (node_ptr_vector &node_ptr_vector_,
+        node_stack &new_node_stack_, bool_stack &perform_op_stack_,
+        bool &down_) const
+    {
+        if (perform_op_stack_.top ())
+        {
+            basic_node<id_type> *rhs_ = new_node_stack_.top ();
+
+            new_node_stack_.pop ();
+
+            basic_node<id_type> *lhs_ = new_node_stack_.top ();
+
+            node_ptr_vector_->push_back
+                (static_cast<basic_selection_node *>(0));
+            node_ptr_vector_->back () = new basic_selection_node (lhs_, rhs_);
+            new_node_stack_.top () = node_ptr_vector_->back ();
+        }
+        else
+        {
+            down_ = true;
+        }
+
+        perform_op_stack_.pop ();
+    }
+
+    // No copy construction.
+    basic_selection_node (const basic_selection_node &);
+    // No assignment.
+    const basic_selection_node &operator = (const basic_selection_node &);
+};
+}
+}
+
+#endif
--- a/inc/lexertl/parser/tree/sequence_node.hpp
+++ b/inc/lexertl/parser/tree/sequence_node.hpp
@@ -0,0 +1,126 @@
+// sequence_node.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_SEQUENCE_NODE_HPP
+#define LEXERTL_SEQUENCE_NODE_HPP
+
+#include "node.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename id_type>
+class basic_sequence_node : public basic_node<id_type>
+{
+public:
+    typedef basic_node<id_type> node;
+    typedef typename node::bool_stack bool_stack;
+    typedef typename node::const_node_stack const_node_stack;
+    typedef typename node::node_ptr_vector node_ptr_vector;
+    typedef typename node::node_stack node_stack;
+    typedef typename node::node_type node_type;
+    typedef typename node::node_vector node_vector;
+
+    basic_sequence_node (basic_node<id_type> *left_,
+        basic_node<id_type> *right_) :
+        basic_node<id_type> (left_->nullable () && right_->nullable ()),
+        _left (left_),
+        _right (right_)
+    {
+        _left->append_firstpos (node::_firstpos);
+
+        if (_left->nullable ())
+        {
+            _right->append_firstpos (node::_firstpos);
+        }
+
+        if (_right->nullable ())
+        {
+            _left->append_lastpos (node::_lastpos);
+        }
+
+        _right->append_lastpos (node::_lastpos);
+
+        node_vector &lastpos_ = _left->lastpos ();
+        const node_vector &firstpos_ = _right->firstpos ();
+
+        for (typename node_vector::iterator iter_ = lastpos_.begin (),
+            end_ = lastpos_.end (); iter_ != end_; ++iter_)
+        {
+            (*iter_)->append_followpos (firstpos_);
+        }
+    }
+
+    virtual ~basic_sequence_node ()
+    {
+    }
+
+    virtual node_type what_type () const
+    {
+        return node::SEQUENCE;
+    }
+
+    virtual bool traverse (const_node_stack &node_stack_,
+        bool_stack &perform_op_stack_) const
+    {
+        perform_op_stack_.push (true);
+
+        switch (_right->what_type ())
+        {
+        case node::SEQUENCE:
+        case node::SELECTION:
+        case node::ITERATION:
+            perform_op_stack_.push (false);
+            break;
+        default:
+            break;
+        }
+
+        node_stack_.push (_right);
+        node_stack_.push (_left);
+        return true;
+    }
+
+private:
+    // Not owner of these pointers...
+    basic_node<id_type> *_left;
+    basic_node<id_type> *_right;
+
+    virtual void copy_node (node_ptr_vector &node_ptr_vector_,
+        node_stack &new_node_stack_, bool_stack &perform_op_stack_,
+        bool &down_) const
+    {
+        if (perform_op_stack_.top ())
+        {
+            basic_node<id_type> *rhs_ = new_node_stack_.top ();
+
+            new_node_stack_.pop ();
+
+            basic_node<id_type> *lhs_ = new_node_stack_.top ();
+
+            node_ptr_vector_->push_back
+                (static_cast<basic_sequence_node<id_type> *>(0));
+            node_ptr_vector_->back () = new basic_sequence_node<id_type>
+                (lhs_, rhs_);
+            new_node_stack_.top () = node_ptr_vector_->back ();
+        }
+        else
+        {
+            down_ = true;
+        }
+
+        perform_op_stack_.pop ();
+    }
+
+    // No copy construction.
+    basic_sequence_node (const basic_sequence_node &);
+    // No assignment.
+    const basic_sequence_node &operator = (const basic_sequence_node &);
+};
+}
+}
+
+#endif