Fix typo in grammar.

Add lexertl.
2013-05-31 21:35:44 +02:00
parent 7be9545e02
commit f3d3389f54
44 changed files with 12465 additions and 2 deletions
--- a/inc/lexertl/bool.hpp
+++ b/inc/lexertl/bool.hpp
@@ -0,0 +1,22 @@
+// bool.hpp
+// Copyright (c) 2010-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef LEXERTL_BOOL_H
+#define LEXERTL_BOOL_H
+
+namespace lexertl
+{
+// Named template param for compiler compatibility
+template<bool b>
+struct bool_
+{
+};
+
+typedef bool_<true> true_;
+typedef bool_<false> false_;
+}
+
+#endif
--- a/inc/lexertl/char_traits.hpp
+++ b/inc/lexertl/char_traits.hpp
@@ -0,0 +1,50 @@
+// char_traits.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef LEXERTL_CHAR_TRAITS_H
+#define LEXERTL_CHAR_TRAITS_H
+
+#include <cstddef>
+
+namespace lexertl
+{
+template<typename ch_type>
+struct basic_char_traits
+{
+    typedef ch_type char_type;
+    typedef ch_type index_type;
+
+    static index_type index (const char_type ch)
+    {
+       return ch;
+    }
+
+    static index_type max_val ()
+    {
+        return sizeof(char_type) > 2 ? 0x10ffff :
+            static_cast<index_type>(~0);
+    }
+};
+
+template<>
+struct basic_char_traits<char>
+{
+    typedef char char_type;
+    typedef unsigned char index_type;
+
+    static index_type index (const char ch)
+    {
+        return static_cast<index_type>(ch);
+    }
+
+    static index_type max_val ()
+    {
+        return static_cast<index_type>(~0);
+    }
+};
+}
+
+#endif
--- a/inc/lexertl/compile_assert.hpp
+++ b/inc/lexertl/compile_assert.hpp
@@ -0,0 +1,24 @@
+// compile_assert.hpp
+// Copyright (c) 2010-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef LEXERTL_COMPILE_ASSERT_H
+#define LEXERTL_COMPILE_ASSERT_H
+
+namespace lexertl
+{
+// Named template param for compiler compatibility
+template<bool b>
+struct compile_assert;
+
+// enum for compiler compatibility
+template<>
+struct compile_assert<true>
+{
+    enum {value = 1};
+};
+}
+
+#endif
--- a/inc/lexertl/containers/bitvector.hpp
+++ b/inc/lexertl/containers/bitvector.hpp
@@ -0,0 +1,228 @@
+// bitvector.hpp
+// Copyright (c) 2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef LEXERTL_BITVECTOR_HPP
+#define LEXERTL_BITVECTOR_HPP
+
+#include <vector>
+
+namespace lexertl
+{
+template<typename T>
+class basic_bitvector
+{
+public:
+    template<typename Ty>
+    class reference
+    {
+    public:
+        reference (Ty &block_, const std::size_t mask_) :
+            _block (block_),
+            _mask (mask_)
+        {
+        }
+
+        operator bool () const
+        {
+            return (_block & _mask) != 0;
+        }
+
+        reference<Ty> &operator = (const bool bit_)
+        {
+            if (bit_)
+            {
+                _block |= _mask;
+            }
+            else
+            {
+                _block &= ~_mask;
+            }
+
+            return *this;
+        }
+
+        reference<Ty> &operator = (reference<Ty> &rhs_)
+        {
+            if (rhs_)
+            {
+                _block |= _mask;
+            }
+            else
+            {
+                _block &= ~_mask;
+            }
+        }
+
+    private:
+        Ty &_block;
+        const std::size_t _mask;
+    };
+
+    basic_bitvector (const std::size_t size_) :
+        _vec (block (size_) + (bit (size_) ? 1 : 0), 0)
+    {
+    }
+
+    basic_bitvector (const basic_bitvector &rhs_) :
+        _vec (rhs_._vec)
+    {
+    }
+
+    basic_bitvector &operator = (const basic_bitvector &rhs_)
+    {
+        if (&rhs_ != this)
+        {
+            _vec = rhs_._vec;
+        }
+
+        return *this;
+    }
+
+    bool operator [] (const std::size_t index_) const
+    {
+        return (_vec[block (index_)] & (1 << bit (index_))) != 0;
+    }
+
+    reference<T> operator [] (const std::size_t index_)
+    {
+        return reference<T> (_vec[block (index_)], (1 << bit (index_)));
+    }
+
+    basic_bitvector<T> &operator |= (const basic_bitvector<T> &rhs_)
+    {
+        typename t_vector::iterator lhs_iter_ = _vec.begin ();
+        typename t_vector::iterator lhs_end_ = _vec.end ();
+        typename t_vector::const_iterator rhs_iter_ = rhs_._vec.begin ();
+        typename t_vector::const_iterator rhs_end_ = rhs_._vec.end ();
+
+        for (; lhs_iter_ != lhs_end_ && rhs_iter_ != rhs_end_;
+            ++lhs_iter_, ++rhs_iter_)
+        {
+            *lhs_iter_ |= *rhs_iter_;
+        }
+
+        return *this;
+    }
+
+    basic_bitvector<T> &operator &= (const basic_bitvector<T> &rhs_)
+    {
+        typename t_vector::iterator lhs_iter_ = _vec.begin ();
+        typename t_vector::iterator lhs_end_ = _vec.end ();
+        typename t_vector::const_iterator rhs_iter_ = rhs_._vec.begin ();
+        typename t_vector::const_iterator rhs_end_ = rhs_._vec.end ();
+
+        for (; lhs_iter_ != lhs_end_ && rhs_iter_ != rhs_end_;
+            ++lhs_iter_, ++rhs_iter_)
+        {
+            *lhs_iter_ &= *rhs_iter_;
+        }
+
+        return *this;
+    }
+
+    void clear ()
+    {
+        typename t_vector::iterator iter_ = _vec.begin ();
+        typename t_vector::iterator end_ = _vec.end ();
+
+        for (; iter_ != end_; ++iter_)
+        {
+            *iter_ = 0;
+        }
+    }
+
+    bool any () const
+    {
+        typename t_vector::const_iterator iter_ = _vec.begin ();
+        typename t_vector::const_iterator end_ = _vec.end ();
+
+        for (; iter_ != end_; ++iter_)
+        {
+            if (*iter_) break;
+        }
+
+        return iter_ != end_;
+    }
+
+    void negate ()
+    {
+        typename t_vector::iterator iter_ = _vec.begin ();
+        typename t_vector::iterator end_ = _vec.end ();
+
+        for (; iter_ != end_; ++iter_)
+        {
+            *iter_ = ~*iter_;
+        }
+    }
+
+    std::size_t find_first () const
+    {
+        return find_next (npos ());
+    }
+
+    std::size_t find_next (const std::size_t index_) const
+    {
+        std::size_t ret_ = npos ();
+        const std::size_t block_ = index_ == npos () ? 0 : block (index_ + 1);
+        std::size_t bit_ = index_ == npos () ? 0 : bit (index_ + 1);
+        typename t_vector::const_iterator iter_ = _vec.begin () + block_;
+        typename t_vector::const_iterator end_ = _vec.end ();
+
+        for (std::size_t i_ = block_; iter_ != end_; ++iter_, ++i_)
+        {
+            const bool bits_ = (*iter_ & (static_cast<T>(~0) << bit_)) != 0;
+
+            if (bits_)
+            {
+                std::size_t j_ = bit_;
+                std::size_t b_ = 1 << bit_;
+                bool found_ = false;
+
+                for (; j_ < sizeof(T) * 8; ++j_, b_ <<= 1)
+                {
+                    if (*iter_ & b_)
+                    {
+                        found_ = true;
+                        break;
+                    }
+                }
+
+                if (found_)
+                {
+                    ret_ = i_ * sizeof(T) * 8 + j_;
+                    break;
+                }
+            }
+
+            bit_ = 0;
+        }
+
+        return ret_;
+    }
+
+    std::size_t npos () const
+    {
+        return static_cast<std::size_t>(~0);
+    }
+
+private:
+    typedef std::vector<T> t_vector;
+
+    t_vector _vec;
+
+    std::size_t block (const std::size_t index_) const
+    {
+        return index_ / (sizeof(T) * 8);
+    }
+
+    std::size_t bit (const std::size_t index_) const
+    {
+        return index_ % (sizeof(T) * 8);
+    }
+};
+}
+
+#endif
--- a/inc/lexertl/containers/ptr_list.hpp
+++ b/inc/lexertl/containers/ptr_list.hpp
@@ -0,0 +1,69 @@
+// ptr_list.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_PTR_LIST_HPP
+#define LEXERTL_PTR_LIST_HPP
+
+#include <list>
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename ptr_type>
+class ptr_list
+{
+public:
+    typedef std::list<ptr_type *> list;
+
+    ptr_list () :
+        _list ()
+    {
+    }
+
+    ~ptr_list ()
+    {
+        clear ();
+    }
+
+    list *operator -> ()
+    {
+        return &_list;
+    }
+
+    const list *operator -> () const
+    {
+        return &_list;
+    }
+
+    list &operator * ()
+    {
+        return _list;
+    }
+
+    const list &operator * () const
+    {
+        return _list;
+    }
+
+    void clear ()
+    {
+        while (!_list.empty ())
+        {
+            delete _list.front ();
+            _list.pop_front ();
+        }
+    }
+
+private:
+    list _list;
+
+    ptr_list (const ptr_list &); // No copy construction.
+    ptr_list &operator = (const ptr_list &); // No assignment.
+};
+}
+}
+
+#endif
--- a/inc/lexertl/containers/ptr_map.hpp
+++ b/inc/lexertl/containers/ptr_map.hpp
@@ -0,0 +1,72 @@
+// ptr_map.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_PTR_MAP_HPP
+#define LEXERTL_PTR_MAP_HPP
+
+#include <map>
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename key_type, typename ptr_type>
+class ptr_map
+{
+public:
+    typedef std::map<key_type, ptr_type *> map;
+    typedef std::pair<key_type, ptr_type *> pair;
+    typedef std::pair<typename map::iterator, bool> iter_pair;
+
+    ptr_map ()
+    {
+    }
+
+    ~ptr_map ()
+    {
+        clear ();
+    }
+
+    map *operator -> ()
+    {
+        return &_map;
+    }
+
+    const map *operator -> () const
+    {
+        return &_map;
+    }
+
+    map &operator * ()
+    {
+        return _map;
+    }
+
+    const map &operator * () const
+    {
+        return _map;
+    }
+
+    void clear ()
+    {
+        for (typename map::iterator iter_ = _map.begin (), end_ = _map.end ();
+            iter_ != end_; ++iter_)
+        {
+            delete iter_->second;
+        }
+
+        _map.clear ();
+    }
+
+private:
+    map _map;
+
+    ptr_map (const ptr_map &); // No copy construction.
+    ptr_map &operator = (const ptr_map &); // No assignment.
+};
+}
+}
+
+#endif
--- a/inc/lexertl/containers/ptr_stack.hpp
+++ b/inc/lexertl/containers/ptr_stack.hpp
@@ -0,0 +1,69 @@
+// ptr_stack.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_PTR_STACK_HPP
+#define LEXERTL_PTR_STACK_HPP
+
+#include <stack>
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename ptr_type>
+class ptr_stack
+{
+public:
+    typedef std::stack<ptr_type *> stack;
+
+    ptr_stack () :
+        _stack ()
+    {
+    }
+
+    ~ptr_stack ()
+    {
+        clear ();
+    }
+
+    stack *operator -> ()
+    {
+        return &_stack;
+    }
+
+    const stack *operator -> () const
+    {
+        return &_stack;
+    }
+
+    stack &operator * ()
+    {
+        return _stack;
+    }
+
+    const stack &operator * () const
+    {
+        return _stack;
+    }
+
+    void clear ()
+    {
+        while (!_stack.empty ())
+        {
+            delete _stack.top ();
+            _stack.pop ();
+        }
+    }
+
+private:
+    stack _stack;
+
+    ptr_stack (const ptr_stack &); // No copy construction.
+    ptr_stack &operator = (const ptr_stack &); // No assignment.
+};
+}
+}
+
+#endif
--- a/inc/lexertl/containers/ptr_vector.hpp
+++ b/inc/lexertl/containers/ptr_vector.hpp
@@ -0,0 +1,106 @@
+// ptr_vector.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_PTR_VECTOR_HPP
+#define LEXERTL_PTR_VECTOR_HPP
+
+#include "../size_t.hpp"
+#include <vector>
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename ptr_type>
+class ptr_vector
+{
+public:
+    typedef std::vector<ptr_type *> vector;
+
+    ptr_vector () :
+        _vector ()
+    {
+    }
+
+    ~ptr_vector ()
+    {
+        clear ();
+    }
+
+    vector *operator -> ()
+    {
+        return &_vector;
+    }
+
+    const vector *operator -> () const
+    {
+        return &_vector;
+    }
+
+    vector &operator * ()
+    {
+        return _vector;
+    }
+
+    const vector &operator * () const
+    {
+        return _vector;
+    }
+
+    ptr_type * &operator [] (const std::size_t index_)
+    {
+        return _vector[index_];
+    }
+
+    ptr_type * const &operator [] (const std::size_t index_) const
+    {
+        return _vector[index_];
+    }
+
+    bool operator == (const ptr_vector &rhs_) const
+    {
+        bool equal_ = _vector.size () == rhs_._vector.size ();
+
+        if (equal_)
+        {
+            typename vector::const_iterator lhs_iter_ = _vector.begin ();
+            typename vector::const_iterator end_ = _vector.end ();
+            typename vector::const_iterator rhs_iter_ = rhs_._vector.begin ();
+
+            for (; equal_ && lhs_iter_ != end_; ++lhs_iter_, ++rhs_iter_)
+            {
+                equal_ = **lhs_iter_ == **rhs_iter_;
+            }
+        }
+
+        return  equal_;
+    }
+
+    void clear ()
+    {
+        if (!_vector.empty ())
+        {
+            ptr_type **iter_ = &_vector.front ();
+            ptr_type **end_ = iter_ + _vector.size ();
+
+            for (; iter_ != end_; ++iter_)
+            {
+                delete *iter_;
+            }
+        }
+
+        _vector.clear ();
+    }
+
+private:
+    vector _vector;
+
+    ptr_vector (const ptr_vector &); // No copy construction.
+    ptr_vector &operator = (const ptr_vector &); // No assignment.
+};
+}
+}
+
+#endif
--- a/inc/lexertl/debug.hpp
+++ b/inc/lexertl/debug.hpp
@@ -0,0 +1,353 @@
+// debug.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_DEBUG_HPP
+#define LEXERTL_DEBUG_HPP
+
+#include <map>
+#include <ostream>
+#include "rules.hpp"
+#include "size_t.hpp"
+#include "state_machine.hpp"
+#include "string_token.hpp"
+#include <vector>
+
+namespace lexertl
+{
+template<typename sm, typename char_type, typename id_type = std::size_t,
+    bool is_dfa = true>
+class basic_debug
+{
+public:
+    typedef lexertl::basic_char_state_machine<char_type, id_type, is_dfa>
+        char_state_machine;
+    typedef std::basic_ostream<char_type> ostream;
+    typedef lexertl::basic_rules<char_type, id_type> rules;
+    typedef std::basic_string<char_type> string;
+
+    static void dump (const sm &sm_, rules &rules_, ostream &stream_)
+    {
+        char_state_machine csm_;
+
+        sm_to_csm (sm_, csm_);
+        dump (csm_, rules_, stream_);
+    }
+
+    static void dump (const sm &sm_, ostream &stream_)
+    {
+        char_state_machine csm_;
+
+        sm_to_csm (sm_, csm_);
+        dump (csm_, stream_);
+    }
+
+    static void dump (const char_state_machine &csm_, rules &rules_,
+        ostream &stream_)
+    {
+        for (std::size_t dfa_ = 0, dfas_ = csm_.size (); dfa_ < dfas_; ++dfa_)
+        {
+            lexer_state (stream_);
+            stream_ << rules_.state (dfa_) << std::endl << std::endl;
+
+            dump_ex (csm_._sm_deque[dfa_], stream_);
+        }
+    }
+
+    static void dump (const char_state_machine &csm_, ostream &stream_)
+    {
+        for (std::size_t dfa_ = 0, dfas_ = csm_.size (); dfa_ < dfas_; ++dfa_)
+        {
+            lexer_state (stream_);
+            stream_ << dfa_ << std::endl << std::endl;
+
+            dump_ex (csm_._sm_deque[dfa_], stream_);
+        }
+    }
+
+protected:
+    typedef typename char_state_machine::state dfa_state;
+    typedef typename dfa_state::string_token string_token;
+    typedef std::basic_stringstream<char_type> stringstream;
+
+    static void sm_to_csm (const sm &sm_, char_state_machine &csm_)
+    {
+        const detail::basic_internals<id_type> &internals_ = sm_.data ();
+        const std::size_t dfas_ = internals_._dfa->size ();
+
+        for (id_type i_ = 0; i_ < dfas_; ++i_)
+        {
+            if (internals_._dfa_alphabet[i_] == 0) continue;
+
+            const std::size_t alphabet_ = internals_._dfa_alphabet[i_] -
+                transitions_index;
+            typename char_state_machine::string_token_vector token_vector_
+                (alphabet_, string_token ());
+            id_type *ptr_ = &internals_._lookup[i_]->front ();
+
+            for (std::size_t c_ = 0; c_ < 256; ++c_, ++ptr_)
+            {
+                if (*ptr_ >= transitions_index)
+                {
+                    string_token &token_ = token_vector_
+                        [*ptr_ - transitions_index];
+
+                    token_.insert (typename string_token::range
+                        (typename string_token::index_type (c_),
+                        typename string_token::index_type (c_)));
+                }
+            }
+
+            csm_.append (token_vector_, internals_, i_);
+        }
+    }
+
+    static void dump_ex (const typename char_state_machine::dfa &dfa_,
+        ostream &stream_)
+    {
+        const std::size_t states_ = dfa_._states.size ();
+        const id_type bol_index_ = dfa_._bol_index;
+        typename dfa_state::id_type_string_token_map::const_iterator iter_;
+        typename dfa_state::id_type_string_token_map::const_iterator end_;
+
+        for (std::size_t i_ = 0; i_ < states_; ++i_)
+        {
+            const dfa_state &state_ = dfa_._states[i_];
+
+            state (stream_);
+            stream_ << i_ << std::endl;
+
+            if (state_._end_state)
+            {
+                end_state (stream_);
+
+                if (state_._push_pop_dfa == dfa_state::push_dfa)
+                {
+                    push (stream_);
+                    stream_ << state_._push_dfa;
+                }
+                else if (state_._push_pop_dfa == dfa_state::pop_dfa)
+                {
+                    pop (stream_);
+                }
+
+                id (stream_);
+                stream_ << static_cast<std::size_t>(state_._id);
+                user_id (stream_);
+                stream_ << static_cast<std::size_t>(state_._user_id);
+                dfa (stream_);
+                stream_ << static_cast<std::size_t>(state_._next_dfa);
+                stream_ << std::endl;
+            }
+
+            if (i_ == 0 && bol_index_ != char_state_machine::npos ())
+            {
+                bol (stream_);
+                stream_ << static_cast<std::size_t>(bol_index_) << std::endl;
+            }
+
+            if (state_._eol_index != char_state_machine::npos ())
+            {
+                eol (stream_);
+                stream_ << static_cast<std::size_t>(state_._eol_index) <<
+                    std::endl;
+            }
+
+            iter_ = state_._transitions.begin ();
+            end_ = state_._transitions.end ();
+
+            for (; iter_ != end_; ++iter_)
+            {
+                string_token token_ = iter_->second;
+
+                open_bracket (stream_);
+
+                if (!iter_->second.any () && iter_->second.negatable ())
+                {
+                    token_.negate ();
+                    negated (stream_);
+                }
+
+                string chars_;
+                typename string_token::range_vector::const_iterator
+                    ranges_iter_ = token_._ranges.begin ();
+                typename string_token::range_vector::const_iterator
+                    ranges_end_ = token_._ranges.end ();
+
+                for (; ranges_iter_ != ranges_end_; ++ranges_iter_)
+                {
+                    if (ranges_iter_->first == '^' ||
+                        ranges_iter_->first == ']')
+                    {
+                        stream_ << '\\';
+                    }
+
+                    chars_ = string_token::escape_char
+                        (ranges_iter_->first);
+
+                    if (ranges_iter_->first != ranges_iter_->second)
+                    {
+                        if (ranges_iter_->first + 1 < ranges_iter_->second)
+                        {
+                            chars_ += '-';
+                        }
+
+                        if (ranges_iter_->second == '^' ||
+                            ranges_iter_->second == ']')
+                        {
+                            stream_ << '\\';
+                        }
+
+                        chars_ += string_token::escape_char
+                            (ranges_iter_->second);
+                    }
+
+                    stream_ << chars_;
+                }
+
+                close_bracket (stream_);
+                stream_ << static_cast<std::size_t>(iter_->first) <<
+                    std::endl;
+            }
+
+            stream_ << std::endl;
+        }
+    }
+
+    static void lexer_state (std::ostream &stream_)
+    {
+        stream_ << "Lexer state: ";
+    }
+
+    static void lexer_state (std::wostream &stream_)
+    {
+        stream_ << L"Lexer state: ";
+    }
+
+    static void state (std::ostream &stream_)
+    {
+        stream_ << "State: ";
+    }
+
+    static void state (std::wostream &stream_)
+    {
+        stream_ << L"State: ";
+    }
+
+    static void bol (std::ostream &stream_)
+    {
+        stream_ << "  BOL -> ";
+    }
+
+    static void bol (std::wostream &stream_)
+    {
+        stream_ << L"  BOL -> ";
+    }
+
+    static void eol (std::ostream &stream_)
+    {
+        stream_ << "  EOL -> ";
+    }
+
+    static void eol (std::wostream &stream_)
+    {
+        stream_ << L"  EOL -> ";
+    }
+
+    static void end_state (std::ostream &stream_)
+    {
+        stream_ << "  END STATE";
+    }
+
+    static void end_state (std::wostream &stream_)
+    {
+        stream_ << L"  END STATE";
+    }
+
+    static void id (std::ostream &stream_)
+    {
+        stream_ << ", Id = ";
+    }
+
+    static void id (std::wostream &stream_)
+    {
+        stream_ << L", Id = ";
+    }
+
+    static void push (std::ostream &stream_)
+    {
+        stream_ << ", PUSH ";
+    }
+
+    static void push (std::wostream &stream_)
+    {
+        stream_ << L", PUSH ";
+    }
+
+    static void pop (std::ostream &stream_)
+    {
+        stream_ << ", POP";
+    }
+
+    static void pop (std::wostream &stream_)
+    {
+        stream_ << L", POP";
+    }
+
+    static void user_id (std::ostream &stream_)
+    {
+        stream_ << ", User Id = ";
+    }
+
+    static void user_id (std::wostream &stream_)
+    {
+        stream_ << L", User Id = ";
+    }
+
+    static void open_bracket (std::ostream &stream_)
+    {
+        stream_ << "  [";
+    }
+
+    static void open_bracket (std::wostream &stream_)
+    {
+        stream_ << L"  [";
+    }
+
+    static void negated (std::ostream &stream_)
+    {
+        stream_ << "^";
+    }
+
+    static void negated (std::wostream &stream_)
+    {
+        stream_ << L"^";
+    }
+
+    static void close_bracket (std::ostream &stream_)
+    {
+        stream_ << "] -> ";
+    }
+
+    static void close_bracket (std::wostream &stream_)
+    {
+        stream_ << L"] -> ";
+    }
+
+    static void dfa (std::ostream &stream_)
+    {
+        stream_ << ", dfa = ";
+    }
+
+    static void dfa (std::wostream &stream_)
+    {
+        stream_ << L", dfa = ";
+    }
+};
+
+typedef basic_debug<basic_state_machine<char>, char> debug;
+typedef basic_debug<basic_state_machine<wchar_t>, wchar_t> wdebug;
+}
+
+#endif
--- a/inc/lexertl/enums.hpp
+++ b/inc/lexertl/enums.hpp
@@ -0,0 +1,25 @@
+// enums.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef LEXERTL_ENUMS_H
+#define LEXERTL_ENUMS_H
+
+namespace lexertl
+{
+    enum regex_flags {icase = 1, dot_not_newline = 2, skip_ws = 4,
+        match_zero_len = 8};
+    // 0 = end state, 1 = id, 2 = user id, 3 = push_dfa_index
+    // 4 = next dfa, 5 = dead state, 6 = dfa_start
+    enum {end_state_index, id_index, user_id_index, push_dfa_index,
+        next_dfa_index, eol_index, dead_state_index, transitions_index};
+    // Rule flags:
+    enum feature_flags {bol_bit = 1, eol_bit = 2, skip_bit = 4, again_bit = 8,
+        multi_state_bit = 16, recursive_bit = 32, advance_bit = 64};
+    // End state flags:
+    enum {end_state_bit = 1, pop_dfa_bit = 2};
+}
+
+#endif
--- a/inc/lexertl/generate_cpp.hpp
+++ b/inc/lexertl/generate_cpp.hpp
--- a/inc/lexertl/generator.hpp
+++ b/inc/lexertl/generator.hpp
@@ -0,0 +1,829 @@
+// generator.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_GENERATOR_HPP
+#define LEXERTL_GENERATOR_HPP
+
+#include <algorithm>
+#include "bool.hpp"
+#include "partition/charset.hpp"
+#include "char_traits.hpp"
+#include "partition/equivset.hpp"
+#include <memory>
+#include "parser/parser.hpp"
+#include "containers/ptr_list.hpp"
+#include "rules.hpp"
+#include "size_t.hpp"
+#include "state_machine.hpp"
+
+namespace lexertl
+{
+template<typename rules, typename sm, typename char_traits = basic_char_traits
+    <typename sm::traits::input_char_type> >
+class basic_generator
+{
+public:
+    typedef typename rules::id_type id_type;
+    typedef typename rules::char_type rules_char_type;
+    typedef typename sm::traits sm_traits;
+    typedef detail::basic_parser<rules_char_type, sm_traits> parser;
+    typedef typename parser::charset_map charset_map;
+    typedef typename parser::node node;
+    typedef typename parser::node_ptr_vector node_ptr_vector;
+
+    static void build (const rules &rules_, sm &sm_)
+    {
+        const std::size_t size_ = rules_.statemap ().size ();
+        // Strong exception guarantee
+        // http://www.boost.org/community/exception_safety.html
+        internals internals_;
+        sm temp_sm_;
+        node_ptr_vector node_ptr_vector_;
+
+        internals_._eoi = rules_.eoi ();
+        internals_.add_states (size_);
+
+        for (id_type index_ = 0; index_ < size_; ++index_)
+        {
+            if (rules_.regexes ()[index_].empty ())
+            {
+                std::ostringstream ss_;
+
+                ss_ << "Lexer states with no rules are not allowed "
+                    "(lexer state " << index_ << ".)";
+                throw runtime_error (ss_.str ());
+            }
+            else
+            {
+                // Note that the following variables are per DFA.
+                // Map of regex charset tokens (strings) to index
+                charset_map charset_map_;
+                // Used to fix up $ and \n clashes.
+                id_type nl_id_ = sm_traits::npos ();
+                // Regex syntax tree
+                node *root_ = build_tree (rules_, index_, node_ptr_vector_,
+                    charset_map_, nl_id_);
+
+                build_dfa (charset_map_, root_, internals_, temp_sm_, index_,
+                    nl_id_);
+
+                if (internals_._dfa[index_]->size () /
+                    internals_._dfa_alphabet[index_] >= sm_traits::npos ())
+                {
+                    // Overflow
+                    throw runtime_error ("The data type you have chosen "
+                        "cannot hold this many DFA rows.");
+                }
+            }
+        }
+
+        // If you get a compile error here the id_type from rules and
+        // state machine do no match.
+        create (internals_, temp_sm_, rules_.features (), lookup ());
+        sm_.swap (temp_sm_);
+    }
+
+    static node *build_tree (const rules &rules_, const std::size_t dfa_,
+        node_ptr_vector &node_ptr_vector_, charset_map &charset_map_,
+        id_type &nl_id_)
+    {
+        typename parser::macro_map macro_map_;
+        parser parser_ (rules_.locale (), node_ptr_vector_, macro_map_,
+            charset_map_, rules_.eoi ());
+        const typename rules::string_deque_deque &regexes_ =
+            rules_.regexes ();
+        typename rules::string_deque::const_iterator regex_iter_ =
+            regexes_[dfa_].begin ();
+        typename rules::string_deque::const_iterator regex_iter_end_ =
+            regexes_[dfa_].end ();
+        const typename rules::string &regex_ = *regex_iter_;
+        const typename rules::id_vector_deque &ids_ = rules_.ids ();
+        const typename rules::id_vector_deque &user_ids_ =
+            rules_.user_ids ();
+        typename rules::id_vector::const_iterator id_iter_ =
+            ids_[dfa_].begin ();
+        typename rules::id_vector::const_iterator user_id_iter_ =
+            user_ids_[dfa_].begin ();
+        const typename rules::id_vector_deque &next_dfas_ =
+            rules_.next_dfas ();
+        const typename rules::id_vector_deque &pushes_ = rules_.pushes ();
+        const typename rules::bool_vector_deque &pops_ = rules_.pops ();
+        typename rules::id_vector::const_iterator next_dfa_iter_ =
+            next_dfas_[dfa_].begin ();
+        typename rules::id_vector::const_iterator push_dfa_iter_ =
+            pushes_[dfa_].begin ();
+        typename rules::bool_vector::const_iterator pop_dfa_iter_ =
+            pops_[dfa_].begin ();
+        const bool seen_bol_ = (rules_.features ()[dfa_] & bol_bit) != 0;
+        node *root_ = 0;
+
+        // Macros have a different context per lexer state
+        // as equivsets (generally) differ.
+        build_macros (rules_, macro_map_, node_ptr_vector_, charset_map_,
+            nl_id_);
+        root_ = parser_.parse (regex_.c_str (),
+            regex_.c_str () + regex_.size (), *id_iter_, *user_id_iter_,
+            *next_dfa_iter_, *push_dfa_iter_, *pop_dfa_iter_,
+            rules_.flags (), nl_id_, seen_bol_, false);
+        ++regex_iter_;
+        ++id_iter_;
+        ++user_id_iter_;
+        ++next_dfa_iter_;
+        ++push_dfa_iter_;
+        ++pop_dfa_iter_;
+
+        // Build syntax trees
+        while (regex_iter_ != regex_iter_end_)
+        {
+            // Re-declare var, otherwise we perform an assignment..!
+            const typename rules::string &regex_ = *regex_iter_;
+            node *rhs_ = parser_.parse (regex_.c_str (),
+                regex_.c_str () + regex_.size (), *id_iter_, *user_id_iter_,
+                *next_dfa_iter_, *push_dfa_iter_, *pop_dfa_iter_,
+                rules_.flags (), nl_id_,
+                (rules_.features ()[dfa_] & bol_bit) != 0, false);
+
+            node_ptr_vector_->push_back
+                (static_cast<selection_node *>(0));
+            node_ptr_vector_->back () = new selection_node (root_, rhs_);
+            root_ = node_ptr_vector_->back ();
+
+            ++regex_iter_;
+            ++id_iter_;
+            ++user_id_iter_;
+            ++next_dfa_iter_;
+            ++push_dfa_iter_;
+            ++pop_dfa_iter_;
+        }
+
+        return root_;
+    }
+
+protected:
+    typedef bool_<sm_traits::compressed> compressed;
+    typedef detail::basic_equivset<id_type> equivset;
+    typedef detail::ptr_list<equivset> equivset_list;
+    typedef std::auto_ptr<equivset> equivset_ptr;
+    typedef typename sm_traits::char_type sm_char_type;
+    typedef detail::basic_charset<sm_char_type, id_type> charset;
+    typedef std::auto_ptr<charset> charset_ptr;
+    typedef detail::ptr_list<charset> charset_list;
+    typedef detail::basic_internals<id_type> internals;
+    typedef typename std::set<id_type> id_type_set;
+    typedef typename internals::id_type_vector id_type_vector;
+    typedef typename charset::index_set index_set;
+    typedef std::vector<index_set> index_set_vector;
+    typedef bool_<sm_traits::is_dfa> is_dfa;
+    typedef bool_<sm_traits::lookup> lookup;
+    typedef typename parser::macro_map macro_map;
+    typedef typename macro_map::iterator macro_iter;
+    typedef std::pair<macro_iter, bool> macro_iter_pair;
+    typedef std::set<const node *> node_set;
+    typedef detail::ptr_vector<node_set> node_set_vector;
+    typedef typename node::node_vector node_vector;
+    typedef detail::ptr_vector<node_vector> node_vector_vector;
+    typedef std::pair<typename rules::string, const node *> macro_pair;
+    typedef typename parser::selection_node selection_node;
+    typedef typename std::vector<std::size_t> size_t_vector;
+    typedef typename parser::string_token string_token;
+
+    static void build_macros (const rules &rules_,
+        macro_map &macro_map_, node_ptr_vector &node_ptr_vector_,
+        charset_map &charset_map_, id_type &nl_id_)
+    {
+        const typename rules::string_pair_deque &macrodeque_ =
+            rules_.macrodeque ();
+
+        for (typename rules::string_pair_deque::const_iterator iter_ =
+            macrodeque_.begin (), end_ = macrodeque_.end ();
+            iter_ != end_; ++iter_)
+        {
+            const typename rules::string &name_ = iter_->first;
+            const typename rules::string &regex_ = iter_->second;
+            parser parser_ (rules_.locale (), node_ptr_vector_, macro_map_,
+                charset_map_, rules_.eoi ());
+            node *node_ = parser_.parse (regex_.c_str (),
+                regex_.c_str () + regex_.size (), 0, 0, 0, false, false,
+                rules_.flags (), nl_id_, false, true);
+            macro_iter_pair map_iter_ = macro_map_.insert (macro_pair (name_,
+                static_cast<const node *>(0)));
+
+            map_iter_.first->second = node_;
+        }
+    }
+
+    static void build_dfa (const charset_map &charset_map_, const node *root_,
+        internals &internals_, sm &sm_, const id_type dfa_index_,
+        id_type &nl_id_)
+    {
+        // partitioned charset list
+        charset_list charset_list_;
+        // vector mapping token indexes to partitioned token index sets
+        index_set_vector set_mapping_;
+        typename internals::id_type_vector &dfa_ =
+            *internals_._dfa[dfa_index_];
+        std::size_t dfa_alphabet_ = 0;
+        const node_vector *followpos_ = &root_->firstpos ();
+        node_set_vector seen_sets_;
+        node_vector_vector seen_vectors_;
+        size_t_vector hash_vector_;
+        id_type zero_id_ = sm_traits::npos ();
+        id_type_set eol_set_;
+
+        set_mapping_.resize (charset_map_.size ());
+        partition_charsets (charset_map_, charset_list_, is_dfa ());
+        build_set_mapping (charset_list_, internals_, dfa_index_,
+            set_mapping_);
+
+        if (nl_id_ != sm_traits::npos ())
+        {
+            nl_id_ = *set_mapping_[nl_id_].begin ();
+            zero_id_ = sm_traits::compressed ?
+                *set_mapping_[charset_map_.find (string_token (0, 0))->
+                second].begin () : sm_traits::npos ();
+        }
+
+        dfa_alphabet_ = charset_list_->size () + transitions_index +
+            (nl_id_ == sm_traits::npos () ? 0 : 1);
+
+        if (dfa_alphabet_ > sm_traits::npos ())
+        {
+            // Overflow
+            throw runtime_error ("The data type you have chosen cannot hold "
+                "the dfa alphabet.");
+        }
+
+        internals_._dfa_alphabet[dfa_index_] = dfa_alphabet_;
+        // 'jam' state
+        dfa_.resize (dfa_alphabet_, 0);
+        closure (followpos_, seen_sets_, seen_vectors_, hash_vector_,
+            dfa_alphabet_, dfa_);
+
+        for (id_type index_ = 0; index_ < static_cast<id_type>
+            (seen_vectors_->size ()); ++index_)
+        {
+            equivset_list equiv_list_;
+
+            build_equiv_list (seen_vectors_[index_], set_mapping_,
+                equiv_list_, is_dfa ());
+
+            for (typename equivset_list::list::const_iterator iter_ =
+                equiv_list_->begin (), end_ = equiv_list_->end ();
+                iter_ != end_; ++iter_)
+            {
+                equivset *equivset_ = *iter_;
+                const id_type transition_ = closure
+                    (&equivset_->_followpos, seen_sets_, seen_vectors_,
+                    hash_vector_, dfa_alphabet_, dfa_);
+
+                if (transition_ != sm_traits::npos ())
+                {
+                    id_type *ptr_ = &dfa_.front () + ((index_ + 1) *
+                        dfa_alphabet_);
+
+                    // Prune abstemious transitions from end states.
+                    if (*ptr_ && !equivset_->_greedy) continue;
+
+                    for (typename equivset::index_vector::const_iterator
+                        equiv_iter_ = equivset_->_index_vector.begin (),
+                        equiv_end_ = equivset_->_index_vector.end ();
+                        equiv_iter_ != equiv_end_; ++equiv_iter_)
+                    {
+                        const id_type i_ = *equiv_iter_;
+
+                        if (i_ == parser::bol_token ())
+                        {
+                            dfa_.front () = transition_;
+                        }
+                        else if (i_ == parser:: eol_token ())
+                        {
+                            ptr_[eol_index] = transition_;
+                            eol_set_.insert (index_ + 1);
+                        }
+                        else
+                        {
+                            ptr_[i_ + transitions_index] = transition_;
+                        }
+                    }
+                }
+            }
+        }
+
+        fix_clashes (eol_set_, nl_id_, zero_id_, dfa_, dfa_alphabet_,
+            compressed ());
+        append_dfa (charset_list_, internals_, sm_, dfa_index_, lookup ());
+    }
+
+    // Uncompressed
+    static void fix_clashes (const id_type_set &eol_set_,
+        const id_type nl_id_, const id_type /*zero_id_*/,
+        typename internals::id_type_vector &dfa_,
+        const std::size_t dfa_alphabet_, const false_ &)
+    {
+        typename id_type_set::const_iterator eol_iter_ =
+            eol_set_.begin ();
+        typename id_type_set::const_iterator eol_end_ =
+            eol_set_.end ();
+
+        for (; eol_iter_ != eol_end_; ++eol_iter_)
+        {
+            id_type *ptr_ = &dfa_.front () + *eol_iter_ * dfa_alphabet_;
+            const id_type eol_state_ = ptr_[eol_index];
+            const id_type nl_state_ = ptr_[nl_id_ + transitions_index];
+
+            if (nl_state_)
+            {
+                ptr_[transitions_index + nl_id_] = 0;
+                ptr_ = &dfa_.front () + eol_state_ * dfa_alphabet_;
+
+                if (ptr_[transitions_index + nl_id_] == 0)
+                {
+                    ptr_[transitions_index + nl_id_] = nl_state_;
+                }
+            }
+        }
+    }
+
+    // Compressed
+    static void fix_clashes (const id_type_set &eol_set_,
+        const id_type nl_id_, const id_type zero_id_,
+        typename internals::id_type_vector &dfa_,
+        const std::size_t dfa_alphabet_, const true_ &)
+    {
+        typename id_type_set::const_iterator eol_iter_ =
+            eol_set_.begin ();
+        typename id_type_set::const_iterator eol_end_ =
+            eol_set_.end ();
+        std::size_t i_ = 0;
+
+        for (; eol_iter_ != eol_end_; ++eol_iter_)
+        {
+            id_type *ptr_ = &dfa_.front () + *eol_iter_ * dfa_alphabet_;
+            const id_type eol_state_ = ptr_[eol_index];
+            id_type nl_state_ = 0;
+
+            for (; i_ < (sm_traits::char_24_bit ? 2 : 1); ++i_)
+            {
+                ptr_ = &dfa_.front () + ptr_[transitions_index + zero_id_] *
+                    dfa_alphabet_;
+            }
+
+            nl_state_ = ptr_[transitions_index + nl_id_];
+
+            if (nl_state_)
+            {
+                ptr_ = &dfa_.front () + eol_state_ * dfa_alphabet_;
+
+                if (ptr_[transitions_index + zero_id_] != 0) continue;
+
+                ptr_[transitions_index + zero_id_] = dfa_.size () /
+                    dfa_alphabet_;
+                dfa_.resize (dfa_.size () + dfa_alphabet_, 0);
+
+                for (i_ = 0; i_ < (sm_traits::char_24_bit ? 1 : 0); ++i_)
+                {
+                    ptr_ = &dfa_.front () + dfa_.size () - dfa_alphabet_;
+                    ptr_[transitions_index + zero_id_] = dfa_.size () /
+                        dfa_alphabet_;
+                    dfa_.resize (dfa_.size () + dfa_alphabet_, 0);
+                }
+
+                ptr_ = &dfa_.front () + dfa_.size () - dfa_alphabet_;
+                ptr_[transitions_index + nl_id_] = nl_state_;
+            }
+        }
+    }
+
+    // char_state_machine version
+    static void append_dfa (const charset_list &charset_list_,
+        const internals &internals_, sm &sm_, const id_type dfa_index_,
+        const false_ &)
+    {
+        typename charset_list::list::const_iterator list_iter_ =
+            charset_list_->begin ();
+        std::size_t size_ = charset_list_->size ();
+        typename sm::string_token_vector token_vector_;
+
+        token_vector_.reserve (size_);
+
+        for (std::size_t i_ = 0; i_ < size_; ++i_, ++list_iter_)
+        {
+            const charset *charset_ = *list_iter_;
+
+            token_vector_.push_back (charset_->_token);
+        }
+
+        sm_.append (token_vector_, internals_, dfa_index_);
+    }
+
+    // state_machine version
+    static void append_dfa (const charset_list &,
+        const internals &, sm &, const id_type, const true_ &)
+    {
+        // Nothing to do - will use create() instead
+    }
+
+    // char_state_machine version
+    static void create (internals &, sm &, const id_type_vector &,
+        const false_ &)
+    {
+        // Nothing to do - will use append_dfa() instead
+    }
+
+    // state_machine version
+    static void create (internals &internals_, sm &sm_,
+        const id_type_vector &features_, const true_ &)
+    {
+        for (std::size_t i_ = 0, size_ = internals_._dfa->size ();
+            i_ < size_; ++i_)
+        {
+            internals_._features |= features_[i_];
+        }
+
+        if (internals_._dfa->size () > 1)
+        {
+            internals_._features |= multi_state_bit;
+        }
+
+        sm_.data ().swap (internals_);
+    }
+
+    // NFA version
+    static void partition_charsets (const charset_map &map_,
+        charset_list &lhs_, const false_ &)
+    {
+        fill_rhs_list (map_, lhs_);
+    }
+
+    // DFA version
+    static void partition_charsets (const charset_map &map_,
+        charset_list &lhs_, const true_ &)
+    {
+        charset_list rhs_;
+
+        fill_rhs_list (map_, rhs_);
+
+        if (!rhs_->empty ())
+        {
+            typename charset_list::list::iterator iter_;
+            typename charset_list::list::iterator end_;
+            charset_ptr overlap_ (new charset);
+
+            lhs_->push_back (static_cast<charset *>(0));
+            lhs_->back () = rhs_->front ();
+            rhs_->pop_front ();
+
+            while (!rhs_->empty ())
+            {
+                charset_ptr r_ (rhs_->front ());
+
+                rhs_->pop_front ();
+                iter_ = lhs_->begin ();
+                end_ = lhs_->end ();
+
+                while (!r_->empty () && iter_ != end_)
+                {
+                    typename charset_list::list::iterator l_iter_ = iter_;
+
+                    (*l_iter_)->intersect (*r_.get (), *overlap_.get ());
+
+                    if (overlap_->empty ())
+                    {
+                        ++iter_;
+                    }
+                    else if ((*l_iter_)->empty ())
+                    {
+                        delete *l_iter_;
+                        *l_iter_ = overlap_.release ();
+                        overlap_.reset (new charset);
+                        ++iter_;
+                    }
+                    else if (r_->empty ())
+                    {
+                        delete r_.release ();
+                        r_ = overlap_;
+                        overlap_.reset (new charset);
+                        break;
+                    }
+                    else
+                    {
+                        iter_ = lhs_->insert (++iter_,
+                            static_cast<charset *>(0));
+                        *iter_ = overlap_.release ();
+                        overlap_.reset (new charset);
+                        ++iter_;
+                        end_ = lhs_->end ();
+                    }
+                }
+
+                if (!r_->empty ())
+                {
+                    lhs_->push_back (static_cast<charset *>(0));
+                    lhs_->back () = r_.release ();
+                }
+            }
+        }
+    }
+
+    static void fill_rhs_list (const charset_map &map_,
+        charset_list &list_)
+    {
+        typename charset_map::const_iterator iter_ = map_.begin ();
+        typename charset_map::const_iterator end_ = map_.end ();
+
+        for (; iter_ != end_; ++iter_)
+        {
+            list_->push_back (static_cast<charset *>(0));
+            list_->back () = new charset (iter_->first, iter_->second);
+        }
+    }
+
+    static void build_set_mapping (const charset_list &charset_list_,
+        internals &internals_, const id_type dfa_index_,
+        index_set_vector &set_mapping_)
+    {
+        typename charset_list::list::const_iterator iter_ =
+            charset_list_->begin ();
+        typename charset_list::list::const_iterator end_ =
+            charset_list_->end ();
+        typename index_set::const_iterator set_iter_;
+        typename index_set::const_iterator set_end_;
+
+        for (id_type index_ = 0; iter_ != end_; ++iter_, ++index_)
+        {
+            const charset *cs_ = *iter_;
+
+            set_iter_ = cs_->_index_set.begin ();
+            set_end_ = cs_->_index_set.end ();
+            fill_lookup (cs_->_token, internals_._lookup[dfa_index_],
+                index_, lookup ());
+
+            for (; set_iter_ != set_end_; ++set_iter_)
+            {
+                set_mapping_[*set_iter_].insert (index_);
+            }
+        }
+    }
+
+    // char_state_machine version
+    static void fill_lookup (const string_token &, id_type_vector *,
+        const id_type, const false_ &)
+    {
+        // Do nothing (lookup not used)
+    }
+
+    // state_machine version
+    static void fill_lookup (const string_token &charset_,
+        id_type_vector *lookup_, const id_type index_, const true_ &)
+    {
+        typename string_token::range_vector::const_iterator iter_ =
+            charset_._ranges.begin ();
+        typename string_token::range_vector::const_iterator end_ =
+            charset_._ranges.end ();
+        id_type *ptr_ = &lookup_->front ();
+
+        for (; iter_ != end_; ++iter_)
+        {
+            for (typename char_traits::index_type char_ = iter_->first;
+                char_ < iter_->second; ++char_)
+            {
+                // Note char_ must be unsigned
+                ptr_[char_] = index_ + transitions_index;
+            }
+
+            // Note iter_->second must be unsigned
+            ptr_[iter_->second] = index_ + transitions_index;
+        }
+    }
+
+    static id_type closure (const node_vector *followpos_,
+        node_set_vector &seen_sets_, node_vector_vector &seen_vectors_,
+        size_t_vector &hash_vector_, const id_type size_, id_type_vector &dfa_)
+    {
+        bool end_state_ = false;
+        id_type id_ = 0;
+        id_type user_id_ = sm_traits::npos ();
+        id_type next_dfa_ = 0;
+        id_type push_dfa_ = sm_traits::npos ();
+        bool pop_dfa_ = false;
+        std::size_t hash_ = 0;
+
+        if (followpos_->empty ()) return sm_traits::npos ();
+
+        id_type index_ = 0;
+        std::auto_ptr<node_set> set_ptr_ (new node_set);
+        std::auto_ptr<node_vector> vector_ptr_ (new node_vector);
+
+        for (typename node_vector::const_iterator iter_ =
+            followpos_->begin (), end_ = followpos_->end ();
+            iter_ != end_; ++iter_)
+        {
+            closure_ex (*iter_, end_state_, id_, user_id_, next_dfa_,
+                push_dfa_, pop_dfa_, set_ptr_.get (),
+                vector_ptr_.get (), hash_);
+        }
+
+        bool found_ = false;
+        typename size_t_vector::const_iterator hash_iter_ =
+            hash_vector_.begin ();
+        typename size_t_vector::const_iterator hash_end_ =
+            hash_vector_.end ();
+        typename node_set_vector::vector::const_iterator set_iter_ =
+            seen_sets_->begin ();
+
+        for (; hash_iter_ != hash_end_; ++hash_iter_, ++set_iter_)
+        {
+            found_ = *hash_iter_ == hash_ && *(*set_iter_) == *set_ptr_;
+            ++index_;
+
+            if (found_) break;
+        }
+
+        if (!found_)
+        {
+            seen_sets_->push_back (static_cast<node_set *>(0));
+            seen_sets_->back () = set_ptr_.release ();
+            seen_vectors_->push_back (static_cast<node_vector *>(0));
+            seen_vectors_->back () = vector_ptr_.release ();
+            hash_vector_.push_back (hash_);
+            // State 0 is the jam state...
+            index_ = static_cast<id_type>(seen_sets_->size ());
+
+            const std::size_t old_size_ = dfa_.size ();
+
+            dfa_.resize (old_size_ + size_, 0);
+
+            if (end_state_)
+            {
+                dfa_[old_size_] |= end_state_bit;
+
+                if (pop_dfa_)
+                {
+                    dfa_[old_size_] |= pop_dfa_bit;
+                }
+
+                dfa_[old_size_ + id_index] = id_;
+                dfa_[old_size_ + user_id_index] = user_id_;
+                dfa_[old_size_ + push_dfa_index] = push_dfa_;
+                dfa_[old_size_ + next_dfa_index] = next_dfa_;
+            }
+        }
+
+        return index_;
+    }
+
+    static void closure_ex (node *node_, bool &end_state_,
+        id_type &id_, id_type &user_id_, id_type &next_dfa_,
+        id_type &push_dfa_, bool &pop_dfa_, node_set *set_ptr_,
+        node_vector *vector_ptr_, std::size_t &hash_)
+    {
+        const bool temp_end_state_ = node_->end_state ();
+
+        if (temp_end_state_)
+        {
+            if (!end_state_)
+            {
+                end_state_ = true;
+                id_ = node_->id ();
+                user_id_ = node_->user_id ();
+                next_dfa_ = node_->next_dfa ();
+                push_dfa_ = node_->push_dfa ();
+                pop_dfa_ = node_->pop_dfa ();
+            }
+        }
+
+        if (set_ptr_->insert (node_).second)
+        {
+            vector_ptr_->push_back (node_);
+            hash_ += reinterpret_cast<std::size_t> (node_);
+        }
+    }
+
+    // NFA version
+    static void build_equiv_list (const node_vector *vector_,
+        const index_set_vector &set_mapping_, equivset_list &lhs_,
+        const false_ &)
+    {
+        fill_rhs_list (vector_, set_mapping_, lhs_);
+    }
+
+    // DFA version
+    static void build_equiv_list (const node_vector *vector_,
+        const index_set_vector &set_mapping_, equivset_list &lhs_,
+        const true_ &)
+    {
+        equivset_list rhs_;
+
+        fill_rhs_list (vector_, set_mapping_, rhs_);
+
+        if (!rhs_->empty ())
+        {
+            typename equivset_list::list::iterator iter_;
+            typename equivset_list::list::iterator end_;
+            equivset_ptr overlap_ (new equivset);
+
+            lhs_->push_back (static_cast<equivset *>(0));
+            lhs_->back () = rhs_->front ();
+            rhs_->pop_front ();
+
+            while (!rhs_->empty ())
+            {
+                equivset_ptr r_ (rhs_->front ());
+
+                rhs_->pop_front ();
+                iter_ = lhs_->begin ();
+                end_ = lhs_->end ();
+
+                while (!r_->empty () && iter_ != end_)
+                {
+                    typename equivset_list::list::iterator l_iter_ = iter_;
+
+                    (*l_iter_)->intersect (*r_.get (), *overlap_.get ());
+
+                    if (overlap_->empty ())
+                    {
+                        ++iter_;
+                    }
+                    else if ((*l_iter_)->empty ())
+                    {
+                        delete *l_iter_;
+                        *l_iter_ = overlap_.release ();
+                        overlap_.reset (new equivset);
+                        ++iter_;
+                    }
+                    else if (r_->empty ())
+                    {
+                        delete r_.release ();
+                        r_ = overlap_;
+                        overlap_.reset (new equivset);
+                        break;
+                    }
+                    else
+                    {
+                        iter_ = lhs_->insert (++iter_,
+                            static_cast<equivset *>(0));
+                        *iter_ = overlap_.release ();
+                        overlap_.reset (new equivset);
+                        ++iter_;
+                        end_ = lhs_->end ();
+                    }
+                }
+
+                if (!r_->empty ())
+                {
+                    lhs_->push_back (static_cast<equivset *>(0));
+                    lhs_->back () = r_.release ();
+                }
+            }
+        }
+    }
+
+    static void fill_rhs_list (const node_vector *vector_,
+        const index_set_vector &set_mapping_, equivset_list &list_)
+    {
+        typename node_vector::const_iterator iter_ =
+            vector_->begin ();
+        typename node_vector::const_iterator end_ =
+            vector_->end ();
+
+        for (; iter_ != end_; ++iter_)
+        {
+            const node *node_ = *iter_;
+
+            if (!node_->end_state ())
+            {
+                const id_type token_ = node_->token ();
+
+                if (token_ != node::null_token ())
+                {
+                    list_->push_back (static_cast<equivset *>(0));
+
+                    if (token_ == parser::bol_token () ||
+                        token_ == parser::eol_token ())
+                    {
+                        std::set<id_type> index_set_;
+
+                        index_set_.insert (token_);
+                        list_->back () = new equivset (index_set_,
+                            token_, node_->greedy (), node_->followpos ());
+                    }
+                    else
+                    {
+                        list_->back () = new equivset (set_mapping_[token_],
+                            token_, node_->greedy (), node_->followpos ());
+                    }
+                }
+            }
+        }
+    }
+};
+
+typedef basic_generator<rules, state_machine> generator;
+typedef basic_generator<wrules, wstate_machine> wgenerator;
+typedef basic_generator<rules, char_state_machine> char_generator;
+typedef basic_generator<wrules, wchar_state_machine> wchar_generator;
+}
+
+#endif
--- a/inc/lexertl/internals.hpp
+++ b/inc/lexertl/internals.hpp
@@ -0,0 +1,80 @@
+// internals.hpp
+// Copyright (c) 2009-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_INTERNALS_HPP
+#define LEXERTL_INTERNALS_HPP
+
+#include "enums.hpp"
+#include "containers/ptr_vector.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename id_type>
+struct basic_internals
+{
+    typedef std::vector<id_type> id_type_vector;
+    typedef ptr_vector<id_type_vector> id_type_vector_vector;
+
+    id_type _eoi;
+    id_type_vector_vector _lookup;
+    id_type_vector _dfa_alphabet;
+    id_type _features;
+    id_type_vector_vector _dfa;
+
+    basic_internals () :
+        _eoi (0),
+        _lookup (),
+        _dfa_alphabet (),
+        _features (0),
+        _dfa ()
+    {
+    }
+
+    void clear ()
+    {
+        _eoi = 0;
+        _lookup.clear ();
+        _dfa_alphabet.clear ();
+        _features = 0;
+        _dfa.clear ();
+    }
+
+    bool empty () const
+    {
+        return _dfa->empty ();
+    }
+
+    void add_states (const std::size_t num_)
+    {
+        for (std::size_t index_ = 0; index_ < num_; ++index_)
+        {
+            _lookup->push_back (static_cast<id_type_vector *>(0));
+            // lookup *always* has a size 256 now.
+            _lookup->back () = new id_type_vector (256, dead_state_index);
+            _dfa_alphabet.push_back (0);
+            _dfa->push_back (static_cast<id_type_vector *>(0));
+            _dfa->back () = new id_type_vector;
+        }
+    }
+
+    void swap (basic_internals &internals_)
+    {
+        std::swap (_eoi, internals_._eoi);
+        _lookup->swap (*internals_._lookup);
+        _dfa_alphabet.swap (internals_._dfa_alphabet);
+        std::swap (_features, internals_._features);
+        _dfa->swap (*internals_._dfa);
+    }
+
+private:
+    basic_internals (const basic_internals &); // No copy construction.
+    basic_internals &operator = (const basic_internals &); // No assignment.
+};
+}
+}
+
+#endif
--- a/inc/lexertl/is_same.hpp
+++ b/inc/lexertl/is_same.hpp
@@ -0,0 +1,29 @@
+// is_same.hpp
+// Copyright (c) 2010-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef LEXERTL_IS_SAME_HPP
+#define LEXERTL_IS_SAME_HPP
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename t1, typename t2>
+struct is_same
+{
+    enum {same = false};
+};
+
+template<typename t1>
+struct is_same<t1, t1>
+{
+    enum {same = true};
+};
+}
+}
+
+#endif
+
--- a/inc/lexertl/licence_1_0.txt
+++ b/inc/lexertl/licence_1_0.txt
@@ -0,0 +1,24 @@
+Boost Software License - Version 1.0 - August 17th, 2003
+
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+
--- a/inc/lexertl/lookup.hpp
+++ b/inc/lexertl/lookup.hpp
@@ -0,0 +1,477 @@
+// lookup.hpp
+// Copyright (c) 2009-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_LOOKUP_HPP
+#define LEXERTL_LOOKUP_HPP
+
+#include <assert.h>
+#include "bool.hpp"
+#include "match_results.hpp"
+#include "state_machine.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+template<bool>
+struct bol_state
+{
+    bol_state (const bool)
+    {
+    }
+};
+
+template<>
+struct bol_state<true>
+{
+    bool _bol;
+    bool _end_bol;
+
+    bol_state (const bool bol_) :
+        _bol (bol_),
+        _end_bol (bol_)
+    {
+    }
+};
+
+template<typename id_type, bool>
+struct eol_state
+{
+};
+
+template<typename id_type>
+struct eol_state<id_type, true>
+{
+    id_type _EOL_state;
+
+    eol_state () :
+        _EOL_state (0)
+    {
+    }
+};
+
+template<typename id_type, bool>
+struct multi_state_state
+{
+    multi_state_state (const id_type)
+    {
+    }
+};
+
+template<typename id_type>
+struct multi_state_state<id_type, true>
+{
+    id_type _start_state;
+
+    multi_state_state (const id_type state_) :
+        _start_state (state_)
+    {
+    }
+};
+
+template<typename id_type, bool>
+struct recursive_state
+{
+    recursive_state (const id_type *)
+    {
+    }
+};
+
+template<typename id_type>
+struct recursive_state<id_type, true>
+{
+    bool _pop;
+    id_type _push_dfa;
+
+    recursive_state (const id_type *ptr_) :
+        _pop ((*ptr_ & pop_dfa_bit) != 0),
+        _push_dfa (*(ptr_ + push_dfa_index))
+    {
+    }
+};
+
+template<typename id_type, typename index_type, std::size_t flags>
+struct lookup_state
+{
+    typedef basic_internals<id_type> internals;
+
+    const id_type *_lookup;
+    id_type _dfa_alphabet;
+    const id_type *_dfa;
+    const id_type *_ptr;
+    bool _end_state;
+    id_type _id;
+    id_type _uid;
+    bol_state<(flags & bol_bit) != 0> _bol_state;
+    eol_state<id_type, (flags & eol_bit) != 0> _eol_state;
+    multi_state_state<id_type, (flags & multi_state_bit) != 0>
+        _multi_state_state;
+    recursive_state<id_type, (flags & recursive_bit) != 0> _recursive_state;
+
+    lookup_state (const internals &internals_, const bool bol_,
+        const id_type state_) :
+        _lookup (&internals_._lookup[state_]->front ()),
+        _dfa_alphabet (internals_._dfa_alphabet[state_]),
+        _dfa (&internals_._dfa[state_]->front ()),
+        _ptr (_dfa + _dfa_alphabet),
+        _end_state (*_ptr != 0),
+        _id (*(_ptr + id_index)),
+        _uid (*(_ptr + user_id_index)),
+        _bol_state (bol_),
+        _eol_state (),
+        _multi_state_state (state_),
+        _recursive_state (_ptr)
+    {
+    }
+
+    void reset_recursive (const false_ &)
+    {
+        // Do nothing
+    }
+
+    void reset_recursive (const true_ &)
+    {
+        _recursive_state._pop = (*_ptr & pop_dfa_bit) != 0;
+        _recursive_state._push_dfa = *(_ptr + push_dfa_index);
+    }
+
+    void bol_start_state (const false_ &)
+    {
+        // Do nothing
+    }
+
+    void bol_start_state (const true_ &)
+    {
+        if (_bol_state._bol)
+        {
+            const id_type state_ = *_dfa;
+
+            if (state_)
+            {
+                _ptr = &_dfa[state_ * _dfa_alphabet];
+            }
+        }
+    }
+
+    template<typename char_type>
+    bool eol (const char_type, const false_ &)
+    {
+        return false;
+    }
+
+    template<typename char_type>
+    bool eol (const char_type curr_, const true_ &)
+    {
+        bool ret_ = false;
+
+        _eol_state._EOL_state = _ptr[eol_index];
+        ret_ = _eol_state._EOL_state && curr_ == '\n';
+
+        if (ret_)
+        {
+            _ptr = &_dfa[_eol_state._EOL_state * _dfa_alphabet];
+        }
+
+        return ret_;
+    }
+
+    template<typename char_type>
+    id_type next_char (const char_type prev_char_, const false_ &)
+    {
+        const id_type state_= _ptr[_lookup
+            [static_cast<index_type>(prev_char_)]];
+
+        if (state_ != 0)
+        {
+            _ptr = &_dfa[state_ * _dfa_alphabet];
+        }
+
+        return state_;
+    }
+
+    template<typename char_type>
+    id_type next_char (const char_type prev_char_, const true_ &)
+    {
+        const std::size_t bytes_ = sizeof (char_type) < 3 ?
+            sizeof (char_type) : 3;
+        const std::size_t shift_[] = {0, 8, 16};
+        id_type state_= 0;
+
+        for (std::size_t i_ = 0; i_ < bytes_; ++i_)
+        {
+            state_ = _ptr[_lookup[static_cast<unsigned char>((prev_char_ >>
+                shift_[bytes_ - 1 - i_]) & 0xff)]];
+
+            if (state_ == 0)
+            {
+                break;
+            }
+
+            _ptr = &_dfa[state_ * _dfa_alphabet];
+        }
+
+        return state_;
+    }
+
+    template<typename char_type>
+    void bol (const char_type, const false_ &)
+    {
+        // Do nothing
+    }
+
+    template<typename char_type>
+    void bol (const char_type prev_char_, const true_ &)
+    {
+        _bol_state._bol = prev_char_ == '\n';
+    }
+
+    void eol (const id_type, const false_ &)
+    {
+        // Do nothing
+    }
+
+    void eol (const id_type err_val_, const true_ &)
+    {
+        _eol_state._EOL_state = err_val_;
+    }
+
+    void reset_start_state (const false_ &)
+    {
+        // Do nothing
+    }
+
+    void reset_start_state (const true_ &)
+    {
+        _multi_state_state._start_state = *(_ptr + next_dfa_index);
+    }
+
+    void reset_end_bol (const false_ &)
+    {
+        // Do nothing
+    }
+
+    void reset_end_bol (const true_ &)
+    {
+        _bol_state._end_bol = _bol_state._bol;
+    }
+
+    template<typename iter_type>
+    void end_state (iter_type &end_token_, iter_type &curr_)
+    {
+        if (*_ptr)
+        {
+            _end_state = true;
+            reset_end_bol (bool_<(flags & bol_bit) != 0> ());
+            _id = *(_ptr + id_index);
+            _uid = *(_ptr + user_id_index);
+            reset_recursive (bool_<(flags & recursive_bit) != 0> ());
+            reset_start_state (bool_<(flags & multi_state_bit) != 0> ());
+            end_token_ = curr_;
+        }
+    }
+
+    template<typename iter_type, typename char_type>
+    void check_eol (iter_type &, iter_type &, const id_type,
+        const char_type, const false_ &)
+    {
+        // Do nothing
+    }
+
+    template<typename iter_type, typename char_type>
+    void check_eol (iter_type &end_token_, iter_type &curr_,
+        const id_type npos, const char_type eoi_, const true_ &)
+    {
+        if (_eol_state._EOL_state != npos && curr_ == eoi_)
+        {
+            _eol_state._EOL_state = _ptr[eol_index];
+
+            if (_eol_state._EOL_state)
+            {
+                _ptr = &_dfa[_eol_state._EOL_state * _dfa_alphabet];
+                end_state (end_token_, curr_);
+            }
+        }
+    }
+
+    template<typename results>
+    void pop (results &, const false_ &)
+    {
+        // Nothing to do
+    }
+
+    template<typename results>
+    void pop (results &results_, const true_ &)
+    {
+        if (_recursive_state._pop)
+        {
+            _multi_state_state._start_state = results_.stack.top ().first;
+            results_.stack.pop ();
+        }
+        else if (_recursive_state._push_dfa != results::npos ())
+        {
+            results_.stack.push (typename results::id_type_pair
+                (_recursive_state._push_dfa, _id));
+        }
+    }
+
+    template<typename results>
+    bool id_eoi (const id_type eoi_, const results &, const false_ &)
+    {
+        return _id == eoi_;
+    }
+
+    template<typename results>
+    bool id_eoi (const id_type eoi_, const results &results_, const true_ &)
+    {
+        return _id == eoi_ || (_recursive_state._pop &&
+            !results_.stack.empty () && results_.stack.top ().second == eoi_);
+    }
+
+    void start_state (id_type &, const false_ &)
+    {
+        // Do nothing
+    }
+
+    void start_state (id_type &start_state_, const true_ &)
+    {
+        start_state_ = _multi_state_state._start_state;
+    }
+
+    void bol (bool &, const false_ &)
+    {
+        // Do nothing
+    }
+
+    void bol (bool &end_bol_, const true_ &)
+    {
+        end_bol_ = _bol_state._end_bol;
+    }
+};
+
+template<typename results>
+void inc_end (results &, const false_ &)
+{
+    // Do nothing
+}
+
+template<typename results>
+void inc_end (results &results_, const true_ &)
+{
+    ++results_.end;
+}
+
+template<typename iter_type, std::size_t flags, typename id_type,
+    typename results, bool compressed, bool recursive>
+void next (const basic_state_machine<typename std::iterator_traits
+    <iter_type>::value_type, id_type> &sm_,
+    results &results_, const bool_<compressed> &compressed_,
+    const bool_<recursive> &recursive_)
+{
+    const basic_internals<id_type> &internals_ = sm_.data ();
+    typename results::iter_type end_token_ = results_.end;
+
+skip:
+    typename results::iter_type curr_ = results_.end;
+
+    results_.start = curr_;
+
+again:
+    if (curr_ == results_.eoi)
+    {
+        results_.id = internals_._eoi;
+        results_.user_id = results::npos ();
+        return;
+    }
+
+    lookup_state<id_type, typename results::index_type, flags> lu_state_
+        (internals_, results_.bol, results_.state);
+    lu_state_.bol_start_state (bool_<(flags & bol_bit) != 0> ());
+
+    while (curr_ != results_.eoi)
+    {
+        if (!lu_state_.eol (*curr_, bool_<(flags & eol_bit) != 0> ()))
+        {
+            const typename results::char_type prev_char_ = *curr_++;
+            const id_type state_ = lu_state_.next_char (prev_char_,
+                compressed_);
+
+            lu_state_.bol (prev_char_, bool_<(flags & bol_bit) != 0> ());
+
+            if (state_ == 0)
+            {
+                lu_state_.eol (results::npos (),
+                    bool_<(flags & eol_bit) != 0> ());
+                break;
+            }
+        }
+
+        lu_state_.end_state (end_token_, curr_);
+    }
+
+    lu_state_.check_eol (end_token_, curr_, results::npos (), results_.eoi,
+        bool_<(flags & eol_bit) != 0> ());
+
+    if (lu_state_._end_state)
+    {
+        // Return longest match
+        lu_state_.pop (results_, recursive_);
+
+        lu_state_.start_state (results_.state,
+            bool_<(flags & multi_state_bit) != 0> ());
+        lu_state_.bol (results_.bol, bool_<(flags & bol_bit) != 0> ());
+        results_.end = end_token_;
+
+        if (lu_state_._id == sm_.skip ()) goto skip;
+
+        if (lu_state_.id_eoi (internals_._eoi, results_, recursive_))
+        {
+            curr_ = end_token_;
+            goto again;
+        }
+    }
+    else
+    {
+        results_.end = end_token_;
+        results_.bol = *results_.end == '\n';
+        results_.start = results_.end;
+        // No match causes char to be skipped
+        inc_end (results_, bool_<(flags & advance_bit) != 0> ());
+        lu_state_._id = results::npos ();
+        lu_state_._uid = results::npos ();
+    }
+
+    results_.id = lu_state_._id;
+    results_.user_id = lu_state_._uid;
+}
+}
+
+template<typename iter_type, typename id_type, std::size_t flags>
+void lookup (const basic_state_machine<typename std::iterator_traits
+    <iter_type>::value_type, id_type> &sm_,
+    match_results<iter_type, id_type, flags> &results_)
+{
+    // If this asserts, you have either not defined all the correct
+    // flags, or you should be using recursive_match_results instead
+    // of match_results.
+    assert ((sm_.data ()._features & flags) == sm_.data ()._features);
+    detail::next<iter_type, flags, id_type> (sm_, results_, bool_<(sizeof
+        (typename std::iterator_traits<iter_type>::value_type) > 1)> (),
+        false_ ());
+}
+
+template<typename iter_type, typename id_type, std::size_t flags>
+void lookup (const basic_state_machine<typename std::iterator_traits
+    <iter_type>::value_type, id_type> &sm_,
+    recursive_match_results<iter_type, id_type, flags> &results_)
+{
+    // If this asserts, you have not defined all the correct flags
+    assert ((sm_.data ()._features & flags) == sm_.data ()._features);
+    detail::next<iter_type, flags | recursive_bit, id_type> (sm_, results_,
+        bool_<(sizeof(typename std::iterator_traits<iter_type>::
+            value_type) > 1)> (), true_ ());
+}
+}
+
+#endif
--- a/inc/lexertl/match_results.hpp
+++ b/inc/lexertl/match_results.hpp
@@ -0,0 +1,150 @@
+// match_results.hpp
+// Copyright (c) 2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_MATCH_RESULTS_HPP
+#define LEXERTL_MATCH_RESULTS_HPP
+
+#include "char_traits.hpp"
+#include "enums.hpp"
+#include <iterator>
+#include <stack>
+#include <string>
+
+namespace lexertl
+{
+template<typename iter, typename id_type = std::size_t,
+    std::size_t flags = bol_bit | eol_bit | skip_bit | again_bit |
+        multi_state_bit | advance_bit>
+struct match_results
+{
+    typedef iter iter_type;
+    typedef typename std::iterator_traits<iter_type>::value_type char_type;
+    typedef typename basic_char_traits<char_type>::index_type index_type;
+    typedef std::basic_string<char_type> string;
+
+    id_type id;
+    id_type user_id;
+    iter_type start;
+    iter_type end;
+    iter_type eoi;
+    bool bol;
+    id_type state;
+
+    match_results () :
+        id (0),
+        user_id (npos ()),
+        start (iter_type ()),
+        end (iter_type ()),
+        eoi (iter_type ()),
+        bol (true),
+        state (0)
+    {
+    }
+
+    match_results (const iter_type &start_, const iter_type &end_) :
+        id (0),
+        user_id (npos ()),
+        start (start_),
+        end (start_),
+        eoi (end_),
+        bol (true),
+        state (0)
+    {
+    }
+
+    virtual ~match_results ()
+    {
+    }
+
+    string str () const
+    {
+        return string (start, end);
+    }
+
+    virtual void clear ()
+    {
+        id  = 0;
+        user_id = npos ();
+        start = eoi;
+        end = eoi;
+        bol = true;
+        state = 0;
+    }
+
+    virtual void reset (const iter_type &start_, const iter_type &end_)
+    {
+        id  = 0;
+        user_id = npos ();
+        start = start_;
+        end  = start_;
+        eoi = end_;
+        bol = true;
+        state = 0;
+    }
+
+    static id_type npos ()
+    {
+        return static_cast<id_type>(~0);
+    }
+
+    static id_type skip ()
+    {
+        return static_cast<id_type>(~1);
+    }
+};
+
+template<typename iter, typename id_type = std::size_t,
+    std::size_t flags = bol_bit | eol_bit | skip_bit | again_bit |
+        multi_state_bit | recursive_bit | advance_bit>
+struct recursive_match_results : public match_results<iter, id_type, flags>
+{
+    typedef std::pair<id_type, id_type> id_type_pair;
+    std::stack<id_type_pair> stack;
+
+    recursive_match_results () :
+        match_results<iter, id_type, flags> (),
+        stack ()
+    {
+    }
+
+    recursive_match_results (const iter &start_, const iter &end_) :
+        match_results<iter, id_type, flags> (start_, end_),
+        stack ()
+    {
+    }
+
+    virtual ~recursive_match_results ()
+    {
+    }
+
+    virtual void clear ()
+    {
+        match_results<iter, id_type, flags>::clear ();
+
+        while (!stack.empty()) stack.pop ();
+    }
+
+    virtual void reset (const iter &start_, const iter &end_)
+    {
+        match_results<iter, id_type, flags>::reset (start_, end_);
+
+        while (!stack.empty()) stack.pop ();
+    }
+};
+
+typedef match_results<std::string::const_iterator> smatch;
+typedef match_results<const char *> cmatch;
+typedef match_results<std::wstring::const_iterator> wsmatch;
+typedef match_results<const wchar_t *> wcmatch;
+
+typedef recursive_match_results<std::string::const_iterator>
+    srmatch;
+typedef recursive_match_results<const char *> crmatch;
+typedef recursive_match_results<std::wstring::const_iterator>
+    wsrmatch;
+typedef recursive_match_results<const wchar_t *> wcrmatch;
+}
+
+#endif
--- a/inc/lexertl/memory_file.hpp
+++ b/inc/lexertl/memory_file.hpp
@@ -0,0 +1,112 @@
+// memory_file.hpp
+// Copyright (c) 2012 Ben Hanson (http://www.benhanson.net/)
+// Inspired by http://en.wikibooks.org/wiki/Optimizing_C%2B%2B/General_optimization_techniques/Input/Output#Memory-mapped_file
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef LEXERTL_MEMORY_FILE_H
+#define LEXERTL_MEMORY_FILE_H
+
+#ifdef __unix__
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#elif defined _WIN32
+#include <windows.h>
+#endif
+
+// Only files small enough to fit into memory are supported.
+namespace lexertl
+{
+template<typename CharT>
+class basic_memory_file
+{
+public:
+    basic_memory_file (const char *pathname_) :
+        _data (0),
+        _size (0)
+    {
+#ifdef __unix__
+        _fh = ::open (pathname_, O_RDONLY);
+
+        if (_fh > -1)
+        {
+            struct stat sbuf_;
+
+            if (::fstat (_fh, &sbuf_) > -1)
+            {
+                _data = static_cast<const CharT *>
+                    (::mmap (0, sbuf_.st_size, PROT_READ, MAP_SHARED, _fh, 0));
+
+                if (_data == MAP_FAILED)
+                {
+                    _data = 0;
+                }
+                else
+                {
+                    _size = sbuf_.st_size;
+                }
+            }
+        }
+#elif defined _WIN32
+        _fh = ::CreateFileA (pathname_, GENERIC_READ, FILE_SHARE_READ, 0,
+            OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
+        _fmh = 0;
+
+        if (_fh != INVALID_HANDLE_VALUE)
+        {
+            _fmh = ::CreateFileMapping (_fh, 0, PAGE_READONLY, 0, 0, 0);
+
+            if (_fmh != 0)
+            {
+                _data = static_cast<CharT *>(::MapViewOfFile
+                    (_fmh, FILE_MAP_READ, 0, 0, 0));
+
+                if (_data) _size = ::GetFileSize(_fh, 0);
+            }
+        }
+#endif
+    }
+
+    ~basic_memory_file ()
+    {
+#if defined(__unix__)
+        ::munmap(const_cast<CharT *>(_data), _size);
+        ::close(_fh);
+#elif defined(_WIN32)
+        ::UnmapViewOfFile(_data);
+        ::CloseHandle(_fmh);
+        ::CloseHandle(_fh);
+#endif
+    }
+
+    const CharT *data () const
+    {
+        return _data;
+    }
+
+    std::size_t size () const
+    {
+        return _size;
+    }
+
+private:
+    const CharT *_data;
+    std::size_t _size;
+#ifdef __unix__
+    int _fh;
+#elif defined _WIN32
+    HANDLE _fh;
+    HANDLE _fmh;
+#else
+    #error Only Posix or Windows are supported.
+#endif
+};
+
+typedef basic_memory_file<char> memory_file;
+typedef basic_memory_file<wchar_t> wmemory_file;
+}
+
+#endif
--- a/inc/lexertl/old/fast_filebuf.hpp
+++ b/inc/lexertl/old/fast_filebuf.hpp
@@ -0,0 +1,45 @@
+// Quick hack...
+// If you find this really is faster then using std::ifstream, let me know
+// as I can always spend some more time to improve it.
+
+namespace lexertl
+{
+template<typename CharT, class Traits>
+class basic_fast_filebuf : public std::basic_streambuf<CharT, Traits>
+{
+public:
+    basic_fast_filebuf (const char *filename_) :
+        _fp (0)
+    {
+        _fp = ::fopen(filename_, "r");
+    }
+
+    virtual ~basic_fast_filebuf()
+    {
+        ::fclose(_fp);
+        _fp = 0;
+    }
+
+protected:
+    FILE *_fp;
+
+    virtual std::streamsize xsgetn (CharT *ptr_, std::streamsize count_)
+    {
+        return ::fread (ptr_, sizeof(CharT),
+            static_cast<std::size_t>(count_), _fp);
+    }
+};
+
+typedef basic_fast_filebuf<char, std::char_traits<char> > fast_filebuf;
+typedef basic_fast_filebuf<wchar_t, std::char_traits<wchar_t> > wfast_filebuf;
+}
+
+// Usage:
+// lexertl::rules rules_;
+// lexertl::state_machine state_machine_;
+// fast_filebuf buf ("Unicode/PropList.txt");
+// std::istream if_(&buf);
+// lexertl::stream_shared_iterator iter_ (if_);
+// lexertl::stream_shared_iterator end_;
+// lexertl::match_results<lexertl::stream_shared_iterator>
+//     results_(iter_, end_);
--- a/inc/lexertl/old/string_token.hpp
+++ b/inc/lexertl/old/string_token.hpp
@@ -0,0 +1,561 @@
+// string_token.hpp
+// Copyright (c) 2005-2010 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_STRING_TOKEN_HPP
+#define LEXERTL_STRING_TOKEN_HPP
+
+#include "../char_traits.hpp"
+#include <iostream>
+#include <limits>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace lexertl
+{
+template<typename char_type>
+struct basic_string_token
+{
+    typedef std::basic_string<char_type> string;
+
+    bool _negated;
+    string _chars;
+
+    basic_string_token () :
+        _negated (false)
+    {
+    }
+
+    basic_string_token (const bool negated_, const string &chars_) :
+        _negated (negated_),
+        _chars (chars_)
+    {
+    }
+
+    void remove_duplicates ()
+    {
+        const char_type *start_ = _chars.c_str ();
+        const char_type *end_ = start_ + _chars.size ();
+
+        // Optimisation for very large charsets:
+        // sorting via pointers is much quicker than
+        // via iterators...
+        std::sort (const_cast<char_type *> (start_), const_cast<char_type *>
+            (end_));
+        _chars.erase (std::unique (_chars.begin (), _chars.end ()),
+            _chars.end ());
+    }
+
+    void normalise ()
+    {
+        const std::size_t max_chars_ = sizeof (char_type) == 1 ?
+            num_chars : num_wchar_ts;
+
+        if (_chars.length () == max_chars_)
+        {
+            _negated = !_negated;
+            _chars.clear ();
+        }
+        else if (_chars.length () > max_chars_ / 2)
+        {
+            negate ();
+        }
+    }
+
+    void negate ()
+    {
+        const std::size_t max_chars_ = sizeof (char_type) == 1 ?
+            num_chars : num_wchar_ts;
+        char_type curr_char_ = std::numeric_limits<CharT>::min ();
+        string temp_;
+        const char_type *curr_ = _chars.c_str ();
+        const char_type *chars_end_ = curr_ + _chars.size ();
+
+        _negated = !_negated;
+        temp_.resize (max_chars_ - _chars.size ());
+
+        char_type *ptr_ = const_cast<char_type *> (temp_.c_str ());
+        std::size_t i_ = 0;
+
+        while (curr_ < chars_end_)
+        {
+            while (*curr_ > curr_char_)
+            {
+                *ptr_ = curr_char_;
+                ++ptr_;
+                ++curr_char_;
+                ++i_;
+            }
+
+            ++curr_char_;
+            ++curr_;
+            ++i_;
+        }
+
+        for (; i_ < max_chars_; ++i_)
+        {
+            *ptr_ = curr_char_;
+            ++ptr_;
+            ++curr_char_;
+        }
+
+        _chars = temp_;
+    }
+
+    bool operator < (const basic_string_token &rhs_) const
+    {
+        return _negated < rhs_._negated ||
+            (_negated == rhs_._negated && _chars < rhs_._chars);
+    }
+
+    bool operator == (const basic_string_token &rhs_) const
+    {
+        return _negated == rhs_._negated && _chars == rhs_._chars;
+    }
+
+    bool empty () const
+    {
+        return _chars.empty () && !_negated;
+    }
+
+    bool any () const
+    {
+        return _chars.empty () && _negated;
+    }
+
+    void clear ()
+    {
+        _negated = false;
+        _chars.clear ();
+    }
+
+    void intersect (basic_string_token &rhs_, basic_string_token &overlap_)
+    {
+        if ((any () && rhs_.any ()) || (_negated == rhs_._negated &&
+            !any () && !rhs_.any ()))
+        {
+            intersect_same_types (rhs_, overlap_);
+        }
+        else
+        {
+            intersect_diff_types (rhs_, overlap_);
+        }
+    }
+
+    void merge (const basic_string_token &rhs_,
+        basic_string_token &merged_) const
+    {
+        if ((any () && rhs_.any ()) || (_negated == rhs_._negated &&
+            !any () && !rhs_.any ()))
+        {
+            merge_same_types (rhs_, merged_);
+        }
+        else
+        {
+            merge_diff_types (rhs_, merged_);
+        }
+    }
+
+    static string escape_char (const char_type ch_)
+    {
+        string out_;
+
+        switch (ch_)
+        {
+            case '\0':
+                out_ += '\\';
+                out_ += '0';
+                break;
+            case '\a':
+                out_ += '\\';
+                out_ += 'a';
+                break;
+            case '\b':
+                out_ += '\\';
+                out_ += 'b';
+                break;
+            case 27:
+                out_ += '\\';
+                out_ += 'x';
+                out_ += '1';
+                out_ += 'b';
+                break;
+            case '\f':
+                out_ += '\\';
+                out_ += 'f';
+                break;
+            case '\n':
+                out_ += '\\';
+                out_ += 'n';
+                break;
+            case '\r':
+                out_ += '\\';
+                out_ += 'r';
+                break;
+            case '\t':
+                out_ += '\\';
+                out_ += 't';
+                break;
+            case '\v':
+                out_ += '\\';
+                out_ += 'v';
+                break;
+            case '\\':
+                out_ += '\\';
+                out_ += '\\';
+                break;
+            case '"':
+                out_ += '\\';
+                out_ += '"';
+                break;
+            case '\'':
+                out_ += '\\';
+                out_ += '\'';
+                break;
+            default:
+            {
+                if (ch_ < 32)
+                {
+                    std::basic_stringstream<char_type> ss_;
+
+                    out_ += '\\';
+                    out_ += 'x';
+                    ss_ << std::hex <<
+                        static_cast<std::size_t> (ch_);
+                    out_ += ss_.str ();
+                }
+                else
+                {
+                    out_ += ch_;
+                }
+
+                break;
+            }
+        }
+
+        return out_;
+    }
+
+private:
+    void intersect_same_types (basic_string_token &rhs_,
+        basic_string_token &overlap_)
+    {
+        if (any ())
+        {
+            clear ();
+            overlap_._negated = true;
+            rhs_.clear ();
+        }
+        else
+        {
+            typename string::iterator iter_ = _chars.begin ();
+            typename string::iterator end_ = _chars.end ();
+            typename string::iterator rhs_iter_ = rhs_._chars.begin ();
+            typename string::iterator rhs_end_ = rhs_._chars.end ();
+
+            overlap_._negated = _negated;
+
+            while (iter_ != end_ && rhs_iter_ != rhs_end_)
+            {
+                if (*iter_ < *rhs_iter_)
+                {
+                    ++iter_;
+                }
+                else if (*iter_ > *rhs_iter_)
+                {
+                    ++rhs_iter_;
+                }
+                else
+                {
+                    overlap_._chars += *iter_;
+                    iter_ = _chars.erase (iter_);
+                    end_ = _chars.end ();
+                    rhs_iter_ = rhs_._chars.erase (rhs_iter_);
+                    rhs_end_ = rhs_._chars.end ();
+                }
+            }
+
+            if (_negated)
+            {
+                // duplicates already merged, so safe to merge
+                // using std lib.
+
+                // src, dest
+                merge (_chars, overlap_._chars);
+                // duplicates already merged, so safe to merge
+                // using std lib.
+
+                // src, dest
+                merge (rhs_._chars, overlap_._chars);
+                _negated = false;
+                rhs_._negated = false;
+                std::swap (_chars, rhs_._chars);
+                normalise ();
+                overlap_.normalise ();
+                rhs_.normalise ();
+            }
+            else if (!overlap_._chars.empty ())
+            {
+                normalise ();
+                overlap_.normalise ();
+                rhs_.normalise ();
+            }
+        }
+    }
+
+    void intersect_diff_types (basic_string_token &rhs_,
+        basic_string_token &overlap_)
+    {
+        if (any ())
+        {
+            intersect_any (rhs_, overlap_);
+        }
+        else if (_negated)
+        {
+            intersect_negated (rhs_, overlap_);
+        }
+        else // _negated == false
+        {
+            intersect_charset (rhs_, overlap_);
+        }
+    }
+
+    void intersect_any (basic_string_token &rhs_, basic_string_token &overlap_)
+    {
+        if (rhs_._negated)
+        {
+            rhs_.intersect_negated (*this, overlap_);
+        }
+        else // rhs._negated == false
+        {
+            rhs_.intersect_charset (*this, overlap_);
+        }
+    }
+
+    void intersect_negated (basic_string_token &rhs_,
+        basic_string_token &overlap_)
+    {
+        if (rhs_.any ())
+        {
+            overlap_._negated = true;
+            overlap_._chars = _chars;
+            rhs_._negated = false;
+            rhs_._chars = _chars;
+            clear ();
+        }
+        else // rhs._negated == false
+        {
+            rhs_.intersect_charset (*this, overlap_);
+        }
+    }
+
+    void intersect_charset (basic_string_token &rhs_,
+        basic_string_token &overlap_)
+    {
+        if (rhs_.any ())
+        {
+            overlap_._chars = _chars;
+            rhs_._negated = true;
+            rhs_._chars = _chars;
+            clear ();
+        }
+        else // rhs_._negated == true
+        {
+            typename string::iterator iter_ = _chars.begin ();
+            typename string::iterator end_ = _chars.end ();
+            typename string::iterator rhs_iter_ = rhs_._chars.begin ();
+            typename string::iterator rhs_end_ = rhs_._chars.end ();
+
+            while (iter_ != end_ && rhs_iter_ != rhs_end_)
+            {
+                if (*iter_ < *rhs_iter_)
+                {
+                    overlap_._chars += *iter_;
+                    rhs_iter_ = rhs_._chars.insert (rhs_iter_, *iter_);
+                    ++rhs_iter_;
+                    rhs_end_ = rhs_._chars.end ();
+                    iter_ = _chars.erase (iter_);
+                    end_ = _chars.end ();
+                }
+                else if (*iter_ > *rhs_iter_)
+                {
+                    ++rhs_iter_;
+                }
+                else
+                {
+                    ++iter_;
+                    ++rhs_iter_;
+                }
+            }
+
+            if (iter_ != end_)
+            {
+                // nothing bigger in rhs_ than iter_,
+                // so safe to merge using std lib.
+                string temp_ (iter_, end_);
+
+                // src, dest
+                merge (temp_, overlap_._chars);
+                _chars.erase (iter_, end_);
+            }
+
+            if (!overlap_._chars.empty ())
+            {
+                merge (overlap_._chars, rhs_._chars);
+                // possible duplicates, so check for any and erase.
+                rhs_._chars.erase (std::unique (rhs_._chars.begin (),
+                    rhs_._chars.end ()), rhs_._chars.end ());
+                normalise ();
+                overlap_.normalise ();
+                rhs_.normalise ();
+            }
+        }
+    }
+
+    void merge (string &src_, string &dest_)
+    {
+        string tmp_ (src_.size () + dest_.size (), 0);
+
+        std::merge (src_.begin (), src_.end (), dest_.begin (), dest_.end (),
+            tmp_.begin ());
+        dest_ = tmp_;
+    }
+
+    void merge_same_types (const basic_string_token &rhs_,
+        basic_string_token &merged_) const
+    {
+        if (any ())
+        {
+            merged_._negated = true;
+        }
+        else if (_negated)
+        {
+            typename string::const_iterator iter_ = _chars.begin ();
+            typename string::const_iterator end_ = _chars.end ();
+            typename string::const_iterator rhs_iter_ = rhs_._chars.begin ();
+            typename string::const_iterator rhs_end_ = rhs_._chars.end ();
+
+            merged_._negated = _negated;
+
+            while (iter_ != end_ && rhs_iter_ != rhs_end_)
+            {
+                if (*iter_ < *rhs_iter_)
+                {
+                    ++iter_;
+                }
+                else if (*iter_ > *rhs_iter_)
+                {
+                    ++rhs_iter_;
+                }
+                else
+                {
+                    merged_._chars += *iter_;
+                    ++iter_;
+                    ++rhs_iter_;
+                }
+            }
+
+            merged_.normalise ();
+        }
+        else
+        {
+            typename string::const_iterator iter_ = _chars.begin ();
+            typename string::const_iterator end_ = _chars.end ();
+            typename string::const_iterator rhs_iter_ = rhs_._chars.begin ();
+            typename string::const_iterator rhs_end_ = rhs_._chars.end ();
+
+            while (iter_ != end_ && rhs_iter_ != rhs_end_)
+            {
+                if (*iter_ < *rhs_iter_)
+                {
+                    merged_._chars += *iter_;
+                    ++iter_;
+                }
+                else if (*iter_ > *rhs_iter_)
+                {
+                    merged_._chars += *rhs_iter_;
+                    ++rhs_iter_;
+                }
+                else
+                {
+                    merged_._chars += *iter_;
+                    ++iter_;
+                    ++rhs_iter_;
+                }
+            }
+
+            // Include any trailing chars
+            if (iter_ != end_)
+            {
+                string temp_ (iter_, end_);
+
+                merged_._chars += temp_;
+            }
+            else if (rhs_iter_ != rhs_end_)
+            {
+                string temp_ (rhs_iter_, rhs_end_);
+
+                merged_._chars += temp_;
+            }
+
+            merged_.normalise ();
+        }
+    }
+
+    void merge_diff_types (const basic_string_token &rhs_,
+        basic_string_token &merged_) const
+    {
+        if (_negated)
+        {
+            merge_negated (*this, rhs_, merged_);
+        }
+        else
+        {
+            merge_negated (rhs_, *this, merged_);
+        }
+
+        merged_.normalise ();
+    }
+
+    void merge_negated (const basic_string_token &lhs_,
+        const basic_string_token &rhs_, basic_string_token &merged_) const
+    {
+        typename string::const_iterator lhs_iter_ = lhs_._chars.begin ();
+        typename string::const_iterator lhs_end_ = lhs_._chars.end ();
+        typename string::const_iterator rhs_iter_ = rhs_._chars.begin ();
+        typename string::const_iterator rhs_end_ = rhs_._chars.end ();
+
+        merged_._negated = true;
+
+        while (lhs_iter_ != lhs_end_ && rhs_iter_ != rhs_end_)
+        {
+            if (*lhs_iter_ < *rhs_iter_)
+            {
+                merged_._chars += *lhs_iter_;
+                ++lhs_iter_;
+            }
+            else if (*lhs_iter_ > *rhs_iter_)
+            {
+                ++rhs_iter_;
+            }
+            else
+            {
+                ++lhs_iter_;
+                ++rhs_iter_;
+            }
+        }
+
+        // Only interested in any remaining 'negated' chars
+        if (lhs_iter_ != lhs_end_)
+        {
+            string temp_ (lhs_iter_, lhs_end_);
+
+            merged_._chars += temp_;
+        }
+    }
+};
+}
+
+#endif
--- a/inc/lexertl/parser/parser.hpp
+++ b/inc/lexertl/parser/parser.hpp
--- a/inc/lexertl/parser/tokeniser/re_token.hpp
+++ b/inc/lexertl/parser/tokeniser/re_token.hpp
@@ -0,0 +1,100 @@
+// re_token.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_RE_TOKEN_HPP
+#define LEXERTL_RE_TOKEN_HPP
+
+#include "../../string_token.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+enum token_type {BEGIN, REGEX, OREXP, SEQUENCE, SUB, EXPRESSION, REPEAT,
+    DUP, OR, CHARSET, BOL, EOL, MACRO, OPENPAREN, CLOSEPAREN, OPT, AOPT,
+    ZEROORMORE, AZEROORMORE, ONEORMORE, AONEORMORE, REPEATN, AREPEATN,
+    END};
+
+template<typename input_char_type, typename char_type>
+struct basic_re_token
+{
+    typedef basic_string_token<char_type> string_token;
+    typedef std::basic_string<input_char_type> string;
+
+    token_type _type;
+    string _extra;
+    string_token _str;
+
+    basic_re_token (const token_type type_ = BEGIN) :
+        _type (type_),
+        _extra (),
+        _str ()
+    {
+    }
+
+    void clear ()
+    {
+        _type = BEGIN;
+        _extra.clear ();
+        _str.clear ();
+    }
+
+    basic_re_token &operator = (const basic_re_token &rhs_)
+    {
+        _type = rhs_._type;
+        _extra = rhs_._extra;
+        _str = rhs_._str;
+        return *this;
+    }
+
+    char precedence (const token_type type_) const
+    {
+        // Moved in here for Solaris compiler.
+        static const char precedence_table_[END + 1][END + 1] = {
+//        BEG, REG, ORE, SEQ, SUB, EXP, RPT, DUP,  | , CHR, BOL, EOL, MCR,  ( ,  ) ,  ? , ?? ,  * , *? ,  + , +?, {n}?, {n}, END
+/*BEGIN*/{' ', '<', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*REGEX*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*OREXP*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* SEQ */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* SUB */{' ', ' ', ' ', ' ', ' ', '=', '<', ' ', '>', '<', '<', '<', '<', '<', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*EXPRE*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* RPT */{' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', '>', '>', '>', '>', '>', '<', '<', '<', '<', '<', '<', '<', '<', '>'},
+/*DUPLI*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*  |  */{' ', ' ', ' ', '=', '<', '<', '<', ' ', ' ', '<', '<', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '},
+/*CHARA*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'},
+/* BOL */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'},
+/* EOL */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'},
+/*MACRO*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'},
+/*  (  */{' ', '=', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '},
+/*  )  */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'},
+/*  ?  */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* ??  */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*  *  */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* *?  */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*  +  */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* +?  */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*{n,m}*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*{nm}?*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* END */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '}
+};
+
+        return precedence_table_[_type][type_];
+    }
+
+    const char *precedence_string () const
+    {
+        // Moved in here for Solaris compiler.
+        static const char *precedence_strings_[END + 1] =
+            {"BEGIN", "REGEX", "OREXP", "SEQUENCE", "SUB", "EXPRESSION",
+            "REPEAT", "DUPLICATE", "|", "CHARSET", "^", "$", "MACRO", "(", ")",
+            "?", "??", "*", "*?", "+", "+?", "{n[,[m]]}", "{n[,[m]]}?", "END"};
+
+        return precedence_strings_[_type];
+    }
+};
+}
+}
+
+#endif
--- a/inc/lexertl/parser/tokeniser/re_tokeniser.hpp
+++ b/inc/lexertl/parser/tokeniser/re_tokeniser.hpp
@@ -0,0 +1,829 @@
+// tokeniser.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_RE_TOKENISER_HPP
+#define LEXERTL_RE_TOKENISER_HPP
+
+#include <cstring>
+#include "re_token.hpp"
+#include "../../runtime_error.hpp"
+#include "../../size_t.hpp"
+#include <sstream>
+#include "../../string_token.hpp"
+#include "re_tokeniser_helper.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename rules_char_type, typename char_type, typename id_type>
+class basic_re_tokeniser
+{
+public:
+    typedef basic_re_token<rules_char_type, char_type> re_token;
+    typedef basic_re_tokeniser_helper<rules_char_type, char_type, id_type>
+        tokeniser_helper;
+    typedef typename tokeniser_helper::char_state char_state;
+    typedef typename tokeniser_helper::state state;
+    typedef basic_string_token<char_type> string_token;
+
+    static void next (re_token *lhs_, state &state_, re_token *token_)
+    {
+        rules_char_type ch_ = 0;
+        bool eos_ = state_.next (ch_);
+        bool skipped_ = false;
+
+        token_->clear ();
+
+        do
+        {
+            // string begin/end
+            while (!eos_ && ch_ == '"')
+            {
+                state_._in_string ^= 1;
+                eos_ = state_.next (ch_);
+            }
+
+            // (?# ...)
+            skipped_ = comment (eos_, ch_, state_);
+            // skip_ws set
+            skipped_ |= skip (eos_, ch_, state_);
+        } while (skipped_);
+
+        if (eos_)
+        {
+            if (state_._in_string)
+            {
+                std::ostringstream ss_;
+
+                // Pointless returning index if at end of string
+                ss_ << "Unexpected end of regex (missing '\"') in rule id " <<
+                    state_._id << '.';
+                throw runtime_error (ss_.str ());
+            }
+
+            if (state_._paren_count)
+            {
+                std::ostringstream ss_;
+
+                // Pointless returning index if at end of string
+                ss_ << "Unexpected end of regex (missing ')') in rule id " <<
+                    state_._id << '.';
+                throw runtime_error (ss_.str ());
+            }
+
+            token_->_type = END;
+        }
+        else
+        {
+            if (ch_ == '\\')
+            {
+                // Even if we are in a string, respect escape sequences...
+                token_->_type = CHARSET;
+                escape (state_, token_->_str);
+            }
+            else if (state_._in_string)
+            {
+                // All other meta characters lose their special meaning
+                // inside a string.
+                token_->_type = CHARSET;
+                token_->_str.insert (typename string_token::range (ch_, ch_));
+            }
+            else
+            {
+                // Not an escape sequence and not inside a string, so
+                // check for meta characters.
+                switch (ch_)
+                {
+                    case '(':
+                        token_->_type = OPENPAREN;
+                        ++state_._paren_count;
+                        read_options (state_);
+                        break;
+                    case ')':
+                        --state_._paren_count;
+
+                        if (state_._paren_count < 0)
+                        {
+                            std::ostringstream ss_;
+
+                            ss_ << "Number of open parenthesis < 0 "
+                                "at index " << state_.index () - 1 <<
+                                " in rule id " << state_._id << '.';
+                            throw runtime_error (ss_.str ());
+                        }
+
+                        token_->_type = CLOSEPAREN;
+
+                        if (!state_._flags_stack.empty ())
+                        {
+                            state_._flags = state_._flags_stack.top ();
+                            state_._flags_stack.pop ();
+                        }
+
+                        break;
+                    case '?':
+                        if (!state_.eos () && *state_._curr == '?')
+                        {
+                            token_->_type = AOPT;
+                            state_.increment ();
+                        }
+                        else
+                        {
+                            token_->_type = OPT;
+                        }
+
+                        break;
+                    case '*':
+                        if (!state_.eos () && *state_._curr == '?')
+                        {
+                            token_->_type = AZEROORMORE;
+                            state_.increment ();
+                        }
+                        else
+                        {
+                            token_->_type = ZEROORMORE;
+                        }
+
+                        break;
+                    case '+':
+                        if (!state_.eos () && *state_._curr == '?')
+                        {
+                            token_->_type = AONEORMORE;
+                            state_.increment ();
+                        }
+                        else
+                        {
+                            token_->_type = ONEORMORE;
+                        }
+
+                        break;
+                    case '{':
+                        open_curly (lhs_, state_, token_);
+                        break;
+                    case '|':
+                        token_->_type = OR;
+                        break;
+                    case '^':
+                        if (!state_._macro && state_._curr - 1 == state_._start)
+                        {
+                            token_->_type = BOL;
+                        }
+                        else
+                        {
+                            token_->_type = CHARSET;
+                            token_->_str.insert (typename string_token::range
+                                (ch_, ch_));
+                        }
+
+                        break;
+                    case '$':
+                        if (!state_._macro && state_._curr == state_._end)
+                        {
+                            token_->_type = EOL;
+                        }
+                        else
+                        {
+                            token_->_type = CHARSET;
+                            token_->_str.insert (typename string_token::range
+                                (ch_, ch_));
+                        }
+
+                        break;
+                    case '.':
+                    {
+                        token_->_type = CHARSET;
+
+                        if (state_._flags & dot_not_newline)
+                        {
+                            token_->_str.insert (typename string_token::range
+                                ('\n', '\n'));
+                        }
+
+                        token_->_str.negate ();
+                        break;
+                    }
+                    case '[':
+                    {
+                        token_->_type = CHARSET;
+                        tokeniser_helper::charset (state_, token_->_str);
+                        break;
+                    }
+                    case '/':
+                    {
+                        std::ostringstream ss_;
+
+                        ss_ << "Lookahead ('/') is not supported yet in " <<
+                            "rule id " << state_._id << '.';
+                        throw runtime_error (ss_.str ());
+                        break;
+                    }
+                    default:
+                        token_->_type = CHARSET;
+
+                        if ((state_._flags & icase) &&
+                            (std::isupper (ch_, state_._locale) ||
+                            std::islower (ch_, state_._locale)))
+                        {
+                            char_type upper_ = std::toupper
+                                (ch_, state_._locale);
+                            char_type lower_ = std::tolower
+                                (ch_, state_._locale);
+
+                            token_->_str.insert (typename string_token::range
+                                (upper_, upper_));
+                            token_->_str.insert (typename string_token::range
+                                (lower_, lower_));
+                        }
+                        else
+                        {
+                            token_->_str.insert (typename string_token::range
+                                (ch_, ch_));
+                        }
+
+                        break;
+                }
+            }
+        }
+    }
+
+private:
+    static bool comment (bool &eos_, rules_char_type &ch_, state &state_)
+    {
+        bool skipped_ = false;
+
+        if (!eos_ && !state_._in_string && ch_ == '(' &&
+            !state_.eos () && *state_._curr == '?' &&
+            state_._curr + 1 < state_._end && *(state_._curr + 1) == '#')
+        {
+            std::size_t paren_count_ = 1;
+
+            state_.increment ();
+            state_.increment ();
+
+            do
+            {
+                eos_ = state_.next (ch_);
+
+                if (ch_ == '(')
+                {
+                    ++paren_count_;
+                }
+                else if (ch_ == ')')
+                {
+                    --paren_count_;
+                }
+            } while (!eos_ && !(ch_ == ')' && paren_count_ == 0));
+
+            if (eos_)
+            {
+                std::ostringstream ss_;
+
+                // Pointless returning index if at end of string
+                ss_ << "Unexpected end of regex (unterminated comment) " <<
+                    "in rule id " << state_._id << '.';
+                throw runtime_error (ss_.str ());
+            }
+            else
+            {
+                eos_ = state_.next (ch_);
+            }
+
+            skipped_ = true;
+        }
+
+        return skipped_;
+    }
+
+    static bool skip (bool &eos_, rules_char_type &ch_, state &state_)
+    {
+        bool skipped_ = false;
+
+        if (!eos_ && (state_._flags & skip_ws) && !state_._in_string)
+        {
+            bool c_comment_ = false;
+            bool skip_ws_ = false;
+
+            do
+            {
+                c_comment_ = ch_ == '/' && !state_.eos () &&
+                    *state_._curr == '*';
+                skip_ws_ = !c_comment_ && (ch_ == ' ' || ch_ == '\t' ||
+                    ch_ == '\n' || ch_ == '\r' || ch_ == '\f' || ch_ == '\v');
+
+                if (c_comment_)
+                {
+                    state_.increment ();
+                    eos_ = state_.next (ch_);
+
+                    while (!eos_ && !(ch_ == '*' && !state_.eos () &&
+                        *state_._curr == '/'))
+                    {
+                        eos_ = state_.next (ch_);
+                    }
+
+                    if (eos_)
+                    {
+                        std::ostringstream ss_;
+
+                        // Pointless returning index if at end of string
+                        ss_ << "Unexpected end of regex (unterminated " <<
+                            "C style comment) in rule id " <<
+                            state_._id << '.';
+                        throw runtime_error (ss_.str ());
+                    }
+                    else
+                    {
+                        state_.increment ();
+                        eos_ = state_.next (ch_);
+                    }
+
+                    skipped_ = true;
+                }
+                else if (skip_ws_)
+                {
+                    eos_ = state_.next (ch_);
+                    skipped_ = true;
+                }
+            } while (c_comment_ || skip_ws_);
+        }
+
+        return skipped_;
+    }
+
+    static void read_options (state &state_)
+    {
+        if (!state_.eos () && *state_._curr == '?')
+        {
+            rules_char_type ch_ = 0;
+            bool eos_ = false;
+            bool negate_ = false;
+
+            state_.increment ();
+            eos_ = state_.next (ch_);
+            state_._flags_stack.push (state_._flags);
+
+            while (!eos_ && ch_ != ':')
+            {
+                switch (ch_)
+                {
+                    case '-':
+                        negate_ ^= 1;
+                        break;
+                    case 'i':
+                        if (negate_)
+                        {
+                            state_._flags = state_._flags & ~icase;
+                        }
+                        else
+                        {
+                            state_._flags = state_._flags | icase;
+                        }
+
+                        negate_ = false;
+                        break;
+                    case 's':
+                        if (negate_)
+                        {
+                            state_._flags = state_._flags | dot_not_newline;
+                        }
+                        else
+                        {
+                            state_._flags = state_._flags & ~dot_not_newline;
+                        }
+
+                        negate_ = false;
+                        break;
+                    case 'x':
+                        if (negate_)
+                        {
+                            state_._flags = state_._flags & ~skip_ws;
+                        }
+                        else
+                        {
+                            state_._flags = state_._flags | skip_ws;
+                        }
+
+                        negate_ = false;
+                        break;
+                    default:
+                    {
+                        std::ostringstream ss_;
+
+                        ss_ << "Unknown option at index " <<
+                            state_.index () - 1 << " in rule id " <<
+                            state_._id << '.';
+                        throw runtime_error (ss_.str ());
+                    }
+                }
+
+                eos_ = state_.next (ch_);
+            }
+
+            // End of string handler will handle early termination
+        }
+        else if (!state_._flags_stack.empty ())
+        {
+            state_._flags_stack.push (state_._flags);
+        }
+    }
+
+    static void escape (state &state_, string_token &token_)
+    {
+        char_type ch_ = 0;
+        std::size_t str_len_ = 0;
+        const char *str_ = tokeniser_helper::escape_sequence (state_,
+            ch_, str_len_);
+
+        if (str_)
+        {
+            char_state state2_ (str_ + 1, str_ + str_len_, state_._id,
+                state_._flags, state_._locale, false);
+
+            tokeniser_helper::charset (state2_, token_);
+        }
+        else
+        {
+            token_.insert (typename string_token::range (ch_, ch_));
+        }
+    }
+
+    static void open_curly (re_token *lhs_, state &state_,
+        re_token *token_)
+    {
+        if (state_.eos ())
+        {
+            std::ostringstream ss_;
+
+            // Pointless returning index if at end of string
+            ss_ << "Unexpected end of regex (missing '}') in rule id " <<
+                state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+        else if (*state_._curr == '-')
+        {
+            charset_difference (lhs_, state_, token_);
+        }
+        else if (*state_._curr == '+')
+        {
+            charset_union (lhs_, state_, token_);
+        }
+        else if (*state_._curr >= '0' && *state_._curr <= '9')
+        {
+            repeat_n (state_, token_);
+        }
+        else
+        {
+            macro (state_, token_);
+        }
+    }
+
+    static void charset_difference (re_token *lhs_, state &state_,
+        re_token *token_)
+    {
+        rules_char_type ch_ = 0;
+
+        if (lhs_->_type != CHARSET)
+        {
+            std::ostringstream ss_;
+
+            ss_ << "CHARSET must precede {-} at index " <<
+                state_.index () - 1 << " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        state_.next (ch_);
+
+        if (state_.next (ch_))
+        {
+            std::ostringstream ss_;
+
+            // Pointless returning index if at end of string
+            ss_ << "Unexpected end of regex (missing '}') in rule id " <<
+                state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        if (ch_ != '}')
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Missing '}' at index " << state_.index () - 1 <<
+                " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        re_token rhs_;
+
+        next (lhs_, state_, &rhs_);
+
+        if (rhs_._type != CHARSET)
+        {
+            std::ostringstream ss_;
+
+            ss_ << "CHARSET must follow {-} at index " <<
+                state_.index () - 1 << " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        lhs_->_str.remove (rhs_._str);
+
+        if (lhs_->_str.empty ())
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Empty charset created by {-} at index " <<
+                state_.index () - 1 << " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        next (lhs_, state_, token_);
+    }
+
+    static void charset_union (re_token *lhs_, state &state_,
+        re_token *token_)
+    {
+        rules_char_type ch_ = 0;
+
+        if (lhs_->_type != CHARSET)
+        {
+            std::ostringstream ss_;
+
+            ss_ << "CHARSET must precede {+} at index " <<
+                state_.index () - 1 << " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        state_.next (ch_);
+
+        if (state_.next (ch_))
+        {
+            std::ostringstream ss_;
+
+            // Pointless returning index if at end of string
+            ss_ << "Unexpected end of regex (missing '}') in rule id " <<
+                state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        if (ch_ != '}')
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Missing '}' at index " << state_.index () - 1 <<
+                " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        re_token rhs_;
+
+        next (lhs_, state_, &rhs_);
+
+        if (rhs_._type != CHARSET)
+        {
+            std::ostringstream ss_;
+
+            ss_ << "CHARSET must follow {+} at index " <<
+                state_.index () - 1 << " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        lhs_->_str.insert (rhs_._str);
+        next (lhs_, state_, token_);
+    }
+
+    // SYNTAX:
+    //   {n[,[n]]}
+    // SEMANTIC RULES:
+    //   {0} - INVALID (throw exception)
+    //   {0,} = *
+    //   {0,0} - INVALID (throw exception)
+    //   {0,1} = ?
+    //   {1,} = +
+    //   {min,max} where min == max - {min}
+    //   {min,max} where max < min - INVALID (throw exception)
+    static void repeat_n (state &state_, re_token *token_)
+    {
+        rules_char_type ch_ = 0;
+        bool eos_ = state_.next (ch_);
+        std::size_t min_ = 0;
+        std::size_t max_ = 0;
+
+        while (!eos_ && ch_ >= '0' && ch_ <= '9')
+        {
+            min_ *= 10;
+            min_ += ch_ - '0';
+            token_->_extra += ch_;
+            eos_ = state_.next (ch_);
+        }
+
+        if (eos_)
+        {
+            std::ostringstream ss_;
+
+            // Pointless returning index if at end of string
+            ss_ << "Unexpected end of regex (missing '}') in rule id " <<
+                state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        bool min_max_ = false;
+        bool repeatn_ = true;
+
+        if (ch_ == ',')
+        {
+            token_->_extra += ch_;
+            eos_ = state_.next (ch_);
+
+            if (eos_)
+            {
+                std::ostringstream ss_;
+
+                // Pointless returning index if at end of string
+                ss_ << "Unexpected end of regex (missing '}') in rule id " <<
+                    state_._id << '.';
+                throw runtime_error (ss_.str ());
+            }
+
+            if (ch_ == '}')
+            {
+                // Small optimisation: Check for '*' equivalency.
+                if (min_ == 0)
+                {
+                    token_->_type = ZEROORMORE;
+                    repeatn_ = false;
+                }
+                // Small optimisation: Check for '+' equivalency.
+                else if (min_ == 1)
+                {
+                    token_->_type = ONEORMORE;
+                    repeatn_ = false;
+                }
+            }
+            else
+            {
+                if (ch_ < '0' || ch_ > '9')
+                {
+                    std::ostringstream ss_;
+
+                    ss_ << "Missing '}' at index " << state_.index () - 1 <<
+                        " in rule id " << state_._id << '.';
+                    throw runtime_error (ss_.str ());
+                }
+
+                min_max_ = true;
+
+                do
+                {
+                    max_ *= 10;
+                    max_ += ch_ - '0';
+                    token_->_extra += ch_;
+                    eos_ = state_.next (ch_);
+                } while (!eos_ && ch_ >= '0' && ch_ <= '9');
+
+                if (eos_)
+                {
+                    std::ostringstream ss_;
+
+                    // Pointless returning index if at end of string
+                    ss_ << "Unexpected end of regex (missing '}') "
+                        "in rule id " << state_._id << '.';
+                    throw runtime_error (ss_.str ());
+                }
+
+                // Small optimisation: Check for '?' equivalency.
+                if (min_ == 0 && max_ == 1)
+                {
+                    token_->_type = OPT;
+                    repeatn_ = false;
+                }
+                // Small optimisation: if min == max, then min.
+                else if (min_ == max_)
+                {
+                    token_->_extra.erase (token_->_extra.find (','));
+                    min_max_ = false;
+                    max_ = 0;
+                }
+            }
+        }
+
+        if (ch_ != '}')
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Missing '}' at index " << state_.index () - 1 <<
+                " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        if (repeatn_)
+        {
+            // SEMANTIC VALIDATION follows:
+            // NOTE: {0,} has already become *
+            // therefore we don't check for a comma.
+            if (min_ == 0 && max_ == 0)
+            {
+                std::ostringstream ss_;
+
+                ss_ << "Cannot have exactly zero repeats preceding index " <<
+                    state_.index () << " in rule id " << state_._id << '.';
+                throw runtime_error (ss_.str ());
+            }
+
+            if (min_max_ && max_ < min_)
+            {
+                std::ostringstream ss_;
+
+                ss_ << "Max less than min preceding index " <<
+                    state_.index () << " in rule id " << state_._id << '.';
+                throw runtime_error (ss_.str ());
+            }
+
+            if (!state_.eos () && *state_._curr == '?')
+            {
+                token_->_type = AREPEATN;
+                state_.increment ();
+            }
+            else
+            {
+                token_->_type = REPEATN;
+            }
+        }
+        else if (token_->_type == ZEROORMORE)
+        {
+            if (!state_.eos () && *state_._curr == '?')
+            {
+                token_->_type = AZEROORMORE;
+                state_.increment ();
+            }
+        }
+        else if (token_->_type == ONEORMORE)
+        {
+            if (!state_.eos () && *state_._curr == '?')
+            {
+                token_->_type = AONEORMORE;
+                state_.increment ();
+            }
+        }
+        else if (token_->_type == OPT)
+        {
+            if (!state_.eos () && *state_._curr == '?')
+            {
+                token_->_type = AOPT;
+                state_.increment ();
+            }
+        }
+    }
+
+    static void macro (state &state_, re_token *token_)
+    {
+        rules_char_type ch_ = 0;
+        bool eos_ = false;
+
+        state_.next (ch_);
+
+        if (ch_ != '_' && !(ch_ >= 'A' && ch_ <= 'Z') &&
+            !(ch_ >= 'a' && ch_ <= 'z'))
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Invalid MACRO name at index " << state_.index () - 1 <<
+                " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        do
+        {
+            token_->_extra += ch_;
+            eos_ = state_.next (ch_);
+
+            if (eos_)
+            {
+                std::ostringstream ss_;
+
+                // Pointless returning index if at end of string
+                ss_ << "Unexpected end of regex " <<
+                    "(missing '}') in rule id " << state_._id << '.';
+                throw runtime_error (ss_.str ());
+            }
+        } while (ch_ == '_' || ch_ == '-' || (ch_ >= 'A' && ch_ <= 'Z') ||
+            (ch_ >= 'a' && ch_ <= 'z') || (ch_ >= '0' && ch_ <= '9'));
+
+        if (ch_ != '}')
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Missing '}' at index " << state_.index () - 1 <<
+                " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        token_->_type = MACRO;
+    }
+};
+}
+}
+
+#endif
--- a/inc/lexertl/parser/tokeniser/re_tokeniser_helper.hpp
+++ b/inc/lexertl/parser/tokeniser/re_tokeniser_helper.hpp
--- a/inc/lexertl/parser/tokeniser/re_tokeniser_state.hpp
+++ b/inc/lexertl/parser/tokeniser/re_tokeniser_state.hpp
@@ -0,0 +1,115 @@
+// tokeniser_state.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_RE_TOKENISER_STATE_HPP
+#define LEXERTL_RE_TOKENISER_STATE_HPP
+
+#include "../../char_traits.hpp"
+#include "../../enums.hpp"
+#include <locale>
+#include "../../size_t.hpp"
+#include <stack>
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename ch_type, typename id_type>
+struct basic_re_tokeniser_state
+{
+    typedef ch_type char_type;
+    typedef typename basic_char_traits<char_type>::index_type index_type;
+
+    const char_type * const _start;
+    const char_type * const _end;
+    const char_type *_curr;
+    id_type _id;
+    std::size_t _flags;
+    std::stack<std::size_t> _flags_stack;
+    std::locale _locale;
+    bool _macro;
+    long _paren_count;
+    bool _in_string;
+    id_type _nl_id;
+
+    basic_re_tokeniser_state (const char_type *start_,
+        const char_type * const end_, id_type id_, const std::size_t flags_,
+        const std::locale locale_, const bool macro_) :
+        _start (start_),
+        _end (end_),
+        _curr (start_),
+        _id (id_),
+        _flags (flags_),
+        _flags_stack (),
+        _locale (locale_),
+        _macro (macro_),
+        _paren_count (0),
+        _in_string (false),
+        _nl_id (static_cast<id_type>(~0))
+    {
+    }
+
+    basic_re_tokeniser_state (const basic_re_tokeniser_state &rhs_)
+    {
+        assign (rhs_);
+    }
+
+    // prevent VC++ 7.1 warning:
+    const basic_re_tokeniser_state &operator =
+        (const basic_re_tokeniser_state &rhs_)
+    {
+        assign (rhs_);
+    }
+
+    void assign (const basic_re_tokeniser_state &rhs_)
+    {
+        _start = rhs_._start;
+        _end = rhs_._end;
+        _curr = rhs_._curr;
+        _id = rhs_._id;
+        _flags = rhs_._flags;
+        _flags_stack = rhs_._flags_stack;
+        _locale = rhs_._locale;
+        _macro = rhs_._macro;
+        _paren_count = rhs_._paren_count;
+        _in_string = rhs_._in_string;
+        _nl_id = rhs_._nl_id;
+        return this;
+    }
+
+    inline bool next (char_type &ch_)
+    {
+        if (_curr >= _end)
+        {
+            ch_ = 0;
+            return true;
+        }
+        else
+        {
+            ch_ = *_curr;
+            increment ();
+            return false;
+        }
+    }
+
+    inline void increment ()
+    {
+        ++_curr;
+    }
+
+    inline std::size_t index ()
+    {
+        return _curr - _start;
+    }
+
+    inline bool eos ()
+    {
+        return _curr >= _end;
+    }
+};
+}
+}
+
+#endif
--- a/inc/lexertl/parser/tree/end_node.hpp
+++ b/inc/lexertl/parser/tree/end_node.hpp
@@ -0,0 +1,112 @@
+// end_node.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_END_NODE_HPP
+#define LEXERTL_END_NODE_HPP
+
+#include "node.hpp"
+#include "../../size_t.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename id_type>
+class basic_end_node : public basic_node<id_type>
+{
+public:
+    typedef basic_node<id_type> node;
+    typedef typename node::bool_stack bool_stack;
+    typedef typename node::const_node_stack const_node_stack;
+    typedef typename node::node_ptr_vector node_ptr_vector;
+    typedef typename node::node_stack node_stack;
+    typedef typename node::node_type node_type;
+    typedef typename node::node_vector node_vector;
+
+    basic_end_node (const id_type id_, const id_type user_id_,
+        const id_type next_dfa_, const id_type push_dfa_,
+        const bool pop_dfa_) :
+        basic_node<id_type> (false),
+        _id (id_),
+        _user_id (user_id_),
+        _next_dfa (next_dfa_),
+        _push_dfa (push_dfa_),
+        _pop_dfa (pop_dfa_),
+        _followpos ()
+    {
+        basic_node<id_type>::_firstpos.push_back (this);
+        basic_node<id_type>::_lastpos.push_back (this);
+    }
+
+    virtual ~basic_end_node ()
+    {
+    }
+
+    virtual node_type what_type () const
+    {
+        return node::END;
+    }
+
+    virtual bool traverse (const_node_stack &/*node_stack_*/,
+        bool_stack &/*perform_op_stack_*/) const
+    {
+        return false;
+    }
+
+    virtual const node_vector &followpos () const
+    {
+        // _followpos is always empty..!
+        return _followpos;
+    }
+
+    virtual bool end_state () const
+    {
+        return true;
+    }
+
+    virtual id_type id () const
+    {
+        return _id;
+    }
+
+    virtual id_type user_id () const
+    {
+        return _user_id;
+    }
+
+    virtual id_type next_dfa () const
+    {
+        return _next_dfa;
+    }
+
+    virtual id_type push_dfa () const
+    {
+        return _push_dfa;
+    }
+
+    virtual bool pop_dfa () const
+    {
+        return _pop_dfa;
+    }
+
+private:
+    id_type _id;
+    id_type _user_id;
+    id_type _next_dfa;
+    id_type _push_dfa;
+    bool _pop_dfa;
+    node_vector _followpos;
+
+    virtual void copy_node (node_ptr_vector &/*node_ptr_vector_*/,
+        node_stack &/*new_node_stack_*/, bool_stack &/*perform_op_stack_*/,
+        bool &/*down_*/) const
+    {
+        // Nothing to do, as end_nodes are not copied.
+    }
+};
+}
+}
+
+#endif
--- a/inc/lexertl/parser/tree/iteration_node.hpp
+++ b/inc/lexertl/parser/tree/iteration_node.hpp
@@ -0,0 +1,103 @@
+// iteration_node.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_ITERATION_NODE_HPP
+#define LEXERTL_ITERATION_NODE_HPP
+
+#include "node.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename id_type>
+class basic_iteration_node : public basic_node<id_type>
+{
+public:
+    typedef basic_node<id_type> node;
+    typedef typename node::bool_stack bool_stack;
+    typedef typename node::const_node_stack const_node_stack;
+    typedef typename node::node_ptr_vector node_ptr_vector;
+    typedef typename node::node_stack node_stack;
+    typedef typename node::node_type node_type;
+    typedef typename node::node_vector node_vector;
+
+    basic_iteration_node (basic_node<id_type> *next_, const bool greedy_) :
+        basic_node<id_type> (true),
+        _next (next_),
+        _greedy (greedy_)
+    {
+        typename node_vector::iterator iter_;
+        typename node_vector::iterator end_;
+
+        _next->append_firstpos (node::_firstpos);
+        _next->append_lastpos (node::_lastpos);
+
+        for (iter_ = node::_lastpos.begin (), end_ = node::_lastpos.end ();
+            iter_ != end_; ++iter_)
+        {
+            (*iter_)->append_followpos (node::_firstpos);
+        }
+
+        for (iter_ = node::_firstpos.begin (), end_ = node::_firstpos.end ();
+            iter_ != end_; ++iter_)
+        {
+            (*iter_)->greedy (greedy_);
+        }
+    }
+
+    virtual ~basic_iteration_node ()
+    {
+    }
+
+    virtual node_type what_type () const
+    {
+        return node::ITERATION;
+    }
+
+    virtual bool traverse (const_node_stack &node_stack_,
+        bool_stack &perform_op_stack_) const
+    {
+        perform_op_stack_.push (true);
+        node_stack_.push (_next);
+        return true;
+    }
+
+private:
+    // Not owner of this pointer...
+    basic_node<id_type> *_next;
+    bool _greedy;
+
+    virtual void copy_node (node_ptr_vector &node_ptr_vector_,
+        node_stack &new_node_stack_, bool_stack &perform_op_stack_,
+        bool &down_) const
+    {
+        if (perform_op_stack_.top ())
+        {
+            basic_node<id_type> *ptr_ = new_node_stack_.top ();
+
+            node_ptr_vector_->push_back
+                (static_cast<basic_iteration_node<id_type> *>(0));
+            node_ptr_vector_->back () = new basic_iteration_node
+                (ptr_, _greedy);
+            new_node_stack_.top () = node_ptr_vector_->back ();
+        }
+        else
+        {
+            down_ = true;
+        }
+
+        perform_op_stack_.pop ();
+    }
+
+    // No copy construction.
+    basic_iteration_node (const basic_iteration_node &);
+    // No assignment.
+    const basic_iteration_node &operator = (const basic_iteration_node &);
+};
+}
+}
+
+#endif
--- a/inc/lexertl/parser/tree/leaf_node.hpp
+++ b/inc/lexertl/parser/tree/leaf_node.hpp
@@ -0,0 +1,114 @@
+// leaf_node.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_LEAF_NODE_HPP
+#define LEXERTL_LEAF_NODE_HPP
+
+#include "../../enums.hpp" // null_token
+#include "node.hpp"
+#include "../../size_t.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename id_type>
+class basic_leaf_node : public basic_node<id_type>
+{
+public:
+    typedef basic_node<id_type> node;
+    typedef typename node::bool_stack bool_stack;
+    typedef typename node::const_node_stack const_node_stack;
+    typedef typename node::node_ptr_vector node_ptr_vector;
+    typedef typename node::node_stack node_stack;
+    typedef typename node::node_type node_type;
+    typedef typename node::node_vector node_vector;
+
+    basic_leaf_node (const id_type token_, const bool greedy_) :
+        basic_node<id_type> (token_ == node::null_token ()),
+        _token (token_),
+        _set_greedy (!greedy_),
+        _greedy (greedy_),
+        _followpos ()
+    {
+        if (!node::_nullable)
+        {
+            node::_firstpos.push_back (this);
+            node::_lastpos.push_back (this);
+        }
+    }
+
+    virtual ~basic_leaf_node ()
+    {
+    }
+
+    virtual void append_followpos (const node_vector &followpos_)
+    {
+        for (typename node_vector::const_iterator iter_ = followpos_.begin (),
+            end_ = followpos_.end (); iter_ != end_; ++iter_)
+        {
+            _followpos.push_back (*iter_);
+        }
+    }
+
+    virtual node_type what_type () const
+    {
+        return node::LEAF;
+    }
+
+    virtual bool traverse (const_node_stack &/*node_stack_*/,
+        bool_stack &/*perform_op_stack_*/) const
+    {
+        return false;
+    }
+
+    virtual id_type token () const
+    {
+        return _token;
+    }
+
+    virtual void greedy (const bool greedy_)
+    {
+        if (!_set_greedy)
+        {
+            _greedy = greedy_;
+            _set_greedy = true;
+        }
+    }
+
+    virtual bool greedy () const
+    {
+        return _greedy;
+    }
+
+    virtual const node_vector &followpos () const
+    {
+        return _followpos;
+    }
+
+    virtual node_vector &followpos ()
+    {
+        return _followpos;
+    }
+
+private:
+    id_type _token;
+    bool _set_greedy;
+    bool _greedy;
+    node_vector _followpos;
+
+    virtual void copy_node (node_ptr_vector &node_ptr_vector_,
+        node_stack &new_node_stack_, bool_stack &/*perform_op_stack_*/,
+        bool &/*down_*/) const
+    {
+        node_ptr_vector_->push_back (static_cast<basic_leaf_node *>(0));
+        node_ptr_vector_->back () = new basic_leaf_node (_token, _greedy);
+        new_node_stack_.push (node_ptr_vector_->back ());
+    }
+};
+}
+}
+
+#endif
--- a/inc/lexertl/parser/tree/node.hpp
+++ b/inc/lexertl/parser/tree/node.hpp
@@ -0,0 +1,241 @@
+// node.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_NODE_HPP
+#define LEXERTL_NODE_HPP
+
+#include <assert.h>
+#include "../../containers/ptr_vector.hpp"
+#include "../../runtime_error.hpp"
+#include "../../size_t.hpp"
+#include <stack>
+#include <vector>
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename id_type>
+class basic_node
+{
+public:
+    enum node_type {LEAF, SEQUENCE, SELECTION, ITERATION, END};
+
+    typedef std::stack<bool> bool_stack;
+    typedef std::stack<basic_node<id_type> *> node_stack;
+    // stack and vector not owner of node pointers
+    typedef std::stack<const basic_node<id_type> *> const_node_stack;
+    typedef std::vector<basic_node<id_type> *> node_vector;
+    typedef ptr_vector<basic_node<id_type> > node_ptr_vector;
+
+    basic_node () :
+        _nullable (false),
+        _firstpos (),
+        _lastpos ()
+    {
+    }
+
+    basic_node (const bool nullable_) :
+        _nullable (nullable_),
+        _firstpos (),
+        _lastpos ()
+    {
+    }
+
+    virtual ~basic_node ()
+    {
+    }
+
+    static id_type null_token ()
+    {
+        return static_cast<id_type>(~0);
+    }
+
+    bool nullable () const
+    {
+        return _nullable;
+    }
+
+    void append_firstpos (node_vector &firstpos_) const
+    {
+        firstpos_.insert (firstpos_.end (),
+            _firstpos.begin (), _firstpos.end ());
+    }
+
+    void append_lastpos (node_vector &lastpos_) const
+    {
+        lastpos_.insert (lastpos_.end (),
+            _lastpos.begin (), _lastpos.end ());
+    }
+
+    virtual void append_followpos (const node_vector &/*followpos_*/)
+    {
+        throw runtime_error ("Internal error node::append_followpos().");
+    }
+
+    basic_node *copy (node_ptr_vector &node_ptr_vector_) const
+    {
+        basic_node *new_root_ = 0;
+        const_node_stack node_stack_;
+        bool_stack perform_op_stack_;
+        bool down_ = true;
+        node_stack new_node_stack_;
+
+        node_stack_.push (this);
+
+        while (!node_stack_.empty ())
+        {
+            while (down_)
+            {
+                down_ = node_stack_.top ()->traverse (node_stack_,
+                    perform_op_stack_);
+            }
+
+            while (!down_ && !node_stack_.empty ())
+            {
+                const basic_node *top_ = node_stack_.top ();
+
+                top_->copy_node (node_ptr_vector_, new_node_stack_,
+                    perform_op_stack_, down_);
+
+                if (!down_) node_stack_.pop ();
+            }
+        }
+
+        assert (new_node_stack_.size () == 1);
+        new_root_ = new_node_stack_.top ();
+        new_node_stack_.pop ();
+        return new_root_;
+    }
+
+    virtual node_type what_type () const = 0;
+
+    virtual bool traverse (const_node_stack &node_stack_,
+        bool_stack &perform_op_stack_) const = 0;
+
+    node_vector &firstpos ()
+    {
+        return _firstpos;
+    }
+
+    const node_vector &firstpos () const
+    {
+        return _firstpos;
+    }
+
+    // _lastpos modified externally, so not const &
+    node_vector &lastpos ()
+    {
+        return _lastpos;
+    }
+
+    virtual bool end_state () const
+    {
+        return false;
+    }
+
+    virtual id_type id () const
+    {
+        throw runtime_error ("Internal error node::id().");
+#ifdef __SUNPRO_CC
+        // Stop bogus Solaris compiler warning
+        return id_type ();
+#endif
+    }
+
+    virtual id_type user_id () const
+    {
+        throw runtime_error ("Internal error node::user_id().");
+#ifdef __SUNPRO_CC
+        // Stop bogus Solaris compiler warning
+        return id_type ();
+#endif
+    }
+
+    virtual id_type next_dfa () const
+    {
+        throw runtime_error ("Internal error node::next_dfa().");
+#ifdef __SUNPRO_CC
+        // Stop bogus Solaris compiler warning
+        return id_type ();
+#endif
+    }
+
+    virtual id_type push_dfa () const
+    {
+        throw runtime_error ("Internal error node::push_dfa().");
+#ifdef __SUNPRO_CC
+        // Stop bogus Solaris compiler warning
+        return id_type ();
+#endif
+    }
+
+    virtual bool pop_dfa () const
+    {
+        throw runtime_error ("Internal error node::pop_dfa().");
+#ifdef __SUNPRO_CC
+        // Stop bogus Solaris compiler warning
+        return false;
+#endif
+    }
+
+    virtual id_type token () const
+    {
+        throw runtime_error ("Internal error node::token().");
+#ifdef __SUNPRO_CC
+        // Stop bogus Solaris compiler warning
+        return id_type ();
+#endif
+    }
+
+    virtual void greedy (const bool /*greedy_*/)
+    {
+        throw runtime_error ("Internal error node::greedy(bool).");
+    }
+
+    virtual bool greedy () const
+    {
+        throw runtime_error ("Internal error node::greedy().");
+#ifdef __SUNPRO_CC
+        // Stop bogus Solaris compiler warning
+        return false;
+#endif
+    }
+
+    virtual const node_vector &followpos () const
+    {
+        throw runtime_error ("Internal error node::followpos().");
+#ifdef __SUNPRO_CC
+        // Stop bogus Solaris compiler warning
+        return firstpos;
+#endif
+    }
+
+    virtual node_vector &followpos ()
+    {
+        throw runtime_error ("Internal error node::followpos().");
+#ifdef __SUNPRO_CC
+        // Stop bogus Solaris compiler warning
+        return firstpos;
+#endif
+    }
+
+protected:
+    const bool _nullable;
+    node_vector _firstpos;
+    node_vector _lastpos;
+
+    virtual void copy_node (node_ptr_vector &node_ptr_vector_,
+        node_stack &new_node_stack_, bool_stack &perform_op_stack_,
+        bool &down_) const = 0;
+
+private:
+    basic_node (const basic_node &); // No copy construction.
+    const basic_node &operator = (const basic_node &); // No assignment.
+};
+}
+}
+
+#endif
--- a/inc/lexertl/parser/tree/selection_node.hpp
+++ b/inc/lexertl/parser/tree/selection_node.hpp
@@ -0,0 +1,106 @@
+// selection_node.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_SELECTION_NODE_HPP
+#define LEXERTL_SELECTION_NODE_HPP
+
+#include "node.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename id_type>
+class basic_selection_node : public basic_node<id_type>
+{
+public:
+    typedef basic_node<id_type> node;
+    typedef typename node::bool_stack bool_stack;
+    typedef typename node::const_node_stack const_node_stack;
+    typedef typename node::node_ptr_vector node_ptr_vector;
+    typedef typename node::node_stack node_stack;
+    typedef typename node::node_type node_type;
+
+    basic_selection_node (basic_node<id_type> *left_,
+        basic_node<id_type> *right_) :
+        basic_node<id_type> (left_->nullable () || right_->nullable ()),
+        _left (left_),
+        _right (right_)
+    {
+        _left->append_firstpos (node::_firstpos);
+        _right->append_firstpos (node::_firstpos);
+        _left->append_lastpos (node::_lastpos);
+        _right->append_lastpos (node::_lastpos);
+    }
+
+    virtual ~basic_selection_node ()
+    {
+    }
+
+    virtual node_type what_type () const
+    {
+        return node::SELECTION;
+    }
+
+    virtual bool traverse (const_node_stack &node_stack_,
+        bool_stack &perform_op_stack_) const
+    {
+        perform_op_stack_.push (true);
+
+        switch (_right->what_type ())
+        {
+        case node::SEQUENCE:
+        case node::SELECTION:
+        case node::ITERATION:
+            perform_op_stack_.push (false);
+            break;
+        default:
+            break;
+        }
+
+        node_stack_.push (_right);
+        node_stack_.push (_left);
+        return true;
+    }
+
+private:
+    // Not owner of these pointers...
+    basic_node<id_type> *_left;
+    basic_node<id_type> *_right;
+
+    virtual void copy_node (node_ptr_vector &node_ptr_vector_,
+        node_stack &new_node_stack_, bool_stack &perform_op_stack_,
+        bool &down_) const
+    {
+        if (perform_op_stack_.top ())
+        {
+            basic_node<id_type> *rhs_ = new_node_stack_.top ();
+
+            new_node_stack_.pop ();
+
+            basic_node<id_type> *lhs_ = new_node_stack_.top ();
+
+            node_ptr_vector_->push_back
+                (static_cast<basic_selection_node *>(0));
+            node_ptr_vector_->back () = new basic_selection_node (lhs_, rhs_);
+            new_node_stack_.top () = node_ptr_vector_->back ();
+        }
+        else
+        {
+            down_ = true;
+        }
+
+        perform_op_stack_.pop ();
+    }
+
+    // No copy construction.
+    basic_selection_node (const basic_selection_node &);
+    // No assignment.
+    const basic_selection_node &operator = (const basic_selection_node &);
+};
+}
+}
+
+#endif
--- a/inc/lexertl/parser/tree/sequence_node.hpp
+++ b/inc/lexertl/parser/tree/sequence_node.hpp
@@ -0,0 +1,126 @@
+// sequence_node.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_SEQUENCE_NODE_HPP
+#define LEXERTL_SEQUENCE_NODE_HPP
+
+#include "node.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename id_type>
+class basic_sequence_node : public basic_node<id_type>
+{
+public:
+    typedef basic_node<id_type> node;
+    typedef typename node::bool_stack bool_stack;
+    typedef typename node::const_node_stack const_node_stack;
+    typedef typename node::node_ptr_vector node_ptr_vector;
+    typedef typename node::node_stack node_stack;
+    typedef typename node::node_type node_type;
+    typedef typename node::node_vector node_vector;
+
+    basic_sequence_node (basic_node<id_type> *left_,
+        basic_node<id_type> *right_) :
+        basic_node<id_type> (left_->nullable () && right_->nullable ()),
+        _left (left_),
+        _right (right_)
+    {
+        _left->append_firstpos (node::_firstpos);
+
+        if (_left->nullable ())
+        {
+            _right->append_firstpos (node::_firstpos);
+        }
+
+        if (_right->nullable ())
+        {
+            _left->append_lastpos (node::_lastpos);
+        }
+
+        _right->append_lastpos (node::_lastpos);
+
+        node_vector &lastpos_ = _left->lastpos ();
+        const node_vector &firstpos_ = _right->firstpos ();
+
+        for (typename node_vector::iterator iter_ = lastpos_.begin (),
+            end_ = lastpos_.end (); iter_ != end_; ++iter_)
+        {
+            (*iter_)->append_followpos (firstpos_);
+        }
+    }
+
+    virtual ~basic_sequence_node ()
+    {
+    }
+
+    virtual node_type what_type () const
+    {
+        return node::SEQUENCE;
+    }
+
+    virtual bool traverse (const_node_stack &node_stack_,
+        bool_stack &perform_op_stack_) const
+    {
+        perform_op_stack_.push (true);
+
+        switch (_right->what_type ())
+        {
+        case node::SEQUENCE:
+        case node::SELECTION:
+        case node::ITERATION:
+            perform_op_stack_.push (false);
+            break;
+        default:
+            break;
+        }
+
+        node_stack_.push (_right);
+        node_stack_.push (_left);
+        return true;
+    }
+
+private:
+    // Not owner of these pointers...
+    basic_node<id_type> *_left;
+    basic_node<id_type> *_right;
+
+    virtual void copy_node (node_ptr_vector &node_ptr_vector_,
+        node_stack &new_node_stack_, bool_stack &perform_op_stack_,
+        bool &down_) const
+    {
+        if (perform_op_stack_.top ())
+        {
+            basic_node<id_type> *rhs_ = new_node_stack_.top ();
+
+            new_node_stack_.pop ();
+
+            basic_node<id_type> *lhs_ = new_node_stack_.top ();
+
+            node_ptr_vector_->push_back
+                (static_cast<basic_sequence_node<id_type> *>(0));
+            node_ptr_vector_->back () = new basic_sequence_node<id_type>
+                (lhs_, rhs_);
+            new_node_stack_.top () = node_ptr_vector_->back ();
+        }
+        else
+        {
+            down_ = true;
+        }
+
+        perform_op_stack_.pop ();
+    }
+
+    // No copy construction.
+    basic_sequence_node (const basic_sequence_node &);
+    // No assignment.
+    const basic_sequence_node &operator = (const basic_sequence_node &);
+};
+}
+}
+
+#endif
--- a/inc/lexertl/partition/charset.hpp
+++ b/inc/lexertl/partition/charset.hpp
@@ -0,0 +1,73 @@
+// charset.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef LEXERTL_CHARSET_HPP
+#define LEXERTL_CHARSET_HPP
+
+#include <algorithm>
+#include <iterator>
+#include <set>
+#include "../size_t.hpp"
+#include "../string_token.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename char_type, typename id_type>
+struct basic_charset
+{
+    typedef basic_string_token<char_type> token;
+    typedef std::set<id_type> index_set;
+
+    token _token;
+    index_set _index_set;
+
+    basic_charset () :
+        _token (),
+        _index_set ()
+    {
+    }
+
+    basic_charset (const token &token_, const std::size_t index_) :
+        _token (token_),
+        _index_set ()
+    {
+        _index_set.insert (index_);
+    }
+
+    bool empty () const
+    {
+        return _token.empty () && _index_set.empty ();
+    }
+
+    void intersect (basic_charset &rhs_, basic_charset &overlap_)
+    {
+        _token.intersect (rhs_._token, overlap_._token);
+
+        if (!overlap_._token.empty ())
+        {
+            std::merge (_index_set.begin (), _index_set.end (),
+                rhs_._index_set.begin (), rhs_._index_set.end (),
+                std::inserter (overlap_._index_set,
+                overlap_._index_set.end ()));
+
+            if (_token.empty ())
+            {
+                _index_set.clear ();
+            }
+
+            if (rhs_._token.empty ())
+            {
+                rhs_._index_set.clear ();
+            }
+        }
+    }
+};
+}
+}
+
+#endif
--- a/inc/lexertl/partition/equivset.hpp
+++ b/inc/lexertl/partition/equivset.hpp
@@ -0,0 +1,134 @@
+// equivset.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_EQUIVSET_HPP
+#define LEXERTL_EQUIVSET_HPP
+
+#include <algorithm>
+#include "../parser/tree/node.hpp"
+#include <set>
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename id_type>
+struct basic_equivset
+{
+    typedef std::set<id_type> index_set;
+    typedef std::vector<id_type> index_vector;
+    // Not owner of nodes:
+    typedef basic_node<id_type> node;
+    typedef std::vector<node *> node_vector;
+
+    index_vector _index_vector;
+    id_type _id;
+    bool _greedy;
+    node_vector _followpos;
+
+    basic_equivset () :
+        _index_vector (),
+        _id (0),
+        _greedy (true),
+        _followpos ()
+    {
+    }
+
+    basic_equivset (const index_set &index_set_, const id_type id_,
+        const bool greedy_, const node_vector &followpos_) :
+        _index_vector (index_set_.begin (), index_set_.end ()),
+        _id (id_),
+        _greedy (greedy_),
+        _followpos (followpos_)
+    {
+    }
+
+    bool empty () const
+    {
+        return _index_vector.empty () && _followpos.empty ();
+    }
+
+    void intersect (basic_equivset &rhs_, basic_equivset &overlap_)
+    {
+        intersect_indexes (rhs_._index_vector, overlap_._index_vector);
+
+        if (!overlap_._index_vector.empty ())
+        {
+            // Note that the LHS takes priority in order to
+            // respect rule ordering priority in the lex spec.
+            overlap_._id = _id;
+            overlap_._greedy = _greedy;
+            overlap_._followpos = _followpos;
+
+            typename node_vector::const_iterator overlap_begin_ =
+                overlap_._followpos.begin ();
+            typename node_vector::const_iterator overlap_end_ =
+                overlap_._followpos.end ();
+            typename node_vector::const_iterator rhs_iter_ =
+                rhs_._followpos.begin ();
+            typename node_vector::const_iterator rhs_end_ =
+                rhs_._followpos.end ();
+
+            for (; rhs_iter_ != rhs_end_; ++rhs_iter_)
+            {
+                node *node_ = *rhs_iter_;
+
+                if (std::find (overlap_begin_, overlap_end_, node_) ==
+                    overlap_end_)
+                {
+                    overlap_._followpos.push_back (node_);
+                    overlap_begin_ = overlap_._followpos.begin ();
+                    overlap_end_ = overlap_._followpos.end ();
+                }
+            }
+
+            if (_index_vector.empty ())
+            {
+                _followpos.clear ();
+            }
+
+            if (rhs_._index_vector.empty ())
+            {
+                rhs_._followpos.clear ();
+            }
+        }
+    }
+
+private:
+    void intersect_indexes (index_vector &rhs_, index_vector &overlap_)
+    {
+        typename index_vector::iterator iter_ = _index_vector.begin ();
+        typename index_vector::iterator end_ = _index_vector.end ();
+        typename index_vector::iterator rhs_iter_ = rhs_.begin ();
+        typename index_vector::iterator rhs_end_ = rhs_.end ();
+
+        while (iter_ != end_ && rhs_iter_ != rhs_end_)
+        {
+            const id_type index_ = *iter_;
+            const id_type rhs_index_ = *rhs_iter_;
+
+            if (index_ < rhs_index_)
+            {
+                ++iter_;
+            }
+            else if (index_ > rhs_index_)
+            {
+                ++rhs_iter_;
+            }
+            else
+            {
+                overlap_.push_back (index_);
+                iter_ = _index_vector.erase (iter_);
+                end_ = _index_vector.end ();
+                rhs_iter_ = rhs_.erase (rhs_iter_);
+                rhs_end_ = rhs_.end ();
+            }
+        }
+    }
+};
+}
+}
+
+#endif
--- a/inc/lexertl/rules.hpp
+++ b/inc/lexertl/rules.hpp
@@ -0,0 +1,743 @@
+// rules.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_RULES_HPP
+#define LEXERTL_RULES_HPP
+
+#include "compile_assert.hpp"
+#include <deque>
+#include "enums.hpp"
+#include "internals.hpp"
+#include <locale>
+#include <map>
+#include "runtime_error.hpp"
+#include <set>
+#include "size_t.hpp"
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace lexertl
+{
+template<typename ch_type, typename id_ty = std::size_t>
+class basic_rules
+{
+public:
+    typedef std::vector<bool> bool_vector;
+    typedef std::deque<bool_vector> bool_vector_deque;
+    typedef ch_type char_type;
+    typedef id_ty id_type;
+    typedef std::vector<id_type> id_vector;
+    typedef std::deque<id_vector> id_vector_deque;
+    typedef std::basic_string<char_type> string;
+    typedef std::deque<string> string_deque;
+    typedef std::deque<string_deque> string_deque_deque;
+    typedef std::set<string> string_set;
+    typedef std::pair<string, string> string_pair;
+    typedef std::deque<string_pair> string_pair_deque;
+    typedef std::map<string, id_type> string_id_type_map;
+    typedef std::pair<string, id_type> string_id_type_pair;
+
+    // If you get a compile error here you have
+    // failed to define an unsigned id type.
+    compile_assert<(static_cast<id_type>(~0) > 0)>
+        _valid_id_type;
+
+    basic_rules (const std::size_t flags_ = dot_not_newline) :
+        _valid_id_type (),
+        _statemap (),
+        _macrodeque (),
+        _macroset (),
+        _regexes (),
+        _features (),
+        _ids (),
+        _user_ids (),
+        _next_dfas (),
+        _pushes (),
+        _pops (),
+        _flags (flags_),
+        _locale (),
+        _lexer_state_names (),
+        _eoi (0)
+    {
+        add_state (initial ());
+    }
+
+    void clear ()
+    {
+        _statemap.clear ();
+        _macrodeque.clear ();
+        _macroset.clear ();
+        _regexes.clear ();
+        _features.clear ();
+        _ids.clear ();
+        _user_ids.clear ();
+        _next_dfas.clear ();
+        _pushes.clear ();
+        _pops.clear ();
+        _flags = dot_not_newline;
+        _locale = std::locale ();
+        _lexer_state_names.clear ();
+        _eoi = 0;
+        add_state (initial ());
+    }
+
+    void clear (const id_type dfa_)
+    {
+        if (_regexes.size () > dfa_)
+        {
+            _regexes[dfa_].clear ();
+            _features[dfa_] = 0;
+            _ids[dfa_].clear ();
+            _user_ids[dfa_].clear ();
+            _next_dfas[dfa_].clear ();
+            _pushes[dfa_].clear ();
+            _pops[dfa_].clear ();
+        }
+    }
+
+    void flags (const std::size_t flags_)
+    {
+        _flags = flags_;
+    }
+
+    std::size_t flags () const
+    {
+        return _flags;
+    }
+
+    static id_type skip ()
+    {
+        return static_cast<id_type>(~1);
+    }
+
+    void eoi (const id_type eoi_)
+    {
+        _eoi = eoi_;
+    }
+
+    id_type eoi () const
+    {
+        return _eoi;
+    }
+
+    std::locale imbue (const std::locale &locale_)
+    {
+        std::locale loc_ = _locale;
+
+        _locale = locale_;
+        return loc_;
+    }
+
+    const std::locale &locale () const
+    {
+        return _locale;
+    }
+
+    const char_type *state (const id_type index_) const
+    {
+        if (index_ == 0)
+        {
+            return initial ();
+        }
+        else
+        {
+            const id_type i_ = index_ - 1;
+
+            if (_lexer_state_names.size () > i_)
+            {
+                return _lexer_state_names[i_].c_str ();
+            }
+            else
+            {
+                return 0;
+            }
+        }
+    }
+
+    id_type state (const char_type *name_) const
+    {
+        typename string_id_type_map::const_iterator iter_ =
+            _statemap.find (name_);
+
+        if (iter_ == _statemap.end ())
+        {
+            return npos ();
+        }
+        else
+        {
+            return iter_->second;
+        }
+    }
+
+    id_type add_state (const char_type *name_)
+    {
+        validate (name_);
+
+        if (_statemap.insert (string_id_type_pair (name_,
+            _statemap.size ())).second)
+        {
+            _regexes.push_back (string_deque ());
+            _features.push_back (0);
+            _ids.push_back (id_vector ());
+            _user_ids.push_back (id_vector ());
+            _next_dfas.push_back (id_vector ());
+            _pushes.push_back (id_vector ());
+            _pops.push_back (bool_vector ());
+
+            if (string (name_) != initial ())
+            {
+                _lexer_state_names.push_back (name_);
+            }
+        }
+        else
+        {
+            return _statemap.find (name_)->second;
+        }
+
+        if (_next_dfas.size () > npos ())
+        {
+            // Overflow
+            throw runtime_error ("The data type you have chosen cannot hold "
+                "this many lexer start states.");
+        }
+
+        // Initial is not stored, so no need to - 1.
+        return static_cast<id_type>(_lexer_state_names.size ());
+    }
+
+    void add_macro (const char_type *name_, const char_type *regex_)
+    {
+        add_macro (name_, string (regex_));
+    }
+
+    void add_macro (const char_type *name_, const char_type *regex_start_,
+        const char_type *regex_end_)
+    {
+        add_macro (name_, string (regex_start_, regex_end_));
+    }
+
+    void add_macro (const char_type *name_, const string &regex_)
+    {
+        validate (name_);
+
+        typename string_set::const_iterator iter_ = _macroset.find (name_);
+
+        if (iter_ == _macroset.end ())
+        {
+            _macrodeque.push_back (string_pair (name_, regex_));
+            _macroset.insert (name_);
+        }
+        else
+        {
+            std::basic_stringstream<char_type> ss_;
+            std::ostringstream os_;
+
+            os_ << "Attempt to redefine MACRO '";
+
+            while (*name_)
+            {
+                os_ << ss_.narrow (*name_++, static_cast<char_type> (' '));
+            }
+
+            os_ << "'.";
+            throw runtime_error (os_.str ());
+        }
+    }
+
+    void add_macros (const basic_rules &rules_)
+    {
+        const string_pair_deque &macros_ = rules_.macrodeque ();
+        typename string_pair_deque::const_iterator macro_iter_ =
+            macros_.begin ();
+        typename string_pair_deque::const_iterator macro_end_ =
+            macros_.end ();
+
+        for (; macro_iter_ != macro_end_; ++macro_iter_)
+        {
+            add_macro (macro_iter_->first.c_str (),
+                macro_iter_->second.c_str ());
+        }
+    }
+
+    void merge_macros (const basic_rules &rules_)
+    {
+        const string_pair_deque &macros_ = rules_.macrodeque ();
+        typename string_pair_deque::const_iterator macro_iter_ =
+            macros_.begin ();
+        typename string_pair_deque::const_iterator macro_end_ =
+            macros_.end ();
+        typename string_set::const_iterator macro_dest_iter_;
+        typename string_set::const_iterator macro_dest_end_ = _macroset.end ();
+
+        for (; macro_iter_ != macro_end_; ++macro_iter_)
+        {
+            macro_dest_iter_ = _macroset.find (macro_iter_->first);
+
+            if (macro_dest_iter_ == macro_dest_end_)
+            {
+                add_macro (macro_iter_->first.c_str (),
+                    macro_iter_->second.c_str ());
+            }
+        }
+    }
+
+    // Add rule to INITIAL
+    void add (const char_type *regex_, const id_type id_,
+        const id_type user_id_ = npos ())
+    {
+        add (string (regex_), id_, user_id_);
+    }
+
+    void add (const char_type *regex_start_, const char_type *regex_end_,
+        const id_type id_, const id_type user_id_ = npos ())
+    {
+        add (string (regex_start_, regex_end_), id_, user_id_);
+    }
+
+    void add (const string &regex_, const id_type id_,
+        const id_type user_id_ = npos ())
+    {
+        check_for_invalid_id (id_);
+        _regexes.front ().push_back (regex_);
+
+        if (regex_[0] == '^')
+        {
+            _features.front () |= bol_bit;
+        }
+
+        if (regex_.size () > 0 && regex_[regex_.size () - 1] == '$')
+        {
+            _features.front () |= eol_bit;
+        }
+
+        if (id_ == skip ())
+        {
+            _features.front () |= skip_bit;
+        }
+        else if (id_ == eoi ())
+        {
+            _features.front () |= again_bit;
+        }
+
+        _ids.front ().push_back (id_);
+        _user_ids.front ().push_back (user_id_);
+        _next_dfas.front ().push_back (0);
+        _pushes.front ().push_back (npos ());
+        _pops.front ().push_back (false);
+    }
+
+    // Add rule with no id
+    void add (const char_type *curr_dfa_,
+        const char_type *regex_, const char_type *new_dfa_)
+    {
+        add (curr_dfa_, string (regex_), new_dfa_);
+    }
+
+    void add (const char_type *curr_dfa_,
+        const char_type *regex_start_, const char_type *regex_end_,
+        const char_type *new_dfa_)
+    {
+        add (curr_dfa_, string (regex_start_, regex_end_), new_dfa_);
+    }
+
+    void add (const char_type *curr_dfa_, const string &regex_,
+        const char_type *new_dfa_)
+    {
+        add (curr_dfa_, regex_, _eoi, new_dfa_, false);
+    }
+
+    // Add rule with id
+    void add (const char_type *curr_dfa_,
+        const char_type *regex_, const id_type id_,
+        const char_type *new_dfa_, const id_type user_id_ = npos ())
+    {
+        add (curr_dfa_, string (regex_), id_, new_dfa_, user_id_);
+    }
+
+    void add (const char_type *curr_dfa_, const char_type *regex_start_,
+        const char_type *regex_end_, const id_type id_,
+        const char_type *new_dfa_, const id_type user_id_ = npos ())
+    {
+        add (curr_dfa_, string (regex_start_, regex_end_),
+            id_, new_dfa_, user_id_);
+    }
+
+    void add (const char_type *curr_dfa_, const string &regex_,
+        const id_type id_, const char_type *new_dfa_,
+        const id_type user_id_ = npos ())
+    {
+        add (curr_dfa_, regex_, id_, new_dfa_, true, user_id_);
+    }
+
+    const string_id_type_map &statemap () const
+    {
+        return _statemap;
+    }
+
+    const string_pair_deque &macrodeque () const
+    {
+        return _macrodeque;
+    }
+
+    const string_deque_deque &regexes () const
+    {
+        return _regexes;
+    }
+
+    const id_vector &features () const
+    {
+        return _features;
+    }
+
+    const id_vector_deque &ids () const
+    {
+        return _ids;
+    }
+
+    const id_vector_deque &user_ids () const
+    {
+        return _user_ids;
+    }
+
+    const id_vector_deque &next_dfas () const
+    {
+        return _next_dfas;
+    }
+
+    const id_vector_deque &pushes () const
+    {
+        return _pushes;
+    }
+
+    const bool_vector_deque &pops () const
+    {
+        return _pops;
+    }
+
+    bool empty () const
+    {
+        typename string_deque_deque::const_iterator iter_ = _regexes.begin ();
+        typename string_deque_deque::const_iterator end_ = _regexes.end ();
+        bool empty_ = true;
+
+        for (; iter_ != end_; ++iter_)
+        {
+            if (!iter_->empty ())
+            {
+                empty_ = false;
+                break;
+            }
+        }
+
+        return empty_;
+    }
+
+    static const char_type *initial ()
+    {
+        static const char_type initial_[] =
+            {'I', 'N', 'I', 'T', 'I', 'A', 'L', 0};
+
+        return initial_;
+    }
+
+    static const char_type *dot ()
+    {
+        static const char_type dot_[] = {'.', 0};
+
+        return dot_;
+    }
+
+    static const char_type *all_states ()
+    {
+        static const char_type star_[] = {'*', 0};
+
+        return star_;
+    }
+
+    static id_type npos ()
+    {
+        return static_cast<id_type>(~0);
+    }
+
+private:
+    string_id_type_map _statemap;
+    string_pair_deque _macrodeque;
+    string_set _macroset;
+    string_deque_deque _regexes;
+    id_vector _features;
+    id_vector_deque _ids;
+    id_vector_deque _user_ids;
+    id_vector_deque _next_dfas;
+    id_vector_deque _pushes;
+    bool_vector_deque _pops;
+    std::size_t _flags;
+    std::locale _locale;
+    string_deque _lexer_state_names;
+    id_type _eoi;
+
+    void add (const char_type *curr_dfa_, const string &regex_,
+        const id_type id_, const char_type *new_dfa_,
+        const bool check_, const id_type user_id_ = npos ())
+    {
+        const bool star_ = *curr_dfa_ == '*' && *(curr_dfa_ + 1) == 0;
+        const bool dot_ = *new_dfa_ == '.' && *(new_dfa_ + 1) == 0;
+        const bool push_ = *new_dfa_ == '>';
+        const char_type *push_dfa_ = 0;
+        const bool pop_ = *new_dfa_ == '<';
+
+        if (push_ || pop_)
+        {
+            ++new_dfa_;
+        }
+
+        if (check_)
+        {
+            check_for_invalid_id (id_);
+        }
+
+        if (!dot_ && !pop_)
+        {
+            const char_type *temp_ = new_dfa_;
+
+            while (*temp_ && *temp_ != ':')
+            {
+                ++temp_;
+            }
+
+            if (*temp_) push_dfa_ = temp_ + 1;
+
+            validate (new_dfa_, *temp_ ? temp_ : 0);
+
+            if (push_dfa_)
+            {
+                validate (push_dfa_);
+            }
+        }
+
+        // npos means pop here
+        id_type new_dfa_id_ = npos ();
+        id_type push_dfa_id_ = npos ();
+        typename string_id_type_map::const_iterator iter_;
+        typename string_id_type_map::const_iterator end_ = _statemap.end ();
+        id_vector next_dfas_;
+
+        if (!dot_ && !pop_)
+        {
+            if (push_dfa_)
+            {
+                iter_ = _statemap.find (string (new_dfa_, push_dfa_ - 1));
+            }
+            else
+            {
+                iter_ = _statemap.find (new_dfa_);
+            }
+
+            if (iter_ == end_)
+            {
+                std::basic_stringstream<char_type> ss_;
+                std::ostringstream os_;
+
+                os_ << "Unknown state name '";
+
+                while (*new_dfa_)
+                {
+                    os_ << ss_.narrow (*new_dfa_++, ' ');
+                }
+
+                os_ << "'.";
+                throw runtime_error (os_.str ());
+            }
+
+            new_dfa_id_ = iter_->second;
+
+            if (push_dfa_)
+            {
+                iter_ = _statemap.find (push_dfa_);
+
+                if (iter_ == end_)
+                {
+                    std::basic_stringstream<char_type> ss_;
+                    std::ostringstream os_;
+
+                    os_ << "Unknown state name '";
+
+                    while (*push_dfa_)
+                    {
+                        os_ << ss_.narrow (*push_dfa_++, ' ');
+                    }
+
+                    os_ << "'.";
+                    throw runtime_error (os_.str ());
+                }
+
+                push_dfa_id_ = iter_->second;
+            }
+        }
+
+        if (star_)
+        {
+            const std::size_t size_ = _statemap.size ();
+
+            for (id_type i_ = 0; i_ < size_; ++i_)
+            {
+                next_dfas_.push_back (i_);
+            }
+        }
+        else
+        {
+            const char_type *start_ = curr_dfa_;
+            string next_dfa_;
+
+            while (*curr_dfa_)
+            {
+                while (*curr_dfa_ && *curr_dfa_ != ',')
+                {
+                    ++curr_dfa_;
+                }
+
+                next_dfa_.assign (start_, curr_dfa_);
+
+                if (*curr_dfa_)
+                {
+                    ++curr_dfa_;
+                    start_ = curr_dfa_;
+                }
+
+                validate (next_dfa_.c_str ());
+                iter_ = _statemap.find (next_dfa_.c_str ());
+
+                if (iter_ == end_)
+                {
+                    std::basic_stringstream<char_type> ss_;
+                    std::ostringstream os_;
+
+                    os_ << "Unknown state name '";
+                    curr_dfa_ = next_dfa_.c_str ();
+
+                    while (*curr_dfa_)
+                    {
+                        os_ << ss_.narrow (*curr_dfa_++, ' ');
+                    }
+
+                    os_ << "'.";
+                    throw runtime_error (os_.str ());
+                }
+
+                next_dfas_.push_back (iter_->second);
+            }
+        }
+
+        for (std::size_t i_ = 0, size_ = next_dfas_.size ();
+            i_ < size_; ++i_)
+        {
+            const id_type curr_ = next_dfas_[i_];
+
+            _regexes[curr_].push_back (regex_);
+
+            if (regex_[0] == '^')
+            {
+                _features[curr_] |= bol_bit;
+            }
+
+            if (regex_[regex_.size () - 1] == '$')
+            {
+                _features[curr_] |= eol_bit;
+            }
+
+            if (id_ == skip ())
+            {
+                _features[curr_] |= skip_bit;
+            }
+            else if (id_ == eoi ())
+            {
+                _features[curr_] |= again_bit;
+            }
+
+            if (push_ || pop_)
+            {
+                _features[curr_] |= recursive_bit;
+            }
+
+            _ids[curr_].push_back (id_);
+            _user_ids[curr_].push_back (user_id_);
+            _next_dfas[curr_].push_back (dot_ ? curr_ : new_dfa_id_);
+            _pushes[curr_].push_back (push_ ? (push_dfa_ ?
+                push_dfa_id_ : curr_) : npos ());
+            _pops[curr_].push_back (pop_);
+        }
+    }
+
+    void validate (const char_type *name_, const char_type *end_ = 0) const
+    {
+        const char_type *start_ = name_;
+
+        if (*name_ != '_' && !(*name_ >= 'A' && *name_ <= 'Z') &&
+            !(*name_ >= 'a' && *name_ <= 'z'))
+        {
+            std::basic_stringstream<char_type> ss_;
+            std::ostringstream os_;
+
+            os_ << "Invalid name '";
+
+            while (*name_)
+            {
+                os_ << ss_.narrow (*name_++, ' ');
+            }
+
+            os_ << "'.";
+            throw runtime_error (os_.str ());
+        }
+        else if (*name_)
+        {
+            ++name_;
+        }
+
+        while (*name_ && name_ != end_)
+        {
+            if (*name_ != '_' && *name_ != '-' &&
+                !(*name_ >= 'A' && *name_ <= 'Z') &&
+                !(*name_ >= 'a' && *name_ <= 'z') &&
+                !(*name_ >= '0' && *name_ <= '9'))
+            {
+                std::basic_stringstream<char_type> ss_;
+                std::ostringstream os_;
+
+                os_ << "Invalid name '";
+                name_ = start_;
+
+                while (*name_)
+                {
+                    os_ << ss_.narrow (*name_++, ' ');
+                }
+
+                os_ << "'.";
+                throw runtime_error (os_.str ());
+            }
+
+            ++name_;
+        }
+    }
+
+    void check_for_invalid_id (const id_type id_) const
+    {
+        if (id_ == _eoi)
+        {
+            throw runtime_error ("Cannot resuse the id for eoi.");
+        }
+
+        if (id_ == npos ())
+        {
+            throw runtime_error ("id npos is reserved for the "
+                "UNKNOWN token.");
+        }
+    }
+};
+
+typedef basic_rules<char> rules;
+typedef basic_rules<wchar_t> wrules;
+}
+
+#endif
--- a/inc/lexertl/runtime_error.hpp
+++ b/inc/lexertl/runtime_error.hpp
@@ -0,0 +1,23 @@
+// runtime_error.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_RUNTIME_ERROR_HPP
+#define LEXERTL_RUNTIME_ERROR_HPP
+
+#include <stdexcept>
+
+namespace lexertl
+{
+class runtime_error : public std::runtime_error
+{
+public:
+    runtime_error (const std::string &what_arg_) :
+        std::runtime_error (what_arg_)
+    {
+    }
+};
+}
+
+#endif
--- a/inc/lexertl/serialise.hpp
+++ b/inc/lexertl/serialise.hpp
@@ -0,0 +1,28 @@
+// serialise.hpp
+// Copyright (c) 2007-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_SERIALISE_HPP
+#define LEXERTL_SERIALISE_HPP
+
+#include "state_machine.hpp"
+#include <boost/serialization/vector.hpp>
+
+namespace lexertl
+{
+// IMPORTANT! This won't work if you don't enable RTTI!
+template<typename CharT, typename id_type, class Archive>
+void serialise (basic_state_machine<CharT, id_type> &sm_, Archive &ar_)
+{
+    detail::basic_internals<id_type> &internals_ = sm_.data ();
+
+    ar_ & internals_._eoi;
+    ar_ & *internals_._lookup;
+    ar_ & internals_._dfa_alphabet;
+    ar_ & internals_._features;
+    ar_ & *internals_._dfa;
+}
+}
+
+#endif
--- a/inc/lexertl/size_t.hpp
+++ b/inc/lexertl/size_t.hpp
@@ -0,0 +1,12 @@
+// size_t.h
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_SIZE_T_H
+#define LEXERTL_SIZE_T_H
+
+#include <stddef.h> // ptrdiff_t
+#include <cstring>
+
+#endif
--- a/inc/lexertl/sm_traits.hpp
+++ b/inc/lexertl/sm_traits.hpp
@@ -0,0 +1,44 @@
+// sm_traits.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef LEXERTL_SM_TRAITS_H
+#define LEXERTL_SM_TRAITS_H
+
+namespace lexertl
+{
+template<typename ch_type, typename sm_type, bool comp, bool look,
+    bool dfa_nfa>
+struct basic_sm_traits
+{
+    enum {char_24_bit = sizeof(ch_type) > 2, compressed = comp, lookup = look,
+        is_dfa = dfa_nfa};
+    typedef ch_type input_char_type;
+    typedef ch_type char_type;
+    typedef sm_type id_type;
+
+    static id_type npos ()
+    {
+        return static_cast<id_type>(~0);
+    }
+};
+
+template<typename ch_type, typename sm_type, bool look, bool dfa_nfa>
+struct basic_sm_traits<ch_type, sm_type, true, look, dfa_nfa>
+{
+    enum {char_24_bit = sizeof(ch_type) > 2, compressed = true, lookup = look,
+        is_dfa = dfa_nfa};
+    typedef ch_type input_char_type;
+    typedef unsigned char char_type;
+    typedef sm_type id_type;
+
+    static id_type npos ()
+    {
+        return static_cast<id_type>(~0);
+    }
+};
+}
+
+#endif
--- a/inc/lexertl/state_machine.hpp
+++ b/inc/lexertl/state_machine.hpp
@@ -0,0 +1,525 @@
+// state_machine.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_STATE_MACHINE_HPP
+#define LEXERTL_STATE_MACHINE_HPP
+
+#include "compile_assert.hpp"
+// memcmp()
+#include <cstring>
+#include <deque>
+#include "internals.hpp"
+#include <map>
+#include <set>
+#include "sm_traits.hpp"
+#include "string_token.hpp"
+
+namespace lexertl
+{
+template<typename char_type, typename id_type = std::size_t>
+class basic_state_machine
+{
+public:
+    typedef basic_sm_traits<char_type, id_type,
+        (sizeof (char_type) > 1), true, true> traits;
+    typedef detail::basic_internals<id_type> internals;
+
+    // If you get a compile error here you have
+    // failed to define an unsigned id type.
+    compile_assert<(static_cast<id_type>(~0) > 0)>
+        _valid_id_type;
+
+    basic_state_machine () :
+        _valid_id_type (),
+        _internals ()
+    {
+    } 
+
+    void clear ()
+    {
+        _internals.clear ();
+    }
+
+    internals &data ()
+    {
+        return _internals;
+    }
+
+    const internals &data () const
+    {
+        return _internals;
+    }
+
+    bool empty () const
+    {
+        return _internals.empty ();
+    }
+
+    id_type eoi () const
+    {
+        return _internals._eoi;
+    }
+
+    void minimise ()
+    {
+        const id_type dfas_ = static_cast<id_type>(_internals.
+            _dfa->size ());
+
+        for (id_type i_ = 0; i_ < dfas_; ++i_)
+        {
+            const id_type dfa_alphabet_ = _internals._dfa_alphabet[i_];
+            id_type_vector *dfa_ = _internals._dfa[i_];
+
+            if (dfa_alphabet_ != 0)
+            {
+                std::size_t size_ = 0;
+
+                do
+                {
+                    size_ = dfa_->size ();
+                    minimise_dfa (dfa_alphabet_, *dfa_, size_);
+                } while (dfa_->size () != size_);
+            }
+        }
+    }
+
+    static id_type npos ()
+    {
+        return static_cast<id_type>(~0);
+    }
+
+    static id_type skip ()
+    {
+        return static_cast<id_type>(~1);
+    }
+
+    void swap (basic_state_machine &rhs_)
+    {
+        _internals.swap (rhs_._internals);
+    }
+
+private:
+    typedef typename internals::id_type_vector id_type_vector;
+    typedef std::set<id_type> index_set;
+    internals _internals;
+
+    void minimise_dfa (const id_type dfa_alphabet_,
+        id_type_vector &dfa_, std::size_t size_)
+    {
+        const id_type *first_ = &dfa_.front ();
+        const id_type *end_ = first_ + size_;
+        id_type index_ = 1;
+        id_type new_index_ = 1;
+        id_type_vector lookup_ (size_ / dfa_alphabet_, npos ());
+        id_type *lookup_ptr_ = &lookup_.front ();
+        index_set index_set_;
+        const id_type bol_index_ = dfa_.front ();
+
+        *lookup_ptr_ = 0;
+        // Only one 'jam' state, so skip it.
+        first_ += dfa_alphabet_;
+
+        for (; first_ < end_; first_ += dfa_alphabet_, ++index_)
+        {
+            const id_type *second_ = first_ + dfa_alphabet_;
+
+            for (id_type curr_index_ = index_ + 1; second_ < end_;
+                ++curr_index_, second_ += dfa_alphabet_)
+            {
+                if (index_set_.find (curr_index_) != index_set_.end ())
+                {
+                    continue;
+                }
+
+                // Some systems have memcmp in namespace std.
+                using namespace std;
+
+                if (memcmp (first_, second_, sizeof (id_type) *
+                    dfa_alphabet_) == 0)
+                {
+                    index_set_.insert (curr_index_);
+                    lookup_ptr_[curr_index_] = new_index_;
+                }
+            }
+
+            if (lookup_ptr_[index_] == npos ())
+            {
+                lookup_ptr_[index_] = new_index_;
+                ++new_index_;
+            }
+        }
+
+        if (!index_set_.empty ())
+        {
+            const id_type *front_ = &dfa_.front ();
+            id_type_vector new_dfa_ (front_, front_ + dfa_alphabet_);
+            typename index_set::const_iterator set_end_ = index_set_.end ();
+            const id_type *ptr_ = front_ + dfa_alphabet_;
+            id_type *new_ptr_ = 0;
+
+            new_dfa_.resize (size_ - index_set_.size () * dfa_alphabet_, 0);
+            new_ptr_ = &new_dfa_.front () + dfa_alphabet_;
+            size_ /= dfa_alphabet_;
+
+            if (bol_index_)
+            {
+                new_dfa_.front () = lookup_ptr_[bol_index_];
+            }
+
+            for (index_ = 1; index_ < size_; ++index_)
+            {
+                if (index_set_.find (index_) != set_end_)
+                {
+                    ptr_ += dfa_alphabet_;
+                    continue;
+                }
+
+                new_ptr_[end_state_index] = ptr_[end_state_index];
+                new_ptr_[id_index] = ptr_[id_index];
+                new_ptr_[user_id_index] = ptr_[user_id_index];
+                new_ptr_[push_dfa_index] = ptr_[push_dfa_index];
+                new_ptr_[next_dfa_index] = ptr_[next_dfa_index];
+                new_ptr_[eol_index] = lookup_ptr_[ptr_[eol_index]];
+                new_ptr_ += transitions_index;
+                ptr_ += transitions_index;
+
+                for (id_type i_ = transitions_index; i_ < dfa_alphabet_; ++i_)
+                {
+                    *new_ptr_++ = lookup_ptr_[*ptr_++];
+                }
+            }
+
+            dfa_.swap (new_dfa_);
+        }
+    }
+};
+
+typedef basic_state_machine<char> state_machine;
+typedef basic_state_machine<wchar_t> wstate_machine;
+
+template<typename char_type, typename id_type = std::size_t,
+    bool is_dfa = true>
+struct basic_char_state_machine
+{
+    typedef basic_sm_traits<char_type, id_type, false, false, is_dfa> traits;
+    typedef detail::basic_internals<id_type> internals;
+    typedef typename internals::id_type_vector id_type_vector;
+
+    struct state
+    {
+        typedef basic_string_token<char_type> string_token;
+        typedef std::map<id_type, string_token> id_type_string_token_map;
+        typedef std::pair<id_type, string_token> id_type_string_token_pair;
+        enum push_pop_dfa {neither, push_dfa, pop_dfa};
+
+        bool _end_state;
+        push_pop_dfa _push_pop_dfa;
+        id_type _id;
+        id_type _user_id;
+        id_type _push_dfa;
+        id_type _next_dfa;
+        id_type _eol_index;
+        id_type_string_token_map _transitions;
+
+        state () :
+            _end_state (false),
+            _push_pop_dfa (neither),
+            _id (0),
+            _user_id (traits::npos ()),
+            _push_dfa (traits::npos ()),
+            _next_dfa (0),
+            _eol_index (traits::npos ()),
+            _transitions ()
+        {
+        }
+
+        bool operator == (const state rhs_) const
+        {
+            return _end_state == rhs_._end_state &&
+                _push_pop_dfa == rhs_._push_pop_dfa &&
+                _id == rhs_._id &&
+                _user_id == rhs_._user_id &&
+                _push_dfa == rhs_._push_dfa &&
+                _next_dfa == rhs_._next_dfa &&
+                _eol_index == rhs_._eol_index &&
+                _transitions == rhs_._transitions;
+        }
+    };
+
+    typedef typename state::string_token string_token;
+    typedef std::vector<state> state_vector;
+    typedef std::vector<string_token> string_token_vector;
+    typedef typename state::id_type_string_token_pair
+        id_type_string_token_pair;
+
+    struct dfa
+    {
+        id_type _bol_index;
+        state_vector _states;
+
+        dfa (const std::size_t size_) :
+            _bol_index (traits::npos ()),
+            _states (state_vector (size_))
+        {
+        }
+
+        std::size_t size () const
+        {
+            return _states.size ();
+        }
+
+        void swap (dfa &rhs_)
+        {
+            std::swap (_bol_index, rhs_._bol_index);
+            _states.swap (rhs_._states);
+        }
+    };
+
+    typedef std::deque<dfa> dfa_deque;
+
+    dfa_deque _sm_deque;
+
+    // If you get a compile error here you have
+    // failed to define an unsigned id type.
+    compile_assert<(static_cast<id_type>(~0) > 0)>
+        _valid_id_type;
+
+    basic_char_state_machine () :
+        _sm_deque (),
+        _valid_id_type ()
+    {
+    }
+
+    void append (const string_token_vector &token_vector_,
+        const internals &internals_, const id_type dfa_index_)
+    {
+        const std::size_t dfa_alphabet_ = internals_._dfa_alphabet[dfa_index_];
+        const std::size_t alphabet_ = dfa_alphabet_ - transitions_index;
+        const id_type_vector &source_dfa_ = *internals_._dfa[dfa_index_];
+        const id_type *ptr_ = &source_dfa_.front ();
+        const std::size_t size_ = (source_dfa_.size () - dfa_alphabet_) /
+            dfa_alphabet_;
+        typename state::id_type_string_token_map::iterator trans_iter_;
+
+        _sm_deque.push_back (dfa (size_));
+
+        dfa &dest_dfa_ = _sm_deque.back ();
+
+        if (*ptr_)
+        {
+            dest_dfa_._bol_index = *ptr_ - 1;
+        }
+
+        ptr_ += dfa_alphabet_;
+
+        for (id_type i_ = 0; i_ < size_; ++i_)
+        {
+            state &state_ = dest_dfa_._states[i_];
+
+            state_._end_state = ptr_[end_state_index] != 0;
+
+            if (ptr_[push_dfa_index] != npos ())
+            {
+                state_._push_pop_dfa = state::push_dfa;
+            }
+            else if (ptr_[end_state_index] & pop_dfa_bit)
+            {
+                state_._push_pop_dfa = state::pop_dfa;
+            }
+
+            state_._id = ptr_[id_index];
+            state_._user_id = ptr_[user_id_index];
+            state_._push_dfa = ptr_[push_dfa_index];
+            state_._next_dfa = ptr_[next_dfa_index];
+
+            if (ptr_[eol_index])
+            {
+                state_._eol_index = ptr_[eol_index] - 1;
+            }
+
+            ptr_ += transitions_index;
+
+            for (id_type col_index_ = 0; col_index_ < alphabet_;
+                ++col_index_, ++ptr_)
+            {
+                const id_type next_ = *ptr_;
+
+                if (next_ > 0)
+                {
+                    trans_iter_ = state_._transitions.find (next_ - 1);
+
+                    if (trans_iter_ == state_._transitions.end ())
+                    {
+                        trans_iter_ = state_._transitions.insert
+                            (id_type_string_token_pair (next_ - 1,
+                            token_vector_[col_index_])).first;
+                    }
+                    else
+                    {
+                        trans_iter_->second.insert (token_vector_[col_index_]);
+                    }
+                }
+            }
+        }
+    }
+
+    void clear ()
+    {
+        _sm_deque.clear ();
+    }
+
+    bool empty () const
+    {
+        return _sm_deque.empty ();
+    }
+
+    void minimise ()
+    {
+        const id_type dfas_ = static_cast<id_type>(_sm_deque.size ());
+
+        for (id_type i_ = 0; i_ < dfas_; ++i_)
+        {
+            dfa *dfa_ = &_sm_deque[i_];
+
+            if (dfa_->size () > 0)
+            {
+                std::size_t size_ = 0;
+
+                do
+                {
+                    size_ = dfa_->size ();
+                    minimise_dfa (*dfa_, size_);
+                } while (dfa_->size () != size_);
+            }
+        }
+    }
+
+    static id_type npos ()
+    {
+        return traits::npos ();
+    }
+
+    id_type size () const
+    {
+        return static_cast<id_type>(_sm_deque.size ());
+    }
+
+    static id_type skip ()
+    {
+        return static_cast<id_type>(~1);
+    }
+
+    void swap (basic_char_state_machine &csm_)
+    {
+        _sm_deque.swap (csm_._sm_deque);
+    }
+
+private:
+    typedef std::set<id_type> index_set;
+
+    void minimise_dfa (dfa &dfa_, std::size_t size_)
+    {
+        const state *first_ = &dfa_._states.front ();
+        const state *end_ = first_ + size_;
+        id_type index_ = 0;
+        id_type new_index_ = 0;
+        id_type_vector lookup_ (size_, npos ());
+        id_type *lookup_ptr_ = &lookup_.front ();
+        index_set index_set_;
+
+        for (; first_ != end_; ++first_, ++index_)
+        {
+            const state *second_ = first_ + 1;
+
+            for (id_type curr_index_ = index_ + 1; second_ != end_;
+                ++curr_index_, ++second_)
+            {
+                if (index_set_.find (curr_index_) != index_set_.end ())
+                {
+                    continue;
+                }
+
+                if (*first_ == *second_)
+                {
+                    index_set_.insert (curr_index_);
+                    lookup_ptr_[curr_index_] = new_index_;
+                }
+            }
+
+            if (lookup_ptr_[index_] == npos ())
+            {
+                lookup_ptr_[index_] = new_index_;
+                ++new_index_;
+            }
+        }
+
+        if (!index_set_.empty ())
+        {
+            const state *front_ = &dfa_._states.front ();
+            dfa new_dfa_ (new_index_);
+            typename index_set::const_iterator set_end_ = index_set_.end ();
+            const state *ptr_ = front_;
+            state *new_ptr_ = &new_dfa_._states.front ();
+
+            if (dfa_._bol_index != npos ())
+            {
+                new_dfa_._bol_index = lookup_ptr_[dfa_._bol_index];
+            }
+
+            for (index_ = 0; index_ < size_; ++index_)
+            {
+                if (index_set_.find (index_) != set_end_)
+                {
+                    ++ptr_;
+                    continue;
+                }
+
+                new_ptr_->_end_state = ptr_->_end_state;
+                new_ptr_->_id = ptr_->_end_state;
+                new_ptr_->_user_id = ptr_->_user_id;
+                new_ptr_->_next_dfa = ptr_->_next_dfa;
+
+                if (ptr_->_eol_index != npos ())
+                {
+                    new_ptr_->_eol_index = lookup_ptr_[ptr_->_eol_index];
+                }
+
+                typename state::id_type_string_token_map::const_iterator
+                    iter_ = ptr_->_transitions.begin ();
+                typename state::id_type_string_token_map::const_iterator end_ =
+                    ptr_->_transitions.end ();
+                typename state::id_type_string_token_map::iterator find_;
+
+                for (; iter_ != end_; ++iter_)
+                {
+                    find_ = new_ptr_->_transitions.find
+                        (lookup_ptr_[iter_->first]);
+
+                    if (find_ == new_ptr_->_transitions.end ())
+                    {
+                        new_ptr_->_transitions.insert
+                            (id_type_string_token_pair
+                            (lookup_ptr_[iter_->first], iter_->second));
+                    }
+                    else
+                    {
+                        find_->second.insert (iter_->second);
+                    }
+                }
+
+                ++ptr_;
+                ++new_ptr_;
+            }
+
+            dfa_.swap (new_dfa_);
+        }
+    }
+};
+
+typedef basic_char_state_machine<char> char_state_machine;
+typedef basic_char_state_machine<wchar_t> wchar_state_machine;
+}
+
+#endif
--- a/inc/lexertl/stream_shared_iterator.hpp
+++ b/inc/lexertl/stream_shared_iterator.hpp
@@ -0,0 +1,350 @@
+// stream_shared_iterator.hpp
+// Copyright (c) 2010-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef LEXERTL_STREAM_SHARED_ITERATOR_H
+#define LEXERTL_STREAM_SHARED_ITERATOR_H
+
+#include <algorithm>
+// memcpy
+#include <cstring>
+#include <iostream>
+#include <list>
+#include <math.h>
+#include "runtime_error.hpp"
+#include "size_t.hpp"
+#include <vector>
+
+namespace lexertl
+{
+template<typename char_type>
+class basic_stream_shared_iterator
+{
+public:
+    typedef std::basic_istream<char_type> istream;
+    typedef std::forward_iterator_tag iterator_category;
+    typedef std::size_t difference_type;
+    typedef char_type value_type;
+    typedef char_type *pointer;
+    typedef char_type &reference;
+
+    basic_stream_shared_iterator () :
+        _master (false),
+        _live (false),
+        _index (shared::npos ()),
+        _shared (0)
+    {
+    }
+
+    basic_stream_shared_iterator (istream &stream_,
+        const std::size_t buff_size_ = 1024,
+        const std::size_t increment_ = 1024) :
+        _master (true),
+        _live (false),
+        _index (shared::npos ()),
+        // For exception safety don't call new yet
+        _shared (0)
+    {
+        // Safe to call potentially throwing new now.
+        _shared = new shared (stream_, buff_size_, increment_);
+        ++_shared->_ref_count;
+        _iter = _shared->_clients.insert (_shared->_clients.end (), this);
+    }
+
+    basic_stream_shared_iterator (const basic_stream_shared_iterator &rhs_) :
+        _master (false),
+        _live (false),
+        _index (rhs_._master ? rhs_._shared->lowest () : rhs_._index),
+        _shared (rhs_._shared)
+    {
+        if (_shared)
+        {
+            // New copy of an iterator.
+            // The assumption is that any copy must be live
+            // even if the rhs is not (otherwise we will never
+            // have a record of the start of the current range!)
+            ++_shared->_ref_count;
+            _iter = _shared->_clients.insert (_shared->_clients.end (), this);
+            _live = true;
+        }
+    }
+
+    ~basic_stream_shared_iterator ()
+    {
+        if (_shared)
+        {
+            --_shared->_ref_count;
+            _shared->erase (this);
+
+            if (_shared->_ref_count == 0)
+            {
+                delete _shared;
+                _shared = 0;
+            }
+        }
+    }
+
+    basic_stream_shared_iterator &operator =
+        (const basic_stream_shared_iterator &rhs_)
+    {
+        if (this != &rhs_)
+        {
+            _master = false;
+            _index  = rhs_._master ? rhs_._shared->lowest () : rhs_._index;
+
+            if (_live && !rhs_._live)
+            {
+                _shared->erase (this);
+
+                if (!rhs_._shared)
+                {
+                    --_shared->_ref_count;
+                }
+            }
+            else if (!_live && rhs_._live)
+            {
+                rhs_._iter = rhs_._shared->_clients.insert (rhs_._shared->
+                    _clients.end (), this);
+
+                if (!_shared)
+                {
+                    ++rhs_._shared->_ref_count;
+                }
+            }
+
+            _live = rhs_._live;
+            _shared = rhs_._shared;
+        }
+
+        return *this;
+    }
+
+    bool operator == (const basic_stream_shared_iterator &rhs_) const
+    {
+        return _index == rhs_._index &&
+            (_shared == rhs_._shared ||
+            (_index == shared::npos () || rhs_._index == shared::npos ()) &&
+            (!_shared || !rhs_._shared));
+    }
+
+    bool operator != (const basic_stream_shared_iterator &rhs_) const
+    {
+        return !(*this == rhs_);
+    }
+
+    const char_type &operator * ()
+    {
+        check_master ();
+        return _shared->_buffer[_index];
+    }
+
+    basic_stream_shared_iterator &operator ++ ()
+    {
+        check_master ();
+        ++_index;
+        update_state ();
+        return *this;
+    }
+
+    basic_stream_shared_iterator operator ++ (int)
+    {
+        basic_stream_shared_iterator iter_ = *this;
+
+        check_master ();
+        ++_index;
+        update_state ();
+        return iter_;
+    }
+
+private:
+    class shared
+    {
+    public:
+        std::size_t _ref_count;
+        typedef std::vector<char_type> char_vector;
+        typedef std::list<basic_stream_shared_iterator *> iter_list;
+        istream &_stream;
+        std::size_t _increment;
+        std::size_t _len;
+        char_vector _buffer;
+        iter_list _clients;
+
+        shared (istream &stream_, const std::size_t buff_size_,
+            const std::size_t increment_) :
+            _ref_count (0),
+            _increment (increment_),
+            _stream (stream_)
+        {
+            _buffer.resize (buff_size_);
+            _stream.read (&_buffer.front (), _buffer.size ());
+            _len = static_cast<std::size_t>(_stream.gcount ());
+        }
+
+        bool reload_buffer ()
+        {
+            const std::size_t lowest_ = lowest ();
+            std::size_t read_ = 0;
+
+            if (lowest_ == 0)
+            {
+                // Resize buffer
+                const std::size_t old_size_ = _buffer.size ();
+                const std::size_t new_size_ = old_size_ + _increment;
+
+                _buffer.resize (new_size_);
+                _stream.read (&_buffer.front () + old_size_, _increment);
+                read_ = static_cast<std::size_t>(_stream.gcount ());
+
+                if (read_)
+                {
+                    read_ += old_size_;
+                    _len = read_;
+                }
+            }
+            else
+            {
+                // Some systems have memcpy in namespace std
+                using namespace std;
+                const size_t start_ = _buffer.size () - lowest_;
+                const size_t len_ = _buffer.size () - start_;
+
+                memcpy (&_buffer.front (), &_buffer[lowest_], start_ *
+                    sizeof (char_type));
+                _stream.read (&_buffer.front () + start_, len_);
+                read_ = static_cast<size_t>(_stream.gcount ());
+                subtract (lowest_);
+
+                if (read_)
+                {
+                    read_ += start_;
+                    _len = read_;
+                }
+                else
+                {
+                    _len = highest ();
+                }
+            }
+
+            return read_ != 0;
+        }
+
+        void erase (basic_stream_shared_iterator *ptr_)
+        {
+            if (ptr_->_iter != _clients.end ())
+            {
+                _clients.erase (ptr_->_iter);
+                ptr_->_iter = _clients.end ();
+            }
+        }
+
+        std::size_t lowest () const
+        {
+            std::size_t lowest_ = npos ();
+            typename iter_list::const_iterator iter_ = _clients.begin ();
+            typename iter_list::const_iterator end_ = _clients.end ();
+
+            for (; iter_ != end_; ++iter_)
+            {
+                const basic_stream_shared_iterator *ptr_ = *iter_;
+
+                if (ptr_->_index < lowest_)
+                {
+                    lowest_ = ptr_->_index;
+                }
+            }
+
+            if (lowest_ == npos ())
+            {
+                lowest_ = 0;
+            }
+
+            return lowest_;
+        }
+
+        std::size_t highest () const
+        {
+            std::size_t highest_ = 0;
+            typename iter_list::const_iterator iter_ = _clients.begin ();
+            typename iter_list::const_iterator end_ = _clients.end ();
+
+            for (; iter_ != end_; ++iter_)
+            {
+                const basic_stream_shared_iterator *ptr_ = *iter_;
+
+                if (ptr_->_index != npos () && ptr_->_index > highest_)
+                {
+                    highest_ = ptr_->_index;
+                }
+            }
+
+            return highest_;
+        }
+
+        void subtract (const std::size_t lowest_)
+        {
+            typename iter_list::iterator iter_ = _clients.begin ();
+            typename iter_list::iterator end_ = _clients.end ();
+
+            for (; iter_ != end_; ++iter_)
+            {
+                basic_stream_shared_iterator *ptr_ = *iter_;
+
+                if (ptr_->_index != npos ())
+                {
+                    ptr_->_index -= lowest_;
+                }
+            }
+        }
+
+        static std::size_t npos ()
+        {
+            return static_cast<std::size_t>(~0);
+        }
+
+    private:
+        shared &operator = (const shared &rhs_);
+    };
+
+    bool _master;
+    bool _live;
+    std::size_t _index;
+    shared *_shared;
+    mutable typename shared::iter_list::iterator _iter;
+
+    void check_master ()
+    {
+        if (!_shared)
+        {
+            throw runtime_error ("Cannot manipulate null (end) "
+                "stream_shared_iterators.");
+        }
+
+        if (_master)
+        {
+            _master = false;
+            _live = true;
+            _index = _shared->lowest ();
+        }
+    }
+
+    void update_state ()
+    {
+        if (_index >= _shared->_len)
+        {
+            if (!_shared->reload_buffer ())
+            {
+                _shared->erase (this);
+                _index = shared::npos ();
+                _live = false;
+            }
+        }
+    }
+};
+
+typedef basic_stream_shared_iterator<char> stream_shared_iterator;
+typedef basic_stream_shared_iterator<wchar_t> wstream_shared_iterator;
+}
+
+#endif
--- a/inc/lexertl/string_token.hpp
+++ b/inc/lexertl/string_token.hpp
@@ -0,0 +1,421 @@
+// string_token.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_STRING_TOKEN_HPP
+#define LEXERTL_STRING_TOKEN_HPP
+
+#include "char_traits.hpp"
+#include <ios> // Needed by GCC 4.4
+#include <iostream>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace lexertl
+{
+template<typename ch_type>
+struct basic_string_token
+{
+    typedef ch_type char_type;
+    typedef basic_char_traits<char_type> char_traits;
+    typedef typename char_traits::index_type index_type;
+    typedef std::pair<index_type, index_type> range;
+    typedef std::vector<range> range_vector;
+    typedef std::basic_string<char_type> string;
+    typedef basic_string_token<char_type> string_token;
+
+    range_vector _ranges;
+
+    basic_string_token () :
+        _ranges ()
+    {
+    }
+
+    basic_string_token (char_type ch_) :
+        _ranges ()
+    {
+        insert (range (ch_, ch_));
+    }
+
+    basic_string_token (char_type first_, char_type second_) :
+        _ranges ()
+    {
+        insert (range (first_, second_));
+    }
+
+    void clear ()
+    {
+        _ranges.clear ();
+    }
+
+    bool empty () const
+    {
+        return _ranges.empty ();
+    }
+
+    bool any () const
+    {
+        return _ranges.size () == 1 && _ranges.front ().first == 0 &&
+            _ranges.front ().second == char_traits::max_val ();
+    }
+
+    bool operator < (const basic_string_token &rhs_) const
+    {
+        return _ranges < rhs_._ranges;
+    }
+
+    bool operator == (const basic_string_token &rhs_) const
+    {
+        return _ranges == rhs_._ranges;
+    }
+
+    bool negatable () const
+    {
+        std::size_t size_ = 0;
+        typename range_vector::const_iterator iter_ = _ranges.begin ();
+        typename range_vector::const_iterator end_ = _ranges.end ();
+
+        for (; iter_ != end_; ++iter_)
+        {
+            size_ += static_cast<std::size_t>(iter_->second) + 1 -
+                static_cast<std::size_t>(iter_->first);
+        }
+
+        return size_ > static_cast<std::size_t>(char_traits::max_val ()) / 2;
+    }
+
+    void swap (basic_string_token &rhs_)
+    {
+        _ranges.swap (rhs_._ranges);
+    }
+
+    void insert (const basic_string_token &rhs_)
+    {
+        typename range_vector::const_iterator iter_ = rhs_._ranges.begin ();
+        typename range_vector::const_iterator end_ = rhs_._ranges.end ();
+
+        for (; iter_ != end_; ++iter_)
+        {
+            insert (*iter_);
+        }
+    }
+
+    // Deliberately pass by value - may modify
+    typename range_vector::iterator insert (range rhs_)
+    {
+        bool insert_ = true;
+        typename range_vector::iterator iter_ = _ranges.begin ();
+        typename range_vector::const_iterator end_ = _ranges.end ();
+
+        while (iter_ != end_)
+        {
+            // follows current item
+            if (rhs_.first > iter_->second)
+            {
+                if (rhs_.first == iter_->second + 1)
+                {
+                    // Auto normalise
+                    rhs_.first = iter_->first;
+                }
+                else
+                {
+                    // No intersection, consider next
+                    ++iter_;
+                    continue;
+                }
+            }
+            // Precedes current item
+            else if (rhs_.second < iter_->first)
+            {
+                if (rhs_.second == iter_->first - 1)
+                {
+                    // Auto normalise
+                    rhs_.second = iter_->second;
+                }
+                else
+                {
+                    // insert here
+                    break;
+                }
+            }
+            else
+            {
+                // overlap (under)
+                if (rhs_.first < iter_->first)
+                {
+                    if (rhs_.second < iter_->second)
+                    {
+                        rhs_.second = iter_->second;
+                    }
+                }
+                // overlap (over)
+                else if (rhs_.second > iter_->second)
+                {
+                    if (rhs_.first > iter_->first)
+                    {
+                        rhs_.first = iter_->first;
+                    }
+                }
+                // subset
+                else
+                {
+                    insert_ = false;
+                    iter_ = _ranges.end ();
+                    break;
+                }
+            }
+
+            // Code minimisation: this always applies unless we have already
+            // exited the loop, or "continue" executed.
+            iter_ = _ranges.erase (iter_);
+            end_ = _ranges.end ();
+        }
+
+        if (insert_)
+        {
+            iter_ = _ranges.insert(iter_, rhs_);
+        }
+
+        return iter_;
+    }
+
+    void negate ()
+    {
+        index_type next_ = 0;
+        const index_type max_ = char_traits::max_val ();
+        string_token temp_;
+        typename range_vector::iterator iter_ = _ranges.begin ();
+        typename range_vector::const_iterator end_ = _ranges.end ();
+        bool finished_ = false;
+
+        for (; iter_ != end_; ++iter_)
+        {
+            if (next_ < iter_->first)
+            {
+                temp_.insert (range (next_, iter_->first - 1));
+            }
+
+            if (iter_->second < max_)
+            {
+                next_ = iter_->second + 1;
+            }
+            else
+            {
+                finished_ = true;
+                break;
+            }
+        }
+
+        if (!finished_)
+        {
+            temp_.insert (range (next_, max_));
+        }
+
+        swap (temp_);
+    }
+
+    void intersect (basic_string_token &rhs_, basic_string_token &overlap_)
+    {
+        typename range_vector::iterator lhs_iter_ = _ranges.begin ();
+        typename range_vector::const_iterator lhs_end_ = _ranges.end ();
+        typename range_vector::iterator rhs_iter_ = rhs_._ranges.begin ();
+        typename range_vector::const_iterator rhs_end_ = rhs_._ranges.end ();
+
+        while (lhs_iter_ != lhs_end_ && rhs_iter_ != rhs_end_)
+        {
+            if (rhs_iter_->first > lhs_iter_->second)
+            {
+                ++lhs_iter_;
+            }
+            else if (rhs_iter_->second < lhs_iter_->first)
+            {
+                ++rhs_iter_;
+            }
+            else
+            {
+                range range_;
+
+                if (rhs_iter_->first > lhs_iter_->first)
+                {
+                    range_.first = rhs_iter_->first;
+                }
+                else
+                {
+                    range_.first = lhs_iter_->first;
+                }
+
+                if (rhs_iter_->second < lhs_iter_->second)
+                {
+                    range_.second = rhs_iter_->second;
+                }
+                else
+                {
+                    range_.second = lhs_iter_->second;
+                }
+
+                adjust (range_, *this, lhs_iter_, lhs_end_);
+                adjust (range_, rhs_, rhs_iter_, rhs_end_);
+                overlap_.insert (range_);
+            }
+        }
+    }
+
+    void remove (basic_string_token &rhs_)
+    {
+        typename range_vector::iterator lhs_iter_ = _ranges.begin ();
+        typename range_vector::const_iterator lhs_end_ = _ranges.end ();
+        typename range_vector::iterator rhs_iter_ = rhs_._ranges.begin ();
+        typename range_vector::const_iterator rhs_end_ = rhs_._ranges.end ();
+
+        while (lhs_iter_ != lhs_end_ && rhs_iter_ != rhs_end_)
+        {
+            if (rhs_iter_->first > lhs_iter_->second)
+            {
+                ++lhs_iter_;
+            }
+            else if (rhs_iter_->second < lhs_iter_->first)
+            {
+                ++rhs_iter_;
+            }
+            else
+            {
+                range range_;
+
+                if (rhs_iter_->first > lhs_iter_->first)
+                {
+                    range_.first = rhs_iter_->first;
+                }
+                else
+                {
+                    range_.first = lhs_iter_->first;
+                }
+
+                if (rhs_iter_->second < lhs_iter_->second)
+                {
+                    range_.second = rhs_iter_->second;
+                }
+                else
+                {
+                    range_.second = lhs_iter_->second;
+                }
+
+                adjust (range_, *this, lhs_iter_, lhs_end_);
+            }
+        }
+    }
+
+    static string escape_char (const typename char_traits::index_type ch_)
+    {
+        string out_;
+
+        switch (ch_)
+        {
+            case '\0':
+                out_ += '\\';
+                out_ += '0';
+                break;
+            case '\a':
+                out_ += '\\';
+                out_ += 'a';
+                break;
+            case '\b':
+                out_ += '\\';
+                out_ += 'b';
+                break;
+            case 27:
+                out_ += '\\';
+                out_ += 'x';
+                out_ += '1';
+                out_ += 'b';
+                break;
+            case '\f':
+                out_ += '\\';
+                out_ += 'f';
+                break;
+            case '\n':
+                out_ += '\\';
+                out_ += 'n';
+                break;
+            case '\r':
+                out_ += '\\';
+                out_ += 'r';
+                break;
+            case '\t':
+                out_ += '\\';
+                out_ += 't';
+                break;
+            case '\v':
+                out_ += '\\';
+                out_ += 'v';
+                break;
+            case '\\':
+                out_ += '\\';
+                out_ += '\\';
+                break;
+            case '"':
+                out_ += '\\';
+                out_ += '"';
+                break;
+            case '\'':
+                out_ += '\\';
+                out_ += '\'';
+                break;
+            default:
+            {
+                if (ch_ < 32 || ch_ > 126)
+                {
+                    std::basic_stringstream<char_type> ss_;
+
+                    out_ += '\\';
+                    out_ += 'x';
+                    ss_ << std::hex <<
+                        static_cast<std::size_t> (ch_);
+                    out_ += ss_.str ();
+                }
+                else
+                {
+                    out_ += ch_;
+                }
+
+                break;
+            }
+        }
+
+        return out_;
+    }
+
+private:
+    void adjust (const range &range_, basic_string_token &token_,
+        typename range_vector::iterator &iter_,
+        typename range_vector::const_iterator &end_)
+    {
+        if (range_.first > iter_->first)
+        {
+            const index_type second_ = iter_->second;
+
+            iter_->second = range_.first - 1;
+
+            if (range_.second < second_)
+            {
+                range new_range_ (range_.second + 1, second_);
+
+                iter_ = token_.insert (new_range_);
+                end_ = token_._ranges.end ();
+            }
+        }
+        else if (range_.second < iter_->second)
+        {
+            iter_->first = range_.second + 1;
+        }
+        else
+        {
+            iter_ = token_._ranges.erase (iter_);
+            end_ = token_._ranges.end ();
+        }
+    }
+};
+}
+
+#endif
--- a/inc/lexertl/utf_iterators.hpp
+++ b/inc/lexertl/utf_iterators.hpp
@@ -0,0 +1,380 @@
+// utf_iterators.hpp
+// Copyright (c) 2012 Ben Hanson (http://www.benhanson.net/)
+// Inspired by http://utfcpp.sourceforge.net/
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_UTF_ITERATORS_HPP
+#define LEXERTL_UTF_ITERATORS_HPP
+
+#include <iterator>
+
+namespace lexertl
+{
+template<typename char_iterator, typename char_type>
+class basic_utf8_in_iterator :
+    public std::iterator<std::input_iterator_tag, char_type>
+{
+public:
+    basic_utf8_in_iterator () :
+        _char (0)
+    {
+    }
+
+    explicit basic_utf8_in_iterator (const char_iterator& it_) :
+        _it (it_),
+        _char (0)
+    {
+        next ();
+    }
+
+    char_type operator * () const
+    {
+        return _char;
+    }
+
+    bool operator == (const basic_utf8_in_iterator &rhs_) const
+    {
+        return _it == rhs_._it;
+    }
+
+    bool operator != (const basic_utf8_in_iterator &rhs_) const
+    {
+        return _it != rhs_._it;
+    }
+
+    basic_utf8_in_iterator &operator ++ ()
+    {
+        next ();
+        return *this;
+    }
+
+    basic_utf8_in_iterator operator ++ (int)
+    {
+        basic_utf8_in_iterator temp_ = *this;
+
+        next ();
+        return temp_;
+    }
+
+private:
+    typedef typename std::iterator_traits<char_iterator>::
+        difference_type difference_type;
+    char_iterator _it;
+    char_type _char;
+
+    void next ()
+    {
+        const char len_ = len (_it);
+        char_type ch_ = *_it & 0xff;
+
+        switch (len_)
+        {
+        case 1:
+            break;
+        case 2:
+            ++_it;
+            ch_ = (ch_ << 6 & 0x7ff) | (*_it & 0x3f);
+            break;
+        case 3:
+            ++_it;
+            ch_ = (ch_ << 12 & 0xffff) | ((*_it & 0xff) << 6 & 0xfff);
+            ++_it;
+            ch_ |= *_it & 0x3f;
+            break;
+        case 4:
+            ++_it;
+            ch_ = (ch_ << 18 & 0x1fffff) | ((*_it & 0xff) << 12 & 0x3ffff);
+            ++_it;
+            ch_ |= (*_it & 0xff) << 6 & 0xfff;
+            ++_it;
+            ch_ |= *_it & 0x3f;
+            break;
+        }
+
+        ++_it;
+        _char = ch_;
+    }
+
+    char len (const char_iterator &it_) const
+    {
+        const unsigned char ch_ = *it_;
+
+        return ch_ < 0x80 ? 1 :
+            ch_ >> 5 == 0x06 ? 2 :
+            ch_ >> 4 == 0x0e ? 3 :
+            ch_ >> 3 == 0x1e ? 4 : 0;
+    }
+};
+
+template<typename char_iterator>
+class basic_utf8_out_iterator :
+    public std::iterator<std::input_iterator_tag, char>
+{
+public:
+    basic_utf8_out_iterator () :
+        _count (0),
+        _index (0)
+    {
+    }
+
+    explicit basic_utf8_out_iterator (const char_iterator& it_) :
+        _it (it_),
+        _count (0),
+        _index (0)
+    {
+        next ();
+    }
+
+    char operator * () const
+    {
+        return _bytes[_index];
+    }
+
+    bool operator == (const basic_utf8_out_iterator &rhs_) const
+    {
+        return _it == rhs_._it;
+    }
+
+    bool operator != (const basic_utf8_out_iterator &rhs_) const
+    {
+        return _it != rhs_._it;
+    }
+
+    basic_utf8_out_iterator &operator ++ ()
+    {
+        ++_index;
+
+        if (_index >= _count)
+        {
+            next ();
+        }
+
+        return *this;
+    }
+
+    basic_utf8_out_iterator operator ++ (int)
+    {
+        basic_utf8_out_iterator temp_ = *this;
+
+        ++_index;
+
+        if (_index >= _count)
+        {
+            next ();
+        }
+
+        return temp_;
+    }
+
+private:
+    char_iterator _it;
+    char _bytes[4];
+    unsigned char _count;
+    unsigned char _index;
+
+    void next ()
+    {
+        const std::size_t ch_ = *_it;
+
+        _count = len (ch_);
+        _index = 0;
+
+        switch (_count)
+        {
+        case 1:
+            _bytes[0] = static_cast<char>(ch_);
+            break;
+        case 2:
+            _bytes[0] = static_cast<char>((ch_ >> 6) | 0xc0);
+            _bytes[1] = (ch_ & 0x3f) | 0x80;
+            break;
+        case 3:
+            _bytes[0] = static_cast<char>((ch_ >> 12) | 0xe0);
+            _bytes[1] = ((ch_ >> 6) & 0x3f) | 0x80;
+            _bytes[2] = (ch_ & 0x3f) | 0x80;
+            break;
+        case 4:
+            _bytes[0] = static_cast<char>((ch_ >> 18) | 0xf0);
+            _bytes[1] = ((ch_ >> 12) & 0x3f) | 0x80;
+            _bytes[2] = ((ch_ >> 6) & 0x3f) | 0x80;
+            _bytes[3] = (ch_ & 0x3f) | 0x80;
+            break;
+        }
+
+        ++_it;
+    }
+
+    char len (const std::size_t ch_) const
+    {
+        return ch_ < 0x80 ? 1 :
+            ch_ < 0x800 ? 2 :
+            ch_ < 0x10000 ? 3 :
+            4;
+    }
+};
+
+template<typename char_iterator, typename char_type>
+class basic_utf16_in_iterator :
+    public std::iterator<std::input_iterator_tag, char_type>
+{
+public:
+    basic_utf16_in_iterator () :
+        _char (0)
+    {
+    }
+
+    explicit basic_utf16_in_iterator (const char_iterator &it_) :
+        _it (it_),
+        _char (0)
+    {
+        next ();
+    }
+
+    char_type operator * () const
+    {
+        return _char;
+    }
+
+    bool operator == (const basic_utf16_in_iterator &rhs_) const
+    {
+        return _it == rhs_._it;
+    }
+
+    bool operator != (const basic_utf16_in_iterator &rhs_) const
+    {
+        return _it != rhs_._it;
+    }
+
+    basic_utf16_in_iterator &operator ++ ()
+    {
+        next ();
+        return *this;
+    }
+
+    basic_utf16_in_iterator operator ++ (int)
+    {
+        basic_utf16_in_iterator temp_ = *this;
+
+        next ();
+        return temp_;
+    }
+
+private:
+    typedef typename std::iterator_traits<char_iterator>::
+        difference_type difference_type;
+    char_iterator _it;
+    char_type _char;
+
+    void next ()
+    {
+        char_type ch_ = *_it & 0xffff;
+
+        if (ch_ >= 0xd800 && ch_ <= 0xdbff)
+        {
+            const char_type surrogate_ = *++_it & 0xffff;
+
+            ch_ = (((ch_ - 0xd800) << 10) | (surrogate_ - 0xdc00)) + 0x10000;
+        }
+
+        ++_it;
+        _char = ch_;
+    }
+};
+
+template<typename char_iterator>
+class basic_utf16_out_iterator :
+    public std::iterator<std::input_iterator_tag, wchar_t>
+{
+public:
+    basic_utf16_out_iterator () :
+        _count (0),
+        _index (0)
+    {
+    }
+
+    explicit basic_utf16_out_iterator (const char_iterator& it_) :
+        _it (it_),
+        _count (0),
+        _index (0)
+    {
+        next ();
+    }
+
+    wchar_t operator * () const
+    {
+        return _chars[_index];
+    }
+
+    bool operator == (const basic_utf16_out_iterator &rhs_) const
+    {
+        return _it == rhs_._it;
+    }
+
+    bool operator != (const basic_utf16_out_iterator &rhs_) const
+    {
+        return _it != rhs_._it;
+    }
+
+    basic_utf16_out_iterator &operator ++ ()
+    {
+        ++_index;
+
+        if (_index >= _count)
+        {
+            next ();
+        }
+
+        return *this;
+    }
+
+    basic_utf16_out_iterator operator ++ (int)
+    {
+        basic_utf16_out_iterator temp_ = *this;
+
+        ++_index;
+
+        if (_index >= _count)
+        {
+            next ();
+        }
+
+        return temp_;
+    }
+
+private:
+    char_iterator _it;
+    wchar_t _chars[2];
+    unsigned char _count;
+    unsigned char _index;
+
+    void next ()
+    {
+        const std::size_t ch_ = *_it;
+
+        _count = len (ch_);
+        _index = 0;
+
+        switch (_count)
+        {
+        case 1:
+            _chars[0] = static_cast<wchar_t>(ch_);
+            break;
+        case 2:
+            _chars[0] = static_cast<wchar_t>((ch_ >> 10) + 0xdc00u -
+                (0x10000 >> 10));
+            _chars[1] = static_cast<wchar_t>((ch_ & 0x3ff) + 0xdc00u);
+            break;
+        }
+
+        ++_it;
+    }
+
+    char len (const std::size_t ch_) const
+    {
+        return ch_ > 0xffff ? 2 : 1;
+    }
+};
+}
+
+#endif