From f3d3389f54a10fe3be3d841b6a164306841b4540 Mon Sep 17 00:00:00 2001
From: Markus Hauschild <hauschildm@gmx.net>
Date: Fri, 31 May 2013 21:35:44 +0200
Subject: [PATCH] Fix typo in grammar.

Add lexertl.
---
 grammar/grammar.y                             |    5 +-
 inc/lexertl/bool.hpp                          |   22 +
 inc/lexertl/char_traits.hpp                   |   50 +
 inc/lexertl/compile_assert.hpp                |   24 +
 inc/lexertl/containers/bitvector.hpp          |  228 ++
 inc/lexertl/containers/ptr_list.hpp           |   69 +
 inc/lexertl/containers/ptr_map.hpp            |   72 +
 inc/lexertl/containers/ptr_stack.hpp          |   69 +
 inc/lexertl/containers/ptr_vector.hpp         |  106 +
 inc/lexertl/debug.hpp                         |  353 +++
 inc/lexertl/enums.hpp                         |   25 +
 inc/lexertl/generate_cpp.hpp                  | 1122 ++++++++
 inc/lexertl/generator.hpp                     |  829 ++++++
 inc/lexertl/internals.hpp                     |   80 +
 inc/lexertl/is_same.hpp                       |   29 +
 inc/lexertl/licence_1_0.txt                   |   24 +
 inc/lexertl/lookup.hpp                        |  477 ++++
 inc/lexertl/match_results.hpp                 |  150 ++
 inc/lexertl/memory_file.hpp                   |  112 +
 inc/lexertl/old/fast_filebuf.hpp              |   45 +
 inc/lexertl/old/string_token.hpp              |  561 ++++
 inc/lexertl/parser/parser.hpp                 | 1076 ++++++++
 inc/lexertl/parser/tokeniser/re_token.hpp     |  100 +
 inc/lexertl/parser/tokeniser/re_tokeniser.hpp |  829 ++++++
 .../parser/tokeniser/re_tokeniser_helper.hpp  | 2351 +++++++++++++++++
 .../parser/tokeniser/re_tokeniser_state.hpp   |  115 +
 inc/lexertl/parser/tree/end_node.hpp          |  112 +
 inc/lexertl/parser/tree/iteration_node.hpp    |  103 +
 inc/lexertl/parser/tree/leaf_node.hpp         |  114 +
 inc/lexertl/parser/tree/node.hpp              |  241 ++
 inc/lexertl/parser/tree/selection_node.hpp    |  106 +
 inc/lexertl/parser/tree/sequence_node.hpp     |  126 +
 inc/lexertl/partition/charset.hpp             |   73 +
 inc/lexertl/partition/equivset.hpp            |  134 +
 inc/lexertl/rules.hpp                         |  743 ++++++
 inc/lexertl/runtime_error.hpp                 |   23 +
 inc/lexertl/serialise.hpp                     |   28 +
 inc/lexertl/size_t.hpp                        |   12 +
 inc/lexertl/sm_traits.hpp                     |   44 +
 inc/lexertl/state_machine.hpp                 |  525 ++++
 inc/lexertl/stream_shared_iterator.hpp        |  350 +++
 inc/lexertl/string_token.hpp                  |  421 +++
 inc/lexertl/utf_iterators.hpp                 |  380 +++
 src/test.cpp                                  |    9 +
 44 files changed, 12465 insertions(+), 2 deletions(-)
 create mode 100644 inc/lexertl/bool.hpp
 create mode 100644 inc/lexertl/char_traits.hpp
 create mode 100644 inc/lexertl/compile_assert.hpp
 create mode 100644 inc/lexertl/containers/bitvector.hpp
 create mode 100644 inc/lexertl/containers/ptr_list.hpp
 create mode 100644 inc/lexertl/containers/ptr_map.hpp
 create mode 100644 inc/lexertl/containers/ptr_stack.hpp
 create mode 100644 inc/lexertl/containers/ptr_vector.hpp
 create mode 100644 inc/lexertl/debug.hpp
 create mode 100644 inc/lexertl/enums.hpp
 create mode 100644 inc/lexertl/generate_cpp.hpp
 create mode 100644 inc/lexertl/generator.hpp
 create mode 100644 inc/lexertl/internals.hpp
 create mode 100644 inc/lexertl/is_same.hpp
 create mode 100644 inc/lexertl/licence_1_0.txt
 create mode 100644 inc/lexertl/lookup.hpp
 create mode 100644 inc/lexertl/match_results.hpp
 create mode 100644 inc/lexertl/memory_file.hpp
 create mode 100644 inc/lexertl/old/fast_filebuf.hpp
 create mode 100644 inc/lexertl/old/string_token.hpp
 create mode 100644 inc/lexertl/parser/parser.hpp
 create mode 100644 inc/lexertl/parser/tokeniser/re_token.hpp
 create mode 100644 inc/lexertl/parser/tokeniser/re_tokeniser.hpp
 create mode 100644 inc/lexertl/parser/tokeniser/re_tokeniser_helper.hpp
 create mode 100644 inc/lexertl/parser/tokeniser/re_tokeniser_state.hpp
 create mode 100644 inc/lexertl/parser/tree/end_node.hpp
 create mode 100644 inc/lexertl/parser/tree/iteration_node.hpp
 create mode 100644 inc/lexertl/parser/tree/leaf_node.hpp
 create mode 100644 inc/lexertl/parser/tree/node.hpp
 create mode 100644 inc/lexertl/parser/tree/selection_node.hpp
 create mode 100644 inc/lexertl/parser/tree/sequence_node.hpp
 create mode 100644 inc/lexertl/partition/charset.hpp
 create mode 100644 inc/lexertl/partition/equivset.hpp
 create mode 100644 inc/lexertl/rules.hpp
 create mode 100644 inc/lexertl/runtime_error.hpp
 create mode 100644 inc/lexertl/serialise.hpp
 create mode 100644 inc/lexertl/size_t.hpp
 create mode 100644 inc/lexertl/sm_traits.hpp
 create mode 100644 inc/lexertl/state_machine.hpp
 create mode 100644 inc/lexertl/stream_shared_iterator.hpp
 create mode 100644 inc/lexertl/string_token.hpp
 create mode 100644 inc/lexertl/utf_iterators.hpp

diff --git a/grammar/grammar.y b/grammar/grammar.y
index e067bb2..6264236 100644
--- a/grammar/grammar.y
+++ b/grammar/grammar.y
@@ -1,5 +1,6 @@
 %include {
 
+#include <cstdio>
 #include <iostream>
 #include <string>
 #include <assert.h>
@@ -20,8 +21,8 @@
 
 programm(A) ::= fundefs(B).							{ A = B; }
 
-fundefs(A) ::= .								{ A = 0: }
-fundefs(A) ::= fundefs fundef(B).						{ A = A + B: }
+fundefs(A) ::= .								{ A = 0; }
+fundefs(A) ::= fundefs fundef(B).						{ A = A + B; }
 
 fundef(A) ::= type(T) T_IDENTIFIER(ID) params(P) T_BEGIN statements(S) T_END.	{ A = T + ID + P + S; }
 
diff --git a/inc/lexertl/bool.hpp b/inc/lexertl/bool.hpp
new file mode 100644
index 0000000..2965d3d
--- /dev/null
+++ b/inc/lexertl/bool.hpp
@@ -0,0 +1,22 @@
+// bool.hpp
+// Copyright (c) 2010-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef LEXERTL_BOOL_H
+#define LEXERTL_BOOL_H
+
+namespace lexertl
+{
+// Named template param for compiler compatibility
+template<bool b>
+struct bool_
+{
+};
+
+typedef bool_<true> true_;
+typedef bool_<false> false_;
+}
+
+#endif
diff --git a/inc/lexertl/char_traits.hpp b/inc/lexertl/char_traits.hpp
new file mode 100644
index 0000000..64315c7
--- /dev/null
+++ b/inc/lexertl/char_traits.hpp
@@ -0,0 +1,50 @@
+// char_traits.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef LEXERTL_CHAR_TRAITS_H
+#define LEXERTL_CHAR_TRAITS_H
+
+#include <cstddef>
+
+namespace lexertl
+{
+template<typename ch_type>
+struct basic_char_traits
+{
+    typedef ch_type char_type;
+    typedef ch_type index_type;
+
+    static index_type index (const char_type ch)
+    {
+       return ch;
+    }
+
+    static index_type max_val ()
+    {
+        return sizeof(char_type) > 2 ? 0x10ffff :
+            static_cast<index_type>(~0);
+    }
+};
+
+template<>
+struct basic_char_traits<char>
+{
+    typedef char char_type;
+    typedef unsigned char index_type;
+
+    static index_type index (const char ch)
+    {
+        return static_cast<index_type>(ch);
+    }
+
+    static index_type max_val ()
+    {
+        return static_cast<index_type>(~0);
+    }
+};
+}
+
+#endif
diff --git a/inc/lexertl/compile_assert.hpp b/inc/lexertl/compile_assert.hpp
new file mode 100644
index 0000000..a36a668
--- /dev/null
+++ b/inc/lexertl/compile_assert.hpp
@@ -0,0 +1,24 @@
+// compile_assert.hpp
+// Copyright (c) 2010-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef LEXERTL_COMPILE_ASSERT_H
+#define LEXERTL_COMPILE_ASSERT_H
+
+namespace lexertl
+{
+// Named template param for compiler compatibility
+template<bool b>
+struct compile_assert;
+
+// enum for compiler compatibility
+template<>
+struct compile_assert<true>
+{
+    enum {value = 1};
+};
+}
+
+#endif
diff --git a/inc/lexertl/containers/bitvector.hpp b/inc/lexertl/containers/bitvector.hpp
new file mode 100644
index 0000000..00c2fd4
--- /dev/null
+++ b/inc/lexertl/containers/bitvector.hpp
@@ -0,0 +1,228 @@
+// bitvector.hpp
+// Copyright (c) 2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef LEXERTL_BITVECTOR_HPP
+#define LEXERTL_BITVECTOR_HPP
+
+#include <vector>
+
+namespace lexertl
+{
+template<typename T>
+class basic_bitvector
+{
+public:
+    template<typename Ty>
+    class reference
+    {
+    public:
+        reference (Ty &block_, const std::size_t mask_) :
+            _block (block_),
+            _mask (mask_)
+        {
+        }
+
+        operator bool () const
+        {
+            return (_block & _mask) != 0;
+        }
+
+        reference<Ty> &operator = (const bool bit_)
+        {
+            if (bit_)
+            {
+                _block |= _mask;
+            }
+            else
+            {
+                _block &= ~_mask;
+            }
+
+            return *this;
+        }
+
+        reference<Ty> &operator = (reference<Ty> &rhs_)
+        {
+            if (rhs_)
+            {
+                _block |= _mask;
+            }
+            else
+            {
+                _block &= ~_mask;
+            }
+        }
+
+    private:
+        Ty &_block;
+        const std::size_t _mask;
+    };
+
+    basic_bitvector (const std::size_t size_) :
+        _vec (block (size_) + (bit (size_) ? 1 : 0), 0)
+    {
+    }
+
+    basic_bitvector (const basic_bitvector &rhs_) :
+        _vec (rhs_._vec)
+    {
+    }
+
+    basic_bitvector &operator = (const basic_bitvector &rhs_)
+    {
+        if (&rhs_ != this)
+        {
+            _vec = rhs_._vec;
+        }
+
+        return *this;
+    }
+
+    bool operator [] (const std::size_t index_) const
+    {
+        return (_vec[block (index_)] & (1 << bit (index_))) != 0;
+    }
+
+    reference<T> operator [] (const std::size_t index_)
+    {
+        return reference<T> (_vec[block (index_)], (1 << bit (index_)));
+    }
+
+    basic_bitvector<T> &operator |= (const basic_bitvector<T> &rhs_)
+    {
+        typename t_vector::iterator lhs_iter_ = _vec.begin ();
+        typename t_vector::iterator lhs_end_ = _vec.end ();
+        typename t_vector::const_iterator rhs_iter_ = rhs_._vec.begin ();
+        typename t_vector::const_iterator rhs_end_ = rhs_._vec.end ();
+
+        for (; lhs_iter_ != lhs_end_ && rhs_iter_ != rhs_end_;
+            ++lhs_iter_, ++rhs_iter_)
+        {
+            *lhs_iter_ |= *rhs_iter_;
+        }
+
+        return *this;
+    }
+
+    basic_bitvector<T> &operator &= (const basic_bitvector<T> &rhs_)
+    {
+        typename t_vector::iterator lhs_iter_ = _vec.begin ();
+        typename t_vector::iterator lhs_end_ = _vec.end ();
+        typename t_vector::const_iterator rhs_iter_ = rhs_._vec.begin ();
+        typename t_vector::const_iterator rhs_end_ = rhs_._vec.end ();
+
+        for (; lhs_iter_ != lhs_end_ && rhs_iter_ != rhs_end_;
+            ++lhs_iter_, ++rhs_iter_)
+        {
+            *lhs_iter_ &= *rhs_iter_;
+        }
+
+        return *this;
+    }
+
+    void clear ()
+    {
+        typename t_vector::iterator iter_ = _vec.begin ();
+        typename t_vector::iterator end_ = _vec.end ();
+
+        for (; iter_ != end_; ++iter_)
+        {
+            *iter_ = 0;
+        }
+    }
+
+    bool any () const
+    {
+        typename t_vector::const_iterator iter_ = _vec.begin ();
+        typename t_vector::const_iterator end_ = _vec.end ();
+
+        for (; iter_ != end_; ++iter_)
+        {
+            if (*iter_) break;
+        }
+
+        return iter_ != end_;
+    }
+
+    void negate ()
+    {
+        typename t_vector::iterator iter_ = _vec.begin ();
+        typename t_vector::iterator end_ = _vec.end ();
+
+        for (; iter_ != end_; ++iter_)
+        {
+            *iter_ = ~*iter_;
+        }
+    }
+
+    std::size_t find_first () const
+    {
+        return find_next (npos ());
+    }
+
+    std::size_t find_next (const std::size_t index_) const
+    {
+        std::size_t ret_ = npos ();
+        const std::size_t block_ = index_ == npos () ? 0 : block (index_ + 1);
+        std::size_t bit_ = index_ == npos () ? 0 : bit (index_ + 1);
+        typename t_vector::const_iterator iter_ = _vec.begin () + block_;
+        typename t_vector::const_iterator end_ = _vec.end ();
+
+        for (std::size_t i_ = block_; iter_ != end_; ++iter_, ++i_)
+        {
+            const bool bits_ = (*iter_ & (static_cast<T>(~0) << bit_)) != 0;
+
+            if (bits_)
+            {
+                std::size_t j_ = bit_;
+                std::size_t b_ = 1 << bit_;
+                bool found_ = false;
+
+                for (; j_ < sizeof(T) * 8; ++j_, b_ <<= 1)
+                {
+                    if (*iter_ & b_)
+                    {
+                        found_ = true;
+                        break;
+                    }
+                }
+
+                if (found_)
+                {
+                    ret_ = i_ * sizeof(T) * 8 + j_;
+                    break;
+                }
+            }
+
+            bit_ = 0;
+        }
+
+        return ret_;
+    }
+
+    std::size_t npos () const
+    {
+        return static_cast<std::size_t>(~0);
+    }
+
+private:
+    typedef std::vector<T> t_vector;
+
+    t_vector _vec;
+
+    std::size_t block (const std::size_t index_) const
+    {
+        return index_ / (sizeof(T) * 8);
+    }
+
+    std::size_t bit (const std::size_t index_) const
+    {
+        return index_ % (sizeof(T) * 8);
+    }
+};
+}
+
+#endif
diff --git a/inc/lexertl/containers/ptr_list.hpp b/inc/lexertl/containers/ptr_list.hpp
new file mode 100644
index 0000000..53630e6
--- /dev/null
+++ b/inc/lexertl/containers/ptr_list.hpp
@@ -0,0 +1,69 @@
+// ptr_list.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_PTR_LIST_HPP
+#define LEXERTL_PTR_LIST_HPP
+
+#include <list>
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename ptr_type>
+class ptr_list
+{
+public:
+    typedef std::list<ptr_type *> list;
+
+    ptr_list () :
+        _list ()
+    {
+    }
+
+    ~ptr_list ()
+    {
+        clear ();
+    }
+
+    list *operator -> ()
+    {
+        return &_list;
+    }
+
+    const list *operator -> () const
+    {
+        return &_list;
+    }
+
+    list &operator * ()
+    {
+        return _list;
+    }
+
+    const list &operator * () const
+    {
+        return _list;
+    }
+
+    void clear ()
+    {
+        while (!_list.empty ())
+        {
+            delete _list.front ();
+            _list.pop_front ();
+        }
+    }
+
+private:
+    list _list;
+
+    ptr_list (const ptr_list &); // No copy construction.
+    ptr_list &operator = (const ptr_list &); // No assignment.
+};
+}
+}
+
+#endif
diff --git a/inc/lexertl/containers/ptr_map.hpp b/inc/lexertl/containers/ptr_map.hpp
new file mode 100644
index 0000000..28a7aa4
--- /dev/null
+++ b/inc/lexertl/containers/ptr_map.hpp
@@ -0,0 +1,72 @@
+// ptr_map.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_PTR_MAP_HPP
+#define LEXERTL_PTR_MAP_HPP
+
+#include <map>
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename key_type, typename ptr_type>
+class ptr_map
+{
+public:
+    typedef std::map<key_type, ptr_type *> map;
+    typedef std::pair<key_type, ptr_type *> pair;
+    typedef std::pair<typename map::iterator, bool> iter_pair;
+
+    ptr_map ()
+    {
+    }
+
+    ~ptr_map ()
+    {
+        clear ();
+    }
+
+    map *operator -> ()
+    {
+        return &_map;
+    }
+
+    const map *operator -> () const
+    {
+        return &_map;
+    }
+
+    map &operator * ()
+    {
+        return _map;
+    }
+
+    const map &operator * () const
+    {
+        return _map;
+    }
+
+    void clear ()
+    {
+        for (typename map::iterator iter_ = _map.begin (), end_ = _map.end ();
+            iter_ != end_; ++iter_)
+        {
+            delete iter_->second;
+        }
+
+        _map.clear ();
+    }
+
+private:
+    map _map;
+
+    ptr_map (const ptr_map &); // No copy construction.
+    ptr_map &operator = (const ptr_map &); // No assignment.
+};
+}
+}
+
+#endif
diff --git a/inc/lexertl/containers/ptr_stack.hpp b/inc/lexertl/containers/ptr_stack.hpp
new file mode 100644
index 0000000..291067f
--- /dev/null
+++ b/inc/lexertl/containers/ptr_stack.hpp
@@ -0,0 +1,69 @@
+// ptr_stack.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_PTR_STACK_HPP
+#define LEXERTL_PTR_STACK_HPP
+
+#include <stack>
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename ptr_type>
+class ptr_stack
+{
+public:
+    typedef std::stack<ptr_type *> stack;
+
+    ptr_stack () :
+        _stack ()
+    {
+    }
+
+    ~ptr_stack ()
+    {
+        clear ();
+    }
+
+    stack *operator -> ()
+    {
+        return &_stack;
+    }
+
+    const stack *operator -> () const
+    {
+        return &_stack;
+    }
+
+    stack &operator * ()
+    {
+        return _stack;
+    }
+
+    const stack &operator * () const
+    {
+        return _stack;
+    }
+
+    void clear ()
+    {
+        while (!_stack.empty ())
+        {
+            delete _stack.top ();
+            _stack.pop ();
+        }
+    }
+
+private:
+    stack _stack;
+
+    ptr_stack (const ptr_stack &); // No copy construction.
+    ptr_stack &operator = (const ptr_stack &); // No assignment.
+};
+}
+}
+
+#endif
diff --git a/inc/lexertl/containers/ptr_vector.hpp b/inc/lexertl/containers/ptr_vector.hpp
new file mode 100644
index 0000000..0108b83
--- /dev/null
+++ b/inc/lexertl/containers/ptr_vector.hpp
@@ -0,0 +1,106 @@
+// ptr_vector.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_PTR_VECTOR_HPP
+#define LEXERTL_PTR_VECTOR_HPP
+
+#include "../size_t.hpp"
+#include <vector>
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename ptr_type>
+class ptr_vector
+{
+public:
+    typedef std::vector<ptr_type *> vector;
+
+    ptr_vector () :
+        _vector ()
+    {
+    }
+
+    ~ptr_vector ()
+    {
+        clear ();
+    }
+
+    vector *operator -> ()
+    {
+        return &_vector;
+    }
+
+    const vector *operator -> () const
+    {
+        return &_vector;
+    }
+
+    vector &operator * ()
+    {
+        return _vector;
+    }
+
+    const vector &operator * () const
+    {
+        return _vector;
+    }
+
+    ptr_type * &operator [] (const std::size_t index_)
+    {
+        return _vector[index_];
+    }
+
+    ptr_type * const &operator [] (const std::size_t index_) const
+    {
+        return _vector[index_];
+    }
+
+    bool operator == (const ptr_vector &rhs_) const
+    {
+        bool equal_ = _vector.size () == rhs_._vector.size ();
+
+        if (equal_)
+        {
+            typename vector::const_iterator lhs_iter_ = _vector.begin ();
+            typename vector::const_iterator end_ = _vector.end ();
+            typename vector::const_iterator rhs_iter_ = rhs_._vector.begin ();
+
+            for (; equal_ && lhs_iter_ != end_; ++lhs_iter_, ++rhs_iter_)
+            {
+                equal_ = **lhs_iter_ == **rhs_iter_;
+            }
+        }
+
+        return  equal_;
+    }
+
+    void clear ()
+    {
+        if (!_vector.empty ())
+        {
+            ptr_type **iter_ = &_vector.front ();
+            ptr_type **end_ = iter_ + _vector.size ();
+
+            for (; iter_ != end_; ++iter_)
+            {
+                delete *iter_;
+            }
+        }
+
+        _vector.clear ();
+    }
+
+private:
+    vector _vector;
+
+    ptr_vector (const ptr_vector &); // No copy construction.
+    ptr_vector &operator = (const ptr_vector &); // No assignment.
+};
+}
+}
+
+#endif
diff --git a/inc/lexertl/debug.hpp b/inc/lexertl/debug.hpp
new file mode 100644
index 0000000..85a61a0
--- /dev/null
+++ b/inc/lexertl/debug.hpp
@@ -0,0 +1,353 @@
+// debug.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_DEBUG_HPP
+#define LEXERTL_DEBUG_HPP
+
+#include <map>
+#include <ostream>
+#include "rules.hpp"
+#include "size_t.hpp"
+#include "state_machine.hpp"
+#include "string_token.hpp"
+#include <vector>
+
+namespace lexertl
+{
+template<typename sm, typename char_type, typename id_type = std::size_t,
+    bool is_dfa = true>
+class basic_debug
+{
+public:
+    typedef lexertl::basic_char_state_machine<char_type, id_type, is_dfa>
+        char_state_machine;
+    typedef std::basic_ostream<char_type> ostream;
+    typedef lexertl::basic_rules<char_type, id_type> rules;
+    typedef std::basic_string<char_type> string;
+
+    static void dump (const sm &sm_, rules &rules_, ostream &stream_)
+    {
+        char_state_machine csm_;
+
+        sm_to_csm (sm_, csm_);
+        dump (csm_, rules_, stream_);
+    }
+
+    static void dump (const sm &sm_, ostream &stream_)
+    {
+        char_state_machine csm_;
+
+        sm_to_csm (sm_, csm_);
+        dump (csm_, stream_);
+    }
+
+    static void dump (const char_state_machine &csm_, rules &rules_,
+        ostream &stream_)
+    {
+        for (std::size_t dfa_ = 0, dfas_ = csm_.size (); dfa_ < dfas_; ++dfa_)
+        {
+            lexer_state (stream_);
+            stream_ << rules_.state (dfa_) << std::endl << std::endl;
+
+            dump_ex (csm_._sm_deque[dfa_], stream_);
+        }
+    }
+
+    static void dump (const char_state_machine &csm_, ostream &stream_)
+    {
+        for (std::size_t dfa_ = 0, dfas_ = csm_.size (); dfa_ < dfas_; ++dfa_)
+        {
+            lexer_state (stream_);
+            stream_ << dfa_ << std::endl << std::endl;
+
+            dump_ex (csm_._sm_deque[dfa_], stream_);
+        }
+    }
+
+protected:
+    typedef typename char_state_machine::state dfa_state;
+    typedef typename dfa_state::string_token string_token;
+    typedef std::basic_stringstream<char_type> stringstream;
+
+    static void sm_to_csm (const sm &sm_, char_state_machine &csm_)
+    {
+        const detail::basic_internals<id_type> &internals_ = sm_.data ();
+        const std::size_t dfas_ = internals_._dfa->size ();
+
+        for (id_type i_ = 0; i_ < dfas_; ++i_)
+        {
+            if (internals_._dfa_alphabet[i_] == 0) continue;
+
+            const std::size_t alphabet_ = internals_._dfa_alphabet[i_] -
+                transitions_index;
+            typename char_state_machine::string_token_vector token_vector_
+                (alphabet_, string_token ());
+            id_type *ptr_ = &internals_._lookup[i_]->front ();
+
+            for (std::size_t c_ = 0; c_ < 256; ++c_, ++ptr_)
+            {
+                if (*ptr_ >= transitions_index)
+                {
+                    string_token &token_ = token_vector_
+                        [*ptr_ - transitions_index];
+
+                    token_.insert (typename string_token::range
+                        (typename string_token::index_type (c_),
+                        typename string_token::index_type (c_)));
+                }
+            }
+
+            csm_.append (token_vector_, internals_, i_);
+        }
+    }
+
+    static void dump_ex (const typename char_state_machine::dfa &dfa_,
+        ostream &stream_)
+    {
+        const std::size_t states_ = dfa_._states.size ();
+        const id_type bol_index_ = dfa_._bol_index;
+        typename dfa_state::id_type_string_token_map::const_iterator iter_;
+        typename dfa_state::id_type_string_token_map::const_iterator end_;
+
+        for (std::size_t i_ = 0; i_ < states_; ++i_)
+        {
+            const dfa_state &state_ = dfa_._states[i_];
+
+            state (stream_);
+            stream_ << i_ << std::endl;
+
+            if (state_._end_state)
+            {
+                end_state (stream_);
+
+                if (state_._push_pop_dfa == dfa_state::push_dfa)
+                {
+                    push (stream_);
+                    stream_ << state_._push_dfa;
+                }
+                else if (state_._push_pop_dfa == dfa_state::pop_dfa)
+                {
+                    pop (stream_);
+                }
+
+                id (stream_);
+                stream_ << static_cast<std::size_t>(state_._id);
+                user_id (stream_);
+                stream_ << static_cast<std::size_t>(state_._user_id);
+                dfa (stream_);
+                stream_ << static_cast<std::size_t>(state_._next_dfa);
+                stream_ << std::endl;
+            }
+
+            if (i_ == 0 && bol_index_ != char_state_machine::npos ())
+            {
+                bol (stream_);
+                stream_ << static_cast<std::size_t>(bol_index_) << std::endl;
+            }
+
+            if (state_._eol_index != char_state_machine::npos ())
+            {
+                eol (stream_);
+                stream_ << static_cast<std::size_t>(state_._eol_index) <<
+                    std::endl;
+            }
+
+            iter_ = state_._transitions.begin ();
+            end_ = state_._transitions.end ();
+
+            for (; iter_ != end_; ++iter_)
+            {
+                string_token token_ = iter_->second;
+
+                open_bracket (stream_);
+
+                if (!iter_->second.any () && iter_->second.negatable ())
+                {
+                    token_.negate ();
+                    negated (stream_);
+                }
+
+                string chars_;
+                typename string_token::range_vector::const_iterator
+                    ranges_iter_ = token_._ranges.begin ();
+                typename string_token::range_vector::const_iterator
+                    ranges_end_ = token_._ranges.end ();
+
+                for (; ranges_iter_ != ranges_end_; ++ranges_iter_)
+                {
+                    if (ranges_iter_->first == '^' ||
+                        ranges_iter_->first == ']')
+                    {
+                        stream_ << '\\';
+                    }
+
+                    chars_ = string_token::escape_char
+                        (ranges_iter_->first);
+
+                    if (ranges_iter_->first != ranges_iter_->second)
+                    {
+                        if (ranges_iter_->first + 1 < ranges_iter_->second)
+                        {
+                            chars_ += '-';
+                        }
+
+                        if (ranges_iter_->second == '^' ||
+                            ranges_iter_->second == ']')
+                        {
+                            stream_ << '\\';
+                        }
+
+                        chars_ += string_token::escape_char
+                            (ranges_iter_->second);
+                    }
+
+                    stream_ << chars_;
+                }
+
+                close_bracket (stream_);
+                stream_ << static_cast<std::size_t>(iter_->first) <<
+                    std::endl;
+            }
+
+            stream_ << std::endl;
+        }
+    }
+
+    static void lexer_state (std::ostream &stream_)
+    {
+        stream_ << "Lexer state: ";
+    }
+
+    static void lexer_state (std::wostream &stream_)
+    {
+        stream_ << L"Lexer state: ";
+    }
+
+    static void state (std::ostream &stream_)
+    {
+        stream_ << "State: ";
+    }
+
+    static void state (std::wostream &stream_)
+    {
+        stream_ << L"State: ";
+    }
+
+    static void bol (std::ostream &stream_)
+    {
+        stream_ << "  BOL -> ";
+    }
+
+    static void bol (std::wostream &stream_)
+    {
+        stream_ << L"  BOL -> ";
+    }
+
+    static void eol (std::ostream &stream_)
+    {
+        stream_ << "  EOL -> ";
+    }
+
+    static void eol (std::wostream &stream_)
+    {
+        stream_ << L"  EOL -> ";
+    }
+
+    static void end_state (std::ostream &stream_)
+    {
+        stream_ << "  END STATE";
+    }
+
+    static void end_state (std::wostream &stream_)
+    {
+        stream_ << L"  END STATE";
+    }
+
+    static void id (std::ostream &stream_)
+    {
+        stream_ << ", Id = ";
+    }
+
+    static void id (std::wostream &stream_)
+    {
+        stream_ << L", Id = ";
+    }
+
+    static void push (std::ostream &stream_)
+    {
+        stream_ << ", PUSH ";
+    }
+
+    static void push (std::wostream &stream_)
+    {
+        stream_ << L", PUSH ";
+    }
+
+    static void pop (std::ostream &stream_)
+    {
+        stream_ << ", POP";
+    }
+
+    static void pop (std::wostream &stream_)
+    {
+        stream_ << L", POP";
+    }
+
+    static void user_id (std::ostream &stream_)
+    {
+        stream_ << ", User Id = ";
+    }
+
+    static void user_id (std::wostream &stream_)
+    {
+        stream_ << L", User Id = ";
+    }
+
+    static void open_bracket (std::ostream &stream_)
+    {
+        stream_ << "  [";
+    }
+
+    static void open_bracket (std::wostream &stream_)
+    {
+        stream_ << L"  [";
+    }
+
+    static void negated (std::ostream &stream_)
+    {
+        stream_ << "^";
+    }
+
+    static void negated (std::wostream &stream_)
+    {
+        stream_ << L"^";
+    }
+
+    static void close_bracket (std::ostream &stream_)
+    {
+        stream_ << "] -> ";
+    }
+
+    static void close_bracket (std::wostream &stream_)
+    {
+        stream_ << L"] -> ";
+    }
+
+    static void dfa (std::ostream &stream_)
+    {
+        stream_ << ", dfa = ";
+    }
+
+    static void dfa (std::wostream &stream_)
+    {
+        stream_ << L", dfa = ";
+    }
+};
+
+typedef basic_debug<basic_state_machine<char>, char> debug;
+typedef basic_debug<basic_state_machine<wchar_t>, wchar_t> wdebug;
+}
+
+#endif
diff --git a/inc/lexertl/enums.hpp b/inc/lexertl/enums.hpp
new file mode 100644
index 0000000..bec5ddc
--- /dev/null
+++ b/inc/lexertl/enums.hpp
@@ -0,0 +1,25 @@
+// enums.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef LEXERTL_ENUMS_H
+#define LEXERTL_ENUMS_H
+
+namespace lexertl
+{
+    enum regex_flags {icase = 1, dot_not_newline = 2, skip_ws = 4,
+        match_zero_len = 8};
+    // 0 = end state, 1 = id, 2 = user id, 3 = push_dfa_index
+    // 4 = next dfa, 5 = dead state, 6 = dfa_start
+    enum {end_state_index, id_index, user_id_index, push_dfa_index,
+        next_dfa_index, eol_index, dead_state_index, transitions_index};
+    // Rule flags:
+    enum feature_flags {bol_bit = 1, eol_bit = 2, skip_bit = 4, again_bit = 8,
+        multi_state_bit = 16, recursive_bit = 32, advance_bit = 64};
+    // End state flags:
+    enum {end_state_bit = 1, pop_dfa_bit = 2};
+}
+
+#endif
diff --git a/inc/lexertl/generate_cpp.hpp b/inc/lexertl/generate_cpp.hpp
new file mode 100644
index 0000000..1c0c330
--- /dev/null
+++ b/inc/lexertl/generate_cpp.hpp
@@ -0,0 +1,1122 @@
+// generate_cpp.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_GENERATE_CPP_HPP
+#define LEXERTL_GENERATE_CPP_HPP
+
+#include "bool.hpp"
+#include "enums.hpp"
+#include <sstream>
+#include "state_machine.hpp"
+
+namespace lexertl
+{
+class table_based_cpp
+{
+public:
+    template<typename char_type, typename id_type>
+    static void generate_cpp
+        (const std::string &name_,
+        const basic_state_machine<char_type, id_type> &sm_,
+        const bool pointers_, std::ostream &os_)
+    {
+        typedef basic_state_machine<char_type, id_type> sm;
+        typedef typename sm::internals internals;
+        const internals &internals_ = sm_.data ();
+        std::size_t additional_tabs_ = 0;
+
+        os_ << "template<typename iter_type, typename id_type>\n";
+        os_ << "void " << name_ << " (lexertl::";
+
+        if (internals_._features & recursive_bit)
+        {
+            os_ << "recursive_match_results";
+        }
+        else
+        {
+            os_ << "match_results";
+        }
+
+        os_ << "<iter_type, id_type> &results_)\n";
+        os_ << "{\n";
+        os_ << "    typedef lexertl::";
+
+        if (internals_._features & recursive_bit)
+        {
+            os_ << "recursive_match_results";
+        }
+        else
+        {
+            os_ << "match_results";
+        }
+
+        os_ << "<iter_type, id_type> results;\n";
+        os_ << "    typedef typename results::char_type char_type;\n";
+        os_ << "    typename results::iter_type end_token_ = results_.end;\n";
+
+        if (internals_._features & skip_bit)
+        {
+            os_ << "skip:\n";
+        }
+
+        os_ << "    typename results::iter_type curr_ = results_.end;\n\n";
+        os_ << "    results_.start = curr_;\n\n";
+
+        if (internals_._features & again_bit)
+        {
+            os_ << "again:\n";
+        }
+
+        os_ << "    if (curr_ == results_.eoi)\n";
+        os_ << "    {\n";
+        // We want a number regardless of id_type.
+        os_ << "        results_.id = " << static_cast<std::size_t>
+            (internals_._eoi) << ";\n";
+        os_ << "        results_.user_id = results::npos ();\n";
+        os_ << "        return;\n";
+        os_ << "    }\n\n";
+
+        if (internals_._features & bol_bit)
+        {
+            os_ << "    bool bol_ = results_.bol;\n";
+        }
+
+        dump_tables (sm_, 1, pointers_, os_);
+
+        if (internals_._dfa->size () > 1)
+        {
+            os_ << "    const id_type *lookup_ = lookups_[results_.state];\n";
+            os_ << "    const id_type dfa_alphabet_ = dfa_alphabets_"
+                "[results_.state];\n";
+            os_ << "    const ";
+
+            if (pointers_)
+            {
+                os_ << "void * const";
+            }
+            else
+            {
+                os_ << "id_type";
+            }
+
+            os_ << " *dfa_ = dfas_[results_.state];\n";
+        }
+
+        os_ << "    const ";
+
+        if (pointers_)
+        {
+            os_ << "void * const";
+        }
+        else
+        {
+            os_ << "id_type";
+        }
+
+        os_ << " *ptr_ = dfa_ + dfa_alphabet_;\n";
+        os_ << "    bool end_state_ = *ptr_ != 0;\n";
+
+        if (internals_._features & recursive_bit)
+        {
+            os_ << "    bool pop_ = (";
+
+            if (pointers_)
+            {
+                // Done this way for GCC:
+                os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
+            }
+
+            os_ << "*ptr_";
+
+            if (pointers_)
+            {
+                os_ << ')';
+            }
+
+            os_ <<" & " << pop_dfa_bit;
+
+            if (pointers_)
+            {
+                os_ << ')';
+            }
+
+            os_ << ") != 0;\n";
+        }
+
+        os_ << "    id_type id_ = ";
+
+        if (pointers_)
+        {
+            // Done this way for GCC:
+            os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
+        }
+
+        os_ << "*(ptr_ + " << id_index << ")";
+
+        if (pointers_)
+        {
+            os_ << "))";
+        }
+
+        os_ << ";\n";
+        os_ << "    id_type uid_ = ";
+
+        if (pointers_)
+        {
+            // Done this way for GCC:
+            os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
+        }
+
+        os_ << "*(ptr_ + " << user_id_index << ")";
+
+        if (pointers_)
+        {
+            os_ << "))";
+        }
+
+        os_ << ";\n";
+
+        if (internals_._features & recursive_bit)
+        {
+            os_ << "    id_type push_dfa_ = ";
+
+            if (pointers_)
+            {
+                // Done this way for GCC:
+                os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
+            }
+
+            os_ << "*(ptr_ + " << push_dfa_index << ")";
+
+            if (pointers_)
+            {
+                os_ << "))";
+            }
+
+            os_ << ";\n";
+        }
+
+        if (internals_._dfa->size () > 1)
+        {
+            os_ << "    id_type start_state_ = results_.state;\n";
+        }
+
+        if (internals_._features & bol_bit)
+        {
+            os_ << "    bool end_bol_ = bol_;\n";
+        }
+
+        if (internals_._features & eol_bit)
+        {
+            os_ << "    ";
+
+            if (pointers_)
+            {
+                os_ << "const void * const *";
+            }
+            else
+            {
+                os_ << "id_type ";
+            }
+
+            os_ << "EOL_state_ = 0;\n";
+        }
+
+        os_ << '\n';
+
+        if (internals_._features & bol_bit)
+        {
+            os_ << "    if (bol_)\n";
+            os_ << "    {\n";
+            os_ << "        const ";
+
+            if (pointers_)
+            {
+                os_ << "void *";
+            }
+            else
+            {
+                os_ << "id_type ";
+            }
+
+            os_ << "state_ = *dfa_;\n\n";
+            os_ << "        if (state_)\n";
+            os_ << "        {\n";
+            os_ << "            ptr_ = ";
+
+            if (pointers_)
+            {
+                os_ << "reinterpret_cast<void * const *>(state_);\n";
+            }
+            else
+            {
+                os_ << "&dfa_[state_ * dfa_alphabet_];\n";
+            }
+
+            os_ << "        }\n";
+            os_ << "    }\n\n";
+        }
+
+        os_ << "    while (curr_ != results_.eoi)\n";
+        os_ << "    {\n";
+
+        if (internals_._features & eol_bit)
+        {
+            os_ << "        EOL_state_ = ";
+
+            if (pointers_)
+            {
+                os_ << "reinterpret_cast<const void * const *>(";
+            }
+
+            os_ << "ptr_[" << eol_index << ']';
+
+            if (pointers_)
+            {
+                os_ << ')';
+            }
+
+            os_ << ";\n\n";
+            os_ << "        if (EOL_state_ && *curr_ == '\\n')\n";
+            os_ << "        {\n";
+            os_ << "            ptr_ = ";
+
+            if (pointers_)
+            {
+                os_ << "EOL_state_";
+            }
+            else
+            {
+                os_ << "&dfa_[EOL_state_ * dfa_alphabet_]";
+            }
+
+            os_ << ";\n";
+            os_ << "        }\n";
+            os_ << "        else\n";
+            os_ << "        {\n";
+            ++additional_tabs_;
+        }
+
+        output_char_loop (internals_._features, additional_tabs_, pointers_,
+            os_, bool_<(sizeof (typename sm::traits::input_char_type) > 1)> ());
+
+        if (internals_._features & eol_bit)
+        {
+            output_tabs (additional_tabs_, os_);
+            os_ << "    }\n";
+            --additional_tabs_;
+        }
+
+        os_ << '\n';
+        os_ << "        if (*ptr_)\n";
+        os_ << "        {\n";
+        os_ << "            end_state_ = true;\n";
+
+
+        if (internals_._features & recursive_bit)
+        {
+            os_ << "            pop_ = (";
+
+            if (pointers_)
+            {
+                // Done this way for GCC:
+                os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
+            }
+
+            os_ << "*ptr_";
+
+            if (pointers_)
+            {
+                os_ << ')';
+            }
+
+            os_ <<" & " << pop_dfa_bit;
+
+            if (pointers_)
+            {
+                os_ << ')';
+            }
+
+            os_ << ") != 0;\n";
+        }
+
+        os_ << "            id_ = ";
+
+        if (pointers_)
+        {
+            // Done this way for GCC:
+            os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
+        }
+
+        os_ << "*(ptr_ + " << id_index << ")";
+
+        if (pointers_)
+        {
+            os_ << "))";
+        }
+
+        os_ << ";\n";
+        os_ << "            uid_ = ";
+
+        if (pointers_)
+        {
+            // Done this way for GCC:
+            os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
+        }
+
+        os_ << "*(ptr_ + " << user_id_index << ")";
+
+        if (pointers_)
+        {
+            os_ << "))";
+        }
+
+        os_ << ";\n";
+
+        if (internals_._features & recursive_bit)
+        {
+            os_ << "            push_dfa_ = ";
+
+            if (pointers_)
+            {
+                // Done this way for GCC:
+                os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
+            }
+
+            os_ << "*(ptr_ + " << push_dfa_index << ')';
+
+            if (pointers_)
+            {
+                os_ << "))";
+            }
+
+            os_ << ";\n";
+        }
+
+        if (internals_._dfa->size () > 1)
+        {
+            os_ << "            start_state_ = ";
+
+            if (pointers_)
+            {
+                // Done this way for GCC:
+                os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
+            }
+
+            os_ << "*(ptr_ + " << next_dfa_index << ')';
+
+            if (pointers_)
+            {
+                os_ << "))";
+            }
+
+            os_ << ";\n";
+        }
+
+        if (internals_._features & bol_bit)
+        {
+            os_ << "            end_bol_ = bol_;\n";
+        }
+
+        os_ << "            end_token_ = curr_;\n";
+        os_ << "        }\n";
+        os_ << "    }\n\n";
+        output_quit (os_,
+            bool_<(sizeof (typename sm::traits::input_char_type) > 1)> ());
+
+        if (internals_._features & eol_bit)
+        {
+            os_ << "    if (curr_ == results_.eoi)\n";
+            os_ << "    {\n";
+            os_ << "        EOL_state_ = ";
+
+            if (pointers_)
+            {
+                os_ << "reinterpret_cast<const void * const *>(";
+            }
+
+            os_ << "ptr_[" <<  eol_index << ']';
+
+            if (pointers_)
+            {
+                os_ << ')';
+            }
+
+            os_ << ";\n";
+            os_ << "\n";
+            os_ << "        if (EOL_state_)\n";
+            os_ << "        {\n";
+            os_ << "            ptr_ = ";
+
+            if (pointers_)
+            {
+                os_ << "EOL_state_";
+            }
+            else
+            {
+                os_ << "&dfa_[EOL_state_ * dfa_alphabet_]";
+            }
+
+            os_ << ";\n\n";
+            os_ << "            if (*ptr_)\n";
+            os_ << "            {\n";
+            os_ << "                end_state_ = true;\n";
+
+
+            if (internals_._features & recursive_bit)
+            {
+                os_ << "                pop_ = (";
+
+                if (pointers_)
+                {
+                    // Done this way for GCC:
+                    os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
+                }
+
+                os_ << "*ptr_";
+
+                if (pointers_)
+                {
+                    os_ << ')';
+                }
+
+                os_ <<" & " << pop_dfa_bit;
+
+                if (pointers_)
+                {
+                    os_ << ')';
+                }
+
+                os_ << ") != 0;\n";
+            }
+
+            os_ << "                id_ = ";
+
+            if (pointers_)
+            {
+                // Done this way for GCC:
+                os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
+            }
+
+            os_ << "*(ptr_ + " << id_index << ")";
+
+            if (pointers_)
+            {
+                os_ << "))";
+            }
+
+            os_ << ";\n";
+            os_ << "                uid_ = ";
+
+            if (pointers_)
+            {
+                // Done this way for GCC:
+                os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
+            }
+
+            os_ << "*(ptr_ + " << user_id_index << ")";
+
+            if (pointers_)
+            {
+                os_ << "))";
+            }
+
+            os_ <<";\n";
+
+            if (internals_._features & recursive_bit)
+            {
+                os_ << "                push_dfa_ = ";
+
+                if (pointers_)
+                {
+                    // Done this way for GCC:
+                    os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
+                }
+
+                os_ << "*(ptr_ + " << push_dfa_index << ')';
+
+                if (pointers_)
+                {
+                    os_ << "))";
+                }
+
+                os_ << ";\n";
+            }
+
+            if (internals_._dfa->size () > 1)
+            {
+                os_ << "                start_state_ = ";
+
+                if (pointers_)
+                {
+                    // Done this way for GCC:
+                    os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
+                }
+
+                os_ << "*(ptr_ + " << next_dfa_index << ')';
+
+                if (pointers_)
+                {
+                    os_ << "))";
+                }
+
+                os_ << ";\n";
+            }
+
+            if (internals_._features & bol_bit)
+            {
+                os_ << "                end_bol_ = bol_;\n";
+            }
+
+            os_ << "                end_token_ = curr_;\n";
+            os_ << "            }\n";
+            os_ << "        }\n";
+            os_ << "    }\n\n";
+        }
+
+        os_ << "    if (end_state_)\n";
+        os_ << "    {\n";
+        os_ << "        // Return longest match\n";
+
+        if (internals_._features & recursive_bit)
+        {
+            os_ << "        if (pop_)\n";
+            os_ << "        {\n";
+            os_ << "            start_state_ =  results_."
+                "stack.top ().first;\n";
+            os_ << "            results_.stack.pop ();\n";
+            os_ << "        }\n";
+            os_ << "        else if (push_dfa_ != results_.npos ())\n";
+            os_ << "        {\n";
+            os_ << "            results_.stack.push (typename results::"
+                "id_type_pair\n";
+            os_ << "                (push_dfa_, id_));\n";
+            os_ << "        }\n\n";
+        }
+
+        if (internals_._dfa->size () > 1)
+        {
+            os_ << "        results_.state = start_state_;\n";
+        }
+
+        if (internals_._features & bol_bit)
+        {
+            os_ << "        results_.bol = end_bol_;\n";
+        }
+
+        os_ << "        results_.end = end_token_;\n";
+
+        if (internals_._features & skip_bit)
+        {
+            // We want a number regardless of id_type.
+            os_ << "\n        if (id_ == results_.skip ()) goto skip;\n";
+        }
+
+        if (internals_._features & again_bit)
+        {
+            // We want a number regardless of id_type.
+            os_ << "\n        if (id_ == "
+                << static_cast<std::size_t>(internals_._eoi);
+
+            if (internals_._features & recursive_bit)
+            {
+                os_ << " || (pop_ && !results_.stack.empty () &&\n";
+                // We want a number regardless of id_type.
+                os_ << "            results_.stack.top ().second == "
+                    << static_cast<std::size_t>(internals_._eoi) << ')';
+            }
+
+            os_ << ")\n";
+            os_ << "        {\n";
+            os_ << "            curr_ = end_token_;\n";
+            os_ << "            goto again;\n";
+            os_ << "        }\n";
+        }
+
+        os_ << "    }\n";
+        os_ << "    else\n";
+        os_ << "    {\n";
+        os_ << "        // No match causes char to be skipped\n";
+        os_ << "        results_.end = end_token_;\n";
+
+        if (internals_._features & bol_bit)
+        {
+            os_ << "        results_.bol = *results_.end == '\\n';\n";
+        }
+
+        os_ << "        results_.start = results_.end;\n";
+        os_ << "        ++results_.end;\n";
+        os_ << "        id_ = results::npos ();\n";
+        os_ << "        uid_ = results::npos ();\n";
+        os_ << "    }\n\n";
+        os_ << "    results_.id = id_;\n";
+        os_ << "    results_.user_id = uid_;\n";
+        os_ << "}\n";
+    }
+
+    template<typename char_type, typename id_type>
+    static void dump_tables
+        (const basic_state_machine<char_type, id_type> &sm_,
+        const std::size_t tabs_, const bool pointers_, std::ostream &os_)
+    {
+        const typename detail::basic_internals<id_type> &internals_ =
+            sm_.data ();
+        const std::size_t lookup_divisor_ = 8;
+        // Lookup is always 256 entries long now
+        const std::size_t lookup_quotient_ = 256 / lookup_divisor_;
+        const std::size_t dfas_ = internals_._lookup->size ();
+        std::size_t col_ = 1;
+        std::size_t row_ = 1;
+
+        output_tabs (tabs_, os_);
+        os_ << "static const id_type lookup";
+
+        if (dfas_ > 1)
+        {
+            os_ << "s_[][" << 256;
+        }
+        else
+        {
+            os_ << "_[";
+        }
+
+        os_ << "] = \n";
+        output_tabs (tabs_ + 1, os_);
+
+        if (dfas_ > 1)
+        {
+            os_ << '{';
+        }
+
+        for (std::size_t l_ = 0; l_ < dfas_; ++l_)
+        {
+            const id_type *ptr_ = &internals_._lookup[l_]->front ();
+
+            // We want numbers regardless of id_type.
+            os_ << "{0x" << std::hex << static_cast<std::size_t>(*ptr_++);
+
+            for (col_ = 1; col_ < lookup_divisor_; ++col_)
+            {
+                // We want numbers regardless of id_type.
+                os_ << ", 0x" << std::hex << static_cast<std::size_t>(*ptr_++);
+            }
+
+            for (row_ = 1; row_ < lookup_quotient_; ++row_)
+            {
+                os_ << ",\n";
+                output_tabs (tabs_ + 1, os_);
+                // We want numbers regardless of id_type.
+                os_ << "0x" << std::hex << static_cast<std::size_t>(*ptr_++);
+
+                for (col_ = 1; col_ < lookup_divisor_; ++col_)
+                {
+                    // We want numbers regardless of id_type.
+                    os_ << ", 0x" << std::hex <<
+                        static_cast<std::size_t>(*ptr_++);
+                }
+            }
+
+            os_ << '}';
+
+            if (l_ + 1 < dfas_)
+            {
+                os_ << ",\n";
+                output_tabs (tabs_ + 1, os_);
+            }
+        }
+
+        if (dfas_ > 1)
+        {
+            os_ << '}';
+        }
+
+        os_ << ";\n";
+        output_tabs (tabs_, os_);
+        os_ << "static const id_type dfa_alphabet";
+
+        if (dfas_ > 1)
+        {
+            os_ << "s_[" << dfas_ << "] = {";
+        }
+        else
+        {
+            os_ << "_ = ";
+        }
+
+        // We want numbers regardless of id_type.
+        os_ << "0x" << std::hex << static_cast<std::size_t>
+            (internals_._dfa_alphabet[0]);
+
+        for (col_ = 1; col_ < dfas_; ++col_)
+        {
+            // We want numbers regardless of id_type.
+            os_ << ", 0x" << std::hex << static_cast<std::size_t>(internals_.
+                _dfa_alphabet[col_]);
+        }
+
+        if (dfas_ > 1)
+        {
+            os_ << '}';
+        }
+
+        os_ << ";\n";
+
+        // DFAs are usually different sizes, so dump separately
+        for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_)
+        {
+            const id_type dfa_alphabet_ = internals_._dfa_alphabet[dfa_];
+            const std::size_t rows_ = internals_._dfa[dfa_]->size () /
+                dfa_alphabet_;
+            const id_type *ptr_ = &internals_._dfa[dfa_]->front ();
+            std::string dfa_name_ = "dfa";
+
+            output_tabs (tabs_, os_);
+            os_ << "static const ";
+
+            if (pointers_)
+            {
+                os_ << "void *";
+            }
+            else
+            {
+                os_ << "id_type ";
+            }
+
+            os_ << dfa_name_;
+
+            if (dfas_ > 1)
+            {
+                std::ostringstream ss_;
+
+                ss_ << dfa_;
+                dfa_name_ += ss_.str ();
+                os_ << dfa_;
+            }
+
+            dfa_name_ += '_';
+            os_ << "_[] = {";
+
+            for (std::size_t row_ = 0; row_ < rows_; ++row_)
+            {
+                dump_row (row_ == 0, ptr_, dfa_name_, dfa_alphabet_,
+                    pointers_, os_);
+
+                if (row_ + 1 < rows_)
+                {
+                    os_ << ",\n";
+                    output_tabs (tabs_ + 1, os_);
+                }
+            }
+
+            os_ << "};\n";
+        }
+
+        if (dfas_ > 1)
+        {
+            output_tabs (tabs_, os_);
+            os_ << "static const ";
+
+            if (pointers_)
+            {
+                os_ << "void * const";
+            }
+            else
+            {
+                os_ << "id_type";
+            }
+
+            os_ << " *dfas_[] = {dfa0_";
+
+            for (col_ = 1; col_ < dfas_; ++col_)
+            {
+                os_ << ", dfa" << col_ << '_';
+            }
+
+            os_ << "};\n";
+        }
+    }
+
+protected:
+    template<typename id_type>
+    static void dump_row (const bool first_, const id_type * &ptr_,
+        const std::string &dfa_name_, const id_type dfa_alphabet_,
+        const bool pointers_, std::ostream &os_)
+    {
+        if (pointers_)
+        {
+            bool zero_ = *ptr_ == 0;
+
+            if (first_)
+            {
+                // We want numbers regardless of id_type.
+                os_ << dfa_name_ << " + 0x" << std::hex <<
+                    static_cast<std::size_t>(*ptr_++) * dfa_alphabet_;
+            }
+            else if (!zero_)
+            {
+                os_ << "reinterpret_cast<const void *>(0x"
+                    // We want numbers regardless of id_type.
+                    << std::hex << static_cast<std::size_t>(*ptr_++) << ')';
+            }
+            else
+            {
+                // We want numbers regardless of id_type.
+                os_ << "0x" << std::hex << static_cast<std::size_t>(*ptr_++);
+            }
+
+            for (id_type id_index_ = id_index; id_index_ < transitions_index;
+                ++id_index_, ++ptr_)
+            {
+                os_ << ", ";
+                zero_ = *ptr_ == 0;
+
+                if (!zero_)
+                {
+                    os_ << "reinterpret_cast<const void *>(";
+                }
+
+                // We want numbers regardless of id_type.
+                os_ << "0x" << std::hex << static_cast<std::size_t>(*ptr_);
+
+                if (!zero_)
+                {
+                    os_  << ')';
+                }
+            }
+
+            for (id_type alphabet_ = transitions_index;
+                alphabet_ < dfa_alphabet_; ++alphabet_, ++ptr_)
+            {
+                // We want numbers regardless of id_type.
+                os_ << ", ";
+
+                if (*ptr_ == 0)
+                {
+                    os_ << 0;
+                }
+                else
+                {
+                    // We want numbers regardless of id_type.
+                    os_ << dfa_name_ + " + 0x" << std::hex <<
+                        static_cast<std::size_t>(*ptr_) * dfa_alphabet_;
+                }
+            }
+        }
+        else
+        {
+            // We want numbers regardless of id_type.
+            os_ << "0x" << std::hex << static_cast<std::size_t>(*ptr_++);
+
+            for (id_type alphabet_ = 1; alphabet_ < dfa_alphabet_;
+                ++alphabet_, ++ptr_)
+            {
+                // We want numbers regardless of id_type.
+                os_ << ", 0x" << std::hex << static_cast<std::size_t>(*ptr_);
+            }
+        }
+    }
+
+    static void output_tabs (const std::size_t tabs_, std::ostream &os_)
+    {
+        for (std::size_t i_ = 0; i_ < tabs_; ++i_)
+        {
+            os_ << "    ";
+        }
+    }
+
+    template<typename id_type>
+    static void output_char_loop (const id_type features_,
+        const std::size_t additional_tabs_, const bool pointers_,
+        std::ostream &os_, const false_ &)
+    {
+        output_tabs (additional_tabs_, os_);
+        os_ << "        const typename results::char_type prev_char_ = "
+            "*curr_++;\n";
+        output_tabs (additional_tabs_, os_);
+        os_ << "        const ";
+
+        if (pointers_)
+        {
+            os_ << "void * const *";
+        }
+        else
+        {
+            os_ << "id_type ";
+        }
+
+        os_ << "state_ = ";
+
+        if (pointers_)
+        {
+            os_ << "reinterpret_cast<void * const *>\n            ";
+            output_tabs (additional_tabs_, os_);
+            os_ << '(';
+        }
+
+        os_ << "ptr_[lookup_";
+
+        if (!pointers_)
+        {
+            os_ << "\n            ";
+            output_tabs (additional_tabs_, os_);
+        }
+
+        os_ << "[static_cast<typename results::index_type>";
+
+        if (pointers_)
+        {
+            os_ << "\n            ";
+            output_tabs (additional_tabs_, os_);
+        }
+
+        os_ << "(prev_char_)]]";
+
+        if (pointers_)
+        {
+            os_ << ')';
+        }
+
+        os_ << ";\n\n";
+
+        if (features_ & bol_bit)
+        {
+            output_tabs (additional_tabs_, os_);
+            os_ << "        bol_ = prev_char_ == '\\n';\n\n";
+        }
+
+        output_tabs (additional_tabs_, os_);
+        os_ << "        if (state_ == 0)\n";
+        output_tabs (additional_tabs_, os_);
+        os_ << "        {\n";
+
+        if (features_ & eol_bit)
+        {
+            output_tabs (additional_tabs_, os_);
+            os_ << "            EOL_state_ = 0;\n";
+        }
+
+        output_tabs (additional_tabs_, os_);
+        os_ << "            break;\n";
+        output_tabs (additional_tabs_, os_);
+        os_ << "        }\n\n";
+        output_tabs (additional_tabs_, os_);
+        os_ << "        ptr_ = ";
+
+        if (pointers_)
+        {
+            os_ << "state_";
+        }
+        else
+        {
+            os_ << "&dfa_[state_ * dfa_alphabet_]";
+        }
+
+        os_ << ";\n";
+    }
+
+    template<typename id_type>
+    static void output_char_loop (const id_type features_,
+        const std::size_t additional_tabs_, const bool pointers_,
+        std::ostream &os_, const true_ &)
+    {
+        output_tabs (additional_tabs_, os_);
+        os_ << "        const std::size_t bytes_ =\n";
+        output_tabs (additional_tabs_, os_);
+        os_ << "            sizeof (typename results::char_type) < 3 ?\n";
+        output_tabs (additional_tabs_, os_);
+        os_ << "            sizeof (typename results::char_type) : 3;\n";
+        output_tabs (additional_tabs_, os_);
+        os_ << "        const std::size_t shift_[] = {0, 8, 16};\n";
+        output_tabs (additional_tabs_, os_);
+        os_ << "        typename results::char_type prev_char_ = "
+            "*curr_++;\n\n";
+
+        if (features_ & bol_bit)
+        {
+            output_tabs (additional_tabs_, os_);
+            os_ << "        bol_ = prev_char_ == '\\n';\n\n";
+        }
+
+        output_tabs (additional_tabs_, os_);
+        os_ << "        for (std::size_t i_ = 0; i_ < bytes_; ++i_)\n";
+        output_tabs (additional_tabs_, os_);
+        os_ << "        {\n";
+        output_tabs (additional_tabs_, os_);
+        os_ << "            const ";
+
+        if (pointers_)
+        {
+            os_ << "void * const *";
+        }
+        else
+        {
+            os_ << "id_type ";
+        }
+
+        os_ << "state_ = ";
+
+        if (pointers_)
+        {
+            os_ << "reinterpret_cast<void * const *>\n                ";
+            output_tabs (additional_tabs_, os_);
+            os_ << '(';
+        }
+
+        os_ << "ptr_[lookup_[static_cast\n";
+        output_tabs (additional_tabs_, os_);
+        os_ << "                <unsigned char>((prev_char_ >>\n"
+            "                shift_[bytes_ - 1 - i_]) & 0xff)]]";
+
+        if (pointers_)
+        {
+            os_ << ')';
+        }
+
+        os_ << ";\n\n";
+        output_tabs (additional_tabs_, os_);
+        os_ << "            if (state_ == 0)\n";
+        output_tabs (additional_tabs_, os_);
+        os_ << "            {\n";
+
+        if (features_ & eol_bit)
+        {
+            output_tabs (additional_tabs_, os_);
+            os_ << "                EOL_state_ = 0;\n";
+        }
+
+        output_tabs (additional_tabs_, os_);
+        os_ << "                goto quit;\n";
+        output_tabs (additional_tabs_, os_);
+        os_ << "            }\n\n";
+        output_tabs (additional_tabs_, os_);
+        os_ << "            ptr_ = ";
+
+        if (pointers_)
+        {
+            os_ << "state_";
+        }
+        else
+        {
+            os_ << "&dfa_[state_ * dfa_alphabet_]";
+        }
+
+        os_ << ";\n";
+        output_tabs (additional_tabs_, os_);
+        os_ << "        }\n";
+    }
+
+    static void output_quit (std::ostream &, const false_ &)
+    {
+        // Nothing to do
+    }
+
+    static void output_quit (std::ostream &os_, const true_ &)
+    {
+        os_ << "quit:\n";
+    }
+};
+}
+
+#endif
diff --git a/inc/lexertl/generator.hpp b/inc/lexertl/generator.hpp
new file mode 100644
index 0000000..f930d23
--- /dev/null
+++ b/inc/lexertl/generator.hpp
@@ -0,0 +1,829 @@
+// generator.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_GENERATOR_HPP
+#define LEXERTL_GENERATOR_HPP
+
+#include <algorithm>
+#include "bool.hpp"
+#include "partition/charset.hpp"
+#include "char_traits.hpp"
+#include "partition/equivset.hpp"
+#include <memory>
+#include "parser/parser.hpp"
+#include "containers/ptr_list.hpp"
+#include "rules.hpp"
+#include "size_t.hpp"
+#include "state_machine.hpp"
+
+namespace lexertl
+{
+template<typename rules, typename sm, typename char_traits = basic_char_traits
+    <typename sm::traits::input_char_type> >
+class basic_generator
+{
+public:
+    typedef typename rules::id_type id_type;
+    typedef typename rules::char_type rules_char_type;
+    typedef typename sm::traits sm_traits;
+    typedef detail::basic_parser<rules_char_type, sm_traits> parser;
+    typedef typename parser::charset_map charset_map;
+    typedef typename parser::node node;
+    typedef typename parser::node_ptr_vector node_ptr_vector;
+
+    static void build (const rules &rules_, sm &sm_)
+    {
+        const std::size_t size_ = rules_.statemap ().size ();
+        // Strong exception guarantee
+        // http://www.boost.org/community/exception_safety.html
+        internals internals_;
+        sm temp_sm_;
+        node_ptr_vector node_ptr_vector_;
+
+        internals_._eoi = rules_.eoi ();
+        internals_.add_states (size_);
+
+        for (id_type index_ = 0; index_ < size_; ++index_)
+        {
+            if (rules_.regexes ()[index_].empty ())
+            {
+                std::ostringstream ss_;
+
+                ss_ << "Lexer states with no rules are not allowed "
+                    "(lexer state " << index_ << ".)";
+                throw runtime_error (ss_.str ());
+            }
+            else
+            {
+                // Note that the following variables are per DFA.
+                // Map of regex charset tokens (strings) to index
+                charset_map charset_map_;
+                // Used to fix up $ and \n clashes.
+                id_type nl_id_ = sm_traits::npos ();
+                // Regex syntax tree
+                node *root_ = build_tree (rules_, index_, node_ptr_vector_,
+                    charset_map_, nl_id_);
+
+                build_dfa (charset_map_, root_, internals_, temp_sm_, index_,
+                    nl_id_);
+
+                if (internals_._dfa[index_]->size () /
+                    internals_._dfa_alphabet[index_] >= sm_traits::npos ())
+                {
+                    // Overflow
+                    throw runtime_error ("The data type you have chosen "
+                        "cannot hold this many DFA rows.");
+                }
+            }
+        }
+
+        // If you get a compile error here the id_type from rules and
+        // state machine do no match.
+        create (internals_, temp_sm_, rules_.features (), lookup ());
+        sm_.swap (temp_sm_);
+    }
+
+    static node *build_tree (const rules &rules_, const std::size_t dfa_,
+        node_ptr_vector &node_ptr_vector_, charset_map &charset_map_,
+        id_type &nl_id_)
+    {
+        typename parser::macro_map macro_map_;
+        parser parser_ (rules_.locale (), node_ptr_vector_, macro_map_,
+            charset_map_, rules_.eoi ());
+        const typename rules::string_deque_deque &regexes_ =
+            rules_.regexes ();
+        typename rules::string_deque::const_iterator regex_iter_ =
+            regexes_[dfa_].begin ();
+        typename rules::string_deque::const_iterator regex_iter_end_ =
+            regexes_[dfa_].end ();
+        const typename rules::string &regex_ = *regex_iter_;
+        const typename rules::id_vector_deque &ids_ = rules_.ids ();
+        const typename rules::id_vector_deque &user_ids_ =
+            rules_.user_ids ();
+        typename rules::id_vector::const_iterator id_iter_ =
+            ids_[dfa_].begin ();
+        typename rules::id_vector::const_iterator user_id_iter_ =
+            user_ids_[dfa_].begin ();
+        const typename rules::id_vector_deque &next_dfas_ =
+            rules_.next_dfas ();
+        const typename rules::id_vector_deque &pushes_ = rules_.pushes ();
+        const typename rules::bool_vector_deque &pops_ = rules_.pops ();
+        typename rules::id_vector::const_iterator next_dfa_iter_ =
+            next_dfas_[dfa_].begin ();
+        typename rules::id_vector::const_iterator push_dfa_iter_ =
+            pushes_[dfa_].begin ();
+        typename rules::bool_vector::const_iterator pop_dfa_iter_ =
+            pops_[dfa_].begin ();
+        const bool seen_bol_ = (rules_.features ()[dfa_] & bol_bit) != 0;
+        node *root_ = 0;
+
+        // Macros have a different context per lexer state
+        // as equivsets (generally) differ.
+        build_macros (rules_, macro_map_, node_ptr_vector_, charset_map_,
+            nl_id_);
+        root_ = parser_.parse (regex_.c_str (),
+            regex_.c_str () + regex_.size (), *id_iter_, *user_id_iter_,
+            *next_dfa_iter_, *push_dfa_iter_, *pop_dfa_iter_,
+            rules_.flags (), nl_id_, seen_bol_, false);
+        ++regex_iter_;
+        ++id_iter_;
+        ++user_id_iter_;
+        ++next_dfa_iter_;
+        ++push_dfa_iter_;
+        ++pop_dfa_iter_;
+
+        // Build syntax trees
+        while (regex_iter_ != regex_iter_end_)
+        {
+            // Re-declare var, otherwise we perform an assignment..!
+            const typename rules::string &regex_ = *regex_iter_;
+            node *rhs_ = parser_.parse (regex_.c_str (),
+                regex_.c_str () + regex_.size (), *id_iter_, *user_id_iter_,
+                *next_dfa_iter_, *push_dfa_iter_, *pop_dfa_iter_,
+                rules_.flags (), nl_id_,
+                (rules_.features ()[dfa_] & bol_bit) != 0, false);
+
+            node_ptr_vector_->push_back
+                (static_cast<selection_node *>(0));
+            node_ptr_vector_->back () = new selection_node (root_, rhs_);
+            root_ = node_ptr_vector_->back ();
+
+            ++regex_iter_;
+            ++id_iter_;
+            ++user_id_iter_;
+            ++next_dfa_iter_;
+            ++push_dfa_iter_;
+            ++pop_dfa_iter_;
+        }
+
+        return root_;
+    }
+
+protected:
+    typedef bool_<sm_traits::compressed> compressed;
+    typedef detail::basic_equivset<id_type> equivset;
+    typedef detail::ptr_list<equivset> equivset_list;
+    typedef std::auto_ptr<equivset> equivset_ptr;
+    typedef typename sm_traits::char_type sm_char_type;
+    typedef detail::basic_charset<sm_char_type, id_type> charset;
+    typedef std::auto_ptr<charset> charset_ptr;
+    typedef detail::ptr_list<charset> charset_list;
+    typedef detail::basic_internals<id_type> internals;
+    typedef typename std::set<id_type> id_type_set;
+    typedef typename internals::id_type_vector id_type_vector;
+    typedef typename charset::index_set index_set;
+    typedef std::vector<index_set> index_set_vector;
+    typedef bool_<sm_traits::is_dfa> is_dfa;
+    typedef bool_<sm_traits::lookup> lookup;
+    typedef typename parser::macro_map macro_map;
+    typedef typename macro_map::iterator macro_iter;
+    typedef std::pair<macro_iter, bool> macro_iter_pair;
+    typedef std::set<const node *> node_set;
+    typedef detail::ptr_vector<node_set> node_set_vector;
+    typedef typename node::node_vector node_vector;
+    typedef detail::ptr_vector<node_vector> node_vector_vector;
+    typedef std::pair<typename rules::string, const node *> macro_pair;
+    typedef typename parser::selection_node selection_node;
+    typedef typename std::vector<std::size_t> size_t_vector;
+    typedef typename parser::string_token string_token;
+
+    static void build_macros (const rules &rules_,
+        macro_map &macro_map_, node_ptr_vector &node_ptr_vector_,
+        charset_map &charset_map_, id_type &nl_id_)
+    {
+        const typename rules::string_pair_deque &macrodeque_ =
+            rules_.macrodeque ();
+
+        for (typename rules::string_pair_deque::const_iterator iter_ =
+            macrodeque_.begin (), end_ = macrodeque_.end ();
+            iter_ != end_; ++iter_)
+        {
+            const typename rules::string &name_ = iter_->first;
+            const typename rules::string &regex_ = iter_->second;
+            parser parser_ (rules_.locale (), node_ptr_vector_, macro_map_,
+                charset_map_, rules_.eoi ());
+            node *node_ = parser_.parse (regex_.c_str (),
+                regex_.c_str () + regex_.size (), 0, 0, 0, false, false,
+                rules_.flags (), nl_id_, false, true);
+            macro_iter_pair map_iter_ = macro_map_.insert (macro_pair (name_,
+                static_cast<const node *>(0)));
+
+            map_iter_.first->second = node_;
+        }
+    }
+
+    static void build_dfa (const charset_map &charset_map_, const node *root_,
+        internals &internals_, sm &sm_, const id_type dfa_index_,
+        id_type &nl_id_)
+    {
+        // partitioned charset list
+        charset_list charset_list_;
+        // vector mapping token indexes to partitioned token index sets
+        index_set_vector set_mapping_;
+        typename internals::id_type_vector &dfa_ =
+            *internals_._dfa[dfa_index_];
+        std::size_t dfa_alphabet_ = 0;
+        const node_vector *followpos_ = &root_->firstpos ();
+        node_set_vector seen_sets_;
+        node_vector_vector seen_vectors_;
+        size_t_vector hash_vector_;
+        id_type zero_id_ = sm_traits::npos ();
+        id_type_set eol_set_;
+
+        set_mapping_.resize (charset_map_.size ());
+        partition_charsets (charset_map_, charset_list_, is_dfa ());
+        build_set_mapping (charset_list_, internals_, dfa_index_,
+            set_mapping_);
+
+        if (nl_id_ != sm_traits::npos ())
+        {
+            nl_id_ = *set_mapping_[nl_id_].begin ();
+            zero_id_ = sm_traits::compressed ?
+                *set_mapping_[charset_map_.find (string_token (0, 0))->
+                second].begin () : sm_traits::npos ();
+        }
+
+        dfa_alphabet_ = charset_list_->size () + transitions_index +
+            (nl_id_ == sm_traits::npos () ? 0 : 1);
+
+        if (dfa_alphabet_ > sm_traits::npos ())
+        {
+            // Overflow
+            throw runtime_error ("The data type you have chosen cannot hold "
+                "the dfa alphabet.");
+        }
+
+        internals_._dfa_alphabet[dfa_index_] = dfa_alphabet_;
+        // 'jam' state
+        dfa_.resize (dfa_alphabet_, 0);
+        closure (followpos_, seen_sets_, seen_vectors_, hash_vector_,
+            dfa_alphabet_, dfa_);
+
+        for (id_type index_ = 0; index_ < static_cast<id_type>
+            (seen_vectors_->size ()); ++index_)
+        {
+            equivset_list equiv_list_;
+
+            build_equiv_list (seen_vectors_[index_], set_mapping_,
+                equiv_list_, is_dfa ());
+
+            for (typename equivset_list::list::const_iterator iter_ =
+                equiv_list_->begin (), end_ = equiv_list_->end ();
+                iter_ != end_; ++iter_)
+            {
+                equivset *equivset_ = *iter_;
+                const id_type transition_ = closure
+                    (&equivset_->_followpos, seen_sets_, seen_vectors_,
+                    hash_vector_, dfa_alphabet_, dfa_);
+
+                if (transition_ != sm_traits::npos ())
+                {
+                    id_type *ptr_ = &dfa_.front () + ((index_ + 1) *
+                        dfa_alphabet_);
+
+                    // Prune abstemious transitions from end states.
+                    if (*ptr_ && !equivset_->_greedy) continue;
+
+                    for (typename equivset::index_vector::const_iterator
+                        equiv_iter_ = equivset_->_index_vector.begin (),
+                        equiv_end_ = equivset_->_index_vector.end ();
+                        equiv_iter_ != equiv_end_; ++equiv_iter_)
+                    {
+                        const id_type i_ = *equiv_iter_;
+
+                        if (i_ == parser::bol_token ())
+                        {
+                            dfa_.front () = transition_;
+                        }
+                        else if (i_ == parser:: eol_token ())
+                        {
+                            ptr_[eol_index] = transition_;
+                            eol_set_.insert (index_ + 1);
+                        }
+                        else
+                        {
+                            ptr_[i_ + transitions_index] = transition_;
+                        }
+                    }
+                }
+            }
+        }
+
+        fix_clashes (eol_set_, nl_id_, zero_id_, dfa_, dfa_alphabet_,
+            compressed ());
+        append_dfa (charset_list_, internals_, sm_, dfa_index_, lookup ());
+    }
+
+    // Uncompressed
+    static void fix_clashes (const id_type_set &eol_set_,
+        const id_type nl_id_, const id_type /*zero_id_*/,
+        typename internals::id_type_vector &dfa_,
+        const std::size_t dfa_alphabet_, const false_ &)
+    {
+        typename id_type_set::const_iterator eol_iter_ =
+            eol_set_.begin ();
+        typename id_type_set::const_iterator eol_end_ =
+            eol_set_.end ();
+
+        for (; eol_iter_ != eol_end_; ++eol_iter_)
+        {
+            id_type *ptr_ = &dfa_.front () + *eol_iter_ * dfa_alphabet_;
+            const id_type eol_state_ = ptr_[eol_index];
+            const id_type nl_state_ = ptr_[nl_id_ + transitions_index];
+
+            if (nl_state_)
+            {
+                ptr_[transitions_index + nl_id_] = 0;
+                ptr_ = &dfa_.front () + eol_state_ * dfa_alphabet_;
+
+                if (ptr_[transitions_index + nl_id_] == 0)
+                {
+                    ptr_[transitions_index + nl_id_] = nl_state_;
+                }
+            }
+        }
+    }
+
+    // Compressed
+    static void fix_clashes (const id_type_set &eol_set_,
+        const id_type nl_id_, const id_type zero_id_,
+        typename internals::id_type_vector &dfa_,
+        const std::size_t dfa_alphabet_, const true_ &)
+    {
+        typename id_type_set::const_iterator eol_iter_ =
+            eol_set_.begin ();
+        typename id_type_set::const_iterator eol_end_ =
+            eol_set_.end ();
+        std::size_t i_ = 0;
+
+        for (; eol_iter_ != eol_end_; ++eol_iter_)
+        {
+            id_type *ptr_ = &dfa_.front () + *eol_iter_ * dfa_alphabet_;
+            const id_type eol_state_ = ptr_[eol_index];
+            id_type nl_state_ = 0;
+
+            for (; i_ < (sm_traits::char_24_bit ? 2 : 1); ++i_)
+            {
+                ptr_ = &dfa_.front () + ptr_[transitions_index + zero_id_] *
+                    dfa_alphabet_;
+            }
+
+            nl_state_ = ptr_[transitions_index + nl_id_];
+
+            if (nl_state_)
+            {
+                ptr_ = &dfa_.front () + eol_state_ * dfa_alphabet_;
+
+                if (ptr_[transitions_index + zero_id_] != 0) continue;
+
+                ptr_[transitions_index + zero_id_] = dfa_.size () /
+                    dfa_alphabet_;
+                dfa_.resize (dfa_.size () + dfa_alphabet_, 0);
+
+                for (i_ = 0; i_ < (sm_traits::char_24_bit ? 1 : 0); ++i_)
+                {
+                    ptr_ = &dfa_.front () + dfa_.size () - dfa_alphabet_;
+                    ptr_[transitions_index + zero_id_] = dfa_.size () /
+                        dfa_alphabet_;
+                    dfa_.resize (dfa_.size () + dfa_alphabet_, 0);
+                }
+
+                ptr_ = &dfa_.front () + dfa_.size () - dfa_alphabet_;
+                ptr_[transitions_index + nl_id_] = nl_state_;
+            }
+        }
+    }
+
+    // char_state_machine version
+    static void append_dfa (const charset_list &charset_list_,
+        const internals &internals_, sm &sm_, const id_type dfa_index_,
+        const false_ &)
+    {
+        typename charset_list::list::const_iterator list_iter_ =
+            charset_list_->begin ();
+        std::size_t size_ = charset_list_->size ();
+        typename sm::string_token_vector token_vector_;
+
+        token_vector_.reserve (size_);
+
+        for (std::size_t i_ = 0; i_ < size_; ++i_, ++list_iter_)
+        {
+            const charset *charset_ = *list_iter_;
+
+            token_vector_.push_back (charset_->_token);
+        }
+
+        sm_.append (token_vector_, internals_, dfa_index_);
+    }
+
+    // state_machine version
+    static void append_dfa (const charset_list &,
+        const internals &, sm &, const id_type, const true_ &)
+    {
+        // Nothing to do - will use create() instead
+    }
+
+    // char_state_machine version
+    static void create (internals &, sm &, const id_type_vector &,
+        const false_ &)
+    {
+        // Nothing to do - will use append_dfa() instead
+    }
+
+    // state_machine version
+    static void create (internals &internals_, sm &sm_,
+        const id_type_vector &features_, const true_ &)
+    {
+        for (std::size_t i_ = 0, size_ = internals_._dfa->size ();
+            i_ < size_; ++i_)
+        {
+            internals_._features |= features_[i_];
+        }
+
+        if (internals_._dfa->size () > 1)
+        {
+            internals_._features |= multi_state_bit;
+        }
+
+        sm_.data ().swap (internals_);
+    }
+
+    // NFA version
+    static void partition_charsets (const charset_map &map_,
+        charset_list &lhs_, const false_ &)
+    {
+        fill_rhs_list (map_, lhs_);
+    }
+
+    // DFA version
+    static void partition_charsets (const charset_map &map_,
+        charset_list &lhs_, const true_ &)
+    {
+        charset_list rhs_;
+
+        fill_rhs_list (map_, rhs_);
+
+        if (!rhs_->empty ())
+        {
+            typename charset_list::list::iterator iter_;
+            typename charset_list::list::iterator end_;
+            charset_ptr overlap_ (new charset);
+
+            lhs_->push_back (static_cast<charset *>(0));
+            lhs_->back () = rhs_->front ();
+            rhs_->pop_front ();
+
+            while (!rhs_->empty ())
+            {
+                charset_ptr r_ (rhs_->front ());
+
+                rhs_->pop_front ();
+                iter_ = lhs_->begin ();
+                end_ = lhs_->end ();
+
+                while (!r_->empty () && iter_ != end_)
+                {
+                    typename charset_list::list::iterator l_iter_ = iter_;
+
+                    (*l_iter_)->intersect (*r_.get (), *overlap_.get ());
+
+                    if (overlap_->empty ())
+                    {
+                        ++iter_;
+                    }
+                    else if ((*l_iter_)->empty ())
+                    {
+                        delete *l_iter_;
+                        *l_iter_ = overlap_.release ();
+                        overlap_.reset (new charset);
+                        ++iter_;
+                    }
+                    else if (r_->empty ())
+                    {
+                        delete r_.release ();
+                        r_ = overlap_;
+                        overlap_.reset (new charset);
+                        break;
+                    }
+                    else
+                    {
+                        iter_ = lhs_->insert (++iter_,
+                            static_cast<charset *>(0));
+                        *iter_ = overlap_.release ();
+                        overlap_.reset (new charset);
+                        ++iter_;
+                        end_ = lhs_->end ();
+                    }
+                }
+
+                if (!r_->empty ())
+                {
+                    lhs_->push_back (static_cast<charset *>(0));
+                    lhs_->back () = r_.release ();
+                }
+            }
+        }
+    }
+
+    static void fill_rhs_list (const charset_map &map_,
+        charset_list &list_)
+    {
+        typename charset_map::const_iterator iter_ = map_.begin ();
+        typename charset_map::const_iterator end_ = map_.end ();
+
+        for (; iter_ != end_; ++iter_)
+        {
+            list_->push_back (static_cast<charset *>(0));
+            list_->back () = new charset (iter_->first, iter_->second);
+        }
+    }
+
+    static void build_set_mapping (const charset_list &charset_list_,
+        internals &internals_, const id_type dfa_index_,
+        index_set_vector &set_mapping_)
+    {
+        typename charset_list::list::const_iterator iter_ =
+            charset_list_->begin ();
+        typename charset_list::list::const_iterator end_ =
+            charset_list_->end ();
+        typename index_set::const_iterator set_iter_;
+        typename index_set::const_iterator set_end_;
+
+        for (id_type index_ = 0; iter_ != end_; ++iter_, ++index_)
+        {
+            const charset *cs_ = *iter_;
+
+            set_iter_ = cs_->_index_set.begin ();
+            set_end_ = cs_->_index_set.end ();
+            fill_lookup (cs_->_token, internals_._lookup[dfa_index_],
+                index_, lookup ());
+
+            for (; set_iter_ != set_end_; ++set_iter_)
+            {
+                set_mapping_[*set_iter_].insert (index_);
+            }
+        }
+    }
+
+    // char_state_machine version
+    static void fill_lookup (const string_token &, id_type_vector *,
+        const id_type, const false_ &)
+    {
+        // Do nothing (lookup not used)
+    }
+
+    // state_machine version
+    static void fill_lookup (const string_token &charset_,
+        id_type_vector *lookup_, const id_type index_, const true_ &)
+    {
+        typename string_token::range_vector::const_iterator iter_ =
+            charset_._ranges.begin ();
+        typename string_token::range_vector::const_iterator end_ =
+            charset_._ranges.end ();
+        id_type *ptr_ = &lookup_->front ();
+
+        for (; iter_ != end_; ++iter_)
+        {
+            for (typename char_traits::index_type char_ = iter_->first;
+                char_ < iter_->second; ++char_)
+            {
+                // Note char_ must be unsigned
+                ptr_[char_] = index_ + transitions_index;
+            }
+
+            // Note iter_->second must be unsigned
+            ptr_[iter_->second] = index_ + transitions_index;
+        }
+    }
+
+    static id_type closure (const node_vector *followpos_,
+        node_set_vector &seen_sets_, node_vector_vector &seen_vectors_,
+        size_t_vector &hash_vector_, const id_type size_, id_type_vector &dfa_)
+    {
+        bool end_state_ = false;
+        id_type id_ = 0;
+        id_type user_id_ = sm_traits::npos ();
+        id_type next_dfa_ = 0;
+        id_type push_dfa_ = sm_traits::npos ();
+        bool pop_dfa_ = false;
+        std::size_t hash_ = 0;
+
+        if (followpos_->empty ()) return sm_traits::npos ();
+
+        id_type index_ = 0;
+        std::auto_ptr<node_set> set_ptr_ (new node_set);
+        std::auto_ptr<node_vector> vector_ptr_ (new node_vector);
+
+        for (typename node_vector::const_iterator iter_ =
+            followpos_->begin (), end_ = followpos_->end ();
+            iter_ != end_; ++iter_)
+        {
+            closure_ex (*iter_, end_state_, id_, user_id_, next_dfa_,
+                push_dfa_, pop_dfa_, set_ptr_.get (),
+                vector_ptr_.get (), hash_);
+        }
+
+        bool found_ = false;
+        typename size_t_vector::const_iterator hash_iter_ =
+            hash_vector_.begin ();
+        typename size_t_vector::const_iterator hash_end_ =
+            hash_vector_.end ();
+        typename node_set_vector::vector::const_iterator set_iter_ =
+            seen_sets_->begin ();
+
+        for (; hash_iter_ != hash_end_; ++hash_iter_, ++set_iter_)
+        {
+            found_ = *hash_iter_ == hash_ && *(*set_iter_) == *set_ptr_;
+            ++index_;
+
+            if (found_) break;
+        }
+
+        if (!found_)
+        {
+            seen_sets_->push_back (static_cast<node_set *>(0));
+            seen_sets_->back () = set_ptr_.release ();
+            seen_vectors_->push_back (static_cast<node_vector *>(0));
+            seen_vectors_->back () = vector_ptr_.release ();
+            hash_vector_.push_back (hash_);
+            // State 0 is the jam state...
+            index_ = static_cast<id_type>(seen_sets_->size ());
+
+            const std::size_t old_size_ = dfa_.size ();
+
+            dfa_.resize (old_size_ + size_, 0);
+
+            if (end_state_)
+            {
+                dfa_[old_size_] |= end_state_bit;
+
+                if (pop_dfa_)
+                {
+                    dfa_[old_size_] |= pop_dfa_bit;
+                }
+
+                dfa_[old_size_ + id_index] = id_;
+                dfa_[old_size_ + user_id_index] = user_id_;
+                dfa_[old_size_ + push_dfa_index] = push_dfa_;
+                dfa_[old_size_ + next_dfa_index] = next_dfa_;
+            }
+        }
+
+        return index_;
+    }
+
+    static void closure_ex (node *node_, bool &end_state_,
+        id_type &id_, id_type &user_id_, id_type &next_dfa_,
+        id_type &push_dfa_, bool &pop_dfa_, node_set *set_ptr_,
+        node_vector *vector_ptr_, std::size_t &hash_)
+    {
+        const bool temp_end_state_ = node_->end_state ();
+
+        if (temp_end_state_)
+        {
+            if (!end_state_)
+            {
+                end_state_ = true;
+                id_ = node_->id ();
+                user_id_ = node_->user_id ();
+                next_dfa_ = node_->next_dfa ();
+                push_dfa_ = node_->push_dfa ();
+                pop_dfa_ = node_->pop_dfa ();
+            }
+        }
+
+        if (set_ptr_->insert (node_).second)
+        {
+            vector_ptr_->push_back (node_);
+            hash_ += reinterpret_cast<std::size_t> (node_);
+        }
+    }
+
+    // NFA version
+    static void build_equiv_list (const node_vector *vector_,
+        const index_set_vector &set_mapping_, equivset_list &lhs_,
+        const false_ &)
+    {
+        fill_rhs_list (vector_, set_mapping_, lhs_);
+    }
+
+    // DFA version
+    static void build_equiv_list (const node_vector *vector_,
+        const index_set_vector &set_mapping_, equivset_list &lhs_,
+        const true_ &)
+    {
+        equivset_list rhs_;
+
+        fill_rhs_list (vector_, set_mapping_, rhs_);
+
+        if (!rhs_->empty ())
+        {
+            typename equivset_list::list::iterator iter_;
+            typename equivset_list::list::iterator end_;
+            equivset_ptr overlap_ (new equivset);
+
+            lhs_->push_back (static_cast<equivset *>(0));
+            lhs_->back () = rhs_->front ();
+            rhs_->pop_front ();
+
+            while (!rhs_->empty ())
+            {
+                equivset_ptr r_ (rhs_->front ());
+
+                rhs_->pop_front ();
+                iter_ = lhs_->begin ();
+                end_ = lhs_->end ();
+
+                while (!r_->empty () && iter_ != end_)
+                {
+                    typename equivset_list::list::iterator l_iter_ = iter_;
+
+                    (*l_iter_)->intersect (*r_.get (), *overlap_.get ());
+
+                    if (overlap_->empty ())
+                    {
+                        ++iter_;
+                    }
+                    else if ((*l_iter_)->empty ())
+                    {
+                        delete *l_iter_;
+                        *l_iter_ = overlap_.release ();
+                        overlap_.reset (new equivset);
+                        ++iter_;
+                    }
+                    else if (r_->empty ())
+                    {
+                        delete r_.release ();
+                        r_ = overlap_;
+                        overlap_.reset (new equivset);
+                        break;
+                    }
+                    else
+                    {
+                        iter_ = lhs_->insert (++iter_,
+                            static_cast<equivset *>(0));
+                        *iter_ = overlap_.release ();
+                        overlap_.reset (new equivset);
+                        ++iter_;
+                        end_ = lhs_->end ();
+                    }
+                }
+
+                if (!r_->empty ())
+                {
+                    lhs_->push_back (static_cast<equivset *>(0));
+                    lhs_->back () = r_.release ();
+                }
+            }
+        }
+    }
+
+    static void fill_rhs_list (const node_vector *vector_,
+        const index_set_vector &set_mapping_, equivset_list &list_)
+    {
+        typename node_vector::const_iterator iter_ =
+            vector_->begin ();
+        typename node_vector::const_iterator end_ =
+            vector_->end ();
+
+        for (; iter_ != end_; ++iter_)
+        {
+            const node *node_ = *iter_;
+
+            if (!node_->end_state ())
+            {
+                const id_type token_ = node_->token ();
+
+                if (token_ != node::null_token ())
+                {
+                    list_->push_back (static_cast<equivset *>(0));
+
+                    if (token_ == parser::bol_token () ||
+                        token_ == parser::eol_token ())
+                    {
+                        std::set<id_type> index_set_;
+
+                        index_set_.insert (token_);
+                        list_->back () = new equivset (index_set_,
+                            token_, node_->greedy (), node_->followpos ());
+                    }
+                    else
+                    {
+                        list_->back () = new equivset (set_mapping_[token_],
+                            token_, node_->greedy (), node_->followpos ());
+                    }
+                }
+            }
+        }
+    }
+};
+
+typedef basic_generator<rules, state_machine> generator;
+typedef basic_generator<wrules, wstate_machine> wgenerator;
+typedef basic_generator<rules, char_state_machine> char_generator;
+typedef basic_generator<wrules, wchar_state_machine> wchar_generator;
+}
+
+#endif
diff --git a/inc/lexertl/internals.hpp b/inc/lexertl/internals.hpp
new file mode 100644
index 0000000..10335c6
--- /dev/null
+++ b/inc/lexertl/internals.hpp
@@ -0,0 +1,80 @@
+// internals.hpp
+// Copyright (c) 2009-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_INTERNALS_HPP
+#define LEXERTL_INTERNALS_HPP
+
+#include "enums.hpp"
+#include "containers/ptr_vector.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename id_type>
+struct basic_internals
+{
+    typedef std::vector<id_type> id_type_vector;
+    typedef ptr_vector<id_type_vector> id_type_vector_vector;
+
+    id_type _eoi;
+    id_type_vector_vector _lookup;
+    id_type_vector _dfa_alphabet;
+    id_type _features;
+    id_type_vector_vector _dfa;
+
+    basic_internals () :
+        _eoi (0),
+        _lookup (),
+        _dfa_alphabet (),
+        _features (0),
+        _dfa ()
+    {
+    }
+
+    void clear ()
+    {
+        _eoi = 0;
+        _lookup.clear ();
+        _dfa_alphabet.clear ();
+        _features = 0;
+        _dfa.clear ();
+    }
+
+    bool empty () const
+    {
+        return _dfa->empty ();
+    }
+
+    void add_states (const std::size_t num_)
+    {
+        for (std::size_t index_ = 0; index_ < num_; ++index_)
+        {
+            _lookup->push_back (static_cast<id_type_vector *>(0));
+            // lookup *always* has a size 256 now.
+            _lookup->back () = new id_type_vector (256, dead_state_index);
+            _dfa_alphabet.push_back (0);
+            _dfa->push_back (static_cast<id_type_vector *>(0));
+            _dfa->back () = new id_type_vector;
+        }
+    }
+
+    void swap (basic_internals &internals_)
+    {
+        std::swap (_eoi, internals_._eoi);
+        _lookup->swap (*internals_._lookup);
+        _dfa_alphabet.swap (internals_._dfa_alphabet);
+        std::swap (_features, internals_._features);
+        _dfa->swap (*internals_._dfa);
+    }
+
+private:
+    basic_internals (const basic_internals &); // No copy construction.
+    basic_internals &operator = (const basic_internals &); // No assignment.
+};
+}
+}
+
+#endif
diff --git a/inc/lexertl/is_same.hpp b/inc/lexertl/is_same.hpp
new file mode 100644
index 0000000..42684dc
--- /dev/null
+++ b/inc/lexertl/is_same.hpp
@@ -0,0 +1,29 @@
+// is_same.hpp
+// Copyright (c) 2010-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef LEXERTL_IS_SAME_HPP
+#define LEXERTL_IS_SAME_HPP
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename t1, typename t2>
+struct is_same
+{
+    enum {same = false};
+};
+
+template<typename t1>
+struct is_same<t1, t1>
+{
+    enum {same = true};
+};
+}
+}
+
+#endif
+
diff --git a/inc/lexertl/licence_1_0.txt b/inc/lexertl/licence_1_0.txt
new file mode 100644
index 0000000..d1c4c6c
--- /dev/null
+++ b/inc/lexertl/licence_1_0.txt
@@ -0,0 +1,24 @@
+Boost Software License - Version 1.0 - August 17th, 2003
+
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+
diff --git a/inc/lexertl/lookup.hpp b/inc/lexertl/lookup.hpp
new file mode 100644
index 0000000..a54ae42
--- /dev/null
+++ b/inc/lexertl/lookup.hpp
@@ -0,0 +1,477 @@
+// lookup.hpp
+// Copyright (c) 2009-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_LOOKUP_HPP
+#define LEXERTL_LOOKUP_HPP
+
+#include <assert.h>
+#include "bool.hpp"
+#include "match_results.hpp"
+#include "state_machine.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+template<bool>
+struct bol_state
+{
+    bol_state (const bool)
+    {
+    }
+};
+
+template<>
+struct bol_state<true>
+{
+    bool _bol;
+    bool _end_bol;
+
+    bol_state (const bool bol_) :
+        _bol (bol_),
+        _end_bol (bol_)
+    {
+    }
+};
+
+template<typename id_type, bool>
+struct eol_state
+{
+};
+
+template<typename id_type>
+struct eol_state<id_type, true>
+{
+    id_type _EOL_state;
+
+    eol_state () :
+        _EOL_state (0)
+    {
+    }
+};
+
+template<typename id_type, bool>
+struct multi_state_state
+{
+    multi_state_state (const id_type)
+    {
+    }
+};
+
+template<typename id_type>
+struct multi_state_state<id_type, true>
+{
+    id_type _start_state;
+
+    multi_state_state (const id_type state_) :
+        _start_state (state_)
+    {
+    }
+};
+
+template<typename id_type, bool>
+struct recursive_state
+{
+    recursive_state (const id_type *)
+    {
+    }
+};
+
+template<typename id_type>
+struct recursive_state<id_type, true>
+{
+    bool _pop;
+    id_type _push_dfa;
+
+    recursive_state (const id_type *ptr_) :
+        _pop ((*ptr_ & pop_dfa_bit) != 0),
+        _push_dfa (*(ptr_ + push_dfa_index))
+    {
+    }
+};
+
+template<typename id_type, typename index_type, std::size_t flags>
+struct lookup_state
+{
+    typedef basic_internals<id_type> internals;
+
+    const id_type *_lookup;
+    id_type _dfa_alphabet;
+    const id_type *_dfa;
+    const id_type *_ptr;
+    bool _end_state;
+    id_type _id;
+    id_type _uid;
+    bol_state<(flags & bol_bit) != 0> _bol_state;
+    eol_state<id_type, (flags & eol_bit) != 0> _eol_state;
+    multi_state_state<id_type, (flags & multi_state_bit) != 0>
+        _multi_state_state;
+    recursive_state<id_type, (flags & recursive_bit) != 0> _recursive_state;
+
+    lookup_state (const internals &internals_, const bool bol_,
+        const id_type state_) :
+        _lookup (&internals_._lookup[state_]->front ()),
+        _dfa_alphabet (internals_._dfa_alphabet[state_]),
+        _dfa (&internals_._dfa[state_]->front ()),
+        _ptr (_dfa + _dfa_alphabet),
+        _end_state (*_ptr != 0),
+        _id (*(_ptr + id_index)),
+        _uid (*(_ptr + user_id_index)),
+        _bol_state (bol_),
+        _eol_state (),
+        _multi_state_state (state_),
+        _recursive_state (_ptr)
+    {
+    }
+
+    void reset_recursive (const false_ &)
+    {
+        // Do nothing
+    }
+
+    void reset_recursive (const true_ &)
+    {
+        _recursive_state._pop = (*_ptr & pop_dfa_bit) != 0;
+        _recursive_state._push_dfa = *(_ptr + push_dfa_index);
+    }
+
+    void bol_start_state (const false_ &)
+    {
+        // Do nothing
+    }
+
+    void bol_start_state (const true_ &)
+    {
+        if (_bol_state._bol)
+        {
+            const id_type state_ = *_dfa;
+
+            if (state_)
+            {
+                _ptr = &_dfa[state_ * _dfa_alphabet];
+            }
+        }
+    }
+
+    template<typename char_type>
+    bool eol (const char_type, const false_ &)
+    {
+        return false;
+    }
+
+    template<typename char_type>
+    bool eol (const char_type curr_, const true_ &)
+    {
+        bool ret_ = false;
+
+        _eol_state._EOL_state = _ptr[eol_index];
+        ret_ = _eol_state._EOL_state && curr_ == '\n';
+
+        if (ret_)
+        {
+            _ptr = &_dfa[_eol_state._EOL_state * _dfa_alphabet];
+        }
+
+        return ret_;
+    }
+
+    template<typename char_type>
+    id_type next_char (const char_type prev_char_, const false_ &)
+    {
+        const id_type state_= _ptr[_lookup
+            [static_cast<index_type>(prev_char_)]];
+
+        if (state_ != 0)
+        {
+            _ptr = &_dfa[state_ * _dfa_alphabet];
+        }
+
+        return state_;
+    }
+
+    template<typename char_type>
+    id_type next_char (const char_type prev_char_, const true_ &)
+    {
+        const std::size_t bytes_ = sizeof (char_type) < 3 ?
+            sizeof (char_type) : 3;
+        const std::size_t shift_[] = {0, 8, 16};
+        id_type state_= 0;
+
+        for (std::size_t i_ = 0; i_ < bytes_; ++i_)
+        {
+            state_ = _ptr[_lookup[static_cast<unsigned char>((prev_char_ >>
+                shift_[bytes_ - 1 - i_]) & 0xff)]];
+
+            if (state_ == 0)
+            {
+                break;
+            }
+
+            _ptr = &_dfa[state_ * _dfa_alphabet];
+        }
+
+        return state_;
+    }
+
+    template<typename char_type>
+    void bol (const char_type, const false_ &)
+    {
+        // Do nothing
+    }
+
+    template<typename char_type>
+    void bol (const char_type prev_char_, const true_ &)
+    {
+        _bol_state._bol = prev_char_ == '\n';
+    }
+
+    void eol (const id_type, const false_ &)
+    {
+        // Do nothing
+    }
+
+    void eol (const id_type err_val_, const true_ &)
+    {
+        _eol_state._EOL_state = err_val_;
+    }
+
+    void reset_start_state (const false_ &)
+    {
+        // Do nothing
+    }
+
+    void reset_start_state (const true_ &)
+    {
+        _multi_state_state._start_state = *(_ptr + next_dfa_index);
+    }
+
+    void reset_end_bol (const false_ &)
+    {
+        // Do nothing
+    }
+
+    void reset_end_bol (const true_ &)
+    {
+        _bol_state._end_bol = _bol_state._bol;
+    }
+
+    template<typename iter_type>
+    void end_state (iter_type &end_token_, iter_type &curr_)
+    {
+        if (*_ptr)
+        {
+            _end_state = true;
+            reset_end_bol (bool_<(flags & bol_bit) != 0> ());
+            _id = *(_ptr + id_index);
+            _uid = *(_ptr + user_id_index);
+            reset_recursive (bool_<(flags & recursive_bit) != 0> ());
+            reset_start_state (bool_<(flags & multi_state_bit) != 0> ());
+            end_token_ = curr_;
+        }
+    }
+
+    template<typename iter_type, typename char_type>
+    void check_eol (iter_type &, iter_type &, const id_type,
+        const char_type, const false_ &)
+    {
+        // Do nothing
+    }
+
+    template<typename iter_type, typename char_type>
+    void check_eol (iter_type &end_token_, iter_type &curr_,
+        const id_type npos, const char_type eoi_, const true_ &)
+    {
+        if (_eol_state._EOL_state != npos && curr_ == eoi_)
+        {
+            _eol_state._EOL_state = _ptr[eol_index];
+
+            if (_eol_state._EOL_state)
+            {
+                _ptr = &_dfa[_eol_state._EOL_state * _dfa_alphabet];
+                end_state (end_token_, curr_);
+            }
+        }
+    }
+
+    template<typename results>
+    void pop (results &, const false_ &)
+    {
+        // Nothing to do
+    }
+
+    template<typename results>
+    void pop (results &results_, const true_ &)
+    {
+        if (_recursive_state._pop)
+        {
+            _multi_state_state._start_state = results_.stack.top ().first;
+            results_.stack.pop ();
+        }
+        else if (_recursive_state._push_dfa != results::npos ())
+        {
+            results_.stack.push (typename results::id_type_pair
+                (_recursive_state._push_dfa, _id));
+        }
+    }
+
+    template<typename results>
+    bool id_eoi (const id_type eoi_, const results &, const false_ &)
+    {
+        return _id == eoi_;
+    }
+
+    template<typename results>
+    bool id_eoi (const id_type eoi_, const results &results_, const true_ &)
+    {
+        return _id == eoi_ || (_recursive_state._pop &&
+            !results_.stack.empty () && results_.stack.top ().second == eoi_);
+    }
+
+    void start_state (id_type &, const false_ &)
+    {
+        // Do nothing
+    }
+
+    void start_state (id_type &start_state_, const true_ &)
+    {
+        start_state_ = _multi_state_state._start_state;
+    }
+
+    void bol (bool &, const false_ &)
+    {
+        // Do nothing
+    }
+
+    void bol (bool &end_bol_, const true_ &)
+    {
+        end_bol_ = _bol_state._end_bol;
+    }
+};
+
+template<typename results>
+void inc_end (results &, const false_ &)
+{
+    // Do nothing
+}
+
+template<typename results>
+void inc_end (results &results_, const true_ &)
+{
+    ++results_.end;
+}
+
+template<typename iter_type, std::size_t flags, typename id_type,
+    typename results, bool compressed, bool recursive>
+void next (const basic_state_machine<typename std::iterator_traits
+    <iter_type>::value_type, id_type> &sm_,
+    results &results_, const bool_<compressed> &compressed_,
+    const bool_<recursive> &recursive_)
+{
+    const basic_internals<id_type> &internals_ = sm_.data ();
+    typename results::iter_type end_token_ = results_.end;
+
+skip:
+    typename results::iter_type curr_ = results_.end;
+
+    results_.start = curr_;
+
+again:
+    if (curr_ == results_.eoi)
+    {
+        results_.id = internals_._eoi;
+        results_.user_id = results::npos ();
+        return;
+    }
+
+    lookup_state<id_type, typename results::index_type, flags> lu_state_
+        (internals_, results_.bol, results_.state);
+    lu_state_.bol_start_state (bool_<(flags & bol_bit) != 0> ());
+
+    while (curr_ != results_.eoi)
+    {
+        if (!lu_state_.eol (*curr_, bool_<(flags & eol_bit) != 0> ()))
+        {
+            const typename results::char_type prev_char_ = *curr_++;
+            const id_type state_ = lu_state_.next_char (prev_char_,
+                compressed_);
+
+            lu_state_.bol (prev_char_, bool_<(flags & bol_bit) != 0> ());
+
+            if (state_ == 0)
+            {
+                lu_state_.eol (results::npos (),
+                    bool_<(flags & eol_bit) != 0> ());
+                break;
+            }
+        }
+
+        lu_state_.end_state (end_token_, curr_);
+    }
+
+    lu_state_.check_eol (end_token_, curr_, results::npos (), results_.eoi,
+        bool_<(flags & eol_bit) != 0> ());
+
+    if (lu_state_._end_state)
+    {
+        // Return longest match
+        lu_state_.pop (results_, recursive_);
+
+        lu_state_.start_state (results_.state,
+            bool_<(flags & multi_state_bit) != 0> ());
+        lu_state_.bol (results_.bol, bool_<(flags & bol_bit) != 0> ());
+        results_.end = end_token_;
+
+        if (lu_state_._id == sm_.skip ()) goto skip;
+
+        if (lu_state_.id_eoi (internals_._eoi, results_, recursive_))
+        {
+            curr_ = end_token_;
+            goto again;
+        }
+    }
+    else
+    {
+        results_.end = end_token_;
+        results_.bol = *results_.end == '\n';
+        results_.start = results_.end;
+        // No match causes char to be skipped
+        inc_end (results_, bool_<(flags & advance_bit) != 0> ());
+        lu_state_._id = results::npos ();
+        lu_state_._uid = results::npos ();
+    }
+
+    results_.id = lu_state_._id;
+    results_.user_id = lu_state_._uid;
+}
+}
+
+template<typename iter_type, typename id_type, std::size_t flags>
+void lookup (const basic_state_machine<typename std::iterator_traits
+    <iter_type>::value_type, id_type> &sm_,
+    match_results<iter_type, id_type, flags> &results_)
+{
+    // If this asserts, you have either not defined all the correct
+    // flags, or you should be using recursive_match_results instead
+    // of match_results.
+    assert ((sm_.data ()._features & flags) == sm_.data ()._features);
+    detail::next<iter_type, flags, id_type> (sm_, results_, bool_<(sizeof
+        (typename std::iterator_traits<iter_type>::value_type) > 1)> (),
+        false_ ());
+}
+
+template<typename iter_type, typename id_type, std::size_t flags>
+void lookup (const basic_state_machine<typename std::iterator_traits
+    <iter_type>::value_type, id_type> &sm_,
+    recursive_match_results<iter_type, id_type, flags> &results_)
+{
+    // If this asserts, you have not defined all the correct flags
+    assert ((sm_.data ()._features & flags) == sm_.data ()._features);
+    detail::next<iter_type, flags | recursive_bit, id_type> (sm_, results_,
+        bool_<(sizeof(typename std::iterator_traits<iter_type>::
+            value_type) > 1)> (), true_ ());
+}
+}
+
+#endif
diff --git a/inc/lexertl/match_results.hpp b/inc/lexertl/match_results.hpp
new file mode 100644
index 0000000..7a21b9a
--- /dev/null
+++ b/inc/lexertl/match_results.hpp
@@ -0,0 +1,150 @@
+// match_results.hpp
+// Copyright (c) 2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_MATCH_RESULTS_HPP
+#define LEXERTL_MATCH_RESULTS_HPP
+
+#include "char_traits.hpp"
+#include "enums.hpp"
+#include <iterator>
+#include <stack>
+#include <string>
+
+namespace lexertl
+{
+template<typename iter, typename id_type = std::size_t,
+    std::size_t flags = bol_bit | eol_bit | skip_bit | again_bit |
+        multi_state_bit | advance_bit>
+struct match_results
+{
+    typedef iter iter_type;
+    typedef typename std::iterator_traits<iter_type>::value_type char_type;
+    typedef typename basic_char_traits<char_type>::index_type index_type;
+    typedef std::basic_string<char_type> string;
+
+    id_type id;
+    id_type user_id;
+    iter_type start;
+    iter_type end;
+    iter_type eoi;
+    bool bol;
+    id_type state;
+
+    match_results () :
+        id (0),
+        user_id (npos ()),
+        start (iter_type ()),
+        end (iter_type ()),
+        eoi (iter_type ()),
+        bol (true),
+        state (0)
+    {
+    }
+
+    match_results (const iter_type &start_, const iter_type &end_) :
+        id (0),
+        user_id (npos ()),
+        start (start_),
+        end (start_),
+        eoi (end_),
+        bol (true),
+        state (0)
+    {
+    }
+
+    virtual ~match_results ()
+    {
+    }
+
+    string str () const
+    {
+        return string (start, end);
+    }
+
+    virtual void clear ()
+    {
+        id  = 0;
+        user_id = npos ();
+        start = eoi;
+        end = eoi;
+        bol = true;
+        state = 0;
+    }
+
+    virtual void reset (const iter_type &start_, const iter_type &end_)
+    {
+        id  = 0;
+        user_id = npos ();
+        start = start_;
+        end  = start_;
+        eoi = end_;
+        bol = true;
+        state = 0;
+    }
+
+    static id_type npos ()
+    {
+        return static_cast<id_type>(~0);
+    }
+
+    static id_type skip ()
+    {
+        return static_cast<id_type>(~1);
+    }
+};
+
+template<typename iter, typename id_type = std::size_t,
+    std::size_t flags = bol_bit | eol_bit | skip_bit | again_bit |
+        multi_state_bit | recursive_bit | advance_bit>
+struct recursive_match_results : public match_results<iter, id_type, flags>
+{
+    typedef std::pair<id_type, id_type> id_type_pair;
+    std::stack<id_type_pair> stack;
+
+    recursive_match_results () :
+        match_results<iter, id_type, flags> (),
+        stack ()
+    {
+    }
+
+    recursive_match_results (const iter &start_, const iter &end_) :
+        match_results<iter, id_type, flags> (start_, end_),
+        stack ()
+    {
+    }
+
+    virtual ~recursive_match_results ()
+    {
+    }
+
+    virtual void clear ()
+    {
+        match_results<iter, id_type, flags>::clear ();
+
+        while (!stack.empty()) stack.pop ();
+    }
+
+    virtual void reset (const iter &start_, const iter &end_)
+    {
+        match_results<iter, id_type, flags>::reset (start_, end_);
+
+        while (!stack.empty()) stack.pop ();
+    }
+};
+
+typedef match_results<std::string::const_iterator> smatch;
+typedef match_results<const char *> cmatch;
+typedef match_results<std::wstring::const_iterator> wsmatch;
+typedef match_results<const wchar_t *> wcmatch;
+
+typedef recursive_match_results<std::string::const_iterator>
+    srmatch;
+typedef recursive_match_results<const char *> crmatch;
+typedef recursive_match_results<std::wstring::const_iterator>
+    wsrmatch;
+typedef recursive_match_results<const wchar_t *> wcrmatch;
+}
+
+#endif
diff --git a/inc/lexertl/memory_file.hpp b/inc/lexertl/memory_file.hpp
new file mode 100644
index 0000000..2d87b3d
--- /dev/null
+++ b/inc/lexertl/memory_file.hpp
@@ -0,0 +1,112 @@
+// memory_file.hpp
+// Copyright (c) 2012 Ben Hanson (http://www.benhanson.net/)
+// Inspired by http://en.wikibooks.org/wiki/Optimizing_C%2B%2B/General_optimization_techniques/Input/Output#Memory-mapped_file
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef LEXERTL_MEMORY_FILE_H
+#define LEXERTL_MEMORY_FILE_H
+
+#ifdef __unix__
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#elif defined _WIN32
+#include <windows.h>
+#endif
+
+// Only files small enough to fit into memory are supported.
+namespace lexertl
+{
+template<typename CharT>
+class basic_memory_file
+{
+public:
+    basic_memory_file (const char *pathname_) :
+        _data (0),
+        _size (0)
+    {
+#ifdef __unix__
+        _fh = ::open (pathname_, O_RDONLY);
+
+        if (_fh > -1)
+        {
+            struct stat sbuf_;
+
+            if (::fstat (_fh, &sbuf_) > -1)
+            {
+                _data = static_cast<const CharT *>
+                    (::mmap (0, sbuf_.st_size, PROT_READ, MAP_SHARED, _fh, 0));
+
+                if (_data == MAP_FAILED)
+                {
+                    _data = 0;
+                }
+                else
+                {
+                    _size = sbuf_.st_size;
+                }
+            }
+        }
+#elif defined _WIN32
+        _fh = ::CreateFileA (pathname_, GENERIC_READ, FILE_SHARE_READ, 0,
+            OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
+        _fmh = 0;
+
+        if (_fh != INVALID_HANDLE_VALUE)
+        {
+            _fmh = ::CreateFileMapping (_fh, 0, PAGE_READONLY, 0, 0, 0);
+
+            if (_fmh != 0)
+            {
+                _data = static_cast<CharT *>(::MapViewOfFile
+                    (_fmh, FILE_MAP_READ, 0, 0, 0));
+
+                if (_data) _size = ::GetFileSize(_fh, 0);
+            }
+        }
+#endif
+    }
+
+    ~basic_memory_file ()
+    {
+#if defined(__unix__)
+        ::munmap(const_cast<CharT *>(_data), _size);
+        ::close(_fh);
+#elif defined(_WIN32)
+        ::UnmapViewOfFile(_data);
+        ::CloseHandle(_fmh);
+        ::CloseHandle(_fh);
+#endif
+    }
+
+    const CharT *data () const
+    {
+        return _data;
+    }
+
+    std::size_t size () const
+    {
+        return _size;
+    }
+
+private:
+    const CharT *_data;
+    std::size_t _size;
+#ifdef __unix__
+    int _fh;
+#elif defined _WIN32
+    HANDLE _fh;
+    HANDLE _fmh;
+#else
+    #error Only Posix or Windows are supported.
+#endif
+};
+
+typedef basic_memory_file<char> memory_file;
+typedef basic_memory_file<wchar_t> wmemory_file;
+}
+
+#endif
diff --git a/inc/lexertl/old/fast_filebuf.hpp b/inc/lexertl/old/fast_filebuf.hpp
new file mode 100644
index 0000000..f9dd3a9
--- /dev/null
+++ b/inc/lexertl/old/fast_filebuf.hpp
@@ -0,0 +1,45 @@
+// Quick hack...
+// If you find this really is faster then using std::ifstream, let me know
+// as I can always spend some more time to improve it.
+
+namespace lexertl
+{
+template<typename CharT, class Traits>
+class basic_fast_filebuf : public std::basic_streambuf<CharT, Traits>
+{
+public:
+    basic_fast_filebuf (const char *filename_) :
+        _fp (0)
+    {
+        _fp = ::fopen(filename_, "r");
+    }
+
+    virtual ~basic_fast_filebuf()
+    {
+        ::fclose(_fp);
+        _fp = 0;
+    }
+
+protected:
+    FILE *_fp;
+
+    virtual std::streamsize xsgetn (CharT *ptr_, std::streamsize count_)
+    {
+        return ::fread (ptr_, sizeof(CharT),
+            static_cast<std::size_t>(count_), _fp);
+    }
+};
+
+typedef basic_fast_filebuf<char, std::char_traits<char> > fast_filebuf;
+typedef basic_fast_filebuf<wchar_t, std::char_traits<wchar_t> > wfast_filebuf;
+}
+
+// Usage:
+// lexertl::rules rules_;
+// lexertl::state_machine state_machine_;
+// fast_filebuf buf ("Unicode/PropList.txt");
+// std::istream if_(&buf);
+// lexertl::stream_shared_iterator iter_ (if_);
+// lexertl::stream_shared_iterator end_;
+// lexertl::match_results<lexertl::stream_shared_iterator>
+//     results_(iter_, end_);
diff --git a/inc/lexertl/old/string_token.hpp b/inc/lexertl/old/string_token.hpp
new file mode 100644
index 0000000..eb75f08
--- /dev/null
+++ b/inc/lexertl/old/string_token.hpp
@@ -0,0 +1,561 @@
+// string_token.hpp
+// Copyright (c) 2005-2010 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_STRING_TOKEN_HPP
+#define LEXERTL_STRING_TOKEN_HPP
+
+#include "../char_traits.hpp"
+#include <iostream>
+#include <limits>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace lexertl
+{
+template<typename char_type>
+struct basic_string_token
+{
+    typedef std::basic_string<char_type> string;
+
+    bool _negated;
+    string _chars;
+
+    basic_string_token () :
+        _negated (false)
+    {
+    }
+
+    basic_string_token (const bool negated_, const string &chars_) :
+        _negated (negated_),
+        _chars (chars_)
+    {
+    }
+
+    void remove_duplicates ()
+    {
+        const char_type *start_ = _chars.c_str ();
+        const char_type *end_ = start_ + _chars.size ();
+
+        // Optimisation for very large charsets:
+        // sorting via pointers is much quicker than
+        // via iterators...
+        std::sort (const_cast<char_type *> (start_), const_cast<char_type *>
+            (end_));
+        _chars.erase (std::unique (_chars.begin (), _chars.end ()),
+            _chars.end ());
+    }
+
+    void normalise ()
+    {
+        const std::size_t max_chars_ = sizeof (char_type) == 1 ?
+            num_chars : num_wchar_ts;
+
+        if (_chars.length () == max_chars_)
+        {
+            _negated = !_negated;
+            _chars.clear ();
+        }
+        else if (_chars.length () > max_chars_ / 2)
+        {
+            negate ();
+        }
+    }
+
+    void negate ()
+    {
+        const std::size_t max_chars_ = sizeof (char_type) == 1 ?
+            num_chars : num_wchar_ts;
+        char_type curr_char_ = std::numeric_limits<CharT>::min ();
+        string temp_;
+        const char_type *curr_ = _chars.c_str ();
+        const char_type *chars_end_ = curr_ + _chars.size ();
+
+        _negated = !_negated;
+        temp_.resize (max_chars_ - _chars.size ());
+
+        char_type *ptr_ = const_cast<char_type *> (temp_.c_str ());
+        std::size_t i_ = 0;
+
+        while (curr_ < chars_end_)
+        {
+            while (*curr_ > curr_char_)
+            {
+                *ptr_ = curr_char_;
+                ++ptr_;
+                ++curr_char_;
+                ++i_;
+            }
+
+            ++curr_char_;
+            ++curr_;
+            ++i_;
+        }
+
+        for (; i_ < max_chars_; ++i_)
+        {
+            *ptr_ = curr_char_;
+            ++ptr_;
+            ++curr_char_;
+        }
+
+        _chars = temp_;
+    }
+
+    bool operator < (const basic_string_token &rhs_) const
+    {
+        return _negated < rhs_._negated ||
+            (_negated == rhs_._negated && _chars < rhs_._chars);
+    }
+
+    bool operator == (const basic_string_token &rhs_) const
+    {
+        return _negated == rhs_._negated && _chars == rhs_._chars;
+    }
+
+    bool empty () const
+    {
+        return _chars.empty () && !_negated;
+    }
+
+    bool any () const
+    {
+        return _chars.empty () && _negated;
+    }
+
+    void clear ()
+    {
+        _negated = false;
+        _chars.clear ();
+    }
+
+    void intersect (basic_string_token &rhs_, basic_string_token &overlap_)
+    {
+        if ((any () && rhs_.any ()) || (_negated == rhs_._negated &&
+            !any () && !rhs_.any ()))
+        {
+            intersect_same_types (rhs_, overlap_);
+        }
+        else
+        {
+            intersect_diff_types (rhs_, overlap_);
+        }
+    }
+
+    void merge (const basic_string_token &rhs_,
+        basic_string_token &merged_) const
+    {
+        if ((any () && rhs_.any ()) || (_negated == rhs_._negated &&
+            !any () && !rhs_.any ()))
+        {
+            merge_same_types (rhs_, merged_);
+        }
+        else
+        {
+            merge_diff_types (rhs_, merged_);
+        }
+    }
+
+    static string escape_char (const char_type ch_)
+    {
+        string out_;
+
+        switch (ch_)
+        {
+            case '\0':
+                out_ += '\\';
+                out_ += '0';
+                break;
+            case '\a':
+                out_ += '\\';
+                out_ += 'a';
+                break;
+            case '\b':
+                out_ += '\\';
+                out_ += 'b';
+                break;
+            case 27:
+                out_ += '\\';
+                out_ += 'x';
+                out_ += '1';
+                out_ += 'b';
+                break;
+            case '\f':
+                out_ += '\\';
+                out_ += 'f';
+                break;
+            case '\n':
+                out_ += '\\';
+                out_ += 'n';
+                break;
+            case '\r':
+                out_ += '\\';
+                out_ += 'r';
+                break;
+            case '\t':
+                out_ += '\\';
+                out_ += 't';
+                break;
+            case '\v':
+                out_ += '\\';
+                out_ += 'v';
+                break;
+            case '\\':
+                out_ += '\\';
+                out_ += '\\';
+                break;
+            case '"':
+                out_ += '\\';
+                out_ += '"';
+                break;
+            case '\'':
+                out_ += '\\';
+                out_ += '\'';
+                break;
+            default:
+            {
+                if (ch_ < 32)
+                {
+                    std::basic_stringstream<char_type> ss_;
+
+                    out_ += '\\';
+                    out_ += 'x';
+                    ss_ << std::hex <<
+                        static_cast<std::size_t> (ch_);
+                    out_ += ss_.str ();
+                }
+                else
+                {
+                    out_ += ch_;
+                }
+
+                break;
+            }
+        }
+
+        return out_;
+    }
+
+private:
+    void intersect_same_types (basic_string_token &rhs_,
+        basic_string_token &overlap_)
+    {
+        if (any ())
+        {
+            clear ();
+            overlap_._negated = true;
+            rhs_.clear ();
+        }
+        else
+        {
+            typename string::iterator iter_ = _chars.begin ();
+            typename string::iterator end_ = _chars.end ();
+            typename string::iterator rhs_iter_ = rhs_._chars.begin ();
+            typename string::iterator rhs_end_ = rhs_._chars.end ();
+
+            overlap_._negated = _negated;
+
+            while (iter_ != end_ && rhs_iter_ != rhs_end_)
+            {
+                if (*iter_ < *rhs_iter_)
+                {
+                    ++iter_;
+                }
+                else if (*iter_ > *rhs_iter_)
+                {
+                    ++rhs_iter_;
+                }
+                else
+                {
+                    overlap_._chars += *iter_;
+                    iter_ = _chars.erase (iter_);
+                    end_ = _chars.end ();
+                    rhs_iter_ = rhs_._chars.erase (rhs_iter_);
+                    rhs_end_ = rhs_._chars.end ();
+                }
+            }
+
+            if (_negated)
+            {
+                // duplicates already merged, so safe to merge
+                // using std lib.
+
+                // src, dest
+                merge (_chars, overlap_._chars);
+                // duplicates already merged, so safe to merge
+                // using std lib.
+
+                // src, dest
+                merge (rhs_._chars, overlap_._chars);
+                _negated = false;
+                rhs_._negated = false;
+                std::swap (_chars, rhs_._chars);
+                normalise ();
+                overlap_.normalise ();
+                rhs_.normalise ();
+            }
+            else if (!overlap_._chars.empty ())
+            {
+                normalise ();
+                overlap_.normalise ();
+                rhs_.normalise ();
+            }
+        }
+    }
+
+    void intersect_diff_types (basic_string_token &rhs_,
+        basic_string_token &overlap_)
+    {
+        if (any ())
+        {
+            intersect_any (rhs_, overlap_);
+        }
+        else if (_negated)
+        {
+            intersect_negated (rhs_, overlap_);
+        }
+        else // _negated == false
+        {
+            intersect_charset (rhs_, overlap_);
+        }
+    }
+
+    void intersect_any (basic_string_token &rhs_, basic_string_token &overlap_)
+    {
+        if (rhs_._negated)
+        {
+            rhs_.intersect_negated (*this, overlap_);
+        }
+        else // rhs._negated == false
+        {
+            rhs_.intersect_charset (*this, overlap_);
+        }
+    }
+
+    void intersect_negated (basic_string_token &rhs_,
+        basic_string_token &overlap_)
+    {
+        if (rhs_.any ())
+        {
+            overlap_._negated = true;
+            overlap_._chars = _chars;
+            rhs_._negated = false;
+            rhs_._chars = _chars;
+            clear ();
+        }
+        else // rhs._negated == false
+        {
+            rhs_.intersect_charset (*this, overlap_);
+        }
+    }
+
+    void intersect_charset (basic_string_token &rhs_,
+        basic_string_token &overlap_)
+    {
+        if (rhs_.any ())
+        {
+            overlap_._chars = _chars;
+            rhs_._negated = true;
+            rhs_._chars = _chars;
+            clear ();
+        }
+        else // rhs_._negated == true
+        {
+            typename string::iterator iter_ = _chars.begin ();
+            typename string::iterator end_ = _chars.end ();
+            typename string::iterator rhs_iter_ = rhs_._chars.begin ();
+            typename string::iterator rhs_end_ = rhs_._chars.end ();
+
+            while (iter_ != end_ && rhs_iter_ != rhs_end_)
+            {
+                if (*iter_ < *rhs_iter_)
+                {
+                    overlap_._chars += *iter_;
+                    rhs_iter_ = rhs_._chars.insert (rhs_iter_, *iter_);
+                    ++rhs_iter_;
+                    rhs_end_ = rhs_._chars.end ();
+                    iter_ = _chars.erase (iter_);
+                    end_ = _chars.end ();
+                }
+                else if (*iter_ > *rhs_iter_)
+                {
+                    ++rhs_iter_;
+                }
+                else
+                {
+                    ++iter_;
+                    ++rhs_iter_;
+                }
+            }
+
+            if (iter_ != end_)
+            {
+                // nothing bigger in rhs_ than iter_,
+                // so safe to merge using std lib.
+                string temp_ (iter_, end_);
+
+                // src, dest
+                merge (temp_, overlap_._chars);
+                _chars.erase (iter_, end_);
+            }
+
+            if (!overlap_._chars.empty ())
+            {
+                merge (overlap_._chars, rhs_._chars);
+                // possible duplicates, so check for any and erase.
+                rhs_._chars.erase (std::unique (rhs_._chars.begin (),
+                    rhs_._chars.end ()), rhs_._chars.end ());
+                normalise ();
+                overlap_.normalise ();
+                rhs_.normalise ();
+            }
+        }
+    }
+
+    void merge (string &src_, string &dest_)
+    {
+        string tmp_ (src_.size () + dest_.size (), 0);
+
+        std::merge (src_.begin (), src_.end (), dest_.begin (), dest_.end (),
+            tmp_.begin ());
+        dest_ = tmp_;
+    }
+
+    void merge_same_types (const basic_string_token &rhs_,
+        basic_string_token &merged_) const
+    {
+        if (any ())
+        {
+            merged_._negated = true;
+        }
+        else if (_negated)
+        {
+            typename string::const_iterator iter_ = _chars.begin ();
+            typename string::const_iterator end_ = _chars.end ();
+            typename string::const_iterator rhs_iter_ = rhs_._chars.begin ();
+            typename string::const_iterator rhs_end_ = rhs_._chars.end ();
+
+            merged_._negated = _negated;
+
+            while (iter_ != end_ && rhs_iter_ != rhs_end_)
+            {
+                if (*iter_ < *rhs_iter_)
+                {
+                    ++iter_;
+                }
+                else if (*iter_ > *rhs_iter_)
+                {
+                    ++rhs_iter_;
+                }
+                else
+                {
+                    merged_._chars += *iter_;
+                    ++iter_;
+                    ++rhs_iter_;
+                }
+            }
+
+            merged_.normalise ();
+        }
+        else
+        {
+            typename string::const_iterator iter_ = _chars.begin ();
+            typename string::const_iterator end_ = _chars.end ();
+            typename string::const_iterator rhs_iter_ = rhs_._chars.begin ();
+            typename string::const_iterator rhs_end_ = rhs_._chars.end ();
+
+            while (iter_ != end_ && rhs_iter_ != rhs_end_)
+            {
+                if (*iter_ < *rhs_iter_)
+                {
+                    merged_._chars += *iter_;
+                    ++iter_;
+                }
+                else if (*iter_ > *rhs_iter_)
+                {
+                    merged_._chars += *rhs_iter_;
+                    ++rhs_iter_;
+                }
+                else
+                {
+                    merged_._chars += *iter_;
+                    ++iter_;
+                    ++rhs_iter_;
+                }
+            }
+
+            // Include any trailing chars
+            if (iter_ != end_)
+            {
+                string temp_ (iter_, end_);
+
+                merged_._chars += temp_;
+            }
+            else if (rhs_iter_ != rhs_end_)
+            {
+                string temp_ (rhs_iter_, rhs_end_);
+
+                merged_._chars += temp_;
+            }
+
+            merged_.normalise ();
+        }
+    }
+
+    void merge_diff_types (const basic_string_token &rhs_,
+        basic_string_token &merged_) const
+    {
+        if (_negated)
+        {
+            merge_negated (*this, rhs_, merged_);
+        }
+        else
+        {
+            merge_negated (rhs_, *this, merged_);
+        }
+
+        merged_.normalise ();
+    }
+
+    void merge_negated (const basic_string_token &lhs_,
+        const basic_string_token &rhs_, basic_string_token &merged_) const
+    {
+        typename string::const_iterator lhs_iter_ = lhs_._chars.begin ();
+        typename string::const_iterator lhs_end_ = lhs_._chars.end ();
+        typename string::const_iterator rhs_iter_ = rhs_._chars.begin ();
+        typename string::const_iterator rhs_end_ = rhs_._chars.end ();
+
+        merged_._negated = true;
+
+        while (lhs_iter_ != lhs_end_ && rhs_iter_ != rhs_end_)
+        {
+            if (*lhs_iter_ < *rhs_iter_)
+            {
+                merged_._chars += *lhs_iter_;
+                ++lhs_iter_;
+            }
+            else if (*lhs_iter_ > *rhs_iter_)
+            {
+                ++rhs_iter_;
+            }
+            else
+            {
+                ++lhs_iter_;
+                ++rhs_iter_;
+            }
+        }
+
+        // Only interested in any remaining 'negated' chars
+        if (lhs_iter_ != lhs_end_)
+        {
+            string temp_ (lhs_iter_, lhs_end_);
+
+            merged_._chars += temp_;
+        }
+    }
+};
+}
+
+#endif
diff --git a/inc/lexertl/parser/parser.hpp b/inc/lexertl/parser/parser.hpp
new file mode 100644
index 0000000..06869fa
--- /dev/null
+++ b/inc/lexertl/parser/parser.hpp
@@ -0,0 +1,1076 @@
+// parser.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_PARSER_HPP
+#define LEXERTL_PARSER_HPP
+
+#include <assert.h>
+#include <algorithm>
+#include "../bool.hpp"
+#include "tree/end_node.hpp"
+#include "tree/iteration_node.hpp"
+#include "tree/leaf_node.hpp"
+#include <map>
+#include "../containers/ptr_stack.hpp"
+#include "tokeniser/re_tokeniser.hpp"
+#include "../runtime_error.hpp"
+#include "tree/selection_node.hpp"
+#include "tree/sequence_node.hpp"
+#include "../size_t.hpp"
+#include <vector>
+
+namespace lexertl
+{
+namespace detail
+{
+/*
+    General principles of regex parsing:
+    - Every regex is a sequence of sub-regexes.
+    - Regexes consist of operands and operators
+    - All operators decompose to sequence, selection ('|') and iteration ('*')
+    - Regex tokens are stored on a stack.
+    - When a complete sequence of regex tokens is on the stack it is processed.
+
+Grammar:
+
+<REGEX>      -> <OREXP>
+<OREXP>      -> <SEQUENCE> | <OREXP>'|'<SEQUENCE>
+<SEQUENCE>   -> <SUB>
+<SUB>        -> <EXPRESSION> | <SUB><EXPRESSION>
+<EXPRESSION> -> <REPEAT>
+<REPEAT>     -> charset | macro | '('<REGEX>')' | <REPEAT><DUPLICATE>
+<DUPLICATE>  -> '?' | '??' | '*' | '*?' | '+' | '+?' | '{n[,[m]]}' |
+                '{n[,[m]]}?'
+*/
+
+template<typename rules_char_type, typename sm_traits>
+class basic_parser
+{
+public:
+    enum {char_24_bit = sm_traits::char_24_bit};
+    typedef typename sm_traits::char_type char_type;
+    typedef typename sm_traits::id_type id_type;
+    typedef basic_end_node<id_type> end_node;
+    typedef typename sm_traits::input_char_type input_char_type;
+    typedef basic_string_token<input_char_type> input_string_token;
+    typedef basic_iteration_node<id_type> iteration_node;
+    typedef basic_leaf_node<id_type> leaf_node;
+    typedef basic_re_tokeniser<rules_char_type, input_char_type, id_type>
+        tokeniser;
+    typedef basic_node<id_type> node;
+    typedef typename node::node_ptr_vector node_ptr_vector;
+    typedef std::basic_string<rules_char_type> string;
+    typedef basic_string_token<char_type> string_token;
+    typedef std::map<string, const node *> macro_map;
+    typedef basic_selection_node<id_type> selection_node;
+    typedef basic_sequence_node<id_type> sequence_node;
+    typedef std::map<string_token, std::size_t> charset_map;
+    typedef std::pair<string_token, std::size_t> charset_pair;
+    typedef bool_<sm_traits::compressed> compressed;
+
+    basic_parser (const std::locale &locale_,
+        node_ptr_vector &node_ptr_vector_, const macro_map &macro_map_,
+        charset_map &charset_map_, const id_type eoi_) :
+        _locale (locale_),
+        _node_ptr_vector (node_ptr_vector_),
+        _macro_map (macro_map_),
+        _charset_map (charset_map_),
+        _eoi (eoi_),
+        _token_stack (),
+        _tree_node_stack ()
+    {
+    }
+
+    node *parse (const rules_char_type *start_,
+        const rules_char_type * const end_, const id_type id_,
+        const id_type user_id_, const id_type next_dfa_,
+        const id_type push_dfa_, const bool pop_dfa_,
+        const std::size_t flags_, id_type &nl_id_, const bool seen_bol_,
+        const bool macro_)
+    {
+        node *root_ = 0;
+        state state_ (start_, end_, id_, flags_, _locale, macro_);
+        token *lhs_token_ = 0;
+        std::auto_ptr<token> rhs_token_ (new token);
+        char action_ = 0;
+
+        _token_stack->push (static_cast<token *>(0));
+        _token_stack->top () = rhs_token_.release ();
+        rhs_token_.reset (new token);
+        tokeniser::next (_token_stack->top (), state_, rhs_token_.get ());
+
+        do
+        {
+            lhs_token_ = _token_stack->top ();
+            action_ = lhs_token_->precedence (rhs_token_->_type);
+
+            switch (action_)
+            {
+                case '<':
+                case '=':
+                    _token_stack->push (static_cast<token *>(0));
+                    _token_stack->top () = rhs_token_.release ();
+                    rhs_token_.reset (new token);
+                    tokeniser::next (_token_stack->top (), state_,
+                        rhs_token_.get ());
+                    break;
+                case '>':
+                    reduce (state_);
+                    break;
+                default:
+                {
+                    std::ostringstream ss_;
+
+                    ss_ << "A syntax error occurred: '" <<
+                        lhs_token_->precedence_string () <<
+                        "' against '" << rhs_token_->precedence_string () <<
+                        "' preceding index " << state_.index () <<
+                        " in rule id " <<
+                        state_._id << '.';
+                    throw runtime_error (ss_.str ());
+                    break;
+                }
+            }
+        } while (!_token_stack->empty ());
+
+        if (_tree_node_stack.empty ())
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Empty rules are not allowed in rule id " <<
+                state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        assert (_tree_node_stack.size () == 1);
+
+        node *lhs_node_ = _tree_node_stack.top ();
+
+        _tree_node_stack.pop ();
+
+        if (macro_)
+        {
+            // Macros have no end state...
+            root_ = lhs_node_;
+        }
+        else
+        {
+            _node_ptr_vector->push_back (static_cast<end_node *>(0));
+
+            node *rhs_node_ = new end_node (id_, user_id_, next_dfa_,
+                push_dfa_, pop_dfa_);
+
+            _node_ptr_vector->back () = rhs_node_;
+            _node_ptr_vector->push_back (static_cast<sequence_node *>(0));
+            _node_ptr_vector->back () = new sequence_node
+                (lhs_node_, rhs_node_);
+            root_ = _node_ptr_vector->back ();
+        }
+
+        if (seen_bol_)
+        {
+            fixup_bol (root_);
+        }
+
+        if (state_._nl_id != static_cast<id_type>(~0))
+        {
+            nl_id_ = state_._nl_id;
+        }
+
+        if ((flags_ & match_zero_len) == 0)
+        {
+            const typename node::node_vector &firstpos_ = root_->firstpos();
+            typename node::node_vector::const_iterator iter_ =
+                firstpos_.begin ();
+            typename node::node_vector::const_iterator end_ =
+                firstpos_.end ();
+
+            for (; iter_ != end_; ++iter_)
+            {
+                const node *node_ = *iter_;
+
+                if (node_->end_state ())
+                {
+                    std::ostringstream ss_;
+
+                    ss_ << "Rules that match zero characters are not allowed "
+                        "as this can cause an infinite loop in user code. The "
+                        "match_zero_len flag overrides this check. Rule id " <<
+                        state_._id << '.';
+                    throw runtime_error (ss_.str ());
+                }
+            }
+        }
+
+        return root_;
+    }
+
+    static id_type bol_token ()
+    {
+        return static_cast<id_type>(~1);
+    }
+
+    static id_type eol_token ()
+    {
+        return static_cast<id_type>(~2);
+    }
+
+private:
+    typedef typename input_string_token::range input_range;
+    typedef typename tokeniser::state state;
+    typedef basic_re_token<rules_char_type, input_char_type> token;
+    typedef typename string_token::range range;
+    typedef ptr_vector<string_token> string_token_vector;
+    typedef ptr_stack<token> token_stack;
+    typedef typename node::node_stack tree_node_stack;
+
+    const std::locale &_locale;
+    node_ptr_vector &_node_ptr_vector;
+    const macro_map &_macro_map;
+    charset_map &_charset_map;
+    id_type _eoi;
+    token_stack _token_stack;
+    tree_node_stack _tree_node_stack;
+
+    struct find_functor
+    {
+        // Pointer to stop warning about cannot create assignment operator.
+        const string_token *_token;
+
+        find_functor (const string_token &token_) :
+            _token (&token_)
+        {
+        }
+
+        bool operator () (const string_token *rhs_)
+        {
+            return *_token == *rhs_;
+        }
+    };
+
+    void reduce (state &state_)
+    {
+        token *lhs_ = 0;
+        token *rhs_ = 0;
+        token_stack handle_;
+        char action_ = 0;
+
+        do
+        {
+            rhs_ = _token_stack->top ();
+            handle_->push (static_cast<token *>(0));
+            _token_stack->pop ();
+            handle_->top () = rhs_;
+
+            if (!_token_stack->empty ())
+            {
+                lhs_ = _token_stack->top ();
+                action_ = lhs_->precedence (rhs_->_type);
+            }
+        } while (!_token_stack->empty () && action_ == '=');
+
+        assert (_token_stack->empty () || action_ == '<');
+
+        switch (rhs_->_type)
+        {
+        case BEGIN:
+            // finished processing so exit
+            break;
+        case REGEX:
+            // finished parsing, nothing to do
+            break;
+        case OREXP:
+            orexp (handle_);
+            break;
+        case SEQUENCE:
+            _token_stack->push (static_cast<token *>(0));
+            _token_stack->top () = new token (OREXP);
+            break;
+        case SUB:
+            sub (handle_);
+            break;
+        case EXPRESSION:
+            _token_stack->push (static_cast<token *>(0));
+            _token_stack->top () = new token (SUB);
+            break;
+        case REPEAT:
+            repeat (handle_);
+            break;
+        case BOL:
+            bol (handle_);
+            break;
+        case EOL:
+            eol (handle_, state_);
+            break;
+        case CHARSET:
+            charset (handle_, compressed ());
+            break;
+        case MACRO:
+            macro (handle_, state_);
+            break;
+        case OPENPAREN:
+            openparen (handle_);
+            break;
+        case OPT:
+        case AOPT:
+            optional (rhs_->_type == OPT);
+            _token_stack->push (static_cast<token *>(0));
+            _token_stack->top () = new token (DUP);
+            break;
+        case ZEROORMORE:
+        case AZEROORMORE:
+            zero_or_more (rhs_->_type == ZEROORMORE);
+            _token_stack->push (static_cast<token *>(0));
+            _token_stack->top () = new token (DUP);
+            break;
+        case ONEORMORE:
+        case AONEORMORE:
+            one_or_more (rhs_->_type == ONEORMORE);
+            _token_stack->push (static_cast<token *>(0));
+            _token_stack->top () = new token (DUP);
+            break;
+        case REPEATN:
+        case AREPEATN:
+            repeatn (rhs_->_type == REPEATN, handle_->top ());
+            _token_stack->push (static_cast<token *>(0));
+            _token_stack->top () = new token (DUP);
+            break;
+        default:
+            throw runtime_error
+                ("Internal error in regex_parser::reduce.");
+            break;
+        }
+    }
+
+    void orexp (token_stack &handle_)
+    {
+        assert (handle_->top ()->_type == OREXP &&
+            (handle_->size () == 1 || handle_->size () == 3));
+
+        if (handle_->size () == 1)
+        {
+            std::auto_ptr<token> token_ (new token (REGEX));
+
+            _token_stack->push (static_cast<token *>(0));
+            _token_stack->top () = token_.release ();
+        }
+        else
+        {
+            token *token_ = handle_->top ();
+
+            handle_->pop ();
+            delete token_;
+            token_ = 0;
+            assert (handle_->top ()->_type == OR);
+            token_ = handle_->top ();
+            handle_->pop ();
+            delete token_;
+            token_ = 0;
+            assert (handle_->top ()->_type == SEQUENCE);
+            perform_or ();
+            _token_stack->push (static_cast<token *>(0));
+            _token_stack->top () = new token (OREXP);
+        }
+    }
+
+    void perform_or ()
+    {
+        // perform or
+        node *rhs_ = _tree_node_stack.top ();
+
+        _tree_node_stack.pop ();
+
+        node *lhs_ = _tree_node_stack.top ();
+
+        _node_ptr_vector->push_back (static_cast<selection_node *>(0));
+        _node_ptr_vector->back () = new selection_node (lhs_, rhs_);
+        _tree_node_stack.top () = _node_ptr_vector->back ();
+    }
+
+    void sub (token_stack &handle_)
+    {
+        assert ((handle_->top ()->_type == SUB &&
+            handle_->size () == 1) || handle_->size () == 2);
+
+        if (handle_->size () == 1)
+        {
+            _token_stack->push (static_cast<token *>(0));
+            _token_stack->top () = new token (SEQUENCE);
+        }
+        else
+        {
+            token *token_ = handle_->top ();
+
+            handle_->pop ();
+            delete token_;
+            token_ = 0;
+            assert (handle_->top ()->_type == EXPRESSION);
+            // perform join
+            sequence ();
+            _token_stack->push (static_cast<token *>(0));
+            _token_stack->top () = new token (SUB);
+        }
+    }
+
+    void repeat (token_stack &handle_)
+    {
+        assert (handle_->top ()->_type == REPEAT &&
+            handle_->size () >= 1 && handle_->size () <= 3);
+
+        if (handle_->size () == 1)
+        {
+            _token_stack->push (static_cast<token *>(0));
+            _token_stack->top () = new token (EXPRESSION);
+        }
+        else
+        {
+            token *token_ = handle_->top ();
+
+            handle_->pop ();
+            delete token_;
+            token_ = 0;
+            assert (handle_->top ()->_type == DUP);
+            _token_stack->push (static_cast<token *>(0));
+            _token_stack->top () = new token (REPEAT);
+        }
+    }
+
+#ifndef NDEBUG
+    void bol (token_stack &handle_)
+#else
+    void bol (token_stack &)
+#endif
+    {
+        assert (handle_->top ()->_type == BOL &&
+            handle_->size () == 1);
+
+        // store charset
+        _node_ptr_vector->push_back (static_cast<leaf_node *>(0));
+        _node_ptr_vector->back () = new leaf_node (bol_token (), true);
+        _tree_node_stack.push (_node_ptr_vector->back ());
+        _token_stack->push (static_cast<token *>(0));
+        _token_stack->top () = new token (REPEAT);
+    }
+
+#ifndef NDEBUG
+    void eol (token_stack &handle_, state &state_)
+#else
+    void eol (token_stack &, state &state_)
+#endif
+    {
+        // Done in two parts for VC6.
+        const string_token nl_ ('\n');
+
+        assert (handle_->top ()->_type == EOL &&
+            handle_->size () == 1);
+        state_._nl_id = lookup (nl_);
+        // store charset
+        _node_ptr_vector->push_back (static_cast<leaf_node *>(0));
+        _node_ptr_vector->back () = new leaf_node (eol_token (), true);
+        _tree_node_stack.push (_node_ptr_vector->back ());
+        _token_stack->push (static_cast<token *>(0));
+        _token_stack->top () = new token (REPEAT);
+    }
+
+    // Uncompressed
+    void charset (token_stack &handle_, const false_ &)
+    {
+        assert (handle_->top ()->_type == CHARSET &&
+            handle_->size () == 1);
+
+        const id_type id_ = lookup (handle_->top ()->_str);
+
+        // store charset
+        _node_ptr_vector->push_back (static_cast<leaf_node *>(0));
+        _node_ptr_vector->back () = new leaf_node (id_, true);
+        _tree_node_stack.push (_node_ptr_vector->back ());
+        _token_stack->push (static_cast<token *>(0));
+        _token_stack->top () = new token (REPEAT);
+    }
+
+    // Compressed
+    void charset (token_stack &handle_, const true_ &)
+    {
+        assert (handle_->top ()->_type == CHARSET &&
+            handle_->size () == 1);
+
+        std::auto_ptr<token> token_ (handle_->top ());
+
+        handle_->pop ();
+        create_sequence (token_);
+    }
+
+    // Slice wchar_t into sequence of char.
+    void create_sequence (std::auto_ptr<token> &token_)
+    {
+        typename token::string_token::range_vector::iterator iter_ =
+            token_->_str._ranges.begin ();
+        typename token::string_token::range_vector::const_iterator end_ =
+            token_->_str._ranges.end ();
+
+        string_token_vector data_[char_24_bit ? 3 : 2];
+
+        for (; iter_ != end_; ++iter_)
+        {
+            slice_range (*iter_, data_, bool_<char_24_bit> ());
+        }
+
+        push_ranges (data_, bool_<char_24_bit> ());
+
+        _token_stack->push (static_cast<token *>(0));
+        _token_stack->top () = new token (OPENPAREN);
+        _token_stack->push (static_cast<token *>(0));
+        _token_stack->top () = new token (REGEX);
+        _token_stack->push (static_cast<token *>(0));
+        _token_stack->top () = new token (CLOSEPAREN);
+    }
+
+    // 16 bit unicode
+    void slice_range (const input_range &range_, string_token_vector data_[2],
+        const false_ &)
+    {
+        const unsigned char first_msb_ = static_cast<unsigned char>
+            ((range_.first >> 8) & 0xff);
+        const unsigned char first_lsb_ = static_cast<unsigned char>
+            (range_.first & 0xff);
+        const unsigned char second_msb_ = static_cast<unsigned char>
+            ((range_.second >> 8) & 0xff);
+        const unsigned char second_lsb_ = static_cast<unsigned char>
+            (range_.second & 0xff);
+
+        if (first_msb_ == second_msb_)
+        {
+            insert_range (first_msb_, first_msb_, first_lsb_,
+                second_lsb_, data_);
+        }
+        else
+        {
+            insert_range (first_msb_, first_msb_, first_lsb_, 0xff, data_);
+
+            if (second_msb_ > first_msb_ + 1)
+            {
+                insert_range (first_msb_ + 1, second_msb_ - 1, 0, 0xff, data_);
+            }
+
+            insert_range (second_msb_, second_msb_, 0, second_lsb_, data_);
+        }
+    }
+
+    // 24 bit unicode
+    void slice_range (const input_range &range_, string_token_vector data_[3],
+        const true_ &)
+    {
+        const unsigned char first_msb_ = static_cast<unsigned char>
+            ((range_.first >> 16) & 0xff);
+        const unsigned char first_mid_ = static_cast<unsigned char>
+            ((range_.first >> 8) & 0xff);
+        const unsigned char first_lsb_ = static_cast<unsigned char>
+            (range_.first & 0xff);
+        const unsigned char second_msb_ = static_cast<unsigned char>
+            ((range_.second >> 16) & 0xff);
+        const unsigned char second_mid_ = static_cast<unsigned char>
+            ((range_.second >> 8) & 0xff);
+        const unsigned char second_lsb_ = static_cast<unsigned char>
+            (range_.second & 0xff);
+
+        if (first_msb_ == second_msb_)
+        {
+            string_token_vector data2_[2];
+
+            // Re-use 16 bit slice function
+            slice_range (range_, data2_, false_ ());
+
+            for (std::size_t i_ = 0, size_ = data2_[0]->size ();
+                i_ < size_; ++i_)
+            {
+                insert_range (string_token (first_msb_, first_msb_),
+                    *(*data2_[0])[i_], *(*data2_[1])[i_], data_);
+            }
+        }
+        else
+        {
+            insert_range (first_msb_, first_msb_,
+                first_mid_, first_mid_,
+                first_lsb_, 0xff, data_);
+
+            if (first_mid_ != 0xff)
+            {
+                insert_range (first_msb_, first_msb_,
+                    first_mid_ + 1, 0xff,
+                    0, 0xff, data_);
+            }
+
+            if (second_msb_ > first_msb_ + 1)
+            {
+                insert_range (first_mid_ + 1, second_mid_ - 1,
+                    0, 0xff,
+                    0, 0xff, data_);
+            }
+
+            if (second_mid_ != 0)
+            {
+                insert_range (second_msb_, second_msb_,
+                    0, second_mid_ - 1,
+                    0, 0xff, data_);
+                insert_range (second_msb_, second_msb_,
+                    second_mid_, second_mid_,
+                    0, second_lsb_, data_);
+            }
+            else
+            {
+                insert_range (second_msb_, second_msb_,
+                    0, second_mid_,
+                    0, second_lsb_, data_);
+            }
+        }
+    }
+
+    // 16 bit unicode
+    void insert_range (const unsigned char first_, const unsigned char second_,
+        const unsigned char first2_, const unsigned char second2_,
+        string_token_vector data_[2])
+    {
+        const string_token token_ (first_ > second_ ? second_ : first_,
+            first_ > second_ ? first_ : second_);
+        const string_token token2_ (first2_ > second2_ ? second2_ : first2_,
+            first2_ > second2_ ? first2_ : second2_);
+
+        insert_range (token_, token2_, data_);
+    }
+
+    void insert_range (const string_token &token_, const string_token &token2_,
+        string_token_vector data_[2])
+    {
+        typename string_token_vector::vector::const_iterator iter_ =
+            std::find_if (data_[0]->begin (), data_[0]->end (),
+            find_functor (token_));
+
+        if (iter_ == data_[0]->end ())
+        {
+            data_[0]->push_back (0);
+            data_[0]->back () = new string_token (token_);
+            data_[1]->push_back (0);
+            data_[1]->back () = new string_token (token2_);
+        }
+        else
+        {
+            const std::size_t index_ = iter_ - data_[0]->begin ();
+
+            (*data_[1])[index_]->insert (token2_);
+        }
+    }
+
+    // 24 bit unicode
+    void insert_range (const unsigned char first_, const unsigned char second_,
+        const unsigned char first2_, const unsigned char second2_,
+        const unsigned char first3_, const unsigned char second3_,
+        string_token_vector data_[3])
+    {
+        const string_token token_ (first_ > second_ ? second_ : first_,
+            first_ > second_ ? first_ : second_);
+        const string_token token2_ (first2_ > second2_ ? second2_ : first2_,
+            first2_ > second2_ ? first2_ : second2_);
+        const string_token token3_ (first3_ > second3_ ? second3_ : first3_,
+            first3_ > second3_ ? first3_ : second3_);
+
+        insert_range (token_, token2_, token3_, data_);
+    }
+
+    void insert_range (const string_token &token_, const string_token &token2_,
+        const string_token &token3_, string_token_vector data_[3])
+    {
+        typename string_token_vector::vector::const_iterator iter_ =
+            data_[0]->begin ();
+        typename string_token_vector::vector::const_iterator end_ =
+            data_[0]->end ();
+        bool finished_ = false;
+
+        do
+        {
+            iter_ = std::find_if (iter_, end_, find_functor (token_));
+
+            if (iter_ == end_)
+            {
+                data_[0]->push_back (0);
+                data_[0]->back () = new string_token (token_);
+                data_[1]->push_back (0);
+                data_[1]->back () = new string_token (token2_);
+                data_[2]->push_back (0);
+                data_[2]->back () = new string_token (token3_);
+                finished_ = true;
+            }
+            else
+            {
+                const std::size_t index_ = iter_ - data_[0]->begin ();
+
+                if (*(*data_[1])[index_] == token2_)
+                {
+                    (*data_[2])[index_]->insert (token3_);
+                    finished_ = true;
+                }
+                else
+                {
+                    ++iter_;
+                }
+            }
+        } while (!finished_);
+    }
+
+    // 16 bit unicode
+    void push_ranges (string_token_vector data_[2], const false_ &)
+    {
+        typename string_token_vector::vector::const_iterator viter_ =
+            data_[0]->begin ();
+        typename string_token_vector::vector::const_iterator vend_ =
+            data_[0]->end ();
+        typename string_token_vector::vector::const_iterator viter2_ =
+            data_[1]->begin ();
+
+        push_range (*viter_++);
+        push_range (*viter2_++);
+        sequence ();
+
+        while (viter_ != vend_)
+        {
+            push_range (*viter_++);
+            push_range (*viter2_++);
+            sequence ();
+            perform_or ();
+        }
+    }
+
+    // 24 bit unicode
+    void push_ranges (string_token_vector data_[3], const true_ &)
+    {
+        typename string_token_vector::vector::const_iterator viter_ =
+            data_[0]->begin ();
+        typename string_token_vector::vector::const_iterator vend_ =
+            data_[0]->end ();
+        typename string_token_vector::vector::const_iterator viter2_ =
+            data_[1]->begin ();
+        typename string_token_vector::vector::const_iterator viter3_ =
+            data_[2]->begin ();
+
+        push_range (*viter_++);
+        push_range (*viter2_++);
+        sequence ();
+        push_range (*viter3_++);
+        sequence ();
+
+        while (viter_ != vend_)
+        {
+            push_range (*viter_++);
+            push_range (*viter2_++);
+            sequence ();
+            push_range (*viter3_++);
+            sequence ();
+            perform_or ();
+        }
+    }
+
+    void push_range (const string_token *token_)
+    {
+        const id_type id_ = lookup (*token_);
+
+        _node_ptr_vector->push_back (static_cast<leaf_node *>(0));
+        _node_ptr_vector->back () = new leaf_node (id_, true);
+        _tree_node_stack.push (_node_ptr_vector->back ());
+    }
+
+    id_type lookup (const string_token &charset_)
+    {
+        // Converted to id_type below.
+        std::size_t id_ = sm_traits::npos ();
+        typename charset_map::const_iterator iter_ =
+            _charset_map.find (charset_);
+
+        if (iter_ == _charset_map.end ())
+        {
+            id_ = _charset_map.size ();
+            _charset_map.insert (charset_pair (charset_, id_));
+        }
+        else
+        {
+            id_ = iter_->second;
+        }
+
+        if (static_cast<id_type>(id_) < id_)
+        {
+            throw runtime_error ("id_type is not large enough "
+                "to hold all ids.");
+        }
+
+        return static_cast<id_type>(id_);
+    }
+
+    void macro (token_stack &handle_, const state &state_)
+    {
+        const token *top_ = handle_->top ();
+
+        assert (top_->_type == MACRO && handle_->size () == 1);
+
+        typename macro_map::const_iterator iter_ =
+            _macro_map.find (top_->_extra);
+
+        if (iter_ == _macro_map.end ())
+        {
+            const rules_char_type *name_ = top_->_extra.c_str ();
+            std::basic_stringstream<input_char_type> ss_;
+            std::ostringstream os_;
+
+            os_ << "Unknown MACRO name '";
+
+            while (*name_)
+            {
+                os_ << ss_.narrow (*name_++, ' ');
+            }
+
+            os_ << "' in rule id " << state_._id << '.';
+            throw runtime_error (os_.str ());
+        }
+
+        _tree_node_stack.push (iter_->second->copy (_node_ptr_vector));
+        _token_stack->push (static_cast<token *>(0));
+        _token_stack->top () = new token (REPEAT);
+    }
+
+    void openparen (token_stack &handle_)
+    {
+        token *token_ = handle_->top ();
+
+        assert (token_->_type == OPENPAREN &&
+            handle_->size () == 3);
+
+        handle_->pop ();
+        delete token_;
+        token_ = handle_->top ();
+        assert (token_->_type == REGEX);
+        handle_->pop ();
+        delete token_;
+        token_ = 0;
+        assert (handle_->top ()->_type == CLOSEPAREN);
+        _token_stack->push (static_cast<token *>(0));
+        _token_stack->top () = new token (REPEAT);
+    }
+
+    void sequence ()
+    {
+        node *rhs_ = _tree_node_stack.top ();
+
+        _tree_node_stack.pop ();
+
+        node *lhs_ = _tree_node_stack.top ();
+
+        _node_ptr_vector->push_back (static_cast<sequence_node *>(0));
+        _node_ptr_vector->back () = new sequence_node (lhs_, rhs_);
+        _tree_node_stack.top () = _node_ptr_vector->back ();
+    }
+
+    void optional (const bool greedy_)
+    {
+        // perform ?
+        node *lhs_ = _tree_node_stack.top ();
+        // Don't know if lhs_ is a leaf_node, so get firstpos.
+        typename node::node_vector &firstpos_ = lhs_->firstpos ();
+
+        for (typename node::node_vector::iterator iter_ = firstpos_.begin (),
+            end_ = firstpos_.end (); iter_ != end_; ++iter_)
+        {
+            // These are leaf_nodes!
+            (*iter_)->greedy (greedy_);
+        }
+
+        _node_ptr_vector->push_back (static_cast<leaf_node *>(0));
+
+        node *rhs_ = new leaf_node (node::null_token (), greedy_);
+
+        _node_ptr_vector->back () = rhs_;
+        _node_ptr_vector->push_back (static_cast<selection_node *>(0));
+        _node_ptr_vector->back () = new selection_node (lhs_, rhs_);
+        _tree_node_stack.top () = _node_ptr_vector->back ();
+    }
+
+    void zero_or_more (const bool greedy_)
+    {
+        // perform *
+        node *ptr_ = _tree_node_stack.top ();
+
+        _node_ptr_vector->push_back (static_cast<iteration_node *>(0));
+        _node_ptr_vector->back () = new iteration_node (ptr_, greedy_);
+        _tree_node_stack.top () = _node_ptr_vector->back ();
+    }
+
+    void one_or_more (const bool greedy_)
+    {
+        // perform +
+        node *lhs_ = _tree_node_stack.top ();
+        node *copy_ = lhs_->copy (_node_ptr_vector);
+
+        _node_ptr_vector->push_back (static_cast<iteration_node *>(0));
+
+        node *rhs_ = new iteration_node (copy_, greedy_);
+
+        _node_ptr_vector->back () = rhs_;
+        _node_ptr_vector->push_back (static_cast<sequence_node *>(0));
+        _node_ptr_vector->back () = new sequence_node (lhs_, rhs_);
+        _tree_node_stack.top () = _node_ptr_vector->back ();
+    }
+
+    // perform {n[,[m]]}
+    // Semantic checks have already been performed.
+    // {0,}  = *
+    // {0,1} = ?
+    // {1,}  = +
+    // therefore we do not check for these cases.
+    void repeatn (const bool greedy_, const token *token_)
+    {
+        const rules_char_type *str_ = token_->_extra.c_str ();
+        std::size_t min_ = 0;
+        bool comma_ = false;
+        std::size_t max_ = 0;
+
+        while (*str_>= '0' && *str_ <= '9')
+        {
+            min_ *= 10;
+            min_ += *str_ - '0';
+            ++str_;
+        }
+
+        comma_ = *str_ == ',';
+
+        if (comma_) ++str_;
+
+        while (*str_>= '0' && *str_ <= '9')
+        {
+            max_ *= 10;
+            max_ += *str_ - '0';
+            ++str_;
+        }
+
+        if (!(min_ == 1 && !comma_))
+        {
+            const std::size_t top_ = min_ > 0 ? min_ : max_;
+
+            if (min_ == 0)
+            {
+                optional (greedy_);
+            }
+
+            node *prev_ = _tree_node_stack.top ()->
+                copy (_node_ptr_vector);
+            node *curr_ = 0;
+
+            for (std::size_t i_ = 2; i_ < top_; ++i_)
+            {
+                node *temp_ = prev_->copy (_node_ptr_vector);
+
+                curr_ = temp_;
+                _tree_node_stack.push (static_cast<node *>(0));
+                _tree_node_stack.top () = prev_;
+                sequence ();
+                prev_ = curr_;
+            }
+
+            if (comma_ && min_ > 0)
+            {
+                if (min_ > 1)
+                {
+                    node *temp_ = prev_->copy (_node_ptr_vector);
+
+                    curr_ = temp_;
+                    _tree_node_stack.push (static_cast<node *>(0));
+                    _tree_node_stack.top () = prev_;
+                    sequence ();
+                    prev_ = curr_;
+                }
+
+                if (comma_ && max_)
+                {
+                    _tree_node_stack.push (static_cast<node *>(0));
+                    _tree_node_stack.top () = prev_;
+                    optional (greedy_);
+
+                    node *temp_ = _tree_node_stack.top ();
+
+                    _tree_node_stack.pop ();
+                    prev_ = temp_;
+
+                    const std::size_t count_ = max_ - min_;
+
+                    for (std::size_t i_ = 1; i_ < count_; ++i_)
+                    {
+                        node *temp_ = prev_->copy (_node_ptr_vector);
+
+                        curr_ = temp_;
+                        _tree_node_stack.push (static_cast<node *>(0));
+                        _tree_node_stack.top () = prev_;
+                        sequence ();
+                        prev_ = curr_;
+                    }
+                }
+                else
+                {
+                    _tree_node_stack.push (static_cast<node *>(0));
+                    _tree_node_stack.top () = prev_;
+                    zero_or_more (greedy_);
+
+                    node *temp_ = _tree_node_stack.top ();
+
+                    prev_ = temp_;
+                    _tree_node_stack.pop ();
+                }
+            }
+
+            _tree_node_stack.push (static_cast<node *>(0));
+            _tree_node_stack.top () = prev_;
+            sequence ();
+        }
+    }
+
+    void fixup_bol (node * &root_)const
+    {
+        typename node::node_vector *first_ = &root_->firstpos ();
+        bool found_ = false;
+        typename node::node_vector::const_iterator iter_ =
+            first_->begin ();
+        typename node::node_vector::const_iterator end_ =
+            first_->end ();
+
+        for (; iter_ != end_; ++iter_)
+        {
+            const node *node_ = *iter_;
+
+            found_ = !node_->end_state () && node_->token () == bol_token ();
+
+            if (found_) break;
+        }
+
+        if (!found_)
+        {
+            _node_ptr_vector->push_back (static_cast<leaf_node *>(0));
+            _node_ptr_vector->back () = new leaf_node (bol_token (), true);
+
+            node *lhs_ = _node_ptr_vector->back ();
+
+            _node_ptr_vector->push_back (static_cast<leaf_node *>(0));
+            _node_ptr_vector->back () = new leaf_node
+                (node::null_token (), true);
+
+            node *rhs_ = _node_ptr_vector->back ();
+
+            _node_ptr_vector->push_back (static_cast<selection_node *>(0));
+            _node_ptr_vector->back () = new selection_node (lhs_, rhs_);
+            lhs_ = _node_ptr_vector->back ();
+
+            _node_ptr_vector->push_back (static_cast<sequence_node *>(0));
+            _node_ptr_vector->back () = new sequence_node (lhs_, root_);
+            root_ = _node_ptr_vector->back ();
+        }
+    }
+};
+}
+}
+
+#endif
diff --git a/inc/lexertl/parser/tokeniser/re_token.hpp b/inc/lexertl/parser/tokeniser/re_token.hpp
new file mode 100644
index 0000000..449ad65
--- /dev/null
+++ b/inc/lexertl/parser/tokeniser/re_token.hpp
@@ -0,0 +1,100 @@
+// re_token.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_RE_TOKEN_HPP
+#define LEXERTL_RE_TOKEN_HPP
+
+#include "../../string_token.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+enum token_type {BEGIN, REGEX, OREXP, SEQUENCE, SUB, EXPRESSION, REPEAT,
+    DUP, OR, CHARSET, BOL, EOL, MACRO, OPENPAREN, CLOSEPAREN, OPT, AOPT,
+    ZEROORMORE, AZEROORMORE, ONEORMORE, AONEORMORE, REPEATN, AREPEATN,
+    END};
+
+template<typename input_char_type, typename char_type>
+struct basic_re_token
+{
+    typedef basic_string_token<char_type> string_token;
+    typedef std::basic_string<input_char_type> string;
+
+    token_type _type;
+    string _extra;
+    string_token _str;
+
+    basic_re_token (const token_type type_ = BEGIN) :
+        _type (type_),
+        _extra (),
+        _str ()
+    {
+    }
+
+    void clear ()
+    {
+        _type = BEGIN;
+        _extra.clear ();
+        _str.clear ();
+    }
+
+    basic_re_token &operator = (const basic_re_token &rhs_)
+    {
+        _type = rhs_._type;
+        _extra = rhs_._extra;
+        _str = rhs_._str;
+        return *this;
+    }
+
+    char precedence (const token_type type_) const
+    {
+        // Moved in here for Solaris compiler.
+        static const char precedence_table_[END + 1][END + 1] = {
+//        BEG, REG, ORE, SEQ, SUB, EXP, RPT, DUP,  | , CHR, BOL, EOL, MCR,  ( ,  ) ,  ? , ?? ,  * , *? ,  + , +?, {n}?, {n}, END
+/*BEGIN*/{' ', '<', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*REGEX*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*OREXP*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* SEQ */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* SUB */{' ', ' ', ' ', ' ', ' ', '=', '<', ' ', '>', '<', '<', '<', '<', '<', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*EXPRE*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* RPT */{' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', '>', '>', '>', '>', '>', '<', '<', '<', '<', '<', '<', '<', '<', '>'},
+/*DUPLI*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*  |  */{' ', ' ', ' ', '=', '<', '<', '<', ' ', ' ', '<', '<', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '},
+/*CHARA*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'},
+/* BOL */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'},
+/* EOL */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'},
+/*MACRO*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'},
+/*  (  */{' ', '=', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '},
+/*  )  */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'},
+/*  ?  */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* ??  */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*  *  */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* *?  */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*  +  */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* +?  */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*{n,m}*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/*{nm}?*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'},
+/* END */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '}
+};
+
+        return precedence_table_[_type][type_];
+    }
+
+    const char *precedence_string () const
+    {
+        // Moved in here for Solaris compiler.
+        static const char *precedence_strings_[END + 1] =
+            {"BEGIN", "REGEX", "OREXP", "SEQUENCE", "SUB", "EXPRESSION",
+            "REPEAT", "DUPLICATE", "|", "CHARSET", "^", "$", "MACRO", "(", ")",
+            "?", "??", "*", "*?", "+", "+?", "{n[,[m]]}", "{n[,[m]]}?", "END"};
+
+        return precedence_strings_[_type];
+    }
+};
+}
+}
+
+#endif
diff --git a/inc/lexertl/parser/tokeniser/re_tokeniser.hpp b/inc/lexertl/parser/tokeniser/re_tokeniser.hpp
new file mode 100644
index 0000000..394eea4
--- /dev/null
+++ b/inc/lexertl/parser/tokeniser/re_tokeniser.hpp
@@ -0,0 +1,829 @@
+// tokeniser.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_RE_TOKENISER_HPP
+#define LEXERTL_RE_TOKENISER_HPP
+
+#include <cstring>
+#include "re_token.hpp"
+#include "../../runtime_error.hpp"
+#include "../../size_t.hpp"
+#include <sstream>
+#include "../../string_token.hpp"
+#include "re_tokeniser_helper.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename rules_char_type, typename char_type, typename id_type>
+class basic_re_tokeniser
+{
+public:
+    typedef basic_re_token<rules_char_type, char_type> re_token;
+    typedef basic_re_tokeniser_helper<rules_char_type, char_type, id_type>
+        tokeniser_helper;
+    typedef typename tokeniser_helper::char_state char_state;
+    typedef typename tokeniser_helper::state state;
+    typedef basic_string_token<char_type> string_token;
+
+    static void next (re_token *lhs_, state &state_, re_token *token_)
+    {
+        rules_char_type ch_ = 0;
+        bool eos_ = state_.next (ch_);
+        bool skipped_ = false;
+
+        token_->clear ();
+
+        do
+        {
+            // string begin/end
+            while (!eos_ && ch_ == '"')
+            {
+                state_._in_string ^= 1;
+                eos_ = state_.next (ch_);
+            }
+
+            // (?# ...)
+            skipped_ = comment (eos_, ch_, state_);
+            // skip_ws set
+            skipped_ |= skip (eos_, ch_, state_);
+        } while (skipped_);
+
+        if (eos_)
+        {
+            if (state_._in_string)
+            {
+                std::ostringstream ss_;
+
+                // Pointless returning index if at end of string
+                ss_ << "Unexpected end of regex (missing '\"') in rule id " <<
+                    state_._id << '.';
+                throw runtime_error (ss_.str ());
+            }
+
+            if (state_._paren_count)
+            {
+                std::ostringstream ss_;
+
+                // Pointless returning index if at end of string
+                ss_ << "Unexpected end of regex (missing ')') in rule id " <<
+                    state_._id << '.';
+                throw runtime_error (ss_.str ());
+            }
+
+            token_->_type = END;
+        }
+        else
+        {
+            if (ch_ == '\\')
+            {
+                // Even if we are in a string, respect escape sequences...
+                token_->_type = CHARSET;
+                escape (state_, token_->_str);
+            }
+            else if (state_._in_string)
+            {
+                // All other meta characters lose their special meaning
+                // inside a string.
+                token_->_type = CHARSET;
+                token_->_str.insert (typename string_token::range (ch_, ch_));
+            }
+            else
+            {
+                // Not an escape sequence and not inside a string, so
+                // check for meta characters.
+                switch (ch_)
+                {
+                    case '(':
+                        token_->_type = OPENPAREN;
+                        ++state_._paren_count;
+                        read_options (state_);
+                        break;
+                    case ')':
+                        --state_._paren_count;
+
+                        if (state_._paren_count < 0)
+                        {
+                            std::ostringstream ss_;
+
+                            ss_ << "Number of open parenthesis < 0 "
+                                "at index " << state_.index () - 1 <<
+                                " in rule id " << state_._id << '.';
+                            throw runtime_error (ss_.str ());
+                        }
+
+                        token_->_type = CLOSEPAREN;
+
+                        if (!state_._flags_stack.empty ())
+                        {
+                            state_._flags = state_._flags_stack.top ();
+                            state_._flags_stack.pop ();
+                        }
+
+                        break;
+                    case '?':
+                        if (!state_.eos () && *state_._curr == '?')
+                        {
+                            token_->_type = AOPT;
+                            state_.increment ();
+                        }
+                        else
+                        {
+                            token_->_type = OPT;
+                        }
+
+                        break;
+                    case '*':
+                        if (!state_.eos () && *state_._curr == '?')
+                        {
+                            token_->_type = AZEROORMORE;
+                            state_.increment ();
+                        }
+                        else
+                        {
+                            token_->_type = ZEROORMORE;
+                        }
+
+                        break;
+                    case '+':
+                        if (!state_.eos () && *state_._curr == '?')
+                        {
+                            token_->_type = AONEORMORE;
+                            state_.increment ();
+                        }
+                        else
+                        {
+                            token_->_type = ONEORMORE;
+                        }
+
+                        break;
+                    case '{':
+                        open_curly (lhs_, state_, token_);
+                        break;
+                    case '|':
+                        token_->_type = OR;
+                        break;
+                    case '^':
+                        if (!state_._macro && state_._curr - 1 == state_._start)
+                        {
+                            token_->_type = BOL;
+                        }
+                        else
+                        {
+                            token_->_type = CHARSET;
+                            token_->_str.insert (typename string_token::range
+                                (ch_, ch_));
+                        }
+
+                        break;
+                    case '$':
+                        if (!state_._macro && state_._curr == state_._end)
+                        {
+                            token_->_type = EOL;
+                        }
+                        else
+                        {
+                            token_->_type = CHARSET;
+                            token_->_str.insert (typename string_token::range
+                                (ch_, ch_));
+                        }
+
+                        break;
+                    case '.':
+                    {
+                        token_->_type = CHARSET;
+
+                        if (state_._flags & dot_not_newline)
+                        {
+                            token_->_str.insert (typename string_token::range
+                                ('\n', '\n'));
+                        }
+
+                        token_->_str.negate ();
+                        break;
+                    }
+                    case '[':
+                    {
+                        token_->_type = CHARSET;
+                        tokeniser_helper::charset (state_, token_->_str);
+                        break;
+                    }
+                    case '/':
+                    {
+                        std::ostringstream ss_;
+
+                        ss_ << "Lookahead ('/') is not supported yet in " <<
+                            "rule id " << state_._id << '.';
+                        throw runtime_error (ss_.str ());
+                        break;
+                    }
+                    default:
+                        token_->_type = CHARSET;
+
+                        if ((state_._flags & icase) &&
+                            (std::isupper (ch_, state_._locale) ||
+                            std::islower (ch_, state_._locale)))
+                        {
+                            char_type upper_ = std::toupper
+                                (ch_, state_._locale);
+                            char_type lower_ = std::tolower
+                                (ch_, state_._locale);
+
+                            token_->_str.insert (typename string_token::range
+                                (upper_, upper_));
+                            token_->_str.insert (typename string_token::range
+                                (lower_, lower_));
+                        }
+                        else
+                        {
+                            token_->_str.insert (typename string_token::range
+                                (ch_, ch_));
+                        }
+
+                        break;
+                }
+            }
+        }
+    }
+
+private:
+    static bool comment (bool &eos_, rules_char_type &ch_, state &state_)
+    {
+        bool skipped_ = false;
+
+        if (!eos_ && !state_._in_string && ch_ == '(' &&
+            !state_.eos () && *state_._curr == '?' &&
+            state_._curr + 1 < state_._end && *(state_._curr + 1) == '#')
+        {
+            std::size_t paren_count_ = 1;
+
+            state_.increment ();
+            state_.increment ();
+
+            do
+            {
+                eos_ = state_.next (ch_);
+
+                if (ch_ == '(')
+                {
+                    ++paren_count_;
+                }
+                else if (ch_ == ')')
+                {
+                    --paren_count_;
+                }
+            } while (!eos_ && !(ch_ == ')' && paren_count_ == 0));
+
+            if (eos_)
+            {
+                std::ostringstream ss_;
+
+                // Pointless returning index if at end of string
+                ss_ << "Unexpected end of regex (unterminated comment) " <<
+                    "in rule id " << state_._id << '.';
+                throw runtime_error (ss_.str ());
+            }
+            else
+            {
+                eos_ = state_.next (ch_);
+            }
+
+            skipped_ = true;
+        }
+
+        return skipped_;
+    }
+
+    static bool skip (bool &eos_, rules_char_type &ch_, state &state_)
+    {
+        bool skipped_ = false;
+
+        if (!eos_ && (state_._flags & skip_ws) && !state_._in_string)
+        {
+            bool c_comment_ = false;
+            bool skip_ws_ = false;
+
+            do
+            {
+                c_comment_ = ch_ == '/' && !state_.eos () &&
+                    *state_._curr == '*';
+                skip_ws_ = !c_comment_ && (ch_ == ' ' || ch_ == '\t' ||
+                    ch_ == '\n' || ch_ == '\r' || ch_ == '\f' || ch_ == '\v');
+
+                if (c_comment_)
+                {
+                    state_.increment ();
+                    eos_ = state_.next (ch_);
+
+                    while (!eos_ && !(ch_ == '*' && !state_.eos () &&
+                        *state_._curr == '/'))
+                    {
+                        eos_ = state_.next (ch_);
+                    }
+
+                    if (eos_)
+                    {
+                        std::ostringstream ss_;
+
+                        // Pointless returning index if at end of string
+                        ss_ << "Unexpected end of regex (unterminated " <<
+                            "C style comment) in rule id " <<
+                            state_._id << '.';
+                        throw runtime_error (ss_.str ());
+                    }
+                    else
+                    {
+                        state_.increment ();
+                        eos_ = state_.next (ch_);
+                    }
+
+                    skipped_ = true;
+                }
+                else if (skip_ws_)
+                {
+                    eos_ = state_.next (ch_);
+                    skipped_ = true;
+                }
+            } while (c_comment_ || skip_ws_);
+        }
+
+        return skipped_;
+    }
+
+    static void read_options (state &state_)
+    {
+        if (!state_.eos () && *state_._curr == '?')
+        {
+            rules_char_type ch_ = 0;
+            bool eos_ = false;
+            bool negate_ = false;
+
+            state_.increment ();
+            eos_ = state_.next (ch_);
+            state_._flags_stack.push (state_._flags);
+
+            while (!eos_ && ch_ != ':')
+            {
+                switch (ch_)
+                {
+                    case '-':
+                        negate_ ^= 1;
+                        break;
+                    case 'i':
+                        if (negate_)
+                        {
+                            state_._flags = state_._flags & ~icase;
+                        }
+                        else
+                        {
+                            state_._flags = state_._flags | icase;
+                        }
+
+                        negate_ = false;
+                        break;
+                    case 's':
+                        if (negate_)
+                        {
+                            state_._flags = state_._flags | dot_not_newline;
+                        }
+                        else
+                        {
+                            state_._flags = state_._flags & ~dot_not_newline;
+                        }
+
+                        negate_ = false;
+                        break;
+                    case 'x':
+                        if (negate_)
+                        {
+                            state_._flags = state_._flags & ~skip_ws;
+                        }
+                        else
+                        {
+                            state_._flags = state_._flags | skip_ws;
+                        }
+
+                        negate_ = false;
+                        break;
+                    default:
+                    {
+                        std::ostringstream ss_;
+
+                        ss_ << "Unknown option at index " <<
+                            state_.index () - 1 << " in rule id " <<
+                            state_._id << '.';
+                        throw runtime_error (ss_.str ());
+                    }
+                }
+
+                eos_ = state_.next (ch_);
+            }
+
+            // End of string handler will handle early termination
+        }
+        else if (!state_._flags_stack.empty ())
+        {
+            state_._flags_stack.push (state_._flags);
+        }
+    }
+
+    static void escape (state &state_, string_token &token_)
+    {
+        char_type ch_ = 0;
+        std::size_t str_len_ = 0;
+        const char *str_ = tokeniser_helper::escape_sequence (state_,
+            ch_, str_len_);
+
+        if (str_)
+        {
+            char_state state2_ (str_ + 1, str_ + str_len_, state_._id,
+                state_._flags, state_._locale, false);
+
+            tokeniser_helper::charset (state2_, token_);
+        }
+        else
+        {
+            token_.insert (typename string_token::range (ch_, ch_));
+        }
+    }
+
+    static void open_curly (re_token *lhs_, state &state_,
+        re_token *token_)
+    {
+        if (state_.eos ())
+        {
+            std::ostringstream ss_;
+
+            // Pointless returning index if at end of string
+            ss_ << "Unexpected end of regex (missing '}') in rule id " <<
+                state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+        else if (*state_._curr == '-')
+        {
+            charset_difference (lhs_, state_, token_);
+        }
+        else if (*state_._curr == '+')
+        {
+            charset_union (lhs_, state_, token_);
+        }
+        else if (*state_._curr >= '0' && *state_._curr <= '9')
+        {
+            repeat_n (state_, token_);
+        }
+        else
+        {
+            macro (state_, token_);
+        }
+    }
+
+    static void charset_difference (re_token *lhs_, state &state_,
+        re_token *token_)
+    {
+        rules_char_type ch_ = 0;
+
+        if (lhs_->_type != CHARSET)
+        {
+            std::ostringstream ss_;
+
+            ss_ << "CHARSET must precede {-} at index " <<
+                state_.index () - 1 << " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        state_.next (ch_);
+
+        if (state_.next (ch_))
+        {
+            std::ostringstream ss_;
+
+            // Pointless returning index if at end of string
+            ss_ << "Unexpected end of regex (missing '}') in rule id " <<
+                state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        if (ch_ != '}')
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Missing '}' at index " << state_.index () - 1 <<
+                " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        re_token rhs_;
+
+        next (lhs_, state_, &rhs_);
+
+        if (rhs_._type != CHARSET)
+        {
+            std::ostringstream ss_;
+
+            ss_ << "CHARSET must follow {-} at index " <<
+                state_.index () - 1 << " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        lhs_->_str.remove (rhs_._str);
+
+        if (lhs_->_str.empty ())
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Empty charset created by {-} at index " <<
+                state_.index () - 1 << " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        next (lhs_, state_, token_);
+    }
+
+    static void charset_union (re_token *lhs_, state &state_,
+        re_token *token_)
+    {
+        rules_char_type ch_ = 0;
+
+        if (lhs_->_type != CHARSET)
+        {
+            std::ostringstream ss_;
+
+            ss_ << "CHARSET must precede {+} at index " <<
+                state_.index () - 1 << " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        state_.next (ch_);
+
+        if (state_.next (ch_))
+        {
+            std::ostringstream ss_;
+
+            // Pointless returning index if at end of string
+            ss_ << "Unexpected end of regex (missing '}') in rule id " <<
+                state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        if (ch_ != '}')
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Missing '}' at index " << state_.index () - 1 <<
+                " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        re_token rhs_;
+
+        next (lhs_, state_, &rhs_);
+
+        if (rhs_._type != CHARSET)
+        {
+            std::ostringstream ss_;
+
+            ss_ << "CHARSET must follow {+} at index " <<
+                state_.index () - 1 << " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        lhs_->_str.insert (rhs_._str);
+        next (lhs_, state_, token_);
+    }
+
+    // SYNTAX:
+    //   {n[,[n]]}
+    // SEMANTIC RULES:
+    //   {0} - INVALID (throw exception)
+    //   {0,} = *
+    //   {0,0} - INVALID (throw exception)
+    //   {0,1} = ?
+    //   {1,} = +
+    //   {min,max} where min == max - {min}
+    //   {min,max} where max < min - INVALID (throw exception)
+    static void repeat_n (state &state_, re_token *token_)
+    {
+        rules_char_type ch_ = 0;
+        bool eos_ = state_.next (ch_);
+        std::size_t min_ = 0;
+        std::size_t max_ = 0;
+
+        while (!eos_ && ch_ >= '0' && ch_ <= '9')
+        {
+            min_ *= 10;
+            min_ += ch_ - '0';
+            token_->_extra += ch_;
+            eos_ = state_.next (ch_);
+        }
+
+        if (eos_)
+        {
+            std::ostringstream ss_;
+
+            // Pointless returning index if at end of string
+            ss_ << "Unexpected end of regex (missing '}') in rule id " <<
+                state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        bool min_max_ = false;
+        bool repeatn_ = true;
+
+        if (ch_ == ',')
+        {
+            token_->_extra += ch_;
+            eos_ = state_.next (ch_);
+
+            if (eos_)
+            {
+                std::ostringstream ss_;
+
+                // Pointless returning index if at end of string
+                ss_ << "Unexpected end of regex (missing '}') in rule id " <<
+                    state_._id << '.';
+                throw runtime_error (ss_.str ());
+            }
+
+            if (ch_ == '}')
+            {
+                // Small optimisation: Check for '*' equivalency.
+                if (min_ == 0)
+                {
+                    token_->_type = ZEROORMORE;
+                    repeatn_ = false;
+                }
+                // Small optimisation: Check for '+' equivalency.
+                else if (min_ == 1)
+                {
+                    token_->_type = ONEORMORE;
+                    repeatn_ = false;
+                }
+            }
+            else
+            {
+                if (ch_ < '0' || ch_ > '9')
+                {
+                    std::ostringstream ss_;
+
+                    ss_ << "Missing '}' at index " << state_.index () - 1 <<
+                        " in rule id " << state_._id << '.';
+                    throw runtime_error (ss_.str ());
+                }
+
+                min_max_ = true;
+
+                do
+                {
+                    max_ *= 10;
+                    max_ += ch_ - '0';
+                    token_->_extra += ch_;
+                    eos_ = state_.next (ch_);
+                } while (!eos_ && ch_ >= '0' && ch_ <= '9');
+
+                if (eos_)
+                {
+                    std::ostringstream ss_;
+
+                    // Pointless returning index if at end of string
+                    ss_ << "Unexpected end of regex (missing '}') "
+                        "in rule id " << state_._id << '.';
+                    throw runtime_error (ss_.str ());
+                }
+
+                // Small optimisation: Check for '?' equivalency.
+                if (min_ == 0 && max_ == 1)
+                {
+                    token_->_type = OPT;
+                    repeatn_ = false;
+                }
+                // Small optimisation: if min == max, then min.
+                else if (min_ == max_)
+                {
+                    token_->_extra.erase (token_->_extra.find (','));
+                    min_max_ = false;
+                    max_ = 0;
+                }
+            }
+        }
+
+        if (ch_ != '}')
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Missing '}' at index " << state_.index () - 1 <<
+                " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        if (repeatn_)
+        {
+            // SEMANTIC VALIDATION follows:
+            // NOTE: {0,} has already become *
+            // therefore we don't check for a comma.
+            if (min_ == 0 && max_ == 0)
+            {
+                std::ostringstream ss_;
+
+                ss_ << "Cannot have exactly zero repeats preceding index " <<
+                    state_.index () << " in rule id " << state_._id << '.';
+                throw runtime_error (ss_.str ());
+            }
+
+            if (min_max_ && max_ < min_)
+            {
+                std::ostringstream ss_;
+
+                ss_ << "Max less than min preceding index " <<
+                    state_.index () << " in rule id " << state_._id << '.';
+                throw runtime_error (ss_.str ());
+            }
+
+            if (!state_.eos () && *state_._curr == '?')
+            {
+                token_->_type = AREPEATN;
+                state_.increment ();
+            }
+            else
+            {
+                token_->_type = REPEATN;
+            }
+        }
+        else if (token_->_type == ZEROORMORE)
+        {
+            if (!state_.eos () && *state_._curr == '?')
+            {
+                token_->_type = AZEROORMORE;
+                state_.increment ();
+            }
+        }
+        else if (token_->_type == ONEORMORE)
+        {
+            if (!state_.eos () && *state_._curr == '?')
+            {
+                token_->_type = AONEORMORE;
+                state_.increment ();
+            }
+        }
+        else if (token_->_type == OPT)
+        {
+            if (!state_.eos () && *state_._curr == '?')
+            {
+                token_->_type = AOPT;
+                state_.increment ();
+            }
+        }
+    }
+
+    static void macro (state &state_, re_token *token_)
+    {
+        rules_char_type ch_ = 0;
+        bool eos_ = false;
+
+        state_.next (ch_);
+
+        if (ch_ != '_' && !(ch_ >= 'A' && ch_ <= 'Z') &&
+            !(ch_ >= 'a' && ch_ <= 'z'))
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Invalid MACRO name at index " << state_.index () - 1 <<
+                " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        do
+        {
+            token_->_extra += ch_;
+            eos_ = state_.next (ch_);
+
+            if (eos_)
+            {
+                std::ostringstream ss_;
+
+                // Pointless returning index if at end of string
+                ss_ << "Unexpected end of regex " <<
+                    "(missing '}') in rule id " << state_._id << '.';
+                throw runtime_error (ss_.str ());
+            }
+        } while (ch_ == '_' || ch_ == '-' || (ch_ >= 'A' && ch_ <= 'Z') ||
+            (ch_ >= 'a' && ch_ <= 'z') || (ch_ >= '0' && ch_ <= '9'));
+
+        if (ch_ != '}')
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Missing '}' at index " << state_.index () - 1 <<
+                " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        token_->_type = MACRO;
+    }
+};
+}
+}
+
+#endif
diff --git a/inc/lexertl/parser/tokeniser/re_tokeniser_helper.hpp b/inc/lexertl/parser/tokeniser/re_tokeniser_helper.hpp
new file mode 100644
index 0000000..4507ce2
--- /dev/null
+++ b/inc/lexertl/parser/tokeniser/re_tokeniser_helper.hpp
@@ -0,0 +1,2351 @@
+// tokeniser_helper.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_RE_TOKENISER_HELPER_H
+#define LEXERTL_RE_TOKENISER_HELPER_H
+
+#include "../../bool.hpp"
+#include "../../char_traits.hpp"
+// strlen()
+#include <cstring>
+#include "../../size_t.hpp"
+#include "re_tokeniser_state.hpp"
+#include "../../runtime_error.hpp"
+#include <sstream>
+#include "../../string_token.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename rules_char_type, typename input_char_type, typename id_type,
+    typename char_traits = basic_char_traits<input_char_type> >
+class basic_re_tokeniser_helper
+{
+public:
+    typedef basic_re_tokeniser_state<char, id_type> char_state;
+    typedef basic_re_tokeniser_state<rules_char_type, id_type> state;
+    typedef basic_string_token<input_char_type> string_token;
+
+    template<char ch>
+    struct size
+    {
+    };
+
+    typedef size<1> one;
+    typedef size<2> two;
+    typedef size<4> four;
+
+    template<typename state_type, typename char_type>
+    static const char *escape_sequence (state_type &state_,
+        char_type &ch_, std::size_t &str_len_)
+    {
+        bool eos_ = state_.eos ();
+
+        if (eos_)
+        {
+            std::ostringstream ss_;
+
+            // Pointless returning index if at end of string
+            ss_ << "Unexpected end of regex following '\\' in rule id " <<
+                state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        const char *str_ = charset_shortcut (state_, str_len_);
+
+        if (str_)
+        {
+            state_.increment ();
+        }
+        else
+        {
+            ch_ = chr (state_);
+        }
+
+        return str_;
+    }
+
+    // This function can call itself.
+    template<typename state_type>
+    static void charset (state_type &state_, string_token &token_)
+    {
+        bool negated_ = false;
+        typename state_type::char_type ch_ = 0;
+        bool eos_ = state_.next (ch_);
+
+        if (eos_)
+        {
+            std::ostringstream ss_;
+
+            // Pointless returning index if at end of string
+            ss_ << "Unexpected end of regex following '[' in rule id " <<
+                state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        negated_ = ch_ == '^';
+
+        if (negated_)
+        {
+            eos_ = state_.next (ch_);
+
+            if (eos_)
+            {
+                std::ostringstream ss_;
+
+                // Pointless returning index if at end of string
+                ss_ << "Unexpected end of regex following '^' in rule id " <<
+                    state_._id << '.';
+                throw runtime_error (ss_.str ());
+            }
+        }
+
+        bool chset_ = false;
+        typename string_token::char_type prev_ = 0;
+
+        while (ch_ != ']')
+        {
+            if (ch_ == '\\')
+            {
+                std::size_t str_len_ = 0;
+                const char *str_ = escape_sequence (state_, prev_,
+                    str_len_);
+
+                chset_ = str_ != 0;
+
+                if (chset_)
+                {
+                    char_state temp_state_ (str_ + 1, str_ + str_len_,
+                        state_._id, state_._flags, state_._locale, false);
+                    string_token temp_token_;
+
+                    charset (temp_state_, temp_token_);
+                    token_.insert (temp_token_);
+                }
+            }
+            else if (ch_ == '[' && !state_.eos () && *state_._curr == ':')
+            {
+                state_.increment ();
+                posix (state_, token_);
+                chset_ = true;
+            }
+            else
+            {
+                chset_ = false;
+                prev_ = ch_;
+            }
+
+            eos_ = state_.next (ch_);
+
+            // Covers preceding if, else if and else
+            if (eos_)
+            {
+                std::ostringstream ss_;
+
+                // Pointless returning index if at end of string
+                ss_ << "Unexpected end of regex (missing ']') in rule id " <<
+                    state_._id << '.';
+                throw runtime_error (ss_.str ());
+            }
+
+            if (ch_ == '-')
+            {
+                charset_range (chset_, state_, eos_, ch_, prev_,
+                    token_);
+            }
+            else if (!chset_)
+            {
+                token_.insert (typename string_token::range (prev_, prev_));
+
+                if (state_._flags & icase)
+                {
+                    const input_char_type folded_ = fold (prev_,
+                        state_._locale, size<sizeof(input_char_type)> ());
+
+                    if (prev_ != folded_)
+                    {
+                        token_.insert (typename string_token::range
+                            (folded_, folded_));
+                    }
+                }
+            }
+        }
+
+        if (negated_)
+        {
+            token_.negate ();
+        }
+
+        if (token_.empty ())
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Empty charsets not allowed preceding index " <<
+                state_.index () << " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+    }
+
+private:
+    struct char_pair
+    {
+        input_char_type first;
+        input_char_type second;
+    };
+
+    struct fold_pair
+    {
+        char_pair from;
+        char_pair to;
+    };
+
+    template<typename state_type>
+    static void posix (state_type &state_, string_token &token_)
+    {
+        bool negate_ = false;
+
+        if (!state_.eos () && *state_._curr == '^')
+        {
+            negate_ = true;
+            state_.increment ();
+        }
+
+        if (!state_.eos ())
+        {
+            switch (*state_._curr)
+            {
+                case 'a':
+                    // alnum
+                    // alpha
+                    alnum_alpha (state_, token_, negate_);
+                    break;
+                case 'b':
+                    // blank
+                    blank (state_, token_, negate_);
+                    break;
+                case 'c':
+                    // cntrl
+                    cntrl (state_, token_, negate_);
+                    break;
+                case 'd':
+                    // digit
+                    digit (state_, token_, negate_);
+                    break;
+                case 'g':
+                    // graph
+                    graph (state_, token_, negate_);
+                    break;
+                case 'l':
+                    // lower
+                    lower (state_, token_, negate_);
+                    break;
+                case 'p':
+                    // print
+                    // punct
+                    print_punct (state_, token_, negate_);
+                    break;
+                case 's':
+                    // space
+                    space (state_, token_, negate_);
+                    break;
+                case 'u':
+                    // upper
+                    upper (state_, token_, negate_);
+                    break;
+                case 'x':
+                    // xdigit
+                    xdigit (state_, token_, negate_);
+                    break;
+                default:
+                {
+                    std::ostringstream ss_;
+
+                    ss_ << "Unknown POSIX charset at index " <<
+                        state_.index () << " in rule id " << state_._id << '.';
+                    throw runtime_error (ss_.str ());
+                    break;
+                }
+            }
+        }
+        else
+        {
+            std::ostringstream ss_;
+
+            // Pointless returning index if at end of string
+            ss_ << "Unexpected end of regex (unterminated POSIX charset) " <<
+                "in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+    }
+
+    template<typename state_type>
+    static void alnum_alpha (state_type &state_, string_token &token_,
+        const bool negate_)
+    {
+        bool alnum_ = true;
+
+        state_.increment ();
+
+        if (!state_.eos () && *state_._curr == 'l')
+        {
+            state_.increment ();
+
+            if (!state_.eos ())
+            {
+                if (*state_._curr == 'n')
+                {
+                    state_.increment ();
+
+                    if (!state_.eos () && *state_._curr == 'u')
+                    {
+                        state_.increment ();
+
+                        if (!state_.eos () && *state_._curr == 'm')
+                        {
+                            state_.increment ();
+                        }
+                    }
+                }
+                else if (*state_._curr == 'p')
+                {
+                    state_.increment ();
+
+                    if (!state_.eos () && *state_._curr == 'h')
+                    {
+                        state_.increment ();
+
+                        if (!state_.eos () && *state_._curr == 'a')
+                        {
+                            state_.increment ();
+                            alnum_ = false;
+                        }
+                    }
+                }
+            }
+        }
+
+        if (!state_.eos () && *state_._curr == ':')
+        {
+            state_.increment ();
+        }
+
+        if (!state_.eos () && *state_._curr == ']')
+        {
+            std::string str_;
+
+            state_.increment ();
+
+            if (alnum_)
+            {
+                // alnum
+                str_ = sizeof(input_char_type) == 1 ?
+                    make_alnum (state_._locale) :
+                    std::string ("[\\p{Ll}\\p{Lu}\\p{Nd}]");
+            }
+            else
+            {
+                // alpha
+                str_ = sizeof(input_char_type) == 1 ?
+                    make_alpha (state_._locale) :
+                    std::string ("[\\p{Ll}\\p{Lu}]");
+            }
+
+            insert_charset (str_.c_str (), state_, token_, negate_);
+        }
+        else
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Unknown POSIX charset at index " <<
+                state_.index () << " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+    }
+
+    static std::string make_alnum (std::locale &locale_)
+    {
+        std::string str_ (1, '[');
+
+        for (std::size_t i_ = 0; i_ < 256; ++i_)
+        {
+            if (std::use_facet<std::ctype<char> > (locale_).
+                is (std::ctype_base::alnum, static_cast<char>(i_)))
+            {
+                str_ += static_cast<char>(i_);
+            }
+        }
+
+        str_ += ']';
+        return str_;
+    }
+
+    static std::string make_alpha (std::locale &locale_)
+    {
+        std::string str_ (1, '[');
+
+        for (std::size_t i_ = 0; i_ < 256; ++i_)
+        {
+            if (std::use_facet<std::ctype<char> > (locale_).
+                is (std::ctype_base::alpha, static_cast<char>(i_)))
+            {
+                str_ += static_cast<char>(i_);
+            }
+        }
+
+        str_ += ']';
+        return str_;
+    }
+
+    template<typename state_type>
+    static void blank (state_type &state_, string_token &token_,
+        const bool negate_)
+    {
+        const char *blank_ = "lank";
+
+        state_.increment ();
+
+        while (!state_.eos () && *blank_ && *state_._curr == *blank_++)
+        {
+            state_.increment ();
+        }
+
+        if (!*blank_ && !state_.eos () && *state_._curr == ':')
+        {
+            state_.increment ();
+        }
+
+        if (!*blank_ && !state_.eos () && *state_._curr == ']')
+        {
+            const char *str_ = sizeof(input_char_type) == 1 ?
+                "[ \t]" : "[\\p{Zs}\t]";
+
+            state_.increment ();
+            insert_charset (str_, state_, token_, negate_);
+        }
+        else
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Unknown POSIX charset at index " <<
+                state_.index () << " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+    }
+
+    template<typename state_type>
+    static void cntrl (state_type &state_, string_token &token_,
+        const bool negate_)
+    {
+        const char *cntrl_ = "ntrl";
+
+        state_.increment ();
+
+        while (!state_.eos () && *cntrl_ && *state_._curr == *cntrl_++)
+        {
+            state_.increment ();
+        }
+
+        if (!*cntrl_ && !state_.eos () && *state_._curr == ':')
+        {
+            state_.increment ();
+        }
+
+        if (!*cntrl_ && !state_.eos () && *state_._curr == ']')
+        {
+            const char *str_ = sizeof(input_char_type) == 1 ?
+                "[\\x00-\x1f\x7f]" : "[\\p{Cc}]";
+
+            state_.increment ();
+            insert_charset (str_, state_, token_, negate_);
+        }
+        else
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Unknown POSIX charset at index " <<
+                state_.index () << " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+    }
+
+    template<typename state_type>
+    static void digit (state_type &state_, string_token &token_,
+        const bool negate_)
+    {
+        const char *digit_ = "igit";
+
+        state_.increment ();
+
+        while (!state_.eos () && *digit_ && *state_._curr == *digit_++)
+        {
+            state_.increment ();
+        }
+
+        if (!*digit_ && !state_.eos () && *state_._curr == ':')
+        {
+            state_.increment ();
+        }
+
+        if (!*digit_ && !state_.eos () && *state_._curr == ']')
+        {
+            const char *str_ = sizeof(input_char_type) == 1 ?
+                "[0-9]" : "[\\p{Nd}]";
+
+            state_.increment ();
+            insert_charset (str_, state_, token_, negate_);
+        }
+        else
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Unknown POSIX charset at index " << state_.index () <<
+                " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+    }
+
+    template<typename state_type>
+    static void graph (state_type &state_, string_token &token_,
+        const bool negate_)
+    {
+        const char *graph_ = "raph";
+
+        state_.increment ();
+
+        while (!state_.eos () && *graph_ && *state_._curr == *graph_++)
+        {
+            state_.increment ();
+        }
+
+        if (!*graph_ && !state_.eos () && *state_._curr == ':')
+        {
+            state_.increment ();
+        }
+
+        if (!*graph_ && !state_.eos () && *state_._curr == ']')
+        {
+            const char *str_ = sizeof(input_char_type) == 1 ?
+                "[\x21-\x7e]" : "[^\\p{Z}\\p{C}]";
+
+            state_.increment ();
+            insert_charset (str_, state_, token_, negate_);
+        }
+        else
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Unknown POSIX charset at index " << state_.index () <<
+                " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+    }
+
+    template<typename state_type>
+    static void lower (state_type &state_, string_token &token_,
+        const bool negate_)
+    {
+        const char *lower_ = "ower";
+
+        state_.increment ();
+
+        while (!state_.eos () && *lower_ && *state_._curr == *lower_++)
+        {
+            state_.increment ();
+        }
+
+        if (!*lower_ && !state_.eos () && *state_._curr == ':')
+        {
+            state_.increment ();
+        }
+
+        if (!*lower_ && !state_.eos () && *state_._curr == ']')
+        {
+            std::string str_ = sizeof(input_char_type) == 1 ?
+                create_lower (state_._locale) :
+                std::string ("[\\p{Ll}]");
+
+            state_.increment ();
+            insert_charset (str_.c_str (), state_, token_, negate_);
+        }
+        else
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Unknown POSIX charset at index " << state_.index () <<
+                " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+    }
+
+    static std::string create_lower (std::locale &locale_)
+    {
+        std::string str_ (1, '[');
+
+        for (std::size_t i_ = 0; i_ < 256; ++i_)
+        {
+            if (std::use_facet<std::ctype<char> > (locale_).
+                is (std::ctype_base::lower, static_cast<char>(i_)))
+            {
+                str_ += static_cast<char>(i_);
+            }
+        }
+
+        str_ += ']';
+        return str_;
+    }
+
+    template<typename state_type>
+    static void print_punct (state_type &state_, string_token &token_,
+        const bool negate_)
+    {
+        bool print_ = true;
+
+        state_.increment ();
+
+        if (!state_.eos ())
+        {
+            if (*state_._curr == 'r')
+            {
+                state_.increment ();
+
+                if (!state_.eos () && *state_._curr == 'i')
+                {
+                    state_.increment ();
+
+                    if (!state_.eos () && *state_._curr == 'n')
+                    {
+                        state_.increment ();
+
+                        if (!state_.eos () && *state_._curr == 't')
+                        {
+                            state_.increment ();
+                        }
+                    }
+                }
+            }
+            else if (*state_._curr == 'u')
+            {
+                state_.increment ();
+
+                if (!state_.eos () && *state_._curr == 'n')
+                {
+                    state_.increment ();
+
+                    if (!state_.eos () && *state_._curr == 'c')
+                    {
+                        state_.increment ();
+
+                        if (!state_.eos () && *state_._curr == 't')
+                        {
+                            state_.increment ();
+                            print_ = false;
+                        }
+                    }
+                }
+            }
+        }
+
+        if (!state_.eos () && *state_._curr == ':')
+        {
+            state_.increment ();
+        }
+
+        if (!state_.eos () && *state_._curr == ']')
+        {
+            const char *str_ = 0;
+
+            state_.increment ();
+
+            if (print_)
+            {
+                // print
+                str_ = sizeof(input_char_type) == 1 ?
+                    "[\x20-\x7e]" : "[\\p{C}]";
+            }
+            else
+            {
+                // punct
+                str_ = sizeof(input_char_type) == 1 ?
+                    "[!\"#$%&'()*+,\\-./:;<=>?@[\\\\\\]^_`{|}~]" :
+                    "[\\p{P}\\p{S}]";
+            }
+
+            insert_charset (str_, state_, token_, negate_);
+        }
+        else
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Unknown POSIX charset at index " << state_.index () <<
+                " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+    }
+
+    template<typename state_type>
+    static void space (state_type &state_, string_token &token_,
+        const bool negate_)
+    {
+        const char *space_ = "pace";
+
+        state_.increment ();
+
+        while (!state_.eos () && *space_ && *state_._curr == *space_++)
+        {
+            state_.increment ();
+        }
+
+        if (!*space_ && !state_.eos () && *state_._curr == ':')
+        {
+            state_.increment ();
+        }
+
+        if (!*space_ && !state_.eos () && *state_._curr == ']')
+        {
+            const char *str_ = sizeof(input_char_type) == 1 ?
+                "[ \t\r\n\v\f]" : "[\\p{Z}\t\r\n\v\f]";
+
+            state_.increment ();
+            insert_charset (str_, state_, token_, negate_);
+        }
+        else
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Unknown POSIX charset at index " << state_.index () <<
+                " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+    }
+
+    template<typename state_type>
+    static void upper (state_type &state_, string_token &token_,
+        const bool negate_)
+    {
+        const char *upper_ = "pper";
+
+        state_.increment ();
+
+        while (!state_.eos () && *upper_ && *state_._curr == *upper_++)
+        {
+            state_.increment ();
+        }
+
+        if (!*upper_ && !state_.eos () && *state_._curr == ':')
+        {
+            state_.increment ();
+        }
+
+        if (!*upper_ && !state_.eos () && *state_._curr == ']')
+        {
+            std::string str_ = sizeof(input_char_type) == 1 ?
+                create_upper (state_._locale) :
+                std::string ("[\\p{Lu}]");
+
+            state_.increment ();
+            insert_charset (str_.c_str (), state_, token_, negate_);
+        }
+        else
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Unknown POSIX charset at index " << state_.index () <<
+                " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+    }
+
+    static std::string create_upper (std::locale &locale_)
+    {
+        std::string str_ (1, '[');
+
+        for (std::size_t i_ = 0; i_ < 256; ++i_)
+        {
+            if (std::use_facet<std::ctype<char> > (locale_).
+                is (std::ctype_base::upper, static_cast<char>(i_)))
+            {
+                str_ += static_cast<char>(i_);
+            }
+        }
+
+        str_ += ']';
+        return str_;
+    }
+
+    template<typename state_type>
+    static void xdigit (state_type &state_, string_token &token_,
+        const bool negate_)
+    {
+        const char *xdigit_ = "digit";
+
+        state_.increment ();
+
+        while (!state_.eos () && *xdigit_ && *state_._curr == *xdigit_++)
+        {
+            state_.increment ();
+        }
+
+        if (!*xdigit_ && !state_.eos () && *state_._curr == ':')
+        {
+            state_.increment ();
+        }
+
+        if (!*xdigit_ && !state_.eos () && *state_._curr == ']')
+        {
+            const char *str_ = "[0-9A-Fa-f]";
+
+            state_.increment ();
+            insert_charset (str_, state_, token_, negate_);
+        }
+        else
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Unknown POSIX charset at index " << state_.index () <<
+                " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+    }
+
+    template<typename state_type>
+    static void insert_charset (const char *str_, state_type &state_,
+        string_token &token_, const bool negate_)
+    {
+        // Some systems have strlen in namespace std.
+        using namespace std;
+
+        char_state temp_state_ (str_ + 1, str_ + strlen (str_),
+            state_._id, state_._flags, state_._locale, false);
+        string_token temp_token_;
+
+        charset (temp_state_, temp_token_);
+
+        if (negate_) temp_token_.negate ();
+
+        token_.insert (temp_token_);
+    }
+
+    template<typename state_type>
+    static const char *charset_shortcut
+        (state_type &state_, std::size_t &str_len_)
+    {
+        const char *str_ = 0;
+
+        switch (*state_._curr)
+        {
+            case 'd':
+                str_ = "[0-9]";
+                break;
+            case 'D':
+                str_ = "[^0-9]";
+                break;
+            case 'p':
+                str_ = unicode_escape (state_);
+                break;
+            case 's':
+                str_ = "[ \t\n\r\f\v]";
+                break;
+            case 'S':
+                str_ = "[^ \t\n\r\f\v]";
+                break;
+            case 'w':
+                str_ = "[_0-9A-Za-z]";
+                break;
+            case 'W':
+                str_ = "[^_0-9A-Za-z]";
+                break;
+        }
+
+        if (str_)
+        {
+            // Some systems have strlen in namespace std.
+            using namespace std;
+
+            str_len_ = strlen (str_);
+        }
+        else
+        {
+            str_len_ = 0;
+        }
+
+        return str_;
+    }
+
+    template<typename state_type>
+    static const char *unicode_escape (state_type &state_)
+    {
+        const char *str_ = 0;
+
+        state_.increment ();
+
+        if (state_.eos ())
+        {
+            std::ostringstream ss_;
+
+            // Pointless returning index if at end of string
+            ss_ << "Unexpected end of regex following \\p in rule id " <<
+                state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        if (*state_._curr != '{')
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Syntax error following \\p at index " <<
+                state_.index () << " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        state_.increment ();
+
+        if (state_.eos ())
+        {
+            std::ostringstream ss_;
+
+            // Pointless returning index if at end of string
+            ss_ << "Unexpected end of regex following \\p{ in rule id " <<
+                state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        switch (*state_._curr)
+        {
+            case 'C':
+                state_.increment ();
+
+                if (state_.eos ())
+                {
+                    std::ostringstream ss_;
+
+                    // Pointless returning index if at end of string
+                    ss_ << "Unexpected end of regex following \\p{C "
+                        "in rule id " << state_._id << '.';
+                    throw runtime_error (ss_.str ());
+                }
+
+                switch (*state_._curr)
+                {
+                    case '}':
+                        str_ = "[\\p{Cc}\\p{Cf}\\p{Co}\\p{Cs}]";
+                        break;
+                    case 'c':
+                        str_ = other_control ();
+                        state_.increment ();
+                        break;
+                    case 'f':
+                        str_ = other_format ();
+                        state_.increment ();
+                        break;
+//                    case 'n':
+//                        break;
+                    case 'o':
+                        str_ = other_private ();
+                        state_.increment ();
+                        break;
+                    case 's':
+                        str_ = other_surrogate ();
+                        state_.increment ();
+                        break;
+                    default:
+                    {
+                        std::ostringstream ss_;
+
+                        ss_ << "Syntax error following \\p{C at index " <<
+                            state_.index () << " in rule id " <<
+                            state_._id << '.';
+                        throw runtime_error (ss_.str ());
+                    }
+                }
+
+                break;
+            case 'L':
+                state_.increment ();
+
+                if (state_.eos ())
+                {
+                    std::ostringstream ss_;
+
+                    // Pointless returning index if at end of string
+                    ss_ << "Unexpected end of regex following \\p{L "
+                        " in rule id " << state_._id << '.';
+                    throw runtime_error (ss_.str ());
+                }
+
+                switch (*state_._curr)
+                {
+                    case '}':
+                        str_ = "[\\p{Ll}\\p{Lm}\\p{Lo}\\p{Lt}\\p{Lu}]";
+                        break;
+                    case 'C':
+                        str_ = "[\\p{Ll}\\p{Lt}\\p{Lu}]";
+                        state_.increment ();
+                        break;
+                    case 'l':
+                        str_ = letter_lowercase ();
+                        state_.increment ();
+                        break;
+                    case 'm':
+                        str_ = letter_modifier ();
+                        state_.increment ();
+                        break;
+                    case 'o':
+                        str_ = letter_other ();
+                        state_.increment ();
+                        break;
+                    case 't':
+                        str_ = letter_titlecase ();
+                        state_.increment ();
+                        break;
+                    case 'u':
+                        str_ = letter_uppercase ();
+                        state_.increment ();
+                        break;
+                    default:
+                    {
+                        std::ostringstream ss_;
+
+                        ss_ << "Syntax error following \\p{L at index " <<
+                            state_.index () << " in rule id " <<
+                            state_._id << '.';
+                        throw runtime_error (ss_.str ());
+                    }
+                }
+
+                break;
+            case 'M':
+                state_.increment ();
+
+                if (state_.eos ())
+                {
+                    std::ostringstream ss_;
+
+                    // Pointless returning index if at end of string
+                    ss_ << "Unexpected end of regex following \\p{M "
+                        " in rule id " << state_._id << '.';
+                    throw runtime_error (ss_.str ());
+                }
+
+                switch (*state_._curr)
+                {
+                    case '}':
+                        str_ = "[\\p{Mc}\\p{Me}\\p{Mn}]";
+                        break;
+                    case 'c':
+                        str_ = mark_combining ();
+                        state_.increment ();
+                        break;
+                    case 'e':
+                        str_ = mark_enclosing ();
+                        state_.increment ();
+                        break;
+                    case 'n':
+                        str_ = mark_nonspacing ();
+                        state_.increment ();
+                        break;
+                    default:
+                    {
+                        std::ostringstream ss_;
+
+                        ss_ << "Syntax error following \\p{M at index " <<
+                            state_.index () << " in rule id " <<
+                            state_._id << '.';
+                        throw runtime_error (ss_.str ());
+                    }
+                }
+
+                break;
+            case 'N':
+                state_.increment ();
+
+                if (state_.eos ())
+                {
+                    std::ostringstream ss_;
+
+                    // Pointless returning index if at end of string
+                    ss_ << "Unexpected end of regex following \\p{N "
+                        " in rule id " << state_._id << '.';
+                    throw runtime_error (ss_.str ());
+                }
+
+                switch (*state_._curr)
+                {
+                    case '}':
+                        str_ = "[\\p{Nd}\\p{Nl}\\p{No}]";
+                        break;
+                    case 'd':
+                        str_ = number_decimal ();
+                        state_.increment ();
+                        break;
+                    case 'l':
+                        str_ = number_letter ();
+                        state_.increment ();
+                        break;
+                    case 'o':
+                        str_ = number_other ();
+                        state_.increment ();
+                        break;
+                    default:
+                    {
+                        std::ostringstream ss_;
+
+                        ss_ << "Syntax error following \\p{N at index " <<
+                            state_.index () << " in rule id " <<
+                            state_._id << '.';
+                        throw runtime_error (ss_.str ());
+                    }
+                }
+
+                break;
+            case 'P':
+                state_.increment ();
+
+                if (state_.eos ())
+                {
+                    std::ostringstream ss_;
+
+                    // Pointless returning index if at end of string
+                    ss_ << "Unexpected end of regex following \\p{P "
+                        " in rule id " << state_._id << '.';
+                    throw runtime_error (ss_.str ());
+                }
+
+                switch (*state_._curr)
+                {
+                    case '}':
+                        str_ = "[\\p{Pc}\\p{Pd}\\p{Pe}\\p{Pf}\\p{Pi}\\p{Po}"
+                            "\\p{Ps}]";
+                        break;
+                    case 'c':
+                        str_ = punctuation_connector ();
+                        state_.increment ();
+                        break;
+                    case 'd':
+                        str_ = punctuation_dash ();
+                        state_.increment ();
+                        break;
+                    case 'e':
+                        str_ = punctuation_close ();
+                        state_.increment ();
+                        break;
+                    case 'f':
+                        str_ = punctuation_final ();
+                        state_.increment ();
+                        break;
+                    case 'i':
+                        str_ = punctuation_initial ();
+                        state_.increment ();
+                        break;
+                    case 'o':
+                        str_ = punctuation_other ();
+                        state_.increment ();
+                        break;
+                    case 's':
+                        str_ = punctuation_open ();
+                        state_.increment ();
+                        break;
+                    default:
+                    {
+                        std::ostringstream ss_;
+
+                        ss_ << "Syntax error following \\p{P at index " <<
+                            state_.index () << " in rule id " <<
+                            state_._id << '.';
+                        throw runtime_error (ss_.str ());
+                    }
+                }
+
+                break;
+            case 'S':
+                state_.increment ();
+
+                if (state_.eos ())
+                {
+                    std::ostringstream ss_;
+
+                    // Pointless returning index if at end of string
+                    ss_ << "Unexpected end of regex following \\p{S "
+                        " in rule id " << state_._id << '.';
+                    throw runtime_error (ss_.str ());
+                }
+
+                switch (*state_._curr)
+                {
+                    case '}':
+                        str_ = "[\\p{Sc}\\p{Sk}\\p{Sm}\\p{So}]";
+                        break;
+                    case 'c':
+                        str_ = symbol_currency ();
+                        state_.increment ();
+                        break;
+                    case 'k':
+                        str_ = symbol_modifier ();
+                        state_.increment ();
+                        break;
+                    case 'm':
+                        str_ = symbol_math ();
+                        state_.increment ();
+                        break;
+                    case 'o':
+                        str_ = symbol_other ();
+                        state_.increment ();
+                        break;
+                    default:
+                    {
+                        std::ostringstream ss_;
+
+                        ss_ << "Syntax error following \\p{S at index " <<
+                            state_.index () << " in rule id " <<
+                            state_._id << '.';
+                        throw runtime_error (ss_.str ());
+                    }
+                }
+
+                break;
+            case 'Z':
+                state_.increment ();
+
+                if (state_.eos ())
+                {
+                    std::ostringstream ss_;
+
+                    // Pointless returning index if at end of string
+                    ss_ << "Unexpected end of regex following \\p{Z "
+                        " in rule id " << state_._id << '.';
+                    throw runtime_error (ss_.str ());
+                }
+
+                switch (*state_._curr)
+                {
+                    case '}':
+                        str_ = "[\\p{Zl}\\p{Zp}\\p{Zs}]";
+                        break;
+                    case 'l':
+                        str_ = separator_line ();
+                        state_.increment ();
+                        break;
+                    case 'p':
+                        str_ = separator_paragraph ();
+                        state_.increment ();
+                        break;
+                    case 's':
+                        str_ = separator_space ();
+                        state_.increment ();
+                        break;
+                    default:
+                    {
+                        std::ostringstream ss_;
+
+                        ss_ << "Syntax error following \\p{Z at index " <<
+                            state_.index () << " in rule id " <<
+                            state_._id << '.';
+                        throw runtime_error (ss_.str ());
+                    }
+                }
+
+                break;
+            default:
+            {
+                std::ostringstream ss_;
+
+                ss_ << "Syntax error following \\p{ at index " <<
+                    state_.index () << " in rule id " << state_._id << '.';
+                throw runtime_error (ss_.str ());
+            }
+        }
+
+        if (*state_._curr != '}')
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Missing } at index " << state_.index () <<
+                " in rule id << " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        return str_;
+    }
+
+    static const char *letter_uppercase ()
+    {
+        return "[\\x41-\\x5a\\xc0-\\xd6\\xd8-\\xde\\x100\\x102\\x104\\x106"
+            "\\x108\\x10a\\x10c\\x10e\\x110\\x112\\x114\\x116\\x118\\x11a"
+            "\\x11c\\x11e\\x120\\x122\\x124\\x126\\x128\\x12a\\x12c\\x12e"
+            "\\x130\\x132\\x134\\x136\\x139\\x13b\\x13d\\x13f\\x141\\x143"
+            "\\x145\\x147\\x14a\\x14c\\x14e\\x150\\x152\\x154\\x156\\x158"
+            "\\x15a\\x15c\\x15e\\x160\\x162\\x164\\x166\\x168\\x16a\\x16c"
+            "\\x16e\\x170\\x172\\x174\\x176\\x178\\x179\\x17b\\x17d\\x181"
+            "\\x182\\x184\\x186\\x187\\x189-\\x18b\\x18e-\\x191\\x193\\x194"
+            "\\x196-\\x198\\x19c\\x19d\\x19f\\x1a0\\x1a2\\x1a4\\x1a6\\x1a7"
+            "\\x1a9\\x1ac\\x1ae\\x1af\\x1b1-\\x1b3\\x1b5\\x1b7\\x1b8\\x1bc"
+            "\\x1c4\\x1c7\\x1ca\\x1cd\\x1cf\\x1d1\\x1d3\\x1d5\\x1d7\\x1d9"
+            "\\x1db\\x1de\\x1e0\\x1e2\\x1e4\\x1e6\\x1e8\\x1ea\\x1ec\\x1ee"
+            "\\x1f1\\x1f4\\x1f6-\\x1f8\\x1fa\\x1fc\\x1fe\\x200\\x202\\x204"
+            "\\x206\\x208\\x20a\\x20c\\x20e\\x210\\x212\\x214\\x216\\x218"
+            "\\x21a\\x21c\\x21e\\x220\\x222\\x224\\x226\\x228\\x22a\\x22c"
+            "\\x22e\\x230\\x232\\x23a\\x23b\\x23d\\x23e\\x241\\x243-\\x246"
+            "\\x248\\x24a\\x24c\\x24e\\x370\\x372\\x376\\x386\\x388-\\x38a"
+            "\\x38c\\x38e\\x38f\\x391-\\x3a1\\x3a3-\\x3ab\\x3cf\\x3d2-\\x3d4"
+            "\\x3d8\\x3da\\x3dc\\x3de\\x3e0\\x3e2\\x3e4\\x3e6\\x3e8\\x3ea"
+            "\\x3ec\\x3ee\\x3f4\\x3f7\\x3f9\\x3fa\\x3fd-\\x42f\\x460\\x462"
+            "\\x464\\x466\\x468\\x46a\\x46c\\x46e\\x470\\x472\\x474\\x476"
+            "\\x478\\x47a\\x47c\\x47e\\x480\\x48a\\x48c\\x48e\\x490\\x492"
+            "\\x494\\x496\\x498\\x49a\\x49c\\x49e\\x4a0\\x4a2\\x4a4\\x4a6"
+            "\\x4a8\\x4aa\\x4ac\\x4ae\\x4b0\\x4b2\\x4b4\\x4b6\\x4b8\\x4ba"
+            "\\x4bc\\x4be\\x4c0\\x4c1\\x4c3\\x4c5\\x4c7\\x4c9\\x4cb\\x4cd"
+            "\\x4d0\\x4d2\\x4d4\\x4d6\\x4d8\\x4da\\x4dc\\x4de\\x4e0\\x4e2"
+            "\\x4e4\\x4e6\\x4e8\\x4ea\\x4ec\\x4ee\\x4f0\\x4f2\\x4f4\\x4f6"
+            "\\x4f8\\x4fa\\x4fc\\x4fe\\x500\\x502\\x504\\x506\\x508\\x50a"
+            "\\x50c\\x50e\\x510\\x512\\x514\\x516\\x518\\x51a\\x51c\\x51e"
+            "\\x520\\x522\\x524\\x526\\x531-\\x556\\x10a0-\\x10c5\\x1e00"
+            "\\x1e02\\x1e04\\x1e06\\x1e08\\x1e0a\\x1e0c\\x1e0e\\x1e10\\x1e12"
+            "\\x1e14\\x1e16\\x1e18\\x1e1a\\x1e1c\\x1e1e\\x1e20\\x1e22\\x1e24"
+            "\\x1e26\\x1e28\\x1e2a\\x1e2c\\x1e2e\\x1e30\\x1e32\\x1e34\\x1e36"
+            "\\x1e38\\x1e3a\\x1e3c\\x1e3e\\x1e40\\x1e42\\x1e44\\x1e46\\x1e48"
+            "\\x1e4a\\x1e4c\\x1e4e\\x1e50\\x1e52\\x1e54\\x1e56\\x1e58\\x1e5a"
+            "\\x1e5c\\x1e5e\\x1e60\\x1e62\\x1e64\\x1e66\\x1e68\\x1e6a\\x1e6c"
+            "\\x1e6e\\x1e70\\x1e72\\x1e74\\x1e76\\x1e78\\x1e7a\\x1e7c\\x1e7e"
+            "\\x1e80\\x1e82\\x1e84\\x1e86\\x1e88\\x1e8a\\x1e8c\\x1e8e\\x1e90"
+            "\\x1e92\\x1e94\\x1e9e\\x1ea0\\x1ea2\\x1ea4\\x1ea6\\x1ea8\\x1eaa"
+            "\\x1eac\\x1eae\\x1eb0\\x1eb2\\x1eb4\\x1eb6\\x1eb8\\x1eba\\x1ebc"
+            "\\x1ebe\\x1ec0\\x1ec2\\x1ec4\\x1ec6\\x1ec8\\x1eca\\x1ecc\\x1ece"
+            "\\x1ed0\\x1ed2\\x1ed4\\x1ed6\\x1ed8\\x1eda\\x1edc\\x1ede\\x1ee0"
+            "\\x1ee2\\x1ee4\\x1ee6\\x1ee8\\x1eea\\x1eec\\x1eee\\x1ef0\\x1ef2"
+            "\\x1ef4\\x1ef6\\x1ef8\\x1efa\\x1efc\\x1efe\\x1f08-\\x1f0f"
+            "\\x1f18-\\x1f1d\\x1f28-\\x1f2f\\x1f38-\\x1f3f\\x1f48-\\x1f4d"
+            "\\x1f59\\x1f5b\\x1f5d\\x1f5f\\x1f68-\\x1f6f\\x1fb8-\\x1fbb"
+            "\\x1fc8-\\x1fcb\\x1fd8-\\x1fdb\\x1fe8-\\x1fec\\x1ff8-\\x1ffb"
+            "\\x2102\\x2107\\x210b-\\x210d\\x2110-\\x2112\\x2115"
+            "\\x2119-\\x211d\\x2124\\x2126\\x2128\\x212a-\\x212d"
+            "\\x2130-\\x2133\\x213e\\x213f\\x2145\\x2183\\x2c00-\\x2c2e"
+            "\\x2c60\\x2c62-\\x2c64\\x2c67\\x2c69\\x2c6b\\x2c6d-\\x2c70"
+            "\\x2c72\\x2c75\\x2c7e-\\x2c80\\x2c82\\x2c84\\x2c86\\x2c88\\x2c8a"
+            "\\x2c8c\\x2c8e\\x2c90\\x2c92\\x2c94\\x2c96\\x2c98\\x2c9a\\x2c9c"
+            "\\x2c9e\\x2ca0\\x2ca2\\x2ca4\\x2ca6\\x2ca8\\x2caa\\x2cac\\x2cae"
+            "\\x2cb0\\x2cb2\\x2cb4\\x2cb6\\x2cb8\\x2cba\\x2cbc\\x2cbe\\x2cc0"
+            "\\x2cc2\\x2cc4\\x2cc6\\x2cc8\\x2cca\\x2ccc\\x2cce\\x2cd0\\x2cd2"
+            "\\x2cd4\\x2cd6\\x2cd8\\x2cda\\x2cdc\\x2cde\\x2ce0\\x2ce2\\x2ceb"
+            "\\x2ced\\xa640\\xa642\\xa644\\xa646\\xa648\\xa64a\\xa64c\\xa64e"
+            "\\xa650\\xa652\\xa654\\xa656\\xa658\\xa65a\\xa65c\\xa65e\\xa660"
+            "\\xa662\\xa664\\xa666\\xa668\\xa66a\\xa66c\\xa680\\xa682\\xa684"
+            "\\xa686\\xa688\\xa68a\\xa68c\\xa68e\\xa690\\xa692\\xa694\\xa696"
+            "\\xa722\\xa724\\xa726\\xa728\\xa72a\\xa72c\\xa72e\\xa732\\xa734"
+            "\\xa736\\xa738\\xa73a\\xa73c\\xa73e\\xa740\\xa742\\xa744\\xa746"
+            "\\xa748\\xa74a\\xa74c\\xa74e\\xa750\\xa752\\xa754\\xa756\\xa758"
+            "\\xa75a\\xa75c\\xa75e\\xa760\\xa762\\xa764\\xa766\\xa768\\xa76a"
+            "\\xa76c\\xa76e\\xa779\\xa77b\\xa77d\\xa77e\\xa780\\xa782\\xa784"
+            "\\xa786\\xa78b\\xa78d\\xa790\\xa7a0\\xa7a2\\xa7a4\\xa7a6\\xa7a8"
+            "\\xff21-\\xff3a\\x10400-\\x10427\\x1d400-\\x1d419"
+            "\\x1d434-\\x1d44d\\x1d468-\\x1d481\\x1d49c\\x1d49e\\x1d49f"
+            "\\x1d4a2\\x1d4a5\\x1d4a6\\x1d4a9-\\x1d4ac\\x1d4ae-\\x1d4b5"
+            "\\x1d4d0-\\x1d4e9\\x1d504\\x1d505\\x1d507-\\x1d50a"
+            "\\x1d50d-\\x1d514\\x1d516-\\x1d51c\\x1d538\\x1d539"
+            "\\x1d53b-\\x1d53e\\x1d540-\\x1d544\\x1d546\\x1d54a-\\x1d550"
+            "\\x1d56c-\\x1d585\\x1d5a0-\\x1d5b9\\x1d5d4-\\x1d5ed"
+            "\\x1d608-\\x1d621\\x1d63c-\\x1d655\\x1d670-\\x1d689"
+            "\\x1d6a8-\\x1d6c0\\x1d6e2-\\x1d6fa\\x1d71c-\\x1d734"
+            "\\x1d756-\\x1d76e\\x1d790-\\x1d7a8\\x1d7ca]";
+    }
+
+    static const char *letter_lowercase ()
+    {
+        return "[\\x61-\\x7a\\xaa\\xb5\\xba\\xdf-\\xf6\\xf8-\\xff\\x101"
+            "\\x103\\x105\\x107\\x109\\x10b\\x10d\\x10f\\x111\\x113\\x115"
+            "\\x117\\x119\\x11b\\x11d\\x11f\\x121\\x123\\x125\\x127\\x129"
+            "\\x12b\\x12d\\x12f\\x131\\x133\\x135\\x137\\x138\\x13a\\x13c"
+            "\\x13e\\x140\\x142\\x144\\x146\\x148\\x149\\x14b\\x14d\\x14f"
+            "\\x151\\x153\\x155\\x157\\x159\\x15b\\x15d\\x15f\\x161\\x163"
+            "\\x165\\x167\\x169\\x16b\\x16d\\x16f\\x171\\x173\\x175\\x177"
+            "\\x17a\\x17c\\x17e-\\x180\\x183\\x185\\x188\\x18c\\x18d\\x192"
+            "\\x195\\x199-\\x19b\\x19e\\x1a1\\x1a3\\x1a5\\x1a8\\x1aa\\x1ab"
+            "\\x1ad\\x1b0\\x1b4\\x1b6\\x1b9\\x1ba\\x1bd-\\x1bf\\x1c6\\x1c9"
+            "\\x1cc\\x1ce\\x1d0\\x1d2\\x1d4\\x1d6\\x1d8\\x1da\\x1dc\\x1dd"
+            "\\x1df\\x1e1\\x1e3\\x1e5\\x1e7\\x1e9\\x1eb\\x1ed\\x1ef\\x1f0"
+            "\\x1f3\\x1f5\\x1f9\\x1fb\\x1fd\\x1ff\\x201\\x203\\x205\\x207"
+            "\\x209\\x20b\\x20d\\x20f\\x211\\x213\\x215\\x217\\x219\\x21b"
+            "\\x21d\\x21f\\x221\\x223\\x225\\x227\\x229\\x22b\\x22d\\x22f"
+            "\\x231\\x233-\\x239\\x23c\\x23f\\x240\\x242\\x247\\x249\\x24b"
+            "\\x24d\\x24f-\\x293\\x295-\\x2af\\x371\\x373\\x377\\x37b-\\x37d"
+            "\\x390\\x3ac-\\x3ce\\x3d0\\x3d1\\x3d5-\\x3d7\\x3d9\\x3db\\x3dd"
+            "\\x3df\\x3e1\\x3e3\\x3e5\\x3e7\\x3e9\\x3eb\\x3ed\\x3ef-\\x3f3"
+            "\\x3f5\\x3f8\\x3fb\\x3fc\\x430-\\x45f\\x461\\x463\\x465\\x467"
+            "\\x469\\x46b\\x46d\\x46f\\x471\\x473\\x475\\x477\\x479\\x47b"
+            "\\x47d\\x47f\\x481\\x48b\\x48d\\x48f\\x491\\x493\\x495\\x497"
+            "\\x499\\x49b\\x49d\\x49f\\x4a1\\x4a3\\x4a5\\x4a7\\x4a9\\x4ab"
+            "\\x4ad\\x4af\\x4b1\\x4b3\\x4b5\\x4b7\\x4b9\\x4bb\\x4bd\\x4bf"
+            "\\x4c2\\x4c4\\x4c6\\x4c8\\x4ca\\x4cc\\x4ce\\x4cf\\x4d1\\x4d3"
+            "\\x4d5\\x4d7\\x4d9\\x4db\\x4dd\\x4df\\x4e1\\x4e3\\x4e5\\x4e7"
+            "\\x4e9\\x4eb\\x4ed\\x4ef\\x4f1\\x4f3\\x4f5\\x4f7\\x4f9\\x4fb"
+            "\\x4fd\\x4ff\\x501\\x503\\x505\\x507\\x509\\x50b\\x50d\\x50f"
+            "\\x511\\x513\\x515\\x517\\x519\\x51b\\x51d\\x51f\\x521\\x523"
+            "\\x525\\x527\\x561-\\x587\\x1d00-\\x1d2b\\x1d62-\\x1d77"
+            "\\x1d79-\\x1d9a\\x1e01\\x1e03\\x1e05\\x1e07\\x1e09\\x1e0b"
+            "\\x1e0d\\x1e0f\\x1e11\\x1e13\\x1e15\\x1e17\\x1e19\\x1e1b"
+            "\\x1e1d\\x1e1f\\x1e21\\x1e23\\x1e25\\x1e27\\x1e29\\x1e2b\\x1e2d"
+            "\\x1e2f\\x1e31\\x1e33\\x1e35\\x1e37\\x1e39\\x1e3b\\x1e3d\\x1e3f"
+            "\\x1e41\\x1e43\\x1e45\\x1e47\\x1e49\\x1e4b\\x1e4d\\x1e4f\\x1e51"
+            "\\x1e53\\x1e55\\x1e57\\x1e59\\x1e5b\\x1e5d\\x1e5f\\x1e61\\x1e63"
+            "\\x1e65\\x1e67\\x1e69\\x1e6b\\x1e6d\\x1e6f\\x1e71\\x1e73\\x1e75"
+            "\\x1e77\\x1e79\\x1e7b\\x1e7d\\x1e7f\\x1e81\\x1e83\\x1e85\\x1e87"
+            "\\x1e89\\x1e8b\\x1e8d\\x1e8f\\x1e91\\x1e93\\x1e95-\\x1e9d\\x1e9f"
+            "\\x1ea1\\x1ea3\\x1ea5\\x1ea7\\x1ea9\\x1eab\\x1ead\\x1eaf\\x1eb1"
+            "\\x1eb3\\x1eb5\\x1eb7\\x1eb9\\x1ebb\\x1ebd\\x1ebf\\x1ec1\\x1ec3"
+            "\\x1ec5\\x1ec7\\x1ec9\\x1ecb\\x1ecd\\x1ecf\\x1ed1\\x1ed3\\x1ed5"
+            "\\x1ed7\\x1ed9\\x1edb\\x1edd\\x1edf\\x1ee1\\x1ee3\\x1ee5\\x1ee7"
+            "\\x1ee9\\x1eeb\\x1eed\\x1eef\\x1ef1\\x1ef3\\x1ef5\\x1ef7\\x1ef9"
+            "\\x1efb\\x1efd\\x1eff-\\x1f07\\x1f10-\\x1f15\\x1f20-\\x1f27"
+            "\\x1f30-\\x1f37\\x1f40-\\x1f45\\x1f50-\\x1f57\\x1f60-\\x1f67"
+            "\\x1f70-\\x1f7d\\x1f80-\\x1f87\\x1f90-\\x1f97\\x1fa0-\\x1fa7"
+            "\\x1fb0-\\x1fb4\\x1fb6\\x1fb7\\x1fbe\\x1fc2-\\x1fc4\\x1fc6"
+            "\\x1fc7\\x1fd0-\\x1fd3\\x1fd6\\x1fd7\\x1fe0-\\x1fe7"
+            "\\x1ff2-\\x1ff4\\x1ff6\\x1ff7\\x210a\\x210e\\x210f\\x2113"
+            "\\x212f\\x2134\\x2139\\x213c\\x213d\\x2146-\\x2149\\x214e"
+            "\\x2184\\x2c30-\\x2c5e\\x2c61\\x2c65\\x2c66\\x2c68\\x2c6a"
+            "\\x2c6c\\x2c71\\x2c73\\x2c74\\x2c76-\\x2c7c\\x2c81\\x2c83"
+            "\\x2c85\\x2c87\\x2c89\\x2c8b\\x2c8d\\x2c8f\\x2c91\\x2c93\\x2c95"
+            "\\x2c97\\x2c99\\x2c9b\\x2c9d\\x2c9f\\x2ca1\\x2ca3\\x2ca5\\x2ca7"
+            "\\x2ca9\\x2cab\\x2cad\\x2caf\\x2cb1\\x2cb3\\x2cb5\\x2cb7\\x2cb9"
+            "\\x2cbb\\x2cbd\\x2cbf\\x2cc1\\x2cc3\\x2cc5\\x2cc7\\x2cc9\\x2ccb"
+            "\\x2ccd\\x2ccf\\x2cd1\\x2cd3\\x2cd5\\x2cd7\\x2cd9\\x2cdb\\x2cdd"
+            "\\x2cdf\\x2ce1\\x2ce3\\x2ce4\\x2cec\\x2cee\\x2d00-\\x2d25\\xa641"
+            "\\xa643\\xa645\\xa647\\xa649\\xa64b\\xa64d\\xa64f\\xa651\\xa653"
+            "\\xa655\\xa657\\xa659\\xa65b\\xa65d\\xa65f\\xa661\\xa663\\xa665"
+            "\\xa667\\xa669\\xa66b\\xa66d\\xa681\\xa683\\xa685\\xa687\\xa689"
+            "\\xa68b\\xa68d\\xa68f\\xa691\\xa693\\xa695\\xa697\\xa723\\xa725"
+            "\\xa727\\xa729\\xa72b\\xa72d\\xa72f-\\xa731\\xa733\\xa735\\xa737"
+            "\\xa739\\xa73b\\xa73d\\xa73f\\xa741\\xa743\\xa745\\xa747\\xa749"
+            "\\xa74b\\xa74d\\xa74f\\xa751\\xa753\\xa755\\xa757\\xa759\\xa75b"
+            "\\xa75d\\xa75f\\xa761\\xa763\\xa765\\xa767\\xa769\\xa76b\\xa76d"
+            "\\xa76f\\xa771-\\xa778\\xa77a\\xa77c\\xa77f\\xa781\\xa783"
+            "\\xa785\\xa787\\xa78c\\xa78e\\xa791\\xa7a1\\xa7a3\\xa7a5\\xa7a7"
+            "\\xa7a9\\xa7fa\\xfb00-\\xfb06\\xfb13-\\xfb17\\xff41-\\xff5a"
+            "\\x10428-\\x1044f\\x1d41a-\\x1d433\\x1d44e-\\x1d454"
+            "\\x1d456-\\x1d467\\x1d482-\\x1d49b\\x1d4b6-\\x1d4b9\\x1d4bb"
+            "\\x1d4bd-\\x1d4c3\\x1d4c5-\\x1d4cf\\x1d4ea-\\x1d503"
+            "\\x1d51e-\\x1d537\\x1d552-\\x1d56b\\x1d586-\\x1d59f"
+            "\\x1d5ba-\\x1d5d3\\x1d5ee-\\x1d607\\x1d622-\\x1d63b"
+            "\\x1d656-\\x1d66f\\x1d68a-\\x1d6a5\\x1d6c2-\\x1d6da"
+            "\\x1d6dc-\\x1d6e1\\x1d6fc-\\x1d714\\x1d716-\\x1d71b"
+            "\\x1d736-\\x1d74e\\x1d750-\\x1d755\\x1d770-\\x1d788"
+            "\\x1d78a-\\x1d78f\\x1d7aa-\\x1d7c2\\x1d7c4-\\x1d7c9\\x1d7cb]";
+    }
+
+    static const char *letter_titlecase ()
+    {
+        return "[\\x1c5\\x1c8\\x1cb\\x1f2\\x1f88-\\x1f8f\\x1f98-\\x1f9f"
+            "\\x1fa8-\\x1faf\\x1fbc\\x1fcc\\x1ffc]";
+    }
+
+    static const char *letter_modifier ()
+    {
+        return "[\\x2b0-\\x2c1\\x2c6-\\x2d1\\x2e0-\\x2e4\\x2ec\\x2ee\\x374"
+            "\\x37a\\x559\\x640\\x6e5\\x6e6\\x7f4\\x7f5\\x7fa\\x81a\\x824"
+            "\\x828\\x971\\xe46\\xec6\\x10fc\\x17d7\\x1843\\x1aa7"
+            "\\x1c78-\\x1c7d\\x1d2c-\\x1d61\\x1d78\\x1d9b-\\x1dbf\\x2071"
+            "\\x207f\\x2090-\\x209c\\x2c7d\\x2d6f\\x2e2f\\x3005"
+            "\\x3031-\\x3035\\x303b\\x309d\\x309e\\x30fc-\\x30fe\\xa015"
+            "\\xa4f8-\\xa4fd\\xa60c\\xa67f\\xa717-\\xa71f\\xa770\\xa788"
+            "\\xa9cf\\xaa70\\xaadd\\xff70\\xff9e\\xff9f]";
+    }
+
+    static const char *letter_other ()
+    {
+        return "[\\x1bb\\x1c0-\\x1c3\\x294\\x5d0-\\x5ea\\x5f0-\\x5f2"
+            "\\x620-\\x63f\\x641-\\x64a\\x66e\\x66f\\x671-\\x6d3\\x6d5\\x6ee"
+            "\\x6ef\\x6fa-\\x6fc\\x6ff\\x710\\x712-\\x72f\\x74d-\\x7a5\\x7b1"
+            "\\x7ca-\\x7ea\\x800-\\x815\\x840-\\x858\\x904-\\x939\\x93d"
+            "\\x950\\x958-\\x961\\x972-\\x977\\x979-\\x97f\\x985-\\x98c\\x98f"
+            "\\x990\\x993-\\x9a8\\x9aa-\\x9b0\\x9b2\\x9b6-\\x9b9\\x9bd\\x9ce"
+            "\\x9dc\\x9dd\\x9df-\\x9e1\\x9f0\\x9f1\\xa05-\\xa0a\\xa0f\\xa10"
+            "\\xa13-\\xa28\\xa2a-\\xa30\\xa32\\xa33\\xa35\\xa36\\xa38\\xa39"
+            "\\xa59-\\xa5c\\xa5e\\xa72-\\xa74\\xa85-\\xa8d\\xa8f-\\xa91"
+            "\\xa93-\\xaa8\\xaaa-\\xab0\\xab2\\xab3\\xab5-\\xab9\\xabd\\xad0"
+            "\\xae0\\xae1\\xb05-\\xb0c\\xb0f\\xb10\\xb13-\\xb28\\xb2a-\\xb30"
+            "\\xb32\\xb33\\xb35-\\xb39\\xb3d\\xb5c\\xb5d\\xb5f-\\xb61\\xb71"
+            "\\xb83\\xb85-\\xb8a\\xb8e-\\xb90\\xb92-\\xb95\\xb99\\xb9a\\xb9c"
+            "\\xb9e\\xb9f\\xba3\\xba4\\xba8-\\xbaa\\xbae-\\xbb9\\xbd0"
+            "\\xc05-\\xc0c\\xc0e-\\xc10\\xc12-\\xc28\\xc2a-\\xc33"
+            "\\xc35-\\xc39\\xc3d\\xc58\\xc59\\xc60\\xc61\\xc85-\\xc8c"
+            "\\xc8e-\\xc90\\xc92-\\xca8\\xcaa-\\xcb3\\xcb5-\\xcb9\\xcbd"
+            "\\xcde\\xce0\\xce1\\xcf1\\xcf2\\xd05-\\xd0c\\xd0e-\\xd10"
+            "\\xd12-\\xd3a\\xd3d\\xd4e\\xd60\\xd61\\xd7a-\\xd7f\\xd85-\\xd96"
+            "\\xd9a-\\xdb1\\xdb3-\\xdbb\\xdbd\\xdc0-\\xdc6\\xe01-\\xe30\\xe32"
+            "\\xe33\\xe40-\\xe45\\xe81\\xe82\\xe84\\xe87\\xe88\\xe8a\\xe8d"
+            "\\xe94-\\xe97\\xe99-\\xe9f\\xea1-\\xea3\\xea5\\xea7\\xeaa\\xeab"
+            "\\xead-\\xeb0\\xeb2\\xeb3\\xebd\\xec0-\\xec4\\xedc\\xedd\\xf00"
+            "\\xf40-\\xf47\\xf49-\\xf6c\\xf88-\\xf8c\\x1000-\\x102a\\x103f"
+            "\\x1050-\\x1055\\x105a-\\x105d\\x1061\\x1065\\x1066"
+            "\\x106e-\\x1070\\x1075-\\x1081\\x108e\\x10d0-\\x10fa"
+            "\\x1100-\\x1248\\x124a-\\x124d\\x1250-\\x1256\\x1258"
+            "\\x125a-\\x125d\\x1260-\\x1288\\x128a-\\x128d\\x1290-\\x12b0"
+            "\\x12b2-\\x12b5\\x12b8-\\x12be\\x12c0\\x12c2-\\x12c5"
+            "\\x12c8-\\x12d6\\x12d8-\\x1310\\x1312-\\x1315\\x1318-\\x135a"
+            "\\x1380-\\x138f\\x13a0-\\x13f4\\x1401-\\x166c\\x166f-\\x167f"
+            "\\x1681-\\x169a\\x16a0-\\x16ea\\x1700-\\x170c\\x170e-\\x1711"
+            "\\x1720-\\x1731\\x1740-\\x1751\\x1760-\\x176c\\x176e-\\x1770"
+            "\\x1780-\\x17b3\\x17dc\\x1820-\\x1842\\x1844-\\x1877"
+            "\\x1880-\\x18a8\\x18aa\\x18b0-\\x18f5\\x1900-\\x191c"
+            "\\x1950-\\x196d\\x1970-\\x1974\\x1980-\\x19ab\\x19c1-\\x19c7"
+            "\\x1a00-\\x1a16\\x1a20-\\x1a54\\x1b05-\\x1b33\\x1b45-\\x1b4b"
+            "\\x1b83-\\x1ba0\\x1bae\\x1baf\\x1bc0-\\x1be5\\x1c00-\\x1c23"
+            "\\x1c4d-\\x1c4f\\x1c5a-\\x1c77\\x1ce9-\\x1cec\\x1cee-\\x1cf1"
+            "\\x2135-\\x2138\\x2d30-\\x2d65\\x2d80-\\x2d96\\x2da0-\\x2da6"
+            "\\x2da8-\\x2dae\\x2db0-\\x2db6\\x2db8-\\x2dbe\\x2dc0-\\x2dc6"
+            "\\x2dc8-\\x2dce\\x2dd0-\\x2dd6\\x2dd8-\\x2dde\\x3006\\x303c"
+            "\\x3041-\\x3096\\x309f\\x30a1-\\x30fa\\x30ff\\x3105-\\x312d"
+            "\\x3131-\\x318e\\x31a0-\\x31ba\\x31f0-\\x31ff\\x3400\\x4db5"
+            "\\x4e00\\x9fcb\\xa000-\\xa014\\xa016-\\xa48c\\xa4d0-\\xa4f7"
+            "\\xa500-\\xa60b\\xa610-\\xa61f\\xa62a\\xa62b\\xa66e"
+            "\\xa6a0-\\xa6e5\\xa7fb-\\xa801\\xa803-\\xa805\\xa807-\\xa80a"
+            "\\xa80c-\\xa822\\xa840-\\xa873\\xa882-\\xa8b3\\xa8f2-\\xa8f7"
+            "\\xa8fb\\xa90a-\\xa925\\xa930-\\xa946\\xa960-\\xa97c"
+            "\\xa984-\\xa9b2\\xaa00-\\xaa28\\xaa40-\\xaa42\\xaa44-\\xaa4b"
+            "\\xaa60-\\xaa6f\\xaa71-\\xaa76\\xaa7a\\xaa80-\\xaaaf\\xaab1"
+            "\\xaab5\\xaab6\\xaab9-\\xaabd\\xaac0\\xaac2\\xaadb\\xaadc"
+            "\\xab01-\\xab06\\xab09-\\xab0e\\xab11-\\xab16\\xab20-\\xab26"
+            "\\xab28-\\xab2e\\xabc0-\\xabe2\\xac00\\xd7a3\\xd7b0-\\xd7c6"
+            "\\xd7cb-\\xd7fb\\xf900-\\xfa2d\\xfa30-\\xfa6d\\xfa70-\\xfad9"
+            "\\xfb1d\\xfb1f-\\xfb28\\xfb2a-\\xfb36\\xfb38-\\xfb3c\\xfb3e"
+            "\\xfb40\\xfb41\\xfb43\\xfb44\\xfb46-\\xfbb1\\xfbd3-\\xfd3d"
+            "\\xfd50-\\xfd8f\\xfd92-\\xfdc7\\xfdf0-\\xfdfb\\xfe70-\\xfe74"
+            "\\xfe76-\\xfefc\\xff66-\\xff6f\\xff71-\\xff9d\\xffa0-\\xffbe"
+            "\\xffc2-\\xffc7\\xffca-\\xffcf\\xffd2-\\xffd7\\xffda-\\xffdc"
+            "\\x10000-\\x1000b\\x1000d-\\x10026\\x10028-\\x1003a\\x1003c"
+            "\\x1003d\\x1003f-\\x1004d\\x10050-\\x1005d\\x10080-\\x100fa"
+            "\\x10280-\\x1029c\\x102a0-\\x102d0\\x10300-\\x1031e"
+            "\\x10330-\\x10340\\x10342-\\x10349\\x10380-\\x1039d"
+            "\\x103a0-\\x103c3\\x103c8-\\x103cf\\x10450-\\x1049d"
+            "\\x10800-\\x10805\\x10808\\x1080a-\\x10835\\x10837\\x10838"
+            "\\x1083c\\x1083f-\\x10855\\x10900-\\x10915\\x10920-\\x10939"
+            "\\x10a00\\x10a10-\\x10a13\\x10a15-\\x10a17\\x10a19-\\x10a33"
+            "\\x10a60-\\x10a7c\\x10b00-\\x10b35\\x10b40-\\x10b55"
+            "\\x10b60-\\x10b72\\x10c00-\\x10c48\\x11003-\\x11037"
+            "\\x11083-\\x110af\\x12000-\\x1236e\\x13000-\\x1342e"
+            "\\x16800-\\x16a38\\x1b000\\x1b001\\x20000\\x2a6d6\\x2a700"
+            "\\x2b734\\x2b740\\x2b81d\\x2f800-\\x2fa1d]";
+    }
+
+    static const char *mark_nonspacing ()
+    {
+        return "[\\x300-\\x36f\\x483-\\x487\\x591-\\x5bd\\x5bf\\x5c1\\x5c2"
+            "\\x5c4\\x5c5\\x5c7\\x610-\\x61a\\x64b-\\x65f\\x670\\x6d6-\\x6dc"
+            "\\x6df-\\x6e4\\x6e7\\x6e8\\x6ea-\\x6ed\\x711\\x730-\\x74a"
+            "\\x7a6-\\x7b0\\x7eb-\\x7f3\\x816-\\x819\\x81b-\\x823"
+            "\\x825-\\x827\\x829-\\x82d\\x859-\\x85b\\x900-\\x902\\x93a\\x93c"
+            "\\x941-\\x948\\x94d\\x951-\\x957\\x962\\x963\\x981\\x9bc"
+            "\\x9c1-\\x9c4\\x9cd\\x9e2\\x9e3\\xa01\\xa02\\xa3c\\xa41\\xa42"
+            "\\xa47\\xa48\\xa4b-\\xa4d\\xa51\\xa70\\xa71\\xa75\\xa81\\xa82"
+            "\\xabc\\xac1-\\xac5\\xac7\\xac8\\xacd\\xae2\\xae3\\xb01\\xb3c"
+            "\\xb3f\\xb41-\\xb44\\xb4d\\xb56\\xb62\\xb63\\xb82\\xbc0\\xbcd"
+            "\\xc3e-\\xc40\\xc46-\\xc48\\xc4a-\\xc4d\\xc55\\xc56\\xc62\\xc63"
+            "\\xcbc\\xcbf\\xcc6\\xccc\\xccd\\xce2\\xce3\\xd41-\\xd44\\xd4d"
+            "\\xd62\\xd63\\xdca\\xdd2-\\xdd4\\xdd6\\xe31\\xe34-\\xe3a"
+            "\\xe47-\\xe4e\\xeb1\\xeb4-\\xeb9\\xebb\\xebc\\xec8-\\xecd\\xf18"
+            "\\xf19\\xf35\\xf37\\xf39\\xf71-\\xf7e\\xf80-\\xf84\\xf86\\xf87"
+            "\\xf8d-\\xf97\\xf99-\\xfbc\\xfc6\\x102d-\\x1030\\x1032-\\x1037"
+            "\\x1039\\x103a\\x103d\\x103e\\x1058\\x1059\\x105e-\\x1060"
+            "\\x1071-\\x1074\\x1082\\x1085\\x1086\\x108d\\x109d"
+            "\\x135d-\\x135f\\x1712-\\x1714\\x1732-\\x1734\\x1752\\x1753"
+            "\\x1772\\x1773\\x17b7-\\x17bd\\x17c6\\x17c9-\\x17d3\\x17dd"
+            "\\x180b-\\x180d\\x18a9\\x1920-\\x1922\\x1927\\x1928\\x1932"
+            "\\x1939-\\x193b\\x1a17\\x1a18\\x1a56\\x1a58-\\x1a5e\\x1a60"
+            "\\x1a62\\x1a65-\\x1a6c\\x1a73-\\x1a7c\\x1a7f\\x1b00-\\x1b03"
+            "\\x1b34\\x1b36-\\x1b3a\\x1b3c\\x1b42\\x1b6b-\\x1b73\\x1b80"
+            "\\x1b81\\x1ba2-\\x1ba5\\x1ba8\\x1ba9\\x1be6\\x1be8\\x1be9\\x1bed"
+            "\\x1bef-\\x1bf1\\x1c2c-\\x1c33\\x1c36\\x1c37\\x1cd0-\\x1cd2"
+            "\\x1cd4-\\x1ce0\\x1ce2-\\x1ce8\\x1ced\\x1dc0-\\x1de6"
+            "\\x1dfc-\\x1dff\\x20d0-\\x20dc\\x20e1\\x20e5-\\x20f0"
+            "\\x2cef-\\x2cf1\\x2d7f\\x2de0-\\x2dff\\x302a-\\x302f\\x3099"
+            "\\x309a\\xa66f\\xa67c\\xa67d\\xa6f0\\xa6f1\\xa802\\xa806\\xa80b"
+            "\\xa825\\xa826\\xa8c4\\xa8e0-\\xa8f1\\xa926-\\xa92d"
+            "\\xa947-\\xa951\\xa980-\\xa982\\xa9b3\\xa9b6-\\xa9b9\\xa9bc"
+            "\\xaa29-\\xaa2e\\xaa31\\xaa32\\xaa35\\xaa36\\xaa43\\xaa4c\\xaab0"
+            "\\xaab2-\\xaab4\\xaab7\\xaab8\\xaabe\\xaabf\\xaac1\\xabe5\\xabe8"
+            "\\xabed\\xfb1e\\xfe00-\\xfe0f\\xfe20-\\xfe26\\x101fd"
+            "\\x10a01-\\x10a03\\x10a05\\x10a06\\x10a0c-\\x10a0f"
+            "\\x10a38-\\x10a3a\\x10a3f\\x11001\\x11038-\\x11046\\x11080"
+            "\\x11081\\x110b3-\\x110b6\\x110b9\\x110ba\\x1d167-\\x1d169"
+            "\\x1d17b-\\x1d182\\x1d185-\\x1d18b\\x1d1aa-\\x1d1ad"
+            "\\x1d242-\\x1d244\\xe0100-\\xe01ef]";
+    }
+
+    static const char *mark_combining ()
+    {
+        return "[\\x903\\x93b\\x93e-\\x940\\x949-\\x94c\\x94e\\x94f\\x982"
+            "\\x983\\x9be-\\x9c0\\x9c7\\x9c8\\x9cb\\x9cc\\x9d7\\xa03"
+            "\\xa3e-\\xa40\\xa83\\xabe-\\xac0\\xac9\\xacb\\xacc\\xb02\\xb03"
+            "\\xb3e\\xb40\\xb47\\xb48\\xb4b\\xb4c\\xb57\\xbbe\\xbbf\\xbc1"
+            "\\xbc2\\xbc6-\\xbc8\\xbca-\\xbcc\\xbd7\\xc01-\\xc03\\xc41-\\xc44"
+            "\\xc82\\xc83\\xcbe\\xcc0-\\xcc4\\xcc7\\xcc8\\xcca\\xccb\\xcd5"
+            "\\xcd6\\xd02\\xd03\\xd3e-\\xd40\\xd46-\\xd48\\xd4a-\\xd4c\\xd57"
+            "\\xd82\\xd83\\xdcf-\\xdd1\\xdd8-\\xddf\\xdf2\\xdf3\\xf3e\\xf3f"
+            "\\xf7f\\x102b\\x102c\\x1031\\x1038\\x103b\\x103c\\x1056\\x1057"
+            "\\x1062-\\x1064\\x1067-\\x106d\\x1083\\x1084\\x1087-\\x108c"
+            "\\x108f\\x109a-\\x109c\\x17b6\\x17be-\\x17c5\\x17c7\\x17c8"
+            "\\x1923-\\x1926\\x1929-\\x192b\\x1930\\x1931\\x1933-\\x1938"
+            "\\x19b0-\\x19c0\\x19c8\\x19c9\\x1a19-\\x1a1b\\x1a55\\x1a57"
+            "\\x1a61\\x1a63\\x1a64\\x1a6d-\\x1a72\\x1b04\\x1b35\\x1b3b"
+            "\\x1b3d-\\x1b41\\x1b43\\x1b44\\x1b82\\x1ba1\\x1ba6\\x1ba7\\x1baa"
+            "\\x1be7\\x1bea-\\x1bec\\x1bee\\x1bf2\\x1bf3\\x1c24-\\x1c2b"
+            "\\x1c34\\x1c35\\x1ce1\\x1cf2\\xa823\\xa824\\xa827\\xa880\\xa881"
+            "\\xa8b4-\\xa8c3\\xa952\\xa953\\xa983\\xa9b4\\xa9b5\\xa9ba"
+            "\\xa9bb\\xa9bd-\\xa9c0\\xaa2f\\xaa30\\xaa33\\xaa34\\xaa4d\\xaa7b"
+            "\\xabe3\\xabe4\\xabe6\\xabe7\\xabe9\\xabea\\xabec\\x11000"
+            "\\x11002\\x11082\\x110b0-\\x110b2\\x110b7\\x110b8\\x1d165"
+            "\\x1d166\\x1d16d-\\x1d172]";
+    }
+
+    static const char *mark_enclosing ()
+    {
+        return "[\\x488\\x489\\x20dd-\\x20e0\\x20e2-\\x20e4\\xa670-\\xa672]";
+    }
+
+    static const char *number_decimal ()
+    {
+        return "[\\x30-\\x39\\x660-\\x669\\x6f0-\\x6f9\\x7c0-\\x7c9"
+            "\\x966-\\x96f\\x9e6-\\x9ef\\xa66-\\xa6f\\xae6-\\xaef"
+            "\\xb66-\\xb6f\\xbe6-\\xbef\\xc66-\\xc6f\\xce6-\\xcef"
+            "\\xd66-\\xd6f\\xe50-\\xe59\\xed0-\\xed9\\xf20-\\xf29"
+            "\\x1040-\\x1049\\x1090-\\x1099\\x17e0-\\x17e9\\x1810-\\x1819"
+            "\\x1946-\\x194f\\x19d0-\\x19d9\\x1a80-\\x1a89\\x1a90-\\x1a99"
+            "\\x1b50-\\x1b59\\x1bb0-\\x1bb9\\x1c40-\\x1c49\\x1c50-\\x1c59"
+            "\\xa620-\\xa629\\xa8d0-\\xa8d9\\xa900-\\xa909\\xa9d0-\\xa9d9"
+            "\\xaa50-\\xaa59\\xabf0-\\xabf9\\xff10-\\xff19\\x104a0-\\x104a9"
+            "\\x11066-\\x1106f\\x1d7ce-\\x1d7ff]";
+    }
+
+    static const char *number_letter ()
+    {
+        return "[\\x16ee-\\x16f0\\x2160-\\x2182\\x2185-\\x2188\\x3007"
+            "\\x3021-\\x3029\\x3038-\\x303a\\xa6e6-\\xa6ef\\x10140-\\x10174"
+            "\\x10341\\x1034a\\x103d1-\\x103d5\\x12400-\\x12462]";
+    }
+
+    static const char *number_other ()
+    {
+        return "[\\xb2\\xb3\\xb9\\xbc-\\xbe\\x9f4-\\x9f9\\xb72-\\xb77"
+            "\\xbf0-\\xbf2\\xc78-\\xc7e\\xd70-\\xd75\\xf2a-\\xf33"
+            "\\x1369-\\x137c\\x17f0-\\x17f9\\x19da\\x2070\\x2074-\\x2079"
+            "\\x2080-\\x2089\\x2150-\\x215f\\x2189\\x2460-\\x249b"
+            "\\x24ea-\\x24ff\\x2776-\\x2793\\x2cfd\\x3192-\\x3195"
+            "\\x3220-\\x3229\\x3251-\\x325f\\x3280-\\x3289\\x32b1-\\x32bf"
+            "\\xa830-\\xa835\\x10107-\\x10133\\x10175-\\x10178\\x1018a"
+            "\\x10320-\\x10323\\x10858-\\x1085f\\x10916-\\x1091b"
+            "\\x10a40-\\x10a47\\x10a7d\\x10a7e\\x10b58-\\x10b5f"
+            "\\x10b78-\\x10b7f\\x10e60-\\x10e7e\\x11052-\\x11065"
+            "\\x1d360-\\x1d371\\x1f100-\\x1f10a]";
+    }
+
+    static const char *punctuation_connector ()
+    {
+        return "[\\x5f\\x203f\\x2040\\x2054\\xfe33\\xfe34\\xfe4d-\\xfe4f"
+            "\\xff3f]";
+    }
+
+    static const char *punctuation_dash ()
+    {
+        return "[\\x2d\\x58a\\x5be\\x1400\\x1806\\x2010-\\x2015\\x2e17\\x2e1a"
+            "\\x301c\\x3030\\x30a0\\xfe31\\xfe32\\xfe58\\xfe63\\xff0d]";
+    }
+
+    static const char *punctuation_open ()
+    {
+        return "[\\x28\\x5b\\x7b\\xf3a\\xf3c\\x169b\\x201a\\x201e\\x2045"
+            "\\x207d\\x208d\\x2329\\x2768\\x276a\\x276c\\x276e\\x2770\\x2772"
+            "\\x2774\\x27c5\\x27e6\\x27e8\\x27ea\\x27ec\\x27ee\\x2983\\x2985"
+            "\\x2987\\x2989\\x298b\\x298d\\x298f\\x2991\\x2993\\x2995\\x2997"
+            "\\x29d8\\x29da\\x29fc\\x2e22\\x2e24\\x2e26\\x2e28\\x3008\\x300a"
+            "\\x300c\\x300e\\x3010\\x3014\\x3016\\x3018\\x301a\\x301d\\xfd3e"
+            "\\xfe17\\xfe35\\xfe37\\xfe39\\xfe3b\\xfe3d\\xfe3f\\xfe41\\xfe43"
+            "\\xfe47\\xfe59\\xfe5b\\xfe5d\\xff08\\xff3b\\xff5b\\xff5f\\xff62]";
+    }
+
+    static const char *punctuation_close ()
+    {
+        return "[\\x29\\x5d\\x7d\\xf3b\\xf3d\\x169c\\x2046\\x207e\\x208e"
+            "\\x232a\\x2769\\x276b\\x276d\\x276f\\x2771\\x2773\\x2775\\x27c6"
+            "\\x27e7\\x27e9\\x27eb\\x27ed\\x27ef\\x2984\\x2986\\x2988\\x298a"
+            "\\x298c\\x298e\\x2990\\x2992\\x2994\\x2996\\x2998\\x29d9\\x29db"
+            "\\x29fd\\x2e23\\x2e25\\x2e27\\x2e29\\x3009\\x300b\\x300d\\x300f"
+            "\\x3011\\x3015\\x3017\\x3019\\x301b\\x301e\\x301f\\xfd3f\\xfe18"
+            "\\xfe36\\xfe38\\xfe3a\\xfe3c\\xfe3e\\xfe40\\xfe42\\xfe44\\xfe48"
+            "\\xfe5a\\xfe5c\\xfe5e\\xff09\\xff3d\\xff5d\\xff60\\xff63]";
+    }
+
+    static const char *punctuation_initial ()
+    {
+        return "[\\x00AB\\x2018\\x201B\\x201C\\x201F\\x2039\\x2E02\\x2E04"
+            "\\x2E09\\x2E0C\\x2E1C\\x2E20]";
+    }
+
+    static const char *punctuation_final ()
+    {
+        return "[\\x00BB\\x2019\\x201D\\x203A\\x2E03\\x2E05\\x2E0A\\x2E0D"
+            "\\x2E1D\\x2E21]";
+    }
+
+    static const char *punctuation_other ()
+    {
+        return "[\\x21-\\x23\\x25-\\x27\\x2a\\x2c\\x2e\\x2f\\x3a\\x3b\\x3f"
+            "\\x40\\x5c\\xa1\\xb7\\xbf\\x37e\\x387\\x55a-\\x55f\\x589\\x5c0"
+            "\\x5c3\\x5c6\\x5f3\\x5f4\\x609\\x60a\\x60c\\x60d\\x61b\\x61e"
+            "\\x61f\\x66a-\\x66d\\x6d4\\x700-\\x70d\\x7f7-\\x7f9\\x830-\\x83e"
+            "\\x85e\\x964\\x965\\x970\\xdf4\\xe4f\\xe5a\\xe5b\\xf04-\\xf12"
+            "\\xf85\\xfd0-\\xfd4\\xfd9\\xfda\\x104a-\\x104f\\x10fb"
+            "\\x1361-\\x1368\\x166d\\x166e\\x16eb-\\x16ed\\x1735\\x1736"
+            "\\x17d4-\\x17d6\\x17d8-\\x17da\\x1800-\\x1805\\x1807-\\x180a"
+            "\\x1944\\x1945\\x1a1e\\x1a1f\\x1aa0-\\x1aa6\\x1aa8-\\x1aad"
+            "\\x1b5a-\\x1b60\\x1bfc-\\x1bff\\x1c3b-\\x1c3f\\x1c7e\\x1c7f"
+            "\\x1cd3\\x2016\\x2017\\x2020-\\x2027\\x2030-\\x2038"
+            "\\x203b-\\x203e\\x2041-\\x2043\\x2047-\\x2051\\x2053"
+            "\\x2055-\\x205e\\x2cf9-\\x2cfc\\x2cfe\\x2cff\\x2d70\\x2e00"
+            "\\x2e01\\x2e06-\\x2e08\\x2e0b\\x2e0e-\\x2e16\\x2e18\\x2e19"
+            "\\x2e1b\\x2e1e\\x2e1f\\x2e2a-\\x2e2e\\x2e30\\x2e31"
+            "\\x3001-\\x3003\\x303d\\x30fb\\xa4fe\\xa4ff\\xa60d-\\xa60f"
+            "\\xa673\\xa67e\\xa6f2-\\xa6f7\\xa874-\\xa877\\xa8ce\\xa8cf"
+            "\\xa8f8-\\xa8fa\\xa92e\\xa92f\\xa95f\\xa9c1-\\xa9cd\\xa9de"
+            "\\xa9df\\xaa5c-\\xaa5f\\xaade\\xaadf\\xabeb\\xfe10-\\xfe16"
+            "\\xfe19\\xfe30\\xfe45\\xfe46\\xfe49-\\xfe4c\\xfe50-\\xfe52"
+            "\\xfe54-\\xfe57\\xfe5f-\\xfe61\\xfe68\\xfe6a\\xfe6b"
+            "\\xff01-\\xff03\\xff05-\\xff07\\xff0a\\xff0c\\xff0e\\xff0f"
+            "\\xff1a\\xff1b\\xff1f\\xff20\\xff3c\\xff61\\xff64\\xff65"
+            "\\x10100\\x10101\\x1039f\\x103d0\\x10857\\x1091f\\x1093f"
+            "\\x10a50-\\x10a58\\x10a7f\\x10b39-\\x10b3f\\x11047-\\x1104d"
+            "\\x110bb\\x110bc\\x110be-\\x110c1\\x12470-\\x12473]";
+    }
+
+    static const char *symbol_math ()
+    {
+        return "[\\x2b\\x3c-\\x3e\\x7c\\x7e\\xac\\xb1\\xd7\\xf7\\x3f6"
+            "\\x606-\\x608\\x2044\\x2052\\x207a-\\x207c\\x208a-\\x208c"
+            "\\x2118\\x2140-\\x2144\\x214b\\x2190-\\x2194\\x219a\\x219b"
+            "\\x21a0\\x21a3\\x21a6\\x21ae\\x21ce\\x21cf\\x21d2\\x21d4"
+            "\\x21f4-\\x22ff\\x2308-\\x230b\\x2320\\x2321\\x237c"
+            "\\x239b-\\x23b3\\x23dc-\\x23e1\\x25b7\\x25c1\\x25f8-\\x25ff"
+            "\\x266f\\x27c0-\\x27c4\\x27c7-\\x27ca\\x27cc\\x27ce-\\x27e5"
+            "\\x27f0-\\x27ff\\x2900-\\x2982\\x2999-\\x29d7\\x29dc-\\x29fb"
+            "\\x29fe-\\x2aff\\x2b30-\\x2b44\\x2b47-\\x2b4c\\xfb29\\xfe62"
+            "\\xfe64-\\xfe66\\xff0b\\xff1c-\\xff1e\\xff5c\\xff5e\\xffe2"
+            "\\xffe9-\\xffec\\x1d6c1\\x1d6db\\x1d6fb\\x1d715\\x1d735\\x1d74f"
+            "\\x1d76f\\x1d789\\x1d7a9\\x1d7c3]";
+    }
+
+    static const char *symbol_currency ()
+    {
+        return "[\\x24\\xa2-\\xa5\\x60b\\x9f2\\x9f3\\x9fb\\xaf1\\xbf9\\xe3f"
+            "\\x17db\\x20a0-\\x20b9\\xa838\\xfdfc\\xfe69\\xff04\\xffe0\\xffe1"
+            "\\xffe5\\xffe6]";
+    }
+
+    static const char *symbol_modifier ()
+    {
+        return "[\\x5e\\x60\\xa8\\xaf\\xb4\\xb8\\x2c2-\\x2c5\\x2d2-\\x2df"
+            "\\x2e5-\\x2eb\\x2ed\\x2ef-\\x2ff\\x375\\x384\\x385\\x1fbd"
+            "\\x1fbf-\\x1fc1\\x1fcd-\\x1fcf\\x1fdd-\\x1fdf\\x1fed-\\x1fef"
+            "\\x1ffd\\x1ffe\\x309b\\x309c\\xa700-\\xa716\\xa720\\xa721"
+            "\\xa789\\xa78a\\xfbb2-\\xfbc1\\xff3e\\xff40\\xffe3]";
+    }
+
+    static const char *symbol_other ()
+    {
+        return "[\\xa6\\xa7\\xa9\\xae\\xb0\\xb6\\x482\\x60e\\x60f\\x6de"
+            "\\x6e9\\x6fd\\x6fe\\x7f6\\x9fa\\xb70\\xbf3-\\xbf8\\xbfa\\xc7f"
+            "\\xd79\\xf01-\\xf03\\xf13-\\xf17\\xf1a-\\xf1f\\xf34\\xf36\\xf38"
+            "\\xfbe-\\xfc5\\xfc7-\\xfcc\\xfce\\xfcf\\xfd5-\\xfd8"
+            "\\x109e\\x109f\\x1360\\x1390-\\x1399\\x1940\\x19de-\\x19ff"
+            "\\x1b61-\\x1b6a\\x1b74-\\x1b7c\\x2100\\x2101\\x2103-\\x2106"
+            "\\x2108\\x2109\\x2114\\x2116\\x2117\\x211e-\\x2123\\x2125"
+            "\\x2127\\x2129\\x212e\\x213a\\x213b\\x214a\\x214c\\x214d\\x214f"
+            "\\x2195-\\x2199\\x219c-\\x219f\\x21a1\\x21a2\\x21a4\\x21a5"
+            "\\x21a7-\\x21ad\\x21af-\\x21cd\\x21d0\\x21d1\\x21d3"
+            "\\x21d5-\\x21f3\\x2300-\\x2307\\x230c-\\x231f\\x2322-\\x2328"
+            "\\x232b-\\x237b\\x237d-\\x239a\\x23b4-\\x23db\\x23e2-\\x23f3"
+            "\\x2400-\\x2426\\x2440-\\x244a\\x249c-\\x24e9\\x2500-\\x25b6"
+            "\\x25b8-\\x25c0\\x25c2-\\x25f7\\x2600-\\x266e\\x2670-\\x26ff"
+            "\\x2701-\\x2767\\x2794-\\x27bf\\x2800-\\x28ff\\x2b00-\\x2b2f"
+            "\\x2b45\\x2b46\\x2b50-\\x2b59\\x2ce5-\\x2cea\\x2e80-\\x2e99"
+            "\\x2e9b-\\x2ef3\\x2f00-\\x2fd5\\x2ff0-\\x2ffb\\x3004\\x3012"
+            "\\x3013\\x3020\\x3036\\x3037\\x303e\\x303f\\x3190\\x3191"
+            "\\x3196-\\x319f\\x31c0-\\x31e3\\x3200-\\x321e\\x322a-\\x3250"
+            "\\x3260-\\x327f\\x328a-\\x32b0\\x32c0-\\x32fe\\x3300-\\x33ff"
+            "\\x4dc0-\\x4dff\\xa490-\\xa4c6\\xa828-\\xa82b\\xa836\\xa837"
+            "\\xa839\\xaa77-\\xaa79\\xfdfd\\xffe4\\xffe8\\xffed\\xffee"
+            "\\xfffc\\xfffd\\x10102\\x10137-\\x1013f\\x10179-\\x10189"
+            "\\x10190-\\x1019b\\x101d0-\\x101fc\\x1d000-\\x1d0f5"
+            "\\x1d100-\\x1d126\\x1d129-\\x1d164\\x1d16a-\\x1d16c\\x1d183"
+            "\\x1d184\\x1d18c-\\x1d1a9\\x1d1ae-\\x1d1dd\\x1d200-\\x1d241"
+            "\\x1d245\\x1d300-\\x1d356\\x1f000-\\x1f02b\\x1f030-\\x1f093"
+            "\\x1f0a0-\\x1f0ae\\x1f0b1-\\x1f0be\\x1f0c1-\\x1f0cf"
+            "\\x1f0d1-\\x1f0df\\x1f110-\\x1f12e\\x1f130-\\x1f169"
+            "\\x1f170-\\x1f19a\\x1f1e6-\\x1f202\\x1f210-\\x1f23a"
+            "\\x1f240-\\x1f248\\x1f250\\x1f251\\x1f300-\\x1f320"
+            "\\x1f330-\\x1f335\\x1f337-\\x1f37c\\x1f380-\\x1f393"
+            "\\x1f3a0-\\x1f3c4\\x1f3c6-\\x1f3ca\\x1f3e0-\\x1f3f0"
+            "\\x1f400-\\x1f43e\\x1f440\\x1f442-\\x1f4f7\\x1f4f9-\\x1f4fc"
+            "\\x1f500-\\x1f53d\\x1f550-\\x1f567\\x1f5fb-\\x1f5ff"
+            "\\x1f601-\\x1f610\\x1f612-\\x1f614\\x1f616\\x1f618\\x1f61a"
+            "\\x1f61c-\\x1f61e\\x1f620-\\x1f625\\x1f628-\\x1f62b\\x1f62d"
+            "\\x1f630-\\x1f633\\x1f635-\\x1f640\\x1f645-\\x1f64f"
+            "\\x1f680-\\x1f6c5\\x1f700-\\x1f773]";
+    }
+
+    static const char *separator_space ()
+    {
+        return "[\\x20\\xa0\\x1680\\x180e\\x2000-\\x200a\\x202f\\x205f"
+            "\\x3000]";
+    }
+
+    static const char *separator_line ()
+    {
+        return "[\\x2028]";
+    }
+
+    static const char *separator_paragraph ()
+    {
+        return "[\\x2029]";
+    }
+
+    static const char *other_control ()
+    {
+        return "[\\x0-\\x1f\\x7f-\\x9f]";
+    }
+
+    static const char *other_format ()
+    {
+        return "[\\xad\\x600-\\x603\\x6dd\\x70f\\x17b4\\x17b5\\x200b-\\x200f"
+            "\\x202a-\\x202e\\x2060-\\x2064\\x206a-\\x206f\\xfeff"
+            "\\xfff9-\\xfffb\\x110bd\\x1d173-\\x1d17a\\xe0001"
+            "\\xe0020-\\xe007f]";
+    }
+
+    static const char *other_surrogate ()
+    {
+        return "[\\xD800\\xDB7F\\xDB80\\xDBFF\\xDC00\\xDFFF]";
+    }
+
+    static const char *other_private ()
+    {
+        return "[\\xE000\\xF8FF\\xF0000\\xFFFFD\\x100000\\x10FFFD]";
+    }
+
+    static input_char_type fold (const input_char_type char_,
+        const std::locale &locale_, const one &)
+    {
+        const input_char_type upper_ = std::toupper
+            (char_, locale_);
+        const input_char_type lower_ = std::tolower
+            (char_, locale_);
+
+        return upper_ != char_ ? upper_ : lower_;
+    }
+
+    static input_char_type fold (const input_char_type char_,
+        const std::locale &, const two &)
+    {
+        const fold_pair mapping_[] =
+            {{{0x0041, 0x005a}, {0x0061, 0x007a}},
+            {{0x0061, 0x007a}, {0x0041, 0x005a}},
+            {{0x00b5, 0x00b5}, {0x039c, 0x039c}},
+            {{0x00c0, 0x00d6}, {0x00e0, 0x00f6}},
+            {{0x00d8, 0x00de}, {0x00f8, 0x00fe}},
+            {{0x00e0, 0x00f6}, {0x00c0, 0x00d6}},
+            {{0x00f8, 0x0137}, {0x00d8, 0x0117}},
+            {{0x0139, 0x0148}, {0x013a, 0x0149}},
+            {{0x014a, 0x018c}, {0x014b, 0x018d}},
+            {{0x018e, 0x019a}, {0x01dd, 0x01e9}},
+            {{0x019c, 0x01a9}, {0x026f, 0x027c}},
+            {{0x01ac, 0x01b9}, {0x01ad, 0x01ba}},
+            {{0x01bc, 0x01bd}, {0x01bd, 0x01be}},
+            {{0x01bf, 0x01bf}, {0x01f7, 0x01f7}},
+            {{0x01c4, 0x01c4}, {0x01c6, 0x01c6}},
+            {{0x01c6, 0x01c7}, {0x01c4, 0x01c5}},
+            {{0x01c9, 0x01ca}, {0x01c7, 0x01c8}},
+            {{0x01cc, 0x01ef}, {0x01ca, 0x01ed}},
+            {{0x01f1, 0x01f1}, {0x01f3, 0x01f3}},
+            {{0x01f3, 0x0220}, {0x01f1, 0x021e}},
+            {{0x0222, 0x0233}, {0x0223, 0x0234}},
+            {{0x023a, 0x0254}, {0x2c65, 0x2c7f}},
+            {{0x0256, 0x0257}, {0x0189, 0x018a}},
+            {{0x0259, 0x0259}, {0x018f, 0x018f}},
+            {{0x025b, 0x025b}, {0x0190, 0x0190}},
+            {{0x0260, 0x0260}, {0x0193, 0x0193}},
+            {{0x0263, 0x0263}, {0x0194, 0x0194}},
+            {{0x0265, 0x0265}, {0xa78d, 0xa78d}},
+            {{0x0268, 0x0269}, {0x0197, 0x0198}},
+            {{0x026b, 0x026b}, {0x2c62, 0x2c62}},
+            {{0x026f, 0x026f}, {0x019c, 0x019c}},
+            {{0x0271, 0x0272}, {0x2c6e, 0x2c6f}},
+            {{0x0275, 0x0275}, {0x019f, 0x019f}},
+            {{0x027d, 0x027d}, {0x2c64, 0x2c64}},
+            {{0x0280, 0x0280}, {0x01a6, 0x01a6}},
+            {{0x0283, 0x0283}, {0x01a9, 0x01a9}},
+            {{0x0288, 0x028c}, {0x01ae, 0x01b2}},
+            {{0x0292, 0x0292}, {0x01b7, 0x01b7}},
+            {{0x0370, 0x0373}, {0x0371, 0x0374}},
+            {{0x0376, 0x0377}, {0x0377, 0x0378}},
+            {{0x037b, 0x037d}, {0x03fd, 0x03ff}},
+            {{0x0386, 0x0386}, {0x03ac, 0x03ac}},
+            {{0x0388, 0x038a}, {0x03ad, 0x03af}},
+            {{0x038c, 0x038c}, {0x03cc, 0x03cc}},
+            {{0x038e, 0x038f}, {0x03cd, 0x03ce}},
+            {{0x0391, 0x03a1}, {0x03b1, 0x03c1}},
+            {{0x03a3, 0x03af}, {0x03c3, 0x03cf}},
+            {{0x03b1, 0x03d1}, {0x0391, 0x03b1}},
+            {{0x03d5, 0x03f2}, {0x03a6, 0x03c3}},
+            {{0x03f4, 0x03f5}, {0x03b8, 0x03b9}},
+            {{0x03f7, 0x03fb}, {0x03f8, 0x03fc}},
+            {{0x03fd, 0x0481}, {0x037b, 0x03ff}},
+            {{0x048a, 0x0527}, {0x048b, 0x0528}},
+            {{0x0531, 0x0556}, {0x0561, 0x0586}},
+            {{0x0561, 0x0586}, {0x0531, 0x0556}},
+            {{0x10a0, 0x10c5}, {0x2d00, 0x2d25}},
+            {{0x1d79, 0x1d79}, {0xa77d, 0xa77d}},
+            {{0x1d7d, 0x1d7d}, {0x2c63, 0x2c63}},
+            {{0x1e00, 0x1e95}, {0x1e01, 0x1e96}},
+            {{0x1e9b, 0x1e9b}, {0x1e60, 0x1e60}},
+            {{0x1e9e, 0x1e9e}, {0x00df, 0x00df}},
+            {{0x1ea0, 0x1f15}, {0x1ea1, 0x1f16}},
+            {{0x1f18, 0x1f1d}, {0x1f10, 0x1f15}},
+            {{0x1f20, 0x1f45}, {0x1f28, 0x1f4d}},
+            {{0x1f48, 0x1f4d}, {0x1f40, 0x1f45}},
+            {{0x1f51, 0x1f51}, {0x1f59, 0x1f59}},
+            {{0x1f53, 0x1f53}, {0x1f5b, 0x1f5b}},
+            {{0x1f55, 0x1f55}, {0x1f5d, 0x1f5d}},
+            {{0x1f57, 0x1f57}, {0x1f5f, 0x1f5f}},
+            {{0x1f59, 0x1f59}, {0x1f51, 0x1f51}},
+            {{0x1f5b, 0x1f5b}, {0x1f53, 0x1f53}},
+            {{0x1f5d, 0x1f5d}, {0x1f55, 0x1f55}},
+            {{0x1f5f, 0x1f7d}, {0x1f57, 0x1f75}},
+            {{0x1f80, 0x1f87}, {0x1f88, 0x1f8f}},
+            {{0x1f90, 0x1f97}, {0x1f98, 0x1f9f}},
+            {{0x1fa0, 0x1fa7}, {0x1fa8, 0x1faf}},
+            {{0x1fb0, 0x1fb1}, {0x1fb8, 0x1fb9}},
+            {{0x1fb3, 0x1fb3}, {0x1fbc, 0x1fbc}},
+            {{0x1fb8, 0x1fbb}, {0x1fb0, 0x1fb3}},
+            {{0x1fbe, 0x1fbe}, {0x0399, 0x0399}},
+            {{0x1fc3, 0x1fc3}, {0x1fcc, 0x1fcc}},
+            {{0x1fc8, 0x1fcb}, {0x1f72, 0x1f75}},
+            {{0x1fd0, 0x1fd1}, {0x1fd8, 0x1fd9}},
+            {{0x1fd8, 0x1fdb}, {0x1fd0, 0x1fd3}},
+            {{0x1fe0, 0x1fe1}, {0x1fe8, 0x1fe9}},
+            {{0x1fe5, 0x1fe5}, {0x1fec, 0x1fec}},
+            {{0x1fe8, 0x1fec}, {0x1fe0, 0x1fe4}},
+            {{0x1ff3, 0x1ff3}, {0x1ffc, 0x1ffc}},
+            {{0x1ff8, 0x1ffb}, {0x1f78, 0x1f7b}},
+            {{0x2126, 0x2126}, {0x03c9, 0x03c9}},
+            {{0x212a, 0x212b}, {0x006b, 0x006c}},
+            {{0x2132, 0x2132}, {0x214e, 0x214e}},
+            {{0x214e, 0x214e}, {0x2132, 0x2132}},
+            {{0x2183, 0x2184}, {0x2184, 0x2185}},
+            {{0x2c00, 0x2c2e}, {0x2c30, 0x2c5e}},
+            {{0x2c30, 0x2c5e}, {0x2c00, 0x2c2e}},
+            {{0x2c60, 0x2c70}, {0x2c61, 0x2c71}},
+            {{0x2c72, 0x2c73}, {0x2c73, 0x2c74}},
+            {{0x2c75, 0x2c76}, {0x2c76, 0x2c77}},
+            {{0x2c7e, 0x2ce3}, {0x023f, 0x02a4}},
+            {{0x2ceb, 0x2cee}, {0x2cec, 0x2cef}},
+            {{0x2d00, 0x2d25}, {0x10a0, 0x10c5}},
+            {{0xa640, 0xa66d}, {0xa641, 0xa66e}},
+            {{0xa680, 0xa697}, {0xa681, 0xa698}},
+            {{0xa722, 0xa72f}, {0xa723, 0xa730}},
+            {{0xa732, 0xa76f}, {0xa733, 0xa770}},
+            {{0xa779, 0xa787}, {0xa77a, 0xa788}},
+            {{0xa78b, 0xa78d}, {0xa78c, 0xa78e}},
+            {{0xa790, 0xa791}, {0xa791, 0xa792}},
+            {{0xa7a0, 0xa7a9}, {0xa7a1, 0xa7aa}},
+            {{0xff21, 0xff3a}, {0xff41, 0xff5a}},
+            {{0xff41, 0xff5a}, {0xff21, 0xff3a}},
+            {{0, 0}, {0, 0}}};
+        input_char_type ret_ = char_;
+        const fold_pair *ptr_ = mapping_;
+
+        for (; ptr_->from.first != 0 && (char_ < ptr_->from.first ||
+            char_ > ptr_->from.second); ++ptr_);
+
+        if (ptr_->to.first != 0)
+        {
+            ret_ = ptr_->to.first + (char_ - ptr_->from.first);
+        }
+
+        return ret_;
+    }
+
+    static input_char_type fold (const input_char_type char_,
+        const std::locale &locale_, const four &)
+    {
+        if (char_ < 0x10000)
+        {
+            return fold (char_, locale_, two ());
+        }
+        else
+        {
+            const fold_pair mapping_[] =
+                {{{0x10400, 0x1044f}, {0x10428, 0x10477}},
+                {{0, 0}, {0, 0}}};
+            input_char_type ret_ = char_;
+            const fold_pair *ptr_ = mapping_;
+
+            for (; ptr_->from.first != 0 && (char_ < ptr_->from.first ||
+                char_ > ptr_->from.second); ++ptr_);
+
+            if (ptr_->to.first != 0)
+            {
+                ret_ = ptr_->to.first + (char_ - ptr_->from.first);
+            }
+
+            return ret_;
+        }
+    }
+
+    template<typename state_type>
+    static input_char_type chr (state_type &state_)
+    {
+        input_char_type ch_ = 0;
+
+        // eos_ has already been checked for.
+        switch (*state_._curr)
+        {
+            case '0':
+            case '1':
+            case '2':
+            case '3':
+            case '4':
+            case '5':
+            case '6':
+            case '7':
+                ch_ = decode_octal (state_);
+                break;
+            case 'a':
+                ch_ = '\a';
+                state_.increment ();
+                break;
+            case 'b':
+                ch_ = '\b';
+                state_.increment ();
+                break;
+            case 'c':
+                ch_ = decode_control_char (state_);
+                break;
+            case 'e':
+                ch_ = 27; // '\e' not recognised by compiler
+                state_.increment ();
+                break;
+            case 'f':
+                ch_ = '\f';
+                state_.increment ();
+                break;
+            case 'n':
+                ch_ = '\n';
+                state_.increment ();
+                break;
+            case 'r':
+                ch_ = '\r';
+                state_.increment ();
+                break;
+            case 't':
+                ch_ = '\t';
+                state_.increment ();
+                break;
+            case 'v':
+                ch_ = '\v';
+                state_.increment ();
+                break;
+            case 'x':
+                ch_ = decode_hex (state_);
+                break;
+            default:
+                ch_ = *state_._curr;
+                state_.increment ();
+                break;
+        }
+
+        return ch_;
+    }
+
+    template<typename state_type>
+    static input_char_type decode_octal (state_type &state_)
+    {
+        std::size_t oct_ = 0;
+        typename state_type::char_type ch_ = *state_._curr;
+        unsigned short count_ = 3;
+        bool eos_ = false;
+
+        for (;;)
+        {
+            oct_ *= 8;
+            oct_ += ch_ - '0';
+            --count_;
+            state_.increment ();
+            eos_ = state_.eos ();
+
+            if (!count_ || eos_) break;
+
+            ch_ = *state_._curr;
+
+            // Don't consume invalid chars!
+            if (ch_ < '0' || ch_ > '7')
+            {
+                break;
+            }
+        }
+
+        if (oct_ > static_cast<std::size_t>(char_traits::max_val ()))
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Escape \\" << std::oct << oct_ <<
+                " is too big for the state machine char type "
+                "preceding index " << state_.index () << " in rule " <<
+                std::dec << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        return static_cast<input_char_type> (oct_);
+    }
+
+    template<typename state_type>
+    static input_char_type decode_control_char (state_type &state_)
+    {
+        // Skip over 'c'
+        state_.increment ();
+
+        typename state_type::char_type ch_ = 0;
+        bool eos_ = state_.next (ch_);
+
+        if (eos_)
+        {
+            std::ostringstream ss_;
+
+            // Pointless returning index if at end of string
+            ss_ << "Unexpected end of regex following \\c in rule id " <<
+                state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+        else
+        {
+            if (ch_ >= 'a' && ch_ <= 'z')
+            {
+                ch_ -= 'a' - 1;
+            }
+            else if (ch_ >= 'A' && ch_ <= 'Z')
+            {
+                ch_ -= 'A' - 1;
+            }
+            else if (ch_ == '@')
+            {
+                // Apparently...
+                ch_ = 0;
+            }
+            else
+            {
+                std::ostringstream ss_;
+
+                ss_ << "Invalid control char at index " <<
+                    state_.index () - 1 << " in rule id " << state_._id << '.';
+                throw runtime_error (ss_.str ());
+            }
+        }
+
+        return ch_;
+    }
+
+    template<typename state_type>
+    static input_char_type decode_hex (state_type &state_)
+    {
+        // Skip over 'x'
+        state_.increment ();
+
+        typename state_type::char_type ch_ = 0;
+        bool eos_ = state_.next (ch_);
+
+        if (eos_)
+        {
+            std::ostringstream ss_;
+
+            // Pointless returning index if at end of string
+            ss_ << "Unexpected end of regex following \\x in rule id " <<
+                state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        if (!((ch_ >= '0' && ch_ <= '9') || (ch_ >= 'a' && ch_ <= 'f') ||
+            (ch_ >= 'A' && ch_ <= 'F')))
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Illegal char following \\x at index " <<
+                state_.index () - 1 << " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        std::size_t hex_ = 0;
+
+        do
+        {
+            hex_ *= 16;
+
+            if (ch_ >= '0' && ch_ <= '9')
+            {
+                hex_ += ch_ - '0';
+            }
+            else if (ch_ >= 'a' && ch_ <= 'f')
+            {
+                hex_ += 10 + (ch_ - 'a');
+            }
+            else
+            {
+                hex_ += 10 + (ch_ - 'A');
+            }
+
+            eos_ = state_.eos ();
+
+            if (!eos_)
+            {
+                ch_ = *state_._curr;
+
+                // Don't consume invalid chars!
+                if (((ch_ >= '0' && ch_ <= '9') ||
+                    (ch_ >= 'a' && ch_ <= 'f') || (ch_ >= 'A' && ch_ <= 'F')))
+                {
+                    state_.increment ();
+                }
+                else
+                {
+                    eos_ = true;
+                }
+            }
+        } while (!eos_);
+
+        if (hex_ > static_cast<std::size_t>(char_traits::max_val ()))
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Escape \\x" << std::hex << hex_ <<
+                " is too big for the state machine char type at index " <<
+                state_.index () << " in rule id " << std::dec <<
+                state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        return static_cast<input_char_type> (hex_);
+    }
+
+    template<typename state_type>
+    static void charset_range (const bool chset_, state_type &state_,
+        bool &eos_, typename state_type::char_type &ch_,
+        const input_char_type prev_, string_token &chars_)
+    {
+        if (chset_)
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Charset cannot form start of range preceding "
+                "index " << state_.index () - 1 << " in rule id " <<
+                state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        eos_ = state_.next (ch_);
+
+        if (eos_)
+        {
+            std::ostringstream ss_;
+
+            // Pointless returning index if at end of string
+            ss_ << "Unexpected end of regex following '-' in rule id " <<
+                state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        input_char_type curr_ = 0;
+
+        if (ch_ == '\\')
+        {
+            std::size_t str_len_ = 0;
+
+            if (escape_sequence (state_, curr_, str_len_))
+            {
+                std::ostringstream ss_;
+
+                ss_ << "Charset cannot form end of range preceding index "
+                    << state_.index () << " in rule id " << state_._id << '.';
+                throw runtime_error (ss_.str ());
+            }
+        }
+        else if (ch_ == '[' && !state_.eos () && *state_._curr == ':')
+        {
+            std::ostringstream ss_;
+
+            ss_ << "POSIX char class cannot form end of range at "
+                "index " << state_.index () - 1 << " in rule id " <<
+                state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+        else
+        {
+            curr_ = ch_;
+        }
+
+        eos_ = state_.next (ch_);
+
+        // Covers preceding if and else
+        if (eos_)
+        {
+            std::ostringstream ss_;
+
+            // Pointless returning index if at end of string
+            ss_ << "Unexpected end of regex (missing ']') in rule id " <<
+                state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        // Use size_t because we need to go past one past the maximum value.
+        // if we use index_type, we will wrap around to 0 at max + 1.
+        std::size_t start_ = static_cast<typename char_traits::index_type>
+            (prev_);
+        std::size_t end_ = static_cast<typename char_traits::index_type>
+            (curr_);
+
+        // Semanic check
+        if (end_ < start_)
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Invalid range in charset preceding index " <<
+                state_.index () - 1 << " in rule id " << state_._id << '.';
+            throw runtime_error (ss_.str ());
+        }
+
+        // Even though ranges are used now, we still need to consider
+        // each character if icase is set.
+        if (state_._flags & icase)
+        {
+            for (; start_ <= end_; ++start_)
+            {
+                const input_char_type ch_ = static_cast<input_char_type>
+                    (start_);
+                const input_char_type folded_ = fold (ch_, state_._locale,
+                    size<sizeof(input_char_type)> ());
+
+                chars_.insert (typename string_token::range (ch_, ch_));
+
+                if (ch_ != folded_)
+                {
+                    chars_.insert (typename string_token::range
+                        (folded_, folded_));
+                }
+            }
+        }
+        else
+        {
+            chars_.insert (typename string_token::range (prev_, curr_));
+        }
+    }
+};
+}
+}
+
+#endif
diff --git a/inc/lexertl/parser/tokeniser/re_tokeniser_state.hpp b/inc/lexertl/parser/tokeniser/re_tokeniser_state.hpp
new file mode 100644
index 0000000..a3548d6
--- /dev/null
+++ b/inc/lexertl/parser/tokeniser/re_tokeniser_state.hpp
@@ -0,0 +1,115 @@
+// tokeniser_state.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_RE_TOKENISER_STATE_HPP
+#define LEXERTL_RE_TOKENISER_STATE_HPP
+
+#include "../../char_traits.hpp"
+#include "../../enums.hpp"
+#include <locale>
+#include "../../size_t.hpp"
+#include <stack>
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename ch_type, typename id_type>
+struct basic_re_tokeniser_state
+{
+    typedef ch_type char_type;
+    typedef typename basic_char_traits<char_type>::index_type index_type;
+
+    const char_type * const _start;
+    const char_type * const _end;
+    const char_type *_curr;
+    id_type _id;
+    std::size_t _flags;
+    std::stack<std::size_t> _flags_stack;
+    std::locale _locale;
+    bool _macro;
+    long _paren_count;
+    bool _in_string;
+    id_type _nl_id;
+
+    basic_re_tokeniser_state (const char_type *start_,
+        const char_type * const end_, id_type id_, const std::size_t flags_,
+        const std::locale locale_, const bool macro_) :
+        _start (start_),
+        _end (end_),
+        _curr (start_),
+        _id (id_),
+        _flags (flags_),
+        _flags_stack (),
+        _locale (locale_),
+        _macro (macro_),
+        _paren_count (0),
+        _in_string (false),
+        _nl_id (static_cast<id_type>(~0))
+    {
+    }
+
+    basic_re_tokeniser_state (const basic_re_tokeniser_state &rhs_)
+    {
+        assign (rhs_);
+    }
+
+    // prevent VC++ 7.1 warning:
+    const basic_re_tokeniser_state &operator =
+        (const basic_re_tokeniser_state &rhs_)
+    {
+        assign (rhs_);
+    }
+
+    void assign (const basic_re_tokeniser_state &rhs_)
+    {
+        _start = rhs_._start;
+        _end = rhs_._end;
+        _curr = rhs_._curr;
+        _id = rhs_._id;
+        _flags = rhs_._flags;
+        _flags_stack = rhs_._flags_stack;
+        _locale = rhs_._locale;
+        _macro = rhs_._macro;
+        _paren_count = rhs_._paren_count;
+        _in_string = rhs_._in_string;
+        _nl_id = rhs_._nl_id;
+        return this;
+    }
+
+    inline bool next (char_type &ch_)
+    {
+        if (_curr >= _end)
+        {
+            ch_ = 0;
+            return true;
+        }
+        else
+        {
+            ch_ = *_curr;
+            increment ();
+            return false;
+        }
+    }
+
+    inline void increment ()
+    {
+        ++_curr;
+    }
+
+    inline std::size_t index ()
+    {
+        return _curr - _start;
+    }
+
+    inline bool eos ()
+    {
+        return _curr >= _end;
+    }
+};
+}
+}
+
+#endif
diff --git a/inc/lexertl/parser/tree/end_node.hpp b/inc/lexertl/parser/tree/end_node.hpp
new file mode 100644
index 0000000..74cb512
--- /dev/null
+++ b/inc/lexertl/parser/tree/end_node.hpp
@@ -0,0 +1,112 @@
+// end_node.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_END_NODE_HPP
+#define LEXERTL_END_NODE_HPP
+
+#include "node.hpp"
+#include "../../size_t.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename id_type>
+class basic_end_node : public basic_node<id_type>
+{
+public:
+    typedef basic_node<id_type> node;
+    typedef typename node::bool_stack bool_stack;
+    typedef typename node::const_node_stack const_node_stack;
+    typedef typename node::node_ptr_vector node_ptr_vector;
+    typedef typename node::node_stack node_stack;
+    typedef typename node::node_type node_type;
+    typedef typename node::node_vector node_vector;
+
+    basic_end_node (const id_type id_, const id_type user_id_,
+        const id_type next_dfa_, const id_type push_dfa_,
+        const bool pop_dfa_) :
+        basic_node<id_type> (false),
+        _id (id_),
+        _user_id (user_id_),
+        _next_dfa (next_dfa_),
+        _push_dfa (push_dfa_),
+        _pop_dfa (pop_dfa_),
+        _followpos ()
+    {
+        basic_node<id_type>::_firstpos.push_back (this);
+        basic_node<id_type>::_lastpos.push_back (this);
+    }
+
+    virtual ~basic_end_node ()
+    {
+    }
+
+    virtual node_type what_type () const
+    {
+        return node::END;
+    }
+
+    virtual bool traverse (const_node_stack &/*node_stack_*/,
+        bool_stack &/*perform_op_stack_*/) const
+    {
+        return false;
+    }
+
+    virtual const node_vector &followpos () const
+    {
+        // _followpos is always empty..!
+        return _followpos;
+    }
+
+    virtual bool end_state () const
+    {
+        return true;
+    }
+
+    virtual id_type id () const
+    {
+        return _id;
+    }
+
+    virtual id_type user_id () const
+    {
+        return _user_id;
+    }
+
+    virtual id_type next_dfa () const
+    {
+        return _next_dfa;
+    }
+
+    virtual id_type push_dfa () const
+    {
+        return _push_dfa;
+    }
+
+    virtual bool pop_dfa () const
+    {
+        return _pop_dfa;
+    }
+
+private:
+    id_type _id;
+    id_type _user_id;
+    id_type _next_dfa;
+    id_type _push_dfa;
+    bool _pop_dfa;
+    node_vector _followpos;
+
+    virtual void copy_node (node_ptr_vector &/*node_ptr_vector_*/,
+        node_stack &/*new_node_stack_*/, bool_stack &/*perform_op_stack_*/,
+        bool &/*down_*/) const
+    {
+        // Nothing to do, as end_nodes are not copied.
+    }
+};
+}
+}
+
+#endif
diff --git a/inc/lexertl/parser/tree/iteration_node.hpp b/inc/lexertl/parser/tree/iteration_node.hpp
new file mode 100644
index 0000000..51b852e
--- /dev/null
+++ b/inc/lexertl/parser/tree/iteration_node.hpp
@@ -0,0 +1,103 @@
+// iteration_node.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_ITERATION_NODE_HPP
+#define LEXERTL_ITERATION_NODE_HPP
+
+#include "node.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename id_type>
+class basic_iteration_node : public basic_node<id_type>
+{
+public:
+    typedef basic_node<id_type> node;
+    typedef typename node::bool_stack bool_stack;
+    typedef typename node::const_node_stack const_node_stack;
+    typedef typename node::node_ptr_vector node_ptr_vector;
+    typedef typename node::node_stack node_stack;
+    typedef typename node::node_type node_type;
+    typedef typename node::node_vector node_vector;
+
+    basic_iteration_node (basic_node<id_type> *next_, const bool greedy_) :
+        basic_node<id_type> (true),
+        _next (next_),
+        _greedy (greedy_)
+    {
+        typename node_vector::iterator iter_;
+        typename node_vector::iterator end_;
+
+        _next->append_firstpos (node::_firstpos);
+        _next->append_lastpos (node::_lastpos);
+
+        for (iter_ = node::_lastpos.begin (), end_ = node::_lastpos.end ();
+            iter_ != end_; ++iter_)
+        {
+            (*iter_)->append_followpos (node::_firstpos);
+        }
+
+        for (iter_ = node::_firstpos.begin (), end_ = node::_firstpos.end ();
+            iter_ != end_; ++iter_)
+        {
+            (*iter_)->greedy (greedy_);
+        }
+    }
+
+    virtual ~basic_iteration_node ()
+    {
+    }
+
+    virtual node_type what_type () const
+    {
+        return node::ITERATION;
+    }
+
+    virtual bool traverse (const_node_stack &node_stack_,
+        bool_stack &perform_op_stack_) const
+    {
+        perform_op_stack_.push (true);
+        node_stack_.push (_next);
+        return true;
+    }
+
+private:
+    // Not owner of this pointer...
+    basic_node<id_type> *_next;
+    bool _greedy;
+
+    virtual void copy_node (node_ptr_vector &node_ptr_vector_,
+        node_stack &new_node_stack_, bool_stack &perform_op_stack_,
+        bool &down_) const
+    {
+        if (perform_op_stack_.top ())
+        {
+            basic_node<id_type> *ptr_ = new_node_stack_.top ();
+
+            node_ptr_vector_->push_back
+                (static_cast<basic_iteration_node<id_type> *>(0));
+            node_ptr_vector_->back () = new basic_iteration_node
+                (ptr_, _greedy);
+            new_node_stack_.top () = node_ptr_vector_->back ();
+        }
+        else
+        {
+            down_ = true;
+        }
+
+        perform_op_stack_.pop ();
+    }
+
+    // No copy construction.
+    basic_iteration_node (const basic_iteration_node &);
+    // No assignment.
+    const basic_iteration_node &operator = (const basic_iteration_node &);
+};
+}
+}
+
+#endif
diff --git a/inc/lexertl/parser/tree/leaf_node.hpp b/inc/lexertl/parser/tree/leaf_node.hpp
new file mode 100644
index 0000000..010cbd4
--- /dev/null
+++ b/inc/lexertl/parser/tree/leaf_node.hpp
@@ -0,0 +1,114 @@
+// leaf_node.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_LEAF_NODE_HPP
+#define LEXERTL_LEAF_NODE_HPP
+
+#include "../../enums.hpp" // null_token
+#include "node.hpp"
+#include "../../size_t.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename id_type>
+class basic_leaf_node : public basic_node<id_type>
+{
+public:
+    typedef basic_node<id_type> node;
+    typedef typename node::bool_stack bool_stack;
+    typedef typename node::const_node_stack const_node_stack;
+    typedef typename node::node_ptr_vector node_ptr_vector;
+    typedef typename node::node_stack node_stack;
+    typedef typename node::node_type node_type;
+    typedef typename node::node_vector node_vector;
+
+    basic_leaf_node (const id_type token_, const bool greedy_) :
+        basic_node<id_type> (token_ == node::null_token ()),
+        _token (token_),
+        _set_greedy (!greedy_),
+        _greedy (greedy_),
+        _followpos ()
+    {
+        if (!node::_nullable)
+        {
+            node::_firstpos.push_back (this);
+            node::_lastpos.push_back (this);
+        }
+    }
+
+    virtual ~basic_leaf_node ()
+    {
+    }
+
+    virtual void append_followpos (const node_vector &followpos_)
+    {
+        for (typename node_vector::const_iterator iter_ = followpos_.begin (),
+            end_ = followpos_.end (); iter_ != end_; ++iter_)
+        {
+            _followpos.push_back (*iter_);
+        }
+    }
+
+    virtual node_type what_type () const
+    {
+        return node::LEAF;
+    }
+
+    virtual bool traverse (const_node_stack &/*node_stack_*/,
+        bool_stack &/*perform_op_stack_*/) const
+    {
+        return false;
+    }
+
+    virtual id_type token () const
+    {
+        return _token;
+    }
+
+    virtual void greedy (const bool greedy_)
+    {
+        if (!_set_greedy)
+        {
+            _greedy = greedy_;
+            _set_greedy = true;
+        }
+    }
+
+    virtual bool greedy () const
+    {
+        return _greedy;
+    }
+
+    virtual const node_vector &followpos () const
+    {
+        return _followpos;
+    }
+
+    virtual node_vector &followpos ()
+    {
+        return _followpos;
+    }
+
+private:
+    id_type _token;
+    bool _set_greedy;
+    bool _greedy;
+    node_vector _followpos;
+
+    virtual void copy_node (node_ptr_vector &node_ptr_vector_,
+        node_stack &new_node_stack_, bool_stack &/*perform_op_stack_*/,
+        bool &/*down_*/) const
+    {
+        node_ptr_vector_->push_back (static_cast<basic_leaf_node *>(0));
+        node_ptr_vector_->back () = new basic_leaf_node (_token, _greedy);
+        new_node_stack_.push (node_ptr_vector_->back ());
+    }
+};
+}
+}
+
+#endif
diff --git a/inc/lexertl/parser/tree/node.hpp b/inc/lexertl/parser/tree/node.hpp
new file mode 100644
index 0000000..cb54cd2
--- /dev/null
+++ b/inc/lexertl/parser/tree/node.hpp
@@ -0,0 +1,241 @@
+// node.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_NODE_HPP
+#define LEXERTL_NODE_HPP
+
+#include <assert.h>
+#include "../../containers/ptr_vector.hpp"
+#include "../../runtime_error.hpp"
+#include "../../size_t.hpp"
+#include <stack>
+#include <vector>
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename id_type>
+class basic_node
+{
+public:
+    enum node_type {LEAF, SEQUENCE, SELECTION, ITERATION, END};
+
+    typedef std::stack<bool> bool_stack;
+    typedef std::stack<basic_node<id_type> *> node_stack;
+    // stack and vector not owner of node pointers
+    typedef std::stack<const basic_node<id_type> *> const_node_stack;
+    typedef std::vector<basic_node<id_type> *> node_vector;
+    typedef ptr_vector<basic_node<id_type> > node_ptr_vector;
+
+    basic_node () :
+        _nullable (false),
+        _firstpos (),
+        _lastpos ()
+    {
+    }
+
+    basic_node (const bool nullable_) :
+        _nullable (nullable_),
+        _firstpos (),
+        _lastpos ()
+    {
+    }
+
+    virtual ~basic_node ()
+    {
+    }
+
+    static id_type null_token ()
+    {
+        return static_cast<id_type>(~0);
+    }
+
+    bool nullable () const
+    {
+        return _nullable;
+    }
+
+    void append_firstpos (node_vector &firstpos_) const
+    {
+        firstpos_.insert (firstpos_.end (),
+            _firstpos.begin (), _firstpos.end ());
+    }
+
+    void append_lastpos (node_vector &lastpos_) const
+    {
+        lastpos_.insert (lastpos_.end (),
+            _lastpos.begin (), _lastpos.end ());
+    }
+
+    virtual void append_followpos (const node_vector &/*followpos_*/)
+    {
+        throw runtime_error ("Internal error node::append_followpos().");
+    }
+
+    basic_node *copy (node_ptr_vector &node_ptr_vector_) const
+    {
+        basic_node *new_root_ = 0;
+        const_node_stack node_stack_;
+        bool_stack perform_op_stack_;
+        bool down_ = true;
+        node_stack new_node_stack_;
+
+        node_stack_.push (this);
+
+        while (!node_stack_.empty ())
+        {
+            while (down_)
+            {
+                down_ = node_stack_.top ()->traverse (node_stack_,
+                    perform_op_stack_);
+            }
+
+            while (!down_ && !node_stack_.empty ())
+            {
+                const basic_node *top_ = node_stack_.top ();
+
+                top_->copy_node (node_ptr_vector_, new_node_stack_,
+                    perform_op_stack_, down_);
+
+                if (!down_) node_stack_.pop ();
+            }
+        }
+
+        assert (new_node_stack_.size () == 1);
+        new_root_ = new_node_stack_.top ();
+        new_node_stack_.pop ();
+        return new_root_;
+    }
+
+    virtual node_type what_type () const = 0;
+
+    virtual bool traverse (const_node_stack &node_stack_,
+        bool_stack &perform_op_stack_) const = 0;
+
+    node_vector &firstpos ()
+    {
+        return _firstpos;
+    }
+
+    const node_vector &firstpos () const
+    {
+        return _firstpos;
+    }
+
+    // _lastpos modified externally, so not const &
+    node_vector &lastpos ()
+    {
+        return _lastpos;
+    }
+
+    virtual bool end_state () const
+    {
+        return false;
+    }
+
+    virtual id_type id () const
+    {
+        throw runtime_error ("Internal error node::id().");
+#ifdef __SUNPRO_CC
+        // Stop bogus Solaris compiler warning
+        return id_type ();
+#endif
+    }
+
+    virtual id_type user_id () const
+    {
+        throw runtime_error ("Internal error node::user_id().");
+#ifdef __SUNPRO_CC
+        // Stop bogus Solaris compiler warning
+        return id_type ();
+#endif
+    }
+
+    virtual id_type next_dfa () const
+    {
+        throw runtime_error ("Internal error node::next_dfa().");
+#ifdef __SUNPRO_CC
+        // Stop bogus Solaris compiler warning
+        return id_type ();
+#endif
+    }
+
+    virtual id_type push_dfa () const
+    {
+        throw runtime_error ("Internal error node::push_dfa().");
+#ifdef __SUNPRO_CC
+        // Stop bogus Solaris compiler warning
+        return id_type ();
+#endif
+    }
+
+    virtual bool pop_dfa () const
+    {
+        throw runtime_error ("Internal error node::pop_dfa().");
+#ifdef __SUNPRO_CC
+        // Stop bogus Solaris compiler warning
+        return false;
+#endif
+    }
+
+    virtual id_type token () const
+    {
+        throw runtime_error ("Internal error node::token().");
+#ifdef __SUNPRO_CC
+        // Stop bogus Solaris compiler warning
+        return id_type ();
+#endif
+    }
+
+    virtual void greedy (const bool /*greedy_*/)
+    {
+        throw runtime_error ("Internal error node::greedy(bool).");
+    }
+
+    virtual bool greedy () const
+    {
+        throw runtime_error ("Internal error node::greedy().");
+#ifdef __SUNPRO_CC
+        // Stop bogus Solaris compiler warning
+        return false;
+#endif
+    }
+
+    virtual const node_vector &followpos () const
+    {
+        throw runtime_error ("Internal error node::followpos().");
+#ifdef __SUNPRO_CC
+        // Stop bogus Solaris compiler warning
+        return firstpos;
+#endif
+    }
+
+    virtual node_vector &followpos ()
+    {
+        throw runtime_error ("Internal error node::followpos().");
+#ifdef __SUNPRO_CC
+        // Stop bogus Solaris compiler warning
+        return firstpos;
+#endif
+    }
+
+protected:
+    const bool _nullable;
+    node_vector _firstpos;
+    node_vector _lastpos;
+
+    virtual void copy_node (node_ptr_vector &node_ptr_vector_,
+        node_stack &new_node_stack_, bool_stack &perform_op_stack_,
+        bool &down_) const = 0;
+
+private:
+    basic_node (const basic_node &); // No copy construction.
+    const basic_node &operator = (const basic_node &); // No assignment.
+};
+}
+}
+
+#endif
diff --git a/inc/lexertl/parser/tree/selection_node.hpp b/inc/lexertl/parser/tree/selection_node.hpp
new file mode 100644
index 0000000..ab0802a
--- /dev/null
+++ b/inc/lexertl/parser/tree/selection_node.hpp
@@ -0,0 +1,106 @@
+// selection_node.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_SELECTION_NODE_HPP
+#define LEXERTL_SELECTION_NODE_HPP
+
+#include "node.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename id_type>
+class basic_selection_node : public basic_node<id_type>
+{
+public:
+    typedef basic_node<id_type> node;
+    typedef typename node::bool_stack bool_stack;
+    typedef typename node::const_node_stack const_node_stack;
+    typedef typename node::node_ptr_vector node_ptr_vector;
+    typedef typename node::node_stack node_stack;
+    typedef typename node::node_type node_type;
+
+    basic_selection_node (basic_node<id_type> *left_,
+        basic_node<id_type> *right_) :
+        basic_node<id_type> (left_->nullable () || right_->nullable ()),
+        _left (left_),
+        _right (right_)
+    {
+        _left->append_firstpos (node::_firstpos);
+        _right->append_firstpos (node::_firstpos);
+        _left->append_lastpos (node::_lastpos);
+        _right->append_lastpos (node::_lastpos);
+    }
+
+    virtual ~basic_selection_node ()
+    {
+    }
+
+    virtual node_type what_type () const
+    {
+        return node::SELECTION;
+    }
+
+    virtual bool traverse (const_node_stack &node_stack_,
+        bool_stack &perform_op_stack_) const
+    {
+        perform_op_stack_.push (true);
+
+        switch (_right->what_type ())
+        {
+        case node::SEQUENCE:
+        case node::SELECTION:
+        case node::ITERATION:
+            perform_op_stack_.push (false);
+            break;
+        default:
+            break;
+        }
+
+        node_stack_.push (_right);
+        node_stack_.push (_left);
+        return true;
+    }
+
+private:
+    // Not owner of these pointers...
+    basic_node<id_type> *_left;
+    basic_node<id_type> *_right;
+
+    virtual void copy_node (node_ptr_vector &node_ptr_vector_,
+        node_stack &new_node_stack_, bool_stack &perform_op_stack_,
+        bool &down_) const
+    {
+        if (perform_op_stack_.top ())
+        {
+            basic_node<id_type> *rhs_ = new_node_stack_.top ();
+
+            new_node_stack_.pop ();
+
+            basic_node<id_type> *lhs_ = new_node_stack_.top ();
+
+            node_ptr_vector_->push_back
+                (static_cast<basic_selection_node *>(0));
+            node_ptr_vector_->back () = new basic_selection_node (lhs_, rhs_);
+            new_node_stack_.top () = node_ptr_vector_->back ();
+        }
+        else
+        {
+            down_ = true;
+        }
+
+        perform_op_stack_.pop ();
+    }
+
+    // No copy construction.
+    basic_selection_node (const basic_selection_node &);
+    // No assignment.
+    const basic_selection_node &operator = (const basic_selection_node &);
+};
+}
+}
+
+#endif
diff --git a/inc/lexertl/parser/tree/sequence_node.hpp b/inc/lexertl/parser/tree/sequence_node.hpp
new file mode 100644
index 0000000..6e46f14
--- /dev/null
+++ b/inc/lexertl/parser/tree/sequence_node.hpp
@@ -0,0 +1,126 @@
+// sequence_node.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_SEQUENCE_NODE_HPP
+#define LEXERTL_SEQUENCE_NODE_HPP
+
+#include "node.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename id_type>
+class basic_sequence_node : public basic_node<id_type>
+{
+public:
+    typedef basic_node<id_type> node;
+    typedef typename node::bool_stack bool_stack;
+    typedef typename node::const_node_stack const_node_stack;
+    typedef typename node::node_ptr_vector node_ptr_vector;
+    typedef typename node::node_stack node_stack;
+    typedef typename node::node_type node_type;
+    typedef typename node::node_vector node_vector;
+
+    basic_sequence_node (basic_node<id_type> *left_,
+        basic_node<id_type> *right_) :
+        basic_node<id_type> (left_->nullable () && right_->nullable ()),
+        _left (left_),
+        _right (right_)
+    {
+        _left->append_firstpos (node::_firstpos);
+
+        if (_left->nullable ())
+        {
+            _right->append_firstpos (node::_firstpos);
+        }
+
+        if (_right->nullable ())
+        {
+            _left->append_lastpos (node::_lastpos);
+        }
+
+        _right->append_lastpos (node::_lastpos);
+
+        node_vector &lastpos_ = _left->lastpos ();
+        const node_vector &firstpos_ = _right->firstpos ();
+
+        for (typename node_vector::iterator iter_ = lastpos_.begin (),
+            end_ = lastpos_.end (); iter_ != end_; ++iter_)
+        {
+            (*iter_)->append_followpos (firstpos_);
+        }
+    }
+
+    virtual ~basic_sequence_node ()
+    {
+    }
+
+    virtual node_type what_type () const
+    {
+        return node::SEQUENCE;
+    }
+
+    virtual bool traverse (const_node_stack &node_stack_,
+        bool_stack &perform_op_stack_) const
+    {
+        perform_op_stack_.push (true);
+
+        switch (_right->what_type ())
+        {
+        case node::SEQUENCE:
+        case node::SELECTION:
+        case node::ITERATION:
+            perform_op_stack_.push (false);
+            break;
+        default:
+            break;
+        }
+
+        node_stack_.push (_right);
+        node_stack_.push (_left);
+        return true;
+    }
+
+private:
+    // Not owner of these pointers...
+    basic_node<id_type> *_left;
+    basic_node<id_type> *_right;
+
+    virtual void copy_node (node_ptr_vector &node_ptr_vector_,
+        node_stack &new_node_stack_, bool_stack &perform_op_stack_,
+        bool &down_) const
+    {
+        if (perform_op_stack_.top ())
+        {
+            basic_node<id_type> *rhs_ = new_node_stack_.top ();
+
+            new_node_stack_.pop ();
+
+            basic_node<id_type> *lhs_ = new_node_stack_.top ();
+
+            node_ptr_vector_->push_back
+                (static_cast<basic_sequence_node<id_type> *>(0));
+            node_ptr_vector_->back () = new basic_sequence_node<id_type>
+                (lhs_, rhs_);
+            new_node_stack_.top () = node_ptr_vector_->back ();
+        }
+        else
+        {
+            down_ = true;
+        }
+
+        perform_op_stack_.pop ();
+    }
+
+    // No copy construction.
+    basic_sequence_node (const basic_sequence_node &);
+    // No assignment.
+    const basic_sequence_node &operator = (const basic_sequence_node &);
+};
+}
+}
+
+#endif
diff --git a/inc/lexertl/partition/charset.hpp b/inc/lexertl/partition/charset.hpp
new file mode 100644
index 0000000..d97cf03
--- /dev/null
+++ b/inc/lexertl/partition/charset.hpp
@@ -0,0 +1,73 @@
+// charset.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef LEXERTL_CHARSET_HPP
+#define LEXERTL_CHARSET_HPP
+
+#include <algorithm>
+#include <iterator>
+#include <set>
+#include "../size_t.hpp"
+#include "../string_token.hpp"
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename char_type, typename id_type>
+struct basic_charset
+{
+    typedef basic_string_token<char_type> token;
+    typedef std::set<id_type> index_set;
+
+    token _token;
+    index_set _index_set;
+
+    basic_charset () :
+        _token (),
+        _index_set ()
+    {
+    }
+
+    basic_charset (const token &token_, const std::size_t index_) :
+        _token (token_),
+        _index_set ()
+    {
+        _index_set.insert (index_);
+    }
+
+    bool empty () const
+    {
+        return _token.empty () && _index_set.empty ();
+    }
+
+    void intersect (basic_charset &rhs_, basic_charset &overlap_)
+    {
+        _token.intersect (rhs_._token, overlap_._token);
+
+        if (!overlap_._token.empty ())
+        {
+            std::merge (_index_set.begin (), _index_set.end (),
+                rhs_._index_set.begin (), rhs_._index_set.end (),
+                std::inserter (overlap_._index_set,
+                overlap_._index_set.end ()));
+
+            if (_token.empty ())
+            {
+                _index_set.clear ();
+            }
+
+            if (rhs_._token.empty ())
+            {
+                rhs_._index_set.clear ();
+            }
+        }
+    }
+};
+}
+}
+
+#endif
diff --git a/inc/lexertl/partition/equivset.hpp b/inc/lexertl/partition/equivset.hpp
new file mode 100644
index 0000000..6c25c6b
--- /dev/null
+++ b/inc/lexertl/partition/equivset.hpp
@@ -0,0 +1,134 @@
+// equivset.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_EQUIVSET_HPP
+#define LEXERTL_EQUIVSET_HPP
+
+#include <algorithm>
+#include "../parser/tree/node.hpp"
+#include <set>
+
+namespace lexertl
+{
+namespace detail
+{
+template<typename id_type>
+struct basic_equivset
+{
+    typedef std::set<id_type> index_set;
+    typedef std::vector<id_type> index_vector;
+    // Not owner of nodes:
+    typedef basic_node<id_type> node;
+    typedef std::vector<node *> node_vector;
+
+    index_vector _index_vector;
+    id_type _id;
+    bool _greedy;
+    node_vector _followpos;
+
+    basic_equivset () :
+        _index_vector (),
+        _id (0),
+        _greedy (true),
+        _followpos ()
+    {
+    }
+
+    basic_equivset (const index_set &index_set_, const id_type id_,
+        const bool greedy_, const node_vector &followpos_) :
+        _index_vector (index_set_.begin (), index_set_.end ()),
+        _id (id_),
+        _greedy (greedy_),
+        _followpos (followpos_)
+    {
+    }
+
+    bool empty () const
+    {
+        return _index_vector.empty () && _followpos.empty ();
+    }
+
+    void intersect (basic_equivset &rhs_, basic_equivset &overlap_)
+    {
+        intersect_indexes (rhs_._index_vector, overlap_._index_vector);
+
+        if (!overlap_._index_vector.empty ())
+        {
+            // Note that the LHS takes priority in order to
+            // respect rule ordering priority in the lex spec.
+            overlap_._id = _id;
+            overlap_._greedy = _greedy;
+            overlap_._followpos = _followpos;
+
+            typename node_vector::const_iterator overlap_begin_ =
+                overlap_._followpos.begin ();
+            typename node_vector::const_iterator overlap_end_ =
+                overlap_._followpos.end ();
+            typename node_vector::const_iterator rhs_iter_ =
+                rhs_._followpos.begin ();
+            typename node_vector::const_iterator rhs_end_ =
+                rhs_._followpos.end ();
+
+            for (; rhs_iter_ != rhs_end_; ++rhs_iter_)
+            {
+                node *node_ = *rhs_iter_;
+
+                if (std::find (overlap_begin_, overlap_end_, node_) ==
+                    overlap_end_)
+                {
+                    overlap_._followpos.push_back (node_);
+                    overlap_begin_ = overlap_._followpos.begin ();
+                    overlap_end_ = overlap_._followpos.end ();
+                }
+            }
+
+            if (_index_vector.empty ())
+            {
+                _followpos.clear ();
+            }
+
+            if (rhs_._index_vector.empty ())
+            {
+                rhs_._followpos.clear ();
+            }
+        }
+    }
+
+private:
+    void intersect_indexes (index_vector &rhs_, index_vector &overlap_)
+    {
+        typename index_vector::iterator iter_ = _index_vector.begin ();
+        typename index_vector::iterator end_ = _index_vector.end ();
+        typename index_vector::iterator rhs_iter_ = rhs_.begin ();
+        typename index_vector::iterator rhs_end_ = rhs_.end ();
+
+        while (iter_ != end_ && rhs_iter_ != rhs_end_)
+        {
+            const id_type index_ = *iter_;
+            const id_type rhs_index_ = *rhs_iter_;
+
+            if (index_ < rhs_index_)
+            {
+                ++iter_;
+            }
+            else if (index_ > rhs_index_)
+            {
+                ++rhs_iter_;
+            }
+            else
+            {
+                overlap_.push_back (index_);
+                iter_ = _index_vector.erase (iter_);
+                end_ = _index_vector.end ();
+                rhs_iter_ = rhs_.erase (rhs_iter_);
+                rhs_end_ = rhs_.end ();
+            }
+        }
+    }
+};
+}
+}
+
+#endif
diff --git a/inc/lexertl/rules.hpp b/inc/lexertl/rules.hpp
new file mode 100644
index 0000000..713341b
--- /dev/null
+++ b/inc/lexertl/rules.hpp
@@ -0,0 +1,743 @@
+// rules.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_RULES_HPP
+#define LEXERTL_RULES_HPP
+
+#include "compile_assert.hpp"
+#include <deque>
+#include "enums.hpp"
+#include "internals.hpp"
+#include <locale>
+#include <map>
+#include "runtime_error.hpp"
+#include <set>
+#include "size_t.hpp"
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace lexertl
+{
+template<typename ch_type, typename id_ty = std::size_t>
+class basic_rules
+{
+public:
+    typedef std::vector<bool> bool_vector;
+    typedef std::deque<bool_vector> bool_vector_deque;
+    typedef ch_type char_type;
+    typedef id_ty id_type;
+    typedef std::vector<id_type> id_vector;
+    typedef std::deque<id_vector> id_vector_deque;
+    typedef std::basic_string<char_type> string;
+    typedef std::deque<string> string_deque;
+    typedef std::deque<string_deque> string_deque_deque;
+    typedef std::set<string> string_set;
+    typedef std::pair<string, string> string_pair;
+    typedef std::deque<string_pair> string_pair_deque;
+    typedef std::map<string, id_type> string_id_type_map;
+    typedef std::pair<string, id_type> string_id_type_pair;
+
+    // If you get a compile error here you have
+    // failed to define an unsigned id type.
+    compile_assert<(static_cast<id_type>(~0) > 0)>
+        _valid_id_type;
+
+    basic_rules (const std::size_t flags_ = dot_not_newline) :
+        _valid_id_type (),
+        _statemap (),
+        _macrodeque (),
+        _macroset (),
+        _regexes (),
+        _features (),
+        _ids (),
+        _user_ids (),
+        _next_dfas (),
+        _pushes (),
+        _pops (),
+        _flags (flags_),
+        _locale (),
+        _lexer_state_names (),
+        _eoi (0)
+    {
+        add_state (initial ());
+    }
+
+    void clear ()
+    {
+        _statemap.clear ();
+        _macrodeque.clear ();
+        _macroset.clear ();
+        _regexes.clear ();
+        _features.clear ();
+        _ids.clear ();
+        _user_ids.clear ();
+        _next_dfas.clear ();
+        _pushes.clear ();
+        _pops.clear ();
+        _flags = dot_not_newline;
+        _locale = std::locale ();
+        _lexer_state_names.clear ();
+        _eoi = 0;
+        add_state (initial ());
+    }
+
+    void clear (const id_type dfa_)
+    {
+        if (_regexes.size () > dfa_)
+        {
+            _regexes[dfa_].clear ();
+            _features[dfa_] = 0;
+            _ids[dfa_].clear ();
+            _user_ids[dfa_].clear ();
+            _next_dfas[dfa_].clear ();
+            _pushes[dfa_].clear ();
+            _pops[dfa_].clear ();
+        }
+    }
+
+    void flags (const std::size_t flags_)
+    {
+        _flags = flags_;
+    }
+
+    std::size_t flags () const
+    {
+        return _flags;
+    }
+
+    static id_type skip ()
+    {
+        return static_cast<id_type>(~1);
+    }
+
+    void eoi (const id_type eoi_)
+    {
+        _eoi = eoi_;
+    }
+
+    id_type eoi () const
+    {
+        return _eoi;
+    }
+
+    std::locale imbue (const std::locale &locale_)
+    {
+        std::locale loc_ = _locale;
+
+        _locale = locale_;
+        return loc_;
+    }
+
+    const std::locale &locale () const
+    {
+        return _locale;
+    }
+
+    const char_type *state (const id_type index_) const
+    {
+        if (index_ == 0)
+        {
+            return initial ();
+        }
+        else
+        {
+            const id_type i_ = index_ - 1;
+
+            if (_lexer_state_names.size () > i_)
+            {
+                return _lexer_state_names[i_].c_str ();
+            }
+            else
+            {
+                return 0;
+            }
+        }
+    }
+
+    id_type state (const char_type *name_) const
+    {
+        typename string_id_type_map::const_iterator iter_ =
+            _statemap.find (name_);
+
+        if (iter_ == _statemap.end ())
+        {
+            return npos ();
+        }
+        else
+        {
+            return iter_->second;
+        }
+    }
+
+    id_type add_state (const char_type *name_)
+    {
+        validate (name_);
+
+        if (_statemap.insert (string_id_type_pair (name_,
+            _statemap.size ())).second)
+        {
+            _regexes.push_back (string_deque ());
+            _features.push_back (0);
+            _ids.push_back (id_vector ());
+            _user_ids.push_back (id_vector ());
+            _next_dfas.push_back (id_vector ());
+            _pushes.push_back (id_vector ());
+            _pops.push_back (bool_vector ());
+
+            if (string (name_) != initial ())
+            {
+                _lexer_state_names.push_back (name_);
+            }
+        }
+        else
+        {
+            return _statemap.find (name_)->second;
+        }
+
+        if (_next_dfas.size () > npos ())
+        {
+            // Overflow
+            throw runtime_error ("The data type you have chosen cannot hold "
+                "this many lexer start states.");
+        }
+
+        // Initial is not stored, so no need to - 1.
+        return static_cast<id_type>(_lexer_state_names.size ());
+    }
+
+    void add_macro (const char_type *name_, const char_type *regex_)
+    {
+        add_macro (name_, string (regex_));
+    }
+
+    void add_macro (const char_type *name_, const char_type *regex_start_,
+        const char_type *regex_end_)
+    {
+        add_macro (name_, string (regex_start_, regex_end_));
+    }
+
+    void add_macro (const char_type *name_, const string &regex_)
+    {
+        validate (name_);
+
+        typename string_set::const_iterator iter_ = _macroset.find (name_);
+
+        if (iter_ == _macroset.end ())
+        {
+            _macrodeque.push_back (string_pair (name_, regex_));
+            _macroset.insert (name_);
+        }
+        else
+        {
+            std::basic_stringstream<char_type> ss_;
+            std::ostringstream os_;
+
+            os_ << "Attempt to redefine MACRO '";
+
+            while (*name_)
+            {
+                os_ << ss_.narrow (*name_++, static_cast<char_type> (' '));
+            }
+
+            os_ << "'.";
+            throw runtime_error (os_.str ());
+        }
+    }
+
+    void add_macros (const basic_rules &rules_)
+    {
+        const string_pair_deque &macros_ = rules_.macrodeque ();
+        typename string_pair_deque::const_iterator macro_iter_ =
+            macros_.begin ();
+        typename string_pair_deque::const_iterator macro_end_ =
+            macros_.end ();
+
+        for (; macro_iter_ != macro_end_; ++macro_iter_)
+        {
+            add_macro (macro_iter_->first.c_str (),
+                macro_iter_->second.c_str ());
+        }
+    }
+
+    void merge_macros (const basic_rules &rules_)
+    {
+        const string_pair_deque &macros_ = rules_.macrodeque ();
+        typename string_pair_deque::const_iterator macro_iter_ =
+            macros_.begin ();
+        typename string_pair_deque::const_iterator macro_end_ =
+            macros_.end ();
+        typename string_set::const_iterator macro_dest_iter_;
+        typename string_set::const_iterator macro_dest_end_ = _macroset.end ();
+
+        for (; macro_iter_ != macro_end_; ++macro_iter_)
+        {
+            macro_dest_iter_ = _macroset.find (macro_iter_->first);
+
+            if (macro_dest_iter_ == macro_dest_end_)
+            {
+                add_macro (macro_iter_->first.c_str (),
+                    macro_iter_->second.c_str ());
+            }
+        }
+    }
+
+    // Add rule to INITIAL
+    void add (const char_type *regex_, const id_type id_,
+        const id_type user_id_ = npos ())
+    {
+        add (string (regex_), id_, user_id_);
+    }
+
+    void add (const char_type *regex_start_, const char_type *regex_end_,
+        const id_type id_, const id_type user_id_ = npos ())
+    {
+        add (string (regex_start_, regex_end_), id_, user_id_);
+    }
+
+    void add (const string &regex_, const id_type id_,
+        const id_type user_id_ = npos ())
+    {
+        check_for_invalid_id (id_);
+        _regexes.front ().push_back (regex_);
+
+        if (regex_[0] == '^')
+        {
+            _features.front () |= bol_bit;
+        }
+
+        if (regex_.size () > 0 && regex_[regex_.size () - 1] == '$')
+        {
+            _features.front () |= eol_bit;
+        }
+
+        if (id_ == skip ())
+        {
+            _features.front () |= skip_bit;
+        }
+        else if (id_ == eoi ())
+        {
+            _features.front () |= again_bit;
+        }
+
+        _ids.front ().push_back (id_);
+        _user_ids.front ().push_back (user_id_);
+        _next_dfas.front ().push_back (0);
+        _pushes.front ().push_back (npos ());
+        _pops.front ().push_back (false);
+    }
+
+    // Add rule with no id
+    void add (const char_type *curr_dfa_,
+        const char_type *regex_, const char_type *new_dfa_)
+    {
+        add (curr_dfa_, string (regex_), new_dfa_);
+    }
+
+    void add (const char_type *curr_dfa_,
+        const char_type *regex_start_, const char_type *regex_end_,
+        const char_type *new_dfa_)
+    {
+        add (curr_dfa_, string (regex_start_, regex_end_), new_dfa_);
+    }
+
+    void add (const char_type *curr_dfa_, const string &regex_,
+        const char_type *new_dfa_)
+    {
+        add (curr_dfa_, regex_, _eoi, new_dfa_, false);
+    }
+
+    // Add rule with id
+    void add (const char_type *curr_dfa_,
+        const char_type *regex_, const id_type id_,
+        const char_type *new_dfa_, const id_type user_id_ = npos ())
+    {
+        add (curr_dfa_, string (regex_), id_, new_dfa_, user_id_);
+    }
+
+    void add (const char_type *curr_dfa_, const char_type *regex_start_,
+        const char_type *regex_end_, const id_type id_,
+        const char_type *new_dfa_, const id_type user_id_ = npos ())
+    {
+        add (curr_dfa_, string (regex_start_, regex_end_),
+            id_, new_dfa_, user_id_);
+    }
+
+    void add (const char_type *curr_dfa_, const string &regex_,
+        const id_type id_, const char_type *new_dfa_,
+        const id_type user_id_ = npos ())
+    {
+        add (curr_dfa_, regex_, id_, new_dfa_, true, user_id_);
+    }
+
+    const string_id_type_map &statemap () const
+    {
+        return _statemap;
+    }
+
+    const string_pair_deque &macrodeque () const
+    {
+        return _macrodeque;
+    }
+
+    const string_deque_deque &regexes () const
+    {
+        return _regexes;
+    }
+
+    const id_vector &features () const
+    {
+        return _features;
+    }
+
+    const id_vector_deque &ids () const
+    {
+        return _ids;
+    }
+
+    const id_vector_deque &user_ids () const
+    {
+        return _user_ids;
+    }
+
+    const id_vector_deque &next_dfas () const
+    {
+        return _next_dfas;
+    }
+
+    const id_vector_deque &pushes () const
+    {
+        return _pushes;
+    }
+
+    const bool_vector_deque &pops () const
+    {
+        return _pops;
+    }
+
+    bool empty () const
+    {
+        typename string_deque_deque::const_iterator iter_ = _regexes.begin ();
+        typename string_deque_deque::const_iterator end_ = _regexes.end ();
+        bool empty_ = true;
+
+        for (; iter_ != end_; ++iter_)
+        {
+            if (!iter_->empty ())
+            {
+                empty_ = false;
+                break;
+            }
+        }
+
+        return empty_;
+    }
+
+    static const char_type *initial ()
+    {
+        static const char_type initial_[] =
+            {'I', 'N', 'I', 'T', 'I', 'A', 'L', 0};
+
+        return initial_;
+    }
+
+    static const char_type *dot ()
+    {
+        static const char_type dot_[] = {'.', 0};
+
+        return dot_;
+    }
+
+    static const char_type *all_states ()
+    {
+        static const char_type star_[] = {'*', 0};
+
+        return star_;
+    }
+
+    static id_type npos ()
+    {
+        return static_cast<id_type>(~0);
+    }
+
+private:
+    string_id_type_map _statemap;
+    string_pair_deque _macrodeque;
+    string_set _macroset;
+    string_deque_deque _regexes;
+    id_vector _features;
+    id_vector_deque _ids;
+    id_vector_deque _user_ids;
+    id_vector_deque _next_dfas;
+    id_vector_deque _pushes;
+    bool_vector_deque _pops;
+    std::size_t _flags;
+    std::locale _locale;
+    string_deque _lexer_state_names;
+    id_type _eoi;
+
+    void add (const char_type *curr_dfa_, const string &regex_,
+        const id_type id_, const char_type *new_dfa_,
+        const bool check_, const id_type user_id_ = npos ())
+    {
+        const bool star_ = *curr_dfa_ == '*' && *(curr_dfa_ + 1) == 0;
+        const bool dot_ = *new_dfa_ == '.' && *(new_dfa_ + 1) == 0;
+        const bool push_ = *new_dfa_ == '>';
+        const char_type *push_dfa_ = 0;
+        const bool pop_ = *new_dfa_ == '<';
+
+        if (push_ || pop_)
+        {
+            ++new_dfa_;
+        }
+
+        if (check_)
+        {
+            check_for_invalid_id (id_);
+        }
+
+        if (!dot_ && !pop_)
+        {
+            const char_type *temp_ = new_dfa_;
+
+            while (*temp_ && *temp_ != ':')
+            {
+                ++temp_;
+            }
+
+            if (*temp_) push_dfa_ = temp_ + 1;
+
+            validate (new_dfa_, *temp_ ? temp_ : 0);
+
+            if (push_dfa_)
+            {
+                validate (push_dfa_);
+            }
+        }
+
+        // npos means pop here
+        id_type new_dfa_id_ = npos ();
+        id_type push_dfa_id_ = npos ();
+        typename string_id_type_map::const_iterator iter_;
+        typename string_id_type_map::const_iterator end_ = _statemap.end ();
+        id_vector next_dfas_;
+
+        if (!dot_ && !pop_)
+        {
+            if (push_dfa_)
+            {
+                iter_ = _statemap.find (string (new_dfa_, push_dfa_ - 1));
+            }
+            else
+            {
+                iter_ = _statemap.find (new_dfa_);
+            }
+
+            if (iter_ == end_)
+            {
+                std::basic_stringstream<char_type> ss_;
+                std::ostringstream os_;
+
+                os_ << "Unknown state name '";
+
+                while (*new_dfa_)
+                {
+                    os_ << ss_.narrow (*new_dfa_++, ' ');
+                }
+
+                os_ << "'.";
+                throw runtime_error (os_.str ());
+            }
+
+            new_dfa_id_ = iter_->second;
+
+            if (push_dfa_)
+            {
+                iter_ = _statemap.find (push_dfa_);
+
+                if (iter_ == end_)
+                {
+                    std::basic_stringstream<char_type> ss_;
+                    std::ostringstream os_;
+
+                    os_ << "Unknown state name '";
+
+                    while (*push_dfa_)
+                    {
+                        os_ << ss_.narrow (*push_dfa_++, ' ');
+                    }
+
+                    os_ << "'.";
+                    throw runtime_error (os_.str ());
+                }
+
+                push_dfa_id_ = iter_->second;
+            }
+        }
+
+        if (star_)
+        {
+            const std::size_t size_ = _statemap.size ();
+
+            for (id_type i_ = 0; i_ < size_; ++i_)
+            {
+                next_dfas_.push_back (i_);
+            }
+        }
+        else
+        {
+            const char_type *start_ = curr_dfa_;
+            string next_dfa_;
+
+            while (*curr_dfa_)
+            {
+                while (*curr_dfa_ && *curr_dfa_ != ',')
+                {
+                    ++curr_dfa_;
+                }
+
+                next_dfa_.assign (start_, curr_dfa_);
+
+                if (*curr_dfa_)
+                {
+                    ++curr_dfa_;
+                    start_ = curr_dfa_;
+                }
+
+                validate (next_dfa_.c_str ());
+                iter_ = _statemap.find (next_dfa_.c_str ());
+
+                if (iter_ == end_)
+                {
+                    std::basic_stringstream<char_type> ss_;
+                    std::ostringstream os_;
+
+                    os_ << "Unknown state name '";
+                    curr_dfa_ = next_dfa_.c_str ();
+
+                    while (*curr_dfa_)
+                    {
+                        os_ << ss_.narrow (*curr_dfa_++, ' ');
+                    }
+
+                    os_ << "'.";
+                    throw runtime_error (os_.str ());
+                }
+
+                next_dfas_.push_back (iter_->second);
+            }
+        }
+
+        for (std::size_t i_ = 0, size_ = next_dfas_.size ();
+            i_ < size_; ++i_)
+        {
+            const id_type curr_ = next_dfas_[i_];
+
+            _regexes[curr_].push_back (regex_);
+
+            if (regex_[0] == '^')
+            {
+                _features[curr_] |= bol_bit;
+            }
+
+            if (regex_[regex_.size () - 1] == '$')
+            {
+                _features[curr_] |= eol_bit;
+            }
+
+            if (id_ == skip ())
+            {
+                _features[curr_] |= skip_bit;
+            }
+            else if (id_ == eoi ())
+            {
+                _features[curr_] |= again_bit;
+            }
+
+            if (push_ || pop_)
+            {
+                _features[curr_] |= recursive_bit;
+            }
+
+            _ids[curr_].push_back (id_);
+            _user_ids[curr_].push_back (user_id_);
+            _next_dfas[curr_].push_back (dot_ ? curr_ : new_dfa_id_);
+            _pushes[curr_].push_back (push_ ? (push_dfa_ ?
+                push_dfa_id_ : curr_) : npos ());
+            _pops[curr_].push_back (pop_);
+        }
+    }
+
+    void validate (const char_type *name_, const char_type *end_ = 0) const
+    {
+        const char_type *start_ = name_;
+
+        if (*name_ != '_' && !(*name_ >= 'A' && *name_ <= 'Z') &&
+            !(*name_ >= 'a' && *name_ <= 'z'))
+        {
+            std::basic_stringstream<char_type> ss_;
+            std::ostringstream os_;
+
+            os_ << "Invalid name '";
+
+            while (*name_)
+            {
+                os_ << ss_.narrow (*name_++, ' ');
+            }
+
+            os_ << "'.";
+            throw runtime_error (os_.str ());
+        }
+        else if (*name_)
+        {
+            ++name_;
+        }
+
+        while (*name_ && name_ != end_)
+        {
+            if (*name_ != '_' && *name_ != '-' &&
+                !(*name_ >= 'A' && *name_ <= 'Z') &&
+                !(*name_ >= 'a' && *name_ <= 'z') &&
+                !(*name_ >= '0' && *name_ <= '9'))
+            {
+                std::basic_stringstream<char_type> ss_;
+                std::ostringstream os_;
+
+                os_ << "Invalid name '";
+                name_ = start_;
+
+                while (*name_)
+                {
+                    os_ << ss_.narrow (*name_++, ' ');
+                }
+
+                os_ << "'.";
+                throw runtime_error (os_.str ());
+            }
+
+            ++name_;
+        }
+    }
+
+    void check_for_invalid_id (const id_type id_) const
+    {
+        if (id_ == _eoi)
+        {
+            throw runtime_error ("Cannot resuse the id for eoi.");
+        }
+
+        if (id_ == npos ())
+        {
+            throw runtime_error ("id npos is reserved for the "
+                "UNKNOWN token.");
+        }
+    }
+};
+
+typedef basic_rules<char> rules;
+typedef basic_rules<wchar_t> wrules;
+}
+
+#endif
diff --git a/inc/lexertl/runtime_error.hpp b/inc/lexertl/runtime_error.hpp
new file mode 100644
index 0000000..bd525e8
--- /dev/null
+++ b/inc/lexertl/runtime_error.hpp
@@ -0,0 +1,23 @@
+// runtime_error.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_RUNTIME_ERROR_HPP
+#define LEXERTL_RUNTIME_ERROR_HPP
+
+#include <stdexcept>
+
+namespace lexertl
+{
+class runtime_error : public std::runtime_error
+{
+public:
+    runtime_error (const std::string &what_arg_) :
+        std::runtime_error (what_arg_)
+    {
+    }
+};
+}
+
+#endif
diff --git a/inc/lexertl/serialise.hpp b/inc/lexertl/serialise.hpp
new file mode 100644
index 0000000..9fcab9a
--- /dev/null
+++ b/inc/lexertl/serialise.hpp
@@ -0,0 +1,28 @@
+// serialise.hpp
+// Copyright (c) 2007-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_SERIALISE_HPP
+#define LEXERTL_SERIALISE_HPP
+
+#include "state_machine.hpp"
+#include <boost/serialization/vector.hpp>
+
+namespace lexertl
+{
+// IMPORTANT! This won't work if you don't enable RTTI!
+template<typename CharT, typename id_type, class Archive>
+void serialise (basic_state_machine<CharT, id_type> &sm_, Archive &ar_)
+{
+    detail::basic_internals<id_type> &internals_ = sm_.data ();
+
+    ar_ & internals_._eoi;
+    ar_ & *internals_._lookup;
+    ar_ & internals_._dfa_alphabet;
+    ar_ & internals_._features;
+    ar_ & *internals_._dfa;
+}
+}
+
+#endif
diff --git a/inc/lexertl/size_t.hpp b/inc/lexertl/size_t.hpp
new file mode 100644
index 0000000..866ba28
--- /dev/null
+++ b/inc/lexertl/size_t.hpp
@@ -0,0 +1,12 @@
+// size_t.h
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_SIZE_T_H
+#define LEXERTL_SIZE_T_H
+
+#include <stddef.h> // ptrdiff_t
+#include <cstring>
+
+#endif
diff --git a/inc/lexertl/sm_traits.hpp b/inc/lexertl/sm_traits.hpp
new file mode 100644
index 0000000..889a1a9
--- /dev/null
+++ b/inc/lexertl/sm_traits.hpp
@@ -0,0 +1,44 @@
+// sm_traits.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef LEXERTL_SM_TRAITS_H
+#define LEXERTL_SM_TRAITS_H
+
+namespace lexertl
+{
+template<typename ch_type, typename sm_type, bool comp, bool look,
+    bool dfa_nfa>
+struct basic_sm_traits
+{
+    enum {char_24_bit = sizeof(ch_type) > 2, compressed = comp, lookup = look,
+        is_dfa = dfa_nfa};
+    typedef ch_type input_char_type;
+    typedef ch_type char_type;
+    typedef sm_type id_type;
+
+    static id_type npos ()
+    {
+        return static_cast<id_type>(~0);
+    }
+};
+
+template<typename ch_type, typename sm_type, bool look, bool dfa_nfa>
+struct basic_sm_traits<ch_type, sm_type, true, look, dfa_nfa>
+{
+    enum {char_24_bit = sizeof(ch_type) > 2, compressed = true, lookup = look,
+        is_dfa = dfa_nfa};
+    typedef ch_type input_char_type;
+    typedef unsigned char char_type;
+    typedef sm_type id_type;
+
+    static id_type npos ()
+    {
+        return static_cast<id_type>(~0);
+    }
+};
+}
+
+#endif
diff --git a/inc/lexertl/state_machine.hpp b/inc/lexertl/state_machine.hpp
new file mode 100644
index 0000000..e14786c
--- /dev/null
+++ b/inc/lexertl/state_machine.hpp
@@ -0,0 +1,525 @@
+// state_machine.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_STATE_MACHINE_HPP
+#define LEXERTL_STATE_MACHINE_HPP
+
+#include "compile_assert.hpp"
+// memcmp()
+#include <cstring>
+#include <deque>
+#include "internals.hpp"
+#include <map>
+#include <set>
+#include "sm_traits.hpp"
+#include "string_token.hpp"
+
+namespace lexertl
+{
+template<typename char_type, typename id_type = std::size_t>
+class basic_state_machine
+{
+public:
+    typedef basic_sm_traits<char_type, id_type,
+        (sizeof (char_type) > 1), true, true> traits;
+    typedef detail::basic_internals<id_type> internals;
+
+    // If you get a compile error here you have
+    // failed to define an unsigned id type.
+    compile_assert<(static_cast<id_type>(~0) > 0)>
+        _valid_id_type;
+
+    basic_state_machine () :
+        _valid_id_type (),
+        _internals ()
+    {
+    } 
+
+    void clear ()
+    {
+        _internals.clear ();
+    }
+
+    internals &data ()
+    {
+        return _internals;
+    }
+
+    const internals &data () const
+    {
+        return _internals;
+    }
+
+    bool empty () const
+    {
+        return _internals.empty ();
+    }
+
+    id_type eoi () const
+    {
+        return _internals._eoi;
+    }
+
+    void minimise ()
+    {
+        const id_type dfas_ = static_cast<id_type>(_internals.
+            _dfa->size ());
+
+        for (id_type i_ = 0; i_ < dfas_; ++i_)
+        {
+            const id_type dfa_alphabet_ = _internals._dfa_alphabet[i_];
+            id_type_vector *dfa_ = _internals._dfa[i_];
+
+            if (dfa_alphabet_ != 0)
+            {
+                std::size_t size_ = 0;
+
+                do
+                {
+                    size_ = dfa_->size ();
+                    minimise_dfa (dfa_alphabet_, *dfa_, size_);
+                } while (dfa_->size () != size_);
+            }
+        }
+    }
+
+    static id_type npos ()
+    {
+        return static_cast<id_type>(~0);
+    }
+
+    static id_type skip ()
+    {
+        return static_cast<id_type>(~1);
+    }
+
+    void swap (basic_state_machine &rhs_)
+    {
+        _internals.swap (rhs_._internals);
+    }
+
+private:
+    typedef typename internals::id_type_vector id_type_vector;
+    typedef std::set<id_type> index_set;
+    internals _internals;
+
+    void minimise_dfa (const id_type dfa_alphabet_,
+        id_type_vector &dfa_, std::size_t size_)
+    {
+        const id_type *first_ = &dfa_.front ();
+        const id_type *end_ = first_ + size_;
+        id_type index_ = 1;
+        id_type new_index_ = 1;
+        id_type_vector lookup_ (size_ / dfa_alphabet_, npos ());
+        id_type *lookup_ptr_ = &lookup_.front ();
+        index_set index_set_;
+        const id_type bol_index_ = dfa_.front ();
+
+        *lookup_ptr_ = 0;
+        // Only one 'jam' state, so skip it.
+        first_ += dfa_alphabet_;
+
+        for (; first_ < end_; first_ += dfa_alphabet_, ++index_)
+        {
+            const id_type *second_ = first_ + dfa_alphabet_;
+
+            for (id_type curr_index_ = index_ + 1; second_ < end_;
+                ++curr_index_, second_ += dfa_alphabet_)
+            {
+                if (index_set_.find (curr_index_) != index_set_.end ())
+                {
+                    continue;
+                }
+
+                // Some systems have memcmp in namespace std.
+                using namespace std;
+
+                if (memcmp (first_, second_, sizeof (id_type) *
+                    dfa_alphabet_) == 0)
+                {
+                    index_set_.insert (curr_index_);
+                    lookup_ptr_[curr_index_] = new_index_;
+                }
+            }
+
+            if (lookup_ptr_[index_] == npos ())
+            {
+                lookup_ptr_[index_] = new_index_;
+                ++new_index_;
+            }
+        }
+
+        if (!index_set_.empty ())
+        {
+            const id_type *front_ = &dfa_.front ();
+            id_type_vector new_dfa_ (front_, front_ + dfa_alphabet_);
+            typename index_set::const_iterator set_end_ = index_set_.end ();
+            const id_type *ptr_ = front_ + dfa_alphabet_;
+            id_type *new_ptr_ = 0;
+
+            new_dfa_.resize (size_ - index_set_.size () * dfa_alphabet_, 0);
+            new_ptr_ = &new_dfa_.front () + dfa_alphabet_;
+            size_ /= dfa_alphabet_;
+
+            if (bol_index_)
+            {
+                new_dfa_.front () = lookup_ptr_[bol_index_];
+            }
+
+            for (index_ = 1; index_ < size_; ++index_)
+            {
+                if (index_set_.find (index_) != set_end_)
+                {
+                    ptr_ += dfa_alphabet_;
+                    continue;
+                }
+
+                new_ptr_[end_state_index] = ptr_[end_state_index];
+                new_ptr_[id_index] = ptr_[id_index];
+                new_ptr_[user_id_index] = ptr_[user_id_index];
+                new_ptr_[push_dfa_index] = ptr_[push_dfa_index];
+                new_ptr_[next_dfa_index] = ptr_[next_dfa_index];
+                new_ptr_[eol_index] = lookup_ptr_[ptr_[eol_index]];
+                new_ptr_ += transitions_index;
+                ptr_ += transitions_index;
+
+                for (id_type i_ = transitions_index; i_ < dfa_alphabet_; ++i_)
+                {
+                    *new_ptr_++ = lookup_ptr_[*ptr_++];
+                }
+            }
+
+            dfa_.swap (new_dfa_);
+        }
+    }
+};
+
+typedef basic_state_machine<char> state_machine;
+typedef basic_state_machine<wchar_t> wstate_machine;
+
+template<typename char_type, typename id_type = std::size_t,
+    bool is_dfa = true>
+struct basic_char_state_machine
+{
+    typedef basic_sm_traits<char_type, id_type, false, false, is_dfa> traits;
+    typedef detail::basic_internals<id_type> internals;
+    typedef typename internals::id_type_vector id_type_vector;
+
+    struct state
+    {
+        typedef basic_string_token<char_type> string_token;
+        typedef std::map<id_type, string_token> id_type_string_token_map;
+        typedef std::pair<id_type, string_token> id_type_string_token_pair;
+        enum push_pop_dfa {neither, push_dfa, pop_dfa};
+
+        bool _end_state;
+        push_pop_dfa _push_pop_dfa;
+        id_type _id;
+        id_type _user_id;
+        id_type _push_dfa;
+        id_type _next_dfa;
+        id_type _eol_index;
+        id_type_string_token_map _transitions;
+
+        state () :
+            _end_state (false),
+            _push_pop_dfa (neither),
+            _id (0),
+            _user_id (traits::npos ()),
+            _push_dfa (traits::npos ()),
+            _next_dfa (0),
+            _eol_index (traits::npos ()),
+            _transitions ()
+        {
+        }
+
+        bool operator == (const state rhs_) const
+        {
+            return _end_state == rhs_._end_state &&
+                _push_pop_dfa == rhs_._push_pop_dfa &&
+                _id == rhs_._id &&
+                _user_id == rhs_._user_id &&
+                _push_dfa == rhs_._push_dfa &&
+                _next_dfa == rhs_._next_dfa &&
+                _eol_index == rhs_._eol_index &&
+                _transitions == rhs_._transitions;
+        }
+    };
+
+    typedef typename state::string_token string_token;
+    typedef std::vector<state> state_vector;
+    typedef std::vector<string_token> string_token_vector;
+    typedef typename state::id_type_string_token_pair
+        id_type_string_token_pair;
+
+    struct dfa
+    {
+        id_type _bol_index;
+        state_vector _states;
+
+        dfa (const std::size_t size_) :
+            _bol_index (traits::npos ()),
+            _states (state_vector (size_))
+        {
+        }
+
+        std::size_t size () const
+        {
+            return _states.size ();
+        }
+
+        void swap (dfa &rhs_)
+        {
+            std::swap (_bol_index, rhs_._bol_index);
+            _states.swap (rhs_._states);
+        }
+    };
+
+    typedef std::deque<dfa> dfa_deque;
+
+    dfa_deque _sm_deque;
+
+    // If you get a compile error here you have
+    // failed to define an unsigned id type.
+    compile_assert<(static_cast<id_type>(~0) > 0)>
+        _valid_id_type;
+
+    basic_char_state_machine () :
+        _sm_deque (),
+        _valid_id_type ()
+    {
+    }
+
+    void append (const string_token_vector &token_vector_,
+        const internals &internals_, const id_type dfa_index_)
+    {
+        const std::size_t dfa_alphabet_ = internals_._dfa_alphabet[dfa_index_];
+        const std::size_t alphabet_ = dfa_alphabet_ - transitions_index;
+        const id_type_vector &source_dfa_ = *internals_._dfa[dfa_index_];
+        const id_type *ptr_ = &source_dfa_.front ();
+        const std::size_t size_ = (source_dfa_.size () - dfa_alphabet_) /
+            dfa_alphabet_;
+        typename state::id_type_string_token_map::iterator trans_iter_;
+
+        _sm_deque.push_back (dfa (size_));
+
+        dfa &dest_dfa_ = _sm_deque.back ();
+
+        if (*ptr_)
+        {
+            dest_dfa_._bol_index = *ptr_ - 1;
+        }
+
+        ptr_ += dfa_alphabet_;
+
+        for (id_type i_ = 0; i_ < size_; ++i_)
+        {
+            state &state_ = dest_dfa_._states[i_];
+
+            state_._end_state = ptr_[end_state_index] != 0;
+
+            if (ptr_[push_dfa_index] != npos ())
+            {
+                state_._push_pop_dfa = state::push_dfa;
+            }
+            else if (ptr_[end_state_index] & pop_dfa_bit)
+            {
+                state_._push_pop_dfa = state::pop_dfa;
+            }
+
+            state_._id = ptr_[id_index];
+            state_._user_id = ptr_[user_id_index];
+            state_._push_dfa = ptr_[push_dfa_index];
+            state_._next_dfa = ptr_[next_dfa_index];
+
+            if (ptr_[eol_index])
+            {
+                state_._eol_index = ptr_[eol_index] - 1;
+            }
+
+            ptr_ += transitions_index;
+
+            for (id_type col_index_ = 0; col_index_ < alphabet_;
+                ++col_index_, ++ptr_)
+            {
+                const id_type next_ = *ptr_;
+
+                if (next_ > 0)
+                {
+                    trans_iter_ = state_._transitions.find (next_ - 1);
+
+                    if (trans_iter_ == state_._transitions.end ())
+                    {
+                        trans_iter_ = state_._transitions.insert
+                            (id_type_string_token_pair (next_ - 1,
+                            token_vector_[col_index_])).first;
+                    }
+                    else
+                    {
+                        trans_iter_->second.insert (token_vector_[col_index_]);
+                    }
+                }
+            }
+        }
+    }
+
+    void clear ()
+    {
+        _sm_deque.clear ();
+    }
+
+    bool empty () const
+    {
+        return _sm_deque.empty ();
+    }
+
+    void minimise ()
+    {
+        const id_type dfas_ = static_cast<id_type>(_sm_deque.size ());
+
+        for (id_type i_ = 0; i_ < dfas_; ++i_)
+        {
+            dfa *dfa_ = &_sm_deque[i_];
+
+            if (dfa_->size () > 0)
+            {
+                std::size_t size_ = 0;
+
+                do
+                {
+                    size_ = dfa_->size ();
+                    minimise_dfa (*dfa_, size_);
+                } while (dfa_->size () != size_);
+            }
+        }
+    }
+
+    static id_type npos ()
+    {
+        return traits::npos ();
+    }
+
+    id_type size () const
+    {
+        return static_cast<id_type>(_sm_deque.size ());
+    }
+
+    static id_type skip ()
+    {
+        return static_cast<id_type>(~1);
+    }
+
+    void swap (basic_char_state_machine &csm_)
+    {
+        _sm_deque.swap (csm_._sm_deque);
+    }
+
+private:
+    typedef std::set<id_type> index_set;
+
+    void minimise_dfa (dfa &dfa_, std::size_t size_)
+    {
+        const state *first_ = &dfa_._states.front ();
+        const state *end_ = first_ + size_;
+        id_type index_ = 0;
+        id_type new_index_ = 0;
+        id_type_vector lookup_ (size_, npos ());
+        id_type *lookup_ptr_ = &lookup_.front ();
+        index_set index_set_;
+
+        for (; first_ != end_; ++first_, ++index_)
+        {
+            const state *second_ = first_ + 1;
+
+            for (id_type curr_index_ = index_ + 1; second_ != end_;
+                ++curr_index_, ++second_)
+            {
+                if (index_set_.find (curr_index_) != index_set_.end ())
+                {
+                    continue;
+                }
+
+                if (*first_ == *second_)
+                {
+                    index_set_.insert (curr_index_);
+                    lookup_ptr_[curr_index_] = new_index_;
+                }
+            }
+
+            if (lookup_ptr_[index_] == npos ())
+            {
+                lookup_ptr_[index_] = new_index_;
+                ++new_index_;
+            }
+        }
+
+        if (!index_set_.empty ())
+        {
+            const state *front_ = &dfa_._states.front ();
+            dfa new_dfa_ (new_index_);
+            typename index_set::const_iterator set_end_ = index_set_.end ();
+            const state *ptr_ = front_;
+            state *new_ptr_ = &new_dfa_._states.front ();
+
+            if (dfa_._bol_index != npos ())
+            {
+                new_dfa_._bol_index = lookup_ptr_[dfa_._bol_index];
+            }
+
+            for (index_ = 0; index_ < size_; ++index_)
+            {
+                if (index_set_.find (index_) != set_end_)
+                {
+                    ++ptr_;
+                    continue;
+                }
+
+                new_ptr_->_end_state = ptr_->_end_state;
+                new_ptr_->_id = ptr_->_end_state;
+                new_ptr_->_user_id = ptr_->_user_id;
+                new_ptr_->_next_dfa = ptr_->_next_dfa;
+
+                if (ptr_->_eol_index != npos ())
+                {
+                    new_ptr_->_eol_index = lookup_ptr_[ptr_->_eol_index];
+                }
+
+                typename state::id_type_string_token_map::const_iterator
+                    iter_ = ptr_->_transitions.begin ();
+                typename state::id_type_string_token_map::const_iterator end_ =
+                    ptr_->_transitions.end ();
+                typename state::id_type_string_token_map::iterator find_;
+
+                for (; iter_ != end_; ++iter_)
+                {
+                    find_ = new_ptr_->_transitions.find
+                        (lookup_ptr_[iter_->first]);
+
+                    if (find_ == new_ptr_->_transitions.end ())
+                    {
+                        new_ptr_->_transitions.insert
+                            (id_type_string_token_pair
+                            (lookup_ptr_[iter_->first], iter_->second));
+                    }
+                    else
+                    {
+                        find_->second.insert (iter_->second);
+                    }
+                }
+
+                ++ptr_;
+                ++new_ptr_;
+            }
+
+            dfa_.swap (new_dfa_);
+        }
+    }
+};
+
+typedef basic_char_state_machine<char> char_state_machine;
+typedef basic_char_state_machine<wchar_t> wchar_state_machine;
+}
+
+#endif
diff --git a/inc/lexertl/stream_shared_iterator.hpp b/inc/lexertl/stream_shared_iterator.hpp
new file mode 100644
index 0000000..61b529e
--- /dev/null
+++ b/inc/lexertl/stream_shared_iterator.hpp
@@ -0,0 +1,350 @@
+// stream_shared_iterator.hpp
+// Copyright (c) 2010-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef LEXERTL_STREAM_SHARED_ITERATOR_H
+#define LEXERTL_STREAM_SHARED_ITERATOR_H
+
+#include <algorithm>
+// memcpy
+#include <cstring>
+#include <iostream>
+#include <list>
+#include <math.h>
+#include "runtime_error.hpp"
+#include "size_t.hpp"
+#include <vector>
+
+namespace lexertl
+{
+template<typename char_type>
+class basic_stream_shared_iterator
+{
+public:
+    typedef std::basic_istream<char_type> istream;
+    typedef std::forward_iterator_tag iterator_category;
+    typedef std::size_t difference_type;
+    typedef char_type value_type;
+    typedef char_type *pointer;
+    typedef char_type &reference;
+
+    basic_stream_shared_iterator () :
+        _master (false),
+        _live (false),
+        _index (shared::npos ()),
+        _shared (0)
+    {
+    }
+
+    basic_stream_shared_iterator (istream &stream_,
+        const std::size_t buff_size_ = 1024,
+        const std::size_t increment_ = 1024) :
+        _master (true),
+        _live (false),
+        _index (shared::npos ()),
+        // For exception safety don't call new yet
+        _shared (0)
+    {
+        // Safe to call potentially throwing new now.
+        _shared = new shared (stream_, buff_size_, increment_);
+        ++_shared->_ref_count;
+        _iter = _shared->_clients.insert (_shared->_clients.end (), this);
+    }
+
+    basic_stream_shared_iterator (const basic_stream_shared_iterator &rhs_) :
+        _master (false),
+        _live (false),
+        _index (rhs_._master ? rhs_._shared->lowest () : rhs_._index),
+        _shared (rhs_._shared)
+    {
+        if (_shared)
+        {
+            // New copy of an iterator.
+            // The assumption is that any copy must be live
+            // even if the rhs is not (otherwise we will never
+            // have a record of the start of the current range!)
+            ++_shared->_ref_count;
+            _iter = _shared->_clients.insert (_shared->_clients.end (), this);
+            _live = true;
+        }
+    }
+
+    ~basic_stream_shared_iterator ()
+    {
+        if (_shared)
+        {
+            --_shared->_ref_count;
+            _shared->erase (this);
+
+            if (_shared->_ref_count == 0)
+            {
+                delete _shared;
+                _shared = 0;
+            }
+        }
+    }
+
+    basic_stream_shared_iterator &operator =
+        (const basic_stream_shared_iterator &rhs_)
+    {
+        if (this != &rhs_)
+        {
+            _master = false;
+            _index  = rhs_._master ? rhs_._shared->lowest () : rhs_._index;
+
+            if (_live && !rhs_._live)
+            {
+                _shared->erase (this);
+
+                if (!rhs_._shared)
+                {
+                    --_shared->_ref_count;
+                }
+            }
+            else if (!_live && rhs_._live)
+            {
+                rhs_._iter = rhs_._shared->_clients.insert (rhs_._shared->
+                    _clients.end (), this);
+
+                if (!_shared)
+                {
+                    ++rhs_._shared->_ref_count;
+                }
+            }
+
+            _live = rhs_._live;
+            _shared = rhs_._shared;
+        }
+
+        return *this;
+    }
+
+    bool operator == (const basic_stream_shared_iterator &rhs_) const
+    {
+        return _index == rhs_._index &&
+            (_shared == rhs_._shared ||
+            (_index == shared::npos () || rhs_._index == shared::npos ()) &&
+            (!_shared || !rhs_._shared));
+    }
+
+    bool operator != (const basic_stream_shared_iterator &rhs_) const
+    {
+        return !(*this == rhs_);
+    }
+
+    const char_type &operator * ()
+    {
+        check_master ();
+        return _shared->_buffer[_index];
+    }
+
+    basic_stream_shared_iterator &operator ++ ()
+    {
+        check_master ();
+        ++_index;
+        update_state ();
+        return *this;
+    }
+
+    basic_stream_shared_iterator operator ++ (int)
+    {
+        basic_stream_shared_iterator iter_ = *this;
+
+        check_master ();
+        ++_index;
+        update_state ();
+        return iter_;
+    }
+
+private:
+    class shared
+    {
+    public:
+        std::size_t _ref_count;
+        typedef std::vector<char_type> char_vector;
+        typedef std::list<basic_stream_shared_iterator *> iter_list;
+        istream &_stream;
+        std::size_t _increment;
+        std::size_t _len;
+        char_vector _buffer;
+        iter_list _clients;
+
+        shared (istream &stream_, const std::size_t buff_size_,
+            const std::size_t increment_) :
+            _ref_count (0),
+            _increment (increment_),
+            _stream (stream_)
+        {
+            _buffer.resize (buff_size_);
+            _stream.read (&_buffer.front (), _buffer.size ());
+            _len = static_cast<std::size_t>(_stream.gcount ());
+        }
+
+        bool reload_buffer ()
+        {
+            const std::size_t lowest_ = lowest ();
+            std::size_t read_ = 0;
+
+            if (lowest_ == 0)
+            {
+                // Resize buffer
+                const std::size_t old_size_ = _buffer.size ();
+                const std::size_t new_size_ = old_size_ + _increment;
+
+                _buffer.resize (new_size_);
+                _stream.read (&_buffer.front () + old_size_, _increment);
+                read_ = static_cast<std::size_t>(_stream.gcount ());
+
+                if (read_)
+                {
+                    read_ += old_size_;
+                    _len = read_;
+                }
+            }
+            else
+            {
+                // Some systems have memcpy in namespace std
+                using namespace std;
+                const size_t start_ = _buffer.size () - lowest_;
+                const size_t len_ = _buffer.size () - start_;
+
+                memcpy (&_buffer.front (), &_buffer[lowest_], start_ *
+                    sizeof (char_type));
+                _stream.read (&_buffer.front () + start_, len_);
+                read_ = static_cast<size_t>(_stream.gcount ());
+                subtract (lowest_);
+
+                if (read_)
+                {
+                    read_ += start_;
+                    _len = read_;
+                }
+                else
+                {
+                    _len = highest ();
+                }
+            }
+
+            return read_ != 0;
+        }
+
+        void erase (basic_stream_shared_iterator *ptr_)
+        {
+            if (ptr_->_iter != _clients.end ())
+            {
+                _clients.erase (ptr_->_iter);
+                ptr_->_iter = _clients.end ();
+            }
+        }
+
+        std::size_t lowest () const
+        {
+            std::size_t lowest_ = npos ();
+            typename iter_list::const_iterator iter_ = _clients.begin ();
+            typename iter_list::const_iterator end_ = _clients.end ();
+
+            for (; iter_ != end_; ++iter_)
+            {
+                const basic_stream_shared_iterator *ptr_ = *iter_;
+
+                if (ptr_->_index < lowest_)
+                {
+                    lowest_ = ptr_->_index;
+                }
+            }
+
+            if (lowest_ == npos ())
+            {
+                lowest_ = 0;
+            }
+
+            return lowest_;
+        }
+
+        std::size_t highest () const
+        {
+            std::size_t highest_ = 0;
+            typename iter_list::const_iterator iter_ = _clients.begin ();
+            typename iter_list::const_iterator end_ = _clients.end ();
+
+            for (; iter_ != end_; ++iter_)
+            {
+                const basic_stream_shared_iterator *ptr_ = *iter_;
+
+                if (ptr_->_index != npos () && ptr_->_index > highest_)
+                {
+                    highest_ = ptr_->_index;
+                }
+            }
+
+            return highest_;
+        }
+
+        void subtract (const std::size_t lowest_)
+        {
+            typename iter_list::iterator iter_ = _clients.begin ();
+            typename iter_list::iterator end_ = _clients.end ();
+
+            for (; iter_ != end_; ++iter_)
+            {
+                basic_stream_shared_iterator *ptr_ = *iter_;
+
+                if (ptr_->_index != npos ())
+                {
+                    ptr_->_index -= lowest_;
+                }
+            }
+        }
+
+        static std::size_t npos ()
+        {
+            return static_cast<std::size_t>(~0);
+        }
+
+    private:
+        shared &operator = (const shared &rhs_);
+    };
+
+    bool _master;
+    bool _live;
+    std::size_t _index;
+    shared *_shared;
+    mutable typename shared::iter_list::iterator _iter;
+
+    void check_master ()
+    {
+        if (!_shared)
+        {
+            throw runtime_error ("Cannot manipulate null (end) "
+                "stream_shared_iterators.");
+        }
+
+        if (_master)
+        {
+            _master = false;
+            _live = true;
+            _index = _shared->lowest ();
+        }
+    }
+
+    void update_state ()
+    {
+        if (_index >= _shared->_len)
+        {
+            if (!_shared->reload_buffer ())
+            {
+                _shared->erase (this);
+                _index = shared::npos ();
+                _live = false;
+            }
+        }
+    }
+};
+
+typedef basic_stream_shared_iterator<char> stream_shared_iterator;
+typedef basic_stream_shared_iterator<wchar_t> wstream_shared_iterator;
+}
+
+#endif
diff --git a/inc/lexertl/string_token.hpp b/inc/lexertl/string_token.hpp
new file mode 100644
index 0000000..0c8c88d
--- /dev/null
+++ b/inc/lexertl/string_token.hpp
@@ -0,0 +1,421 @@
+// string_token.hpp
+// Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_STRING_TOKEN_HPP
+#define LEXERTL_STRING_TOKEN_HPP
+
+#include "char_traits.hpp"
+#include <ios> // Needed by GCC 4.4
+#include <iostream>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace lexertl
+{
+template<typename ch_type>
+struct basic_string_token
+{
+    typedef ch_type char_type;
+    typedef basic_char_traits<char_type> char_traits;
+    typedef typename char_traits::index_type index_type;
+    typedef std::pair<index_type, index_type> range;
+    typedef std::vector<range> range_vector;
+    typedef std::basic_string<char_type> string;
+    typedef basic_string_token<char_type> string_token;
+
+    range_vector _ranges;
+
+    basic_string_token () :
+        _ranges ()
+    {
+    }
+
+    basic_string_token (char_type ch_) :
+        _ranges ()
+    {
+        insert (range (ch_, ch_));
+    }
+
+    basic_string_token (char_type first_, char_type second_) :
+        _ranges ()
+    {
+        insert (range (first_, second_));
+    }
+
+    void clear ()
+    {
+        _ranges.clear ();
+    }
+
+    bool empty () const
+    {
+        return _ranges.empty ();
+    }
+
+    bool any () const
+    {
+        return _ranges.size () == 1 && _ranges.front ().first == 0 &&
+            _ranges.front ().second == char_traits::max_val ();
+    }
+
+    bool operator < (const basic_string_token &rhs_) const
+    {
+        return _ranges < rhs_._ranges;
+    }
+
+    bool operator == (const basic_string_token &rhs_) const
+    {
+        return _ranges == rhs_._ranges;
+    }
+
+    bool negatable () const
+    {
+        std::size_t size_ = 0;
+        typename range_vector::const_iterator iter_ = _ranges.begin ();
+        typename range_vector::const_iterator end_ = _ranges.end ();
+
+        for (; iter_ != end_; ++iter_)
+        {
+            size_ += static_cast<std::size_t>(iter_->second) + 1 -
+                static_cast<std::size_t>(iter_->first);
+        }
+
+        return size_ > static_cast<std::size_t>(char_traits::max_val ()) / 2;
+    }
+
+    void swap (basic_string_token &rhs_)
+    {
+        _ranges.swap (rhs_._ranges);
+    }
+
+    void insert (const basic_string_token &rhs_)
+    {
+        typename range_vector::const_iterator iter_ = rhs_._ranges.begin ();
+        typename range_vector::const_iterator end_ = rhs_._ranges.end ();
+
+        for (; iter_ != end_; ++iter_)
+        {
+            insert (*iter_);
+        }
+    }
+
+    // Deliberately pass by value - may modify
+    typename range_vector::iterator insert (range rhs_)
+    {
+        bool insert_ = true;
+        typename range_vector::iterator iter_ = _ranges.begin ();
+        typename range_vector::const_iterator end_ = _ranges.end ();
+
+        while (iter_ != end_)
+        {
+            // follows current item
+            if (rhs_.first > iter_->second)
+            {
+                if (rhs_.first == iter_->second + 1)
+                {
+                    // Auto normalise
+                    rhs_.first = iter_->first;
+                }
+                else
+                {
+                    // No intersection, consider next
+                    ++iter_;
+                    continue;
+                }
+            }
+            // Precedes current item
+            else if (rhs_.second < iter_->first)
+            {
+                if (rhs_.second == iter_->first - 1)
+                {
+                    // Auto normalise
+                    rhs_.second = iter_->second;
+                }
+                else
+                {
+                    // insert here
+                    break;
+                }
+            }
+            else
+            {
+                // overlap (under)
+                if (rhs_.first < iter_->first)
+                {
+                    if (rhs_.second < iter_->second)
+                    {
+                        rhs_.second = iter_->second;
+                    }
+                }
+                // overlap (over)
+                else if (rhs_.second > iter_->second)
+                {
+                    if (rhs_.first > iter_->first)
+                    {
+                        rhs_.first = iter_->first;
+                    }
+                }
+                // subset
+                else
+                {
+                    insert_ = false;
+                    iter_ = _ranges.end ();
+                    break;
+                }
+            }
+
+            // Code minimisation: this always applies unless we have already
+            // exited the loop, or "continue" executed.
+            iter_ = _ranges.erase (iter_);
+            end_ = _ranges.end ();
+        }
+
+        if (insert_)
+        {
+            iter_ = _ranges.insert(iter_, rhs_);
+        }
+
+        return iter_;
+    }
+
+    void negate ()
+    {
+        index_type next_ = 0;
+        const index_type max_ = char_traits::max_val ();
+        string_token temp_;
+        typename range_vector::iterator iter_ = _ranges.begin ();
+        typename range_vector::const_iterator end_ = _ranges.end ();
+        bool finished_ = false;
+
+        for (; iter_ != end_; ++iter_)
+        {
+            if (next_ < iter_->first)
+            {
+                temp_.insert (range (next_, iter_->first - 1));
+            }
+
+            if (iter_->second < max_)
+            {
+                next_ = iter_->second + 1;
+            }
+            else
+            {
+                finished_ = true;
+                break;
+            }
+        }
+
+        if (!finished_)
+        {
+            temp_.insert (range (next_, max_));
+        }
+
+        swap (temp_);
+    }
+
+    void intersect (basic_string_token &rhs_, basic_string_token &overlap_)
+    {
+        typename range_vector::iterator lhs_iter_ = _ranges.begin ();
+        typename range_vector::const_iterator lhs_end_ = _ranges.end ();
+        typename range_vector::iterator rhs_iter_ = rhs_._ranges.begin ();
+        typename range_vector::const_iterator rhs_end_ = rhs_._ranges.end ();
+
+        while (lhs_iter_ != lhs_end_ && rhs_iter_ != rhs_end_)
+        {
+            if (rhs_iter_->first > lhs_iter_->second)
+            {
+                ++lhs_iter_;
+            }
+            else if (rhs_iter_->second < lhs_iter_->first)
+            {
+                ++rhs_iter_;
+            }
+            else
+            {
+                range range_;
+
+                if (rhs_iter_->first > lhs_iter_->first)
+                {
+                    range_.first = rhs_iter_->first;
+                }
+                else
+                {
+                    range_.first = lhs_iter_->first;
+                }
+
+                if (rhs_iter_->second < lhs_iter_->second)
+                {
+                    range_.second = rhs_iter_->second;
+                }
+                else
+                {
+                    range_.second = lhs_iter_->second;
+                }
+
+                adjust (range_, *this, lhs_iter_, lhs_end_);
+                adjust (range_, rhs_, rhs_iter_, rhs_end_);
+                overlap_.insert (range_);
+            }
+        }
+    }
+
+    void remove (basic_string_token &rhs_)
+    {
+        typename range_vector::iterator lhs_iter_ = _ranges.begin ();
+        typename range_vector::const_iterator lhs_end_ = _ranges.end ();
+        typename range_vector::iterator rhs_iter_ = rhs_._ranges.begin ();
+        typename range_vector::const_iterator rhs_end_ = rhs_._ranges.end ();
+
+        while (lhs_iter_ != lhs_end_ && rhs_iter_ != rhs_end_)
+        {
+            if (rhs_iter_->first > lhs_iter_->second)
+            {
+                ++lhs_iter_;
+            }
+            else if (rhs_iter_->second < lhs_iter_->first)
+            {
+                ++rhs_iter_;
+            }
+            else
+            {
+                range range_;
+
+                if (rhs_iter_->first > lhs_iter_->first)
+                {
+                    range_.first = rhs_iter_->first;
+                }
+                else
+                {
+                    range_.first = lhs_iter_->first;
+                }
+
+                if (rhs_iter_->second < lhs_iter_->second)
+                {
+                    range_.second = rhs_iter_->second;
+                }
+                else
+                {
+                    range_.second = lhs_iter_->second;
+                }
+
+                adjust (range_, *this, lhs_iter_, lhs_end_);
+            }
+        }
+    }
+
+    static string escape_char (const typename char_traits::index_type ch_)
+    {
+        string out_;
+
+        switch (ch_)
+        {
+            case '\0':
+                out_ += '\\';
+                out_ += '0';
+                break;
+            case '\a':
+                out_ += '\\';
+                out_ += 'a';
+                break;
+            case '\b':
+                out_ += '\\';
+                out_ += 'b';
+                break;
+            case 27:
+                out_ += '\\';
+                out_ += 'x';
+                out_ += '1';
+                out_ += 'b';
+                break;
+            case '\f':
+                out_ += '\\';
+                out_ += 'f';
+                break;
+            case '\n':
+                out_ += '\\';
+                out_ += 'n';
+                break;
+            case '\r':
+                out_ += '\\';
+                out_ += 'r';
+                break;
+            case '\t':
+                out_ += '\\';
+                out_ += 't';
+                break;
+            case '\v':
+                out_ += '\\';
+                out_ += 'v';
+                break;
+            case '\\':
+                out_ += '\\';
+                out_ += '\\';
+                break;
+            case '"':
+                out_ += '\\';
+                out_ += '"';
+                break;
+            case '\'':
+                out_ += '\\';
+                out_ += '\'';
+                break;
+            default:
+            {
+                if (ch_ < 32 || ch_ > 126)
+                {
+                    std::basic_stringstream<char_type> ss_;
+
+                    out_ += '\\';
+                    out_ += 'x';
+                    ss_ << std::hex <<
+                        static_cast<std::size_t> (ch_);
+                    out_ += ss_.str ();
+                }
+                else
+                {
+                    out_ += ch_;
+                }
+
+                break;
+            }
+        }
+
+        return out_;
+    }
+
+private:
+    void adjust (const range &range_, basic_string_token &token_,
+        typename range_vector::iterator &iter_,
+        typename range_vector::const_iterator &end_)
+    {
+        if (range_.first > iter_->first)
+        {
+            const index_type second_ = iter_->second;
+
+            iter_->second = range_.first - 1;
+
+            if (range_.second < second_)
+            {
+                range new_range_ (range_.second + 1, second_);
+
+                iter_ = token_.insert (new_range_);
+                end_ = token_._ranges.end ();
+            }
+        }
+        else if (range_.second < iter_->second)
+        {
+            iter_->first = range_.second + 1;
+        }
+        else
+        {
+            iter_ = token_._ranges.erase (iter_);
+            end_ = token_._ranges.end ();
+        }
+    }
+};
+}
+
+#endif
diff --git a/inc/lexertl/utf_iterators.hpp b/inc/lexertl/utf_iterators.hpp
new file mode 100644
index 0000000..f4251c5
--- /dev/null
+++ b/inc/lexertl/utf_iterators.hpp
@@ -0,0 +1,380 @@
+// utf_iterators.hpp
+// Copyright (c) 2012 Ben Hanson (http://www.benhanson.net/)
+// Inspired by http://utfcpp.sourceforge.net/
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef LEXERTL_UTF_ITERATORS_HPP
+#define LEXERTL_UTF_ITERATORS_HPP
+
+#include <iterator>
+
+namespace lexertl
+{
+template<typename char_iterator, typename char_type>
+class basic_utf8_in_iterator :
+    public std::iterator<std::input_iterator_tag, char_type>
+{
+public:
+    basic_utf8_in_iterator () :
+        _char (0)
+    {
+    }
+
+    explicit basic_utf8_in_iterator (const char_iterator& it_) :
+        _it (it_),
+        _char (0)
+    {
+        next ();
+    }
+
+    char_type operator * () const
+    {
+        return _char;
+    }
+
+    bool operator == (const basic_utf8_in_iterator &rhs_) const
+    {
+        return _it == rhs_._it;
+    }
+
+    bool operator != (const basic_utf8_in_iterator &rhs_) const
+    {
+        return _it != rhs_._it;
+    }
+
+    basic_utf8_in_iterator &operator ++ ()
+    {
+        next ();
+        return *this;
+    }
+
+    basic_utf8_in_iterator operator ++ (int)
+    {
+        basic_utf8_in_iterator temp_ = *this;
+
+        next ();
+        return temp_;
+    }
+
+private:
+    typedef typename std::iterator_traits<char_iterator>::
+        difference_type difference_type;
+    char_iterator _it;
+    char_type _char;
+
+    void next ()
+    {
+        const char len_ = len (_it);
+        char_type ch_ = *_it & 0xff;
+
+        switch (len_)
+        {
+        case 1:
+            break;
+        case 2:
+            ++_it;
+            ch_ = (ch_ << 6 & 0x7ff) | (*_it & 0x3f);
+            break;
+        case 3:
+            ++_it;
+            ch_ = (ch_ << 12 & 0xffff) | ((*_it & 0xff) << 6 & 0xfff);
+            ++_it;
+            ch_ |= *_it & 0x3f;
+            break;
+        case 4:
+            ++_it;
+            ch_ = (ch_ << 18 & 0x1fffff) | ((*_it & 0xff) << 12 & 0x3ffff);
+            ++_it;
+            ch_ |= (*_it & 0xff) << 6 & 0xfff;
+            ++_it;
+            ch_ |= *_it & 0x3f;
+            break;
+        }
+
+        ++_it;
+        _char = ch_;
+    }
+
+    char len (const char_iterator &it_) const
+    {
+        const unsigned char ch_ = *it_;
+
+        return ch_ < 0x80 ? 1 :
+            ch_ >> 5 == 0x06 ? 2 :
+            ch_ >> 4 == 0x0e ? 3 :
+            ch_ >> 3 == 0x1e ? 4 : 0;
+    }
+};
+
+template<typename char_iterator>
+class basic_utf8_out_iterator :
+    public std::iterator<std::input_iterator_tag, char>
+{
+public:
+    basic_utf8_out_iterator () :
+        _count (0),
+        _index (0)
+    {
+    }
+
+    explicit basic_utf8_out_iterator (const char_iterator& it_) :
+        _it (it_),
+        _count (0),
+        _index (0)
+    {
+        next ();
+    }
+
+    char operator * () const
+    {
+        return _bytes[_index];
+    }
+
+    bool operator == (const basic_utf8_out_iterator &rhs_) const
+    {
+        return _it == rhs_._it;
+    }
+
+    bool operator != (const basic_utf8_out_iterator &rhs_) const
+    {
+        return _it != rhs_._it;
+    }
+
+    basic_utf8_out_iterator &operator ++ ()
+    {
+        ++_index;
+
+        if (_index >= _count)
+        {
+            next ();
+        }
+
+        return *this;
+    }
+
+    basic_utf8_out_iterator operator ++ (int)
+    {
+        basic_utf8_out_iterator temp_ = *this;
+
+        ++_index;
+
+        if (_index >= _count)
+        {
+            next ();
+        }
+
+        return temp_;
+    }
+
+private:
+    char_iterator _it;
+    char _bytes[4];
+    unsigned char _count;
+    unsigned char _index;
+
+    void next ()
+    {
+        const std::size_t ch_ = *_it;
+
+        _count = len (ch_);
+        _index = 0;
+
+        switch (_count)
+        {
+        case 1:
+            _bytes[0] = static_cast<char>(ch_);
+            break;
+        case 2:
+            _bytes[0] = static_cast<char>((ch_ >> 6) | 0xc0);
+            _bytes[1] = (ch_ & 0x3f) | 0x80;
+            break;
+        case 3:
+            _bytes[0] = static_cast<char>((ch_ >> 12) | 0xe0);
+            _bytes[1] = ((ch_ >> 6) & 0x3f) | 0x80;
+            _bytes[2] = (ch_ & 0x3f) | 0x80;
+            break;
+        case 4:
+            _bytes[0] = static_cast<char>((ch_ >> 18) | 0xf0);
+            _bytes[1] = ((ch_ >> 12) & 0x3f) | 0x80;
+            _bytes[2] = ((ch_ >> 6) & 0x3f) | 0x80;
+            _bytes[3] = (ch_ & 0x3f) | 0x80;
+            break;
+        }
+
+        ++_it;
+    }
+
+    char len (const std::size_t ch_) const
+    {
+        return ch_ < 0x80 ? 1 :
+            ch_ < 0x800 ? 2 :
+            ch_ < 0x10000 ? 3 :
+            4;
+    }
+};
+
+template<typename char_iterator, typename char_type>
+class basic_utf16_in_iterator :
+    public std::iterator<std::input_iterator_tag, char_type>
+{
+public:
+    basic_utf16_in_iterator () :
+        _char (0)
+    {
+    }
+
+    explicit basic_utf16_in_iterator (const char_iterator &it_) :
+        _it (it_),
+        _char (0)
+    {
+        next ();
+    }
+
+    char_type operator * () const
+    {
+        return _char;
+    }
+
+    bool operator == (const basic_utf16_in_iterator &rhs_) const
+    {
+        return _it == rhs_._it;
+    }
+
+    bool operator != (const basic_utf16_in_iterator &rhs_) const
+    {
+        return _it != rhs_._it;
+    }
+
+    basic_utf16_in_iterator &operator ++ ()
+    {
+        next ();
+        return *this;
+    }
+
+    basic_utf16_in_iterator operator ++ (int)
+    {
+        basic_utf16_in_iterator temp_ = *this;
+
+        next ();
+        return temp_;
+    }
+
+private:
+    typedef typename std::iterator_traits<char_iterator>::
+        difference_type difference_type;
+    char_iterator _it;
+    char_type _char;
+
+    void next ()
+    {
+        char_type ch_ = *_it & 0xffff;
+
+        if (ch_ >= 0xd800 && ch_ <= 0xdbff)
+        {
+            const char_type surrogate_ = *++_it & 0xffff;
+
+            ch_ = (((ch_ - 0xd800) << 10) | (surrogate_ - 0xdc00)) + 0x10000;
+        }
+
+        ++_it;
+        _char = ch_;
+    }
+};
+
+template<typename char_iterator>
+class basic_utf16_out_iterator :
+    public std::iterator<std::input_iterator_tag, wchar_t>
+{
+public:
+    basic_utf16_out_iterator () :
+        _count (0),
+        _index (0)
+    {
+    }
+
+    explicit basic_utf16_out_iterator (const char_iterator& it_) :
+        _it (it_),
+        _count (0),
+        _index (0)
+    {
+        next ();
+    }
+
+    wchar_t operator * () const
+    {
+        return _chars[_index];
+    }
+
+    bool operator == (const basic_utf16_out_iterator &rhs_) const
+    {
+        return _it == rhs_._it;
+    }
+
+    bool operator != (const basic_utf16_out_iterator &rhs_) const
+    {
+        return _it != rhs_._it;
+    }
+
+    basic_utf16_out_iterator &operator ++ ()
+    {
+        ++_index;
+
+        if (_index >= _count)
+        {
+            next ();
+        }
+
+        return *this;
+    }
+
+    basic_utf16_out_iterator operator ++ (int)
+    {
+        basic_utf16_out_iterator temp_ = *this;
+
+        ++_index;
+
+        if (_index >= _count)
+        {
+            next ();
+        }
+
+        return temp_;
+    }
+
+private:
+    char_iterator _it;
+    wchar_t _chars[2];
+    unsigned char _count;
+    unsigned char _index;
+
+    void next ()
+    {
+        const std::size_t ch_ = *_it;
+
+        _count = len (ch_);
+        _index = 0;
+
+        switch (_count)
+        {
+        case 1:
+            _chars[0] = static_cast<wchar_t>(ch_);
+            break;
+        case 2:
+            _chars[0] = static_cast<wchar_t>((ch_ >> 10) + 0xdc00u -
+                (0x10000 >> 10));
+            _chars[1] = static_cast<wchar_t>((ch_ & 0x3ff) + 0xdc00u);
+            break;
+        }
+
+        ++_it;
+    }
+
+    char len (const std::size_t ch_) const
+    {
+        return ch_ > 0xffff ? 2 : 1;
+    }
+};
+}
+
+#endif
diff --git a/src/test.cpp b/src/test.cpp
index 7730549..c4c52f7 100644
--- a/src/test.cpp
+++ b/src/test.cpp
@@ -4,6 +4,15 @@
  * Licensed under the GNU GPL v2.
  */
 
+// this file is auto generated from grammar/grammar.y
+// but it does not work yet
+// #include "grammar.h"
+
+#include "lexertl/generator.hpp"
+#include "lexertl/lookup.hpp"
+#include "lexertl/rules.hpp"
+#include "lexertl/state_machine.hpp"
+
 int main() {
     return 0;
 }