// lookup.hpp // Copyright (c) 2009-2012 Ben Hanson (http://www.benhanson.net/) // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) #ifndef LEXERTL_LOOKUP_HPP #define LEXERTL_LOOKUP_HPP #include #include "bool.hpp" #include "match_results.hpp" #include "state_machine.hpp" namespace lexertl { namespace detail { template struct bol_state { bol_state (const bool) { } }; template<> struct bol_state { bool _bol; bool _end_bol; bol_state (const bool bol_) : _bol (bol_), _end_bol (bol_) { } }; template struct eol_state { }; template struct eol_state { id_type _EOL_state; eol_state () : _EOL_state (0) { } }; template struct multi_state_state { multi_state_state (const id_type) { } }; template struct multi_state_state { id_type _start_state; multi_state_state (const id_type state_) : _start_state (state_) { } }; template struct recursive_state { recursive_state (const id_type *) { } }; template struct recursive_state { bool _pop; id_type _push_dfa; recursive_state (const id_type *ptr_) : _pop ((*ptr_ & pop_dfa_bit) != 0), _push_dfa (*(ptr_ + push_dfa_index)) { } }; template struct lookup_state { typedef basic_internals internals; const id_type *_lookup; id_type _dfa_alphabet; const id_type *_dfa; const id_type *_ptr; bool _end_state; id_type _id; id_type _uid; bol_state<(flags & bol_bit) != 0> _bol_state; eol_state _eol_state; multi_state_state _multi_state_state; recursive_state _recursive_state; lookup_state (const internals &internals_, const bool bol_, const id_type state_) : _lookup (&internals_._lookup[state_]->front ()), _dfa_alphabet (internals_._dfa_alphabet[state_]), _dfa (&internals_._dfa[state_]->front ()), _ptr (_dfa + _dfa_alphabet), _end_state (*_ptr != 0), _id (*(_ptr + id_index)), _uid (*(_ptr + user_id_index)), _bol_state (bol_), _eol_state (), _multi_state_state (state_), _recursive_state (_ptr) { } void reset_recursive (const false_ &) { // Do nothing } void reset_recursive (const true_ &) { _recursive_state._pop = (*_ptr & pop_dfa_bit) != 0; _recursive_state._push_dfa = *(_ptr + push_dfa_index); } void bol_start_state (const false_ &) { // Do nothing } void bol_start_state (const true_ &) { if (_bol_state._bol) { const id_type state_ = *_dfa; if (state_) { _ptr = &_dfa[state_ * _dfa_alphabet]; } } } template bool eol (const char_type, const false_ &) { return false; } template bool eol (const char_type curr_, const true_ &) { bool ret_ = false; _eol_state._EOL_state = _ptr[eol_index]; ret_ = _eol_state._EOL_state && curr_ == '\n'; if (ret_) { _ptr = &_dfa[_eol_state._EOL_state * _dfa_alphabet]; } return ret_; } template id_type next_char (const char_type prev_char_, const false_ &) { const id_type state_= _ptr[_lookup [static_cast(prev_char_)]]; if (state_ != 0) { _ptr = &_dfa[state_ * _dfa_alphabet]; } return state_; } template id_type next_char (const char_type prev_char_, const true_ &) { const std::size_t bytes_ = sizeof (char_type) < 3 ? sizeof (char_type) : 3; const std::size_t shift_[] = {0, 8, 16}; id_type state_= 0; for (std::size_t i_ = 0; i_ < bytes_; ++i_) { state_ = _ptr[_lookup[static_cast((prev_char_ >> shift_[bytes_ - 1 - i_]) & 0xff)]]; if (state_ == 0) { break; } _ptr = &_dfa[state_ * _dfa_alphabet]; } return state_; } template void bol (const char_type, const false_ &) { // Do nothing } template void bol (const char_type prev_char_, const true_ &) { _bol_state._bol = prev_char_ == '\n'; } void eol (const id_type, const false_ &) { // Do nothing } void eol (const id_type err_val_, const true_ &) { _eol_state._EOL_state = err_val_; } void reset_start_state (const false_ &) { // Do nothing } void reset_start_state (const true_ &) { _multi_state_state._start_state = *(_ptr + next_dfa_index); } void reset_end_bol (const false_ &) { // Do nothing } void reset_end_bol (const true_ &) { _bol_state._end_bol = _bol_state._bol; } template void end_state (iter_type &end_token_, iter_type &curr_) { if (*_ptr) { _end_state = true; reset_end_bol (bool_<(flags & bol_bit) != 0> ()); _id = *(_ptr + id_index); _uid = *(_ptr + user_id_index); reset_recursive (bool_<(flags & recursive_bit) != 0> ()); reset_start_state (bool_<(flags & multi_state_bit) != 0> ()); end_token_ = curr_; } } template void check_eol (iter_type &, iter_type &, const id_type, const char_type, const false_ &) { // Do nothing } template void check_eol (iter_type &end_token_, iter_type &curr_, const id_type npos, const char_type eoi_, const true_ &) { if (_eol_state._EOL_state != npos && curr_ == eoi_) { _eol_state._EOL_state = _ptr[eol_index]; if (_eol_state._EOL_state) { _ptr = &_dfa[_eol_state._EOL_state * _dfa_alphabet]; end_state (end_token_, curr_); } } } template void pop (results &, const false_ &) { // Nothing to do } template void pop (results &results_, const true_ &) { if (_recursive_state._pop) { _multi_state_state._start_state = results_.stack.top ().first; results_.stack.pop (); } else if (_recursive_state._push_dfa != results::npos ()) { results_.stack.push (typename results::id_type_pair (_recursive_state._push_dfa, _id)); } } template bool id_eoi (const id_type eoi_, const results &, const false_ &) { return _id == eoi_; } template bool id_eoi (const id_type eoi_, const results &results_, const true_ &) { return _id == eoi_ || (_recursive_state._pop && !results_.stack.empty () && results_.stack.top ().second == eoi_); } void start_state (id_type &, const false_ &) { // Do nothing } void start_state (id_type &start_state_, const true_ &) { start_state_ = _multi_state_state._start_state; } void bol (bool &, const false_ &) { // Do nothing } void bol (bool &end_bol_, const true_ &) { end_bol_ = _bol_state._end_bol; } }; template void inc_end (results &, const false_ &) { // Do nothing } template void inc_end (results &results_, const true_ &) { ++results_.end; } template void next (const basic_state_machine::value_type, id_type> &sm_, results &results_, const bool_ &compressed_, const bool_ &recursive_) { const basic_internals &internals_ = sm_.data (); typename results::iter_type end_token_ = results_.end; skip: typename results::iter_type curr_ = results_.end; results_.start = curr_; again: if (curr_ == results_.eoi) { results_.id = internals_._eoi; results_.user_id = results::npos (); return; } lookup_state lu_state_ (internals_, results_.bol, results_.state); lu_state_.bol_start_state (bool_<(flags & bol_bit) != 0> ()); while (curr_ != results_.eoi) { if (!lu_state_.eol (*curr_, bool_<(flags & eol_bit) != 0> ())) { const typename results::char_type prev_char_ = *curr_++; const id_type state_ = lu_state_.next_char (prev_char_, compressed_); lu_state_.bol (prev_char_, bool_<(flags & bol_bit) != 0> ()); if (state_ == 0) { lu_state_.eol (results::npos (), bool_<(flags & eol_bit) != 0> ()); break; } } lu_state_.end_state (end_token_, curr_); } lu_state_.check_eol (end_token_, curr_, results::npos (), results_.eoi, bool_<(flags & eol_bit) != 0> ()); if (lu_state_._end_state) { // Return longest match lu_state_.pop (results_, recursive_); lu_state_.start_state (results_.state, bool_<(flags & multi_state_bit) != 0> ()); lu_state_.bol (results_.bol, bool_<(flags & bol_bit) != 0> ()); results_.end = end_token_; if (lu_state_._id == sm_.skip ()) goto skip; if (lu_state_.id_eoi (internals_._eoi, results_, recursive_)) { curr_ = end_token_; goto again; } } else { results_.end = end_token_; results_.bol = *results_.end == '\n'; results_.start = results_.end; // No match causes char to be skipped inc_end (results_, bool_<(flags & advance_bit) != 0> ()); lu_state_._id = results::npos (); lu_state_._uid = results::npos (); } results_.id = lu_state_._id; results_.user_id = lu_state_._uid; } } template void lookup (const basic_state_machine::value_type, id_type> &sm_, match_results &results_) { // If this asserts, you have either not defined all the correct // flags, or you should be using recursive_match_results instead // of match_results. assert ((sm_.data ()._features & flags) == sm_.data ()._features); detail::next (sm_, results_, bool_<(sizeof (typename std::iterator_traits::value_type) > 1)> (), false_ ()); } template void lookup (const basic_state_machine::value_type, id_type> &sm_, recursive_match_results &results_) { // If this asserts, you have not defined all the correct flags assert ((sm_.data ()._features & flags) == sm_.data ()._features); detail::next (sm_, results_, bool_<(sizeof(typename std::iterator_traits:: value_type) > 1)> (), true_ ()); } } #endif