// string_token.hpp // Copyright (c) 2005-2012 Ben Hanson (http://www.benhanson.net/) // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) #ifndef LEXERTL_STRING_TOKEN_HPP #define LEXERTL_STRING_TOKEN_HPP #include "char_traits.hpp" #include // Needed by GCC 4.4 #include #include #include #include namespace lexertl { template struct basic_string_token { typedef ch_type char_type; typedef basic_char_traits char_traits; typedef typename char_traits::index_type index_type; typedef std::pair range; typedef std::vector range_vector; typedef std::basic_string string; typedef basic_string_token string_token; range_vector _ranges; basic_string_token () : _ranges () { } basic_string_token (char_type ch_) : _ranges () { insert (range (ch_, ch_)); } basic_string_token (char_type first_, char_type second_) : _ranges () { insert (range (first_, second_)); } void clear () { _ranges.clear (); } bool empty () const { return _ranges.empty (); } bool any () const { return _ranges.size () == 1 && _ranges.front ().first == 0 && _ranges.front ().second == char_traits::max_val (); } bool operator < (const basic_string_token &rhs_) const { return _ranges < rhs_._ranges; } bool operator == (const basic_string_token &rhs_) const { return _ranges == rhs_._ranges; } bool negatable () const { std::size_t size_ = 0; typename range_vector::const_iterator iter_ = _ranges.begin (); typename range_vector::const_iterator end_ = _ranges.end (); for (; iter_ != end_; ++iter_) { size_ += static_cast(iter_->second) + 1 - static_cast(iter_->first); } return size_ > static_cast(char_traits::max_val ()) / 2; } void swap (basic_string_token &rhs_) { _ranges.swap (rhs_._ranges); } void insert (const basic_string_token &rhs_) { typename range_vector::const_iterator iter_ = rhs_._ranges.begin (); typename range_vector::const_iterator end_ = rhs_._ranges.end (); for (; iter_ != end_; ++iter_) { insert (*iter_); } } // Deliberately pass by value - may modify typename range_vector::iterator insert (range rhs_) { bool insert_ = true; typename range_vector::iterator iter_ = _ranges.begin (); typename range_vector::const_iterator end_ = _ranges.end (); while (iter_ != end_) { // follows current item if (rhs_.first > iter_->second) { if (rhs_.first == iter_->second + 1) { // Auto normalise rhs_.first = iter_->first; } else { // No intersection, consider next ++iter_; continue; } } // Precedes current item else if (rhs_.second < iter_->first) { if (rhs_.second == iter_->first - 1) { // Auto normalise rhs_.second = iter_->second; } else { // insert here break; } } else { // overlap (under) if (rhs_.first < iter_->first) { if (rhs_.second < iter_->second) { rhs_.second = iter_->second; } } // overlap (over) else if (rhs_.second > iter_->second) { if (rhs_.first > iter_->first) { rhs_.first = iter_->first; } } // subset else { insert_ = false; iter_ = _ranges.end (); break; } } // Code minimisation: this always applies unless we have already // exited the loop, or "continue" executed. iter_ = _ranges.erase (iter_); end_ = _ranges.end (); } if (insert_) { iter_ = _ranges.insert(iter_, rhs_); } return iter_; } void negate () { index_type next_ = 0; const index_type max_ = char_traits::max_val (); string_token temp_; typename range_vector::iterator iter_ = _ranges.begin (); typename range_vector::const_iterator end_ = _ranges.end (); bool finished_ = false; for (; iter_ != end_; ++iter_) { if (next_ < iter_->first) { temp_.insert (range (next_, iter_->first - 1)); } if (iter_->second < max_) { next_ = iter_->second + 1; } else { finished_ = true; break; } } if (!finished_) { temp_.insert (range (next_, max_)); } swap (temp_); } void intersect (basic_string_token &rhs_, basic_string_token &overlap_) { typename range_vector::iterator lhs_iter_ = _ranges.begin (); typename range_vector::const_iterator lhs_end_ = _ranges.end (); typename range_vector::iterator rhs_iter_ = rhs_._ranges.begin (); typename range_vector::const_iterator rhs_end_ = rhs_._ranges.end (); while (lhs_iter_ != lhs_end_ && rhs_iter_ != rhs_end_) { if (rhs_iter_->first > lhs_iter_->second) { ++lhs_iter_; } else if (rhs_iter_->second < lhs_iter_->first) { ++rhs_iter_; } else { range range_; if (rhs_iter_->first > lhs_iter_->first) { range_.first = rhs_iter_->first; } else { range_.first = lhs_iter_->first; } if (rhs_iter_->second < lhs_iter_->second) { range_.second = rhs_iter_->second; } else { range_.second = lhs_iter_->second; } adjust (range_, *this, lhs_iter_, lhs_end_); adjust (range_, rhs_, rhs_iter_, rhs_end_); overlap_.insert (range_); } } } void remove (basic_string_token &rhs_) { typename range_vector::iterator lhs_iter_ = _ranges.begin (); typename range_vector::const_iterator lhs_end_ = _ranges.end (); typename range_vector::iterator rhs_iter_ = rhs_._ranges.begin (); typename range_vector::const_iterator rhs_end_ = rhs_._ranges.end (); while (lhs_iter_ != lhs_end_ && rhs_iter_ != rhs_end_) { if (rhs_iter_->first > lhs_iter_->second) { ++lhs_iter_; } else if (rhs_iter_->second < lhs_iter_->first) { ++rhs_iter_; } else { range range_; if (rhs_iter_->first > lhs_iter_->first) { range_.first = rhs_iter_->first; } else { range_.first = lhs_iter_->first; } if (rhs_iter_->second < lhs_iter_->second) { range_.second = rhs_iter_->second; } else { range_.second = lhs_iter_->second; } adjust (range_, *this, lhs_iter_, lhs_end_); } } } static string escape_char (const typename char_traits::index_type ch_) { string out_; switch (ch_) { case '\0': out_ += '\\'; out_ += '0'; break; case '\a': out_ += '\\'; out_ += 'a'; break; case '\b': out_ += '\\'; out_ += 'b'; break; case 27: out_ += '\\'; out_ += 'x'; out_ += '1'; out_ += 'b'; break; case '\f': out_ += '\\'; out_ += 'f'; break; case '\n': out_ += '\\'; out_ += 'n'; break; case '\r': out_ += '\\'; out_ += 'r'; break; case '\t': out_ += '\\'; out_ += 't'; break; case '\v': out_ += '\\'; out_ += 'v'; break; case '\\': out_ += '\\'; out_ += '\\'; break; case '"': out_ += '\\'; out_ += '"'; break; case '\'': out_ += '\\'; out_ += '\''; break; default: { if (ch_ < 32 || ch_ > 126) { std::basic_stringstream ss_; out_ += '\\'; out_ += 'x'; ss_ << std::hex << static_cast (ch_); out_ += ss_.str (); } else { out_ += ch_; } break; } } return out_; } private: void adjust (const range &range_, basic_string_token &token_, typename range_vector::iterator &iter_, typename range_vector::const_iterator &end_) { if (range_.first > iter_->first) { const index_type second_ = iter_->second; iter_->second = range_.first - 1; if (range_.second < second_) { range new_range_ (range_.second + 1, second_); iter_ = token_.insert (new_range_); end_ = token_._ranges.end (); } } else if (range_.second < iter_->second) { iter_->first = range_.second + 1; } else { iter_ = token_._ranges.erase (iter_); end_ = token_._ranges.end (); } } }; } #endif