boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp
// Copyright (c) 2001-2011 Hartmut Kaiser
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#if !defined(BOOST_SPIRIT_LEXERTL_ITERATOR_TOKENISER_MARCH_22_2007_0859AM)
#define BOOST_SPIRIT_LEXERTL_ITERATOR_TOKENISER_MARCH_22_2007_0859AM
#if defined(_MSC_VER)
#pragma once
#endif
#include <boost/detail/iterator.hpp>
#include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
#include <boost/spirit/home/support/detail/lexer/consts.hpp>
#include <boost/spirit/home/support/detail/lexer/size_t.hpp>
#include <boost/spirit/home/support/detail/lexer/char_traits.hpp>
#include <vector>
namespace boost { namespace spirit { namespace lex { namespace lexertl
{
///////////////////////////////////////////////////////////////////////////
template<typename Iterator>
class basic_iterator_tokeniser
{
public:
typedef std::vector<std::size_t> size_t_vector;
typedef typename boost::detail::iterator_traits<Iterator>::value_type
char_type;
static std::size_t next (
boost::lexer::basic_state_machine<char_type> const& state_machine_
, std::size_t &dfa_state_, bool& bol_, Iterator &start_token_
, Iterator const& end_, std::size_t& unique_id_)
{
if (start_token_ == end_)
{
unique_id_ = boost::lexer::npos;
return 0;
}
bool bol = bol_;
boost::lexer::detail::internals const& internals_ =
state_machine_.data();
again:
std::size_t const* lookup_ = &internals_._lookup[dfa_state_]->
front ();
std::size_t dfa_alphabet_ = internals_._dfa_alphabet[dfa_state_];
std::size_t const* dfa_ = &internals_._dfa[dfa_state_]->front ();
std::size_t const* ptr_ = dfa_ + dfa_alphabet_;
Iterator curr_ = start_token_;
bool end_state_ = *ptr_ != 0;
std::size_t id_ = *(ptr_ + boost::lexer::id_index);
std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index);
std::size_t end_start_state_ = dfa_state_;
bool end_bol_ = bol_;
Iterator end_token_ = start_token_;
while (curr_ != end_)
{
std::size_t const BOL_state_ = ptr_[boost::lexer::bol_index];
std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index];
if (BOL_state_ && bol)
{
ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
}
else if (EOL_state_ && *curr_ == '\n')
{
ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
}
else
{
typedef typename
boost::detail::iterator_traits<Iterator>::value_type
value_type;
typedef typename
boost::lexer::char_traits<value_type>::index_type
index_type;
index_type index =
boost::lexer::char_traits<value_type>::call(*curr_++);
bol = (index == '\n') ? true : false;
std::size_t const state_ = ptr_[
lookup_[static_cast<std::size_t>(index)]];
if (state_ == 0)
{
break;
}
ptr_ = &dfa_[state_ * dfa_alphabet_];
}
if (*ptr_)
{
end_state_ = true;
id_ = *(ptr_ + boost::lexer::id_index);
uid_ = *(ptr_ + boost::lexer::unique_id_index);
end_start_state_ = *(ptr_ + boost::lexer::state_index);
end_bol_ = bol;
end_token_ = curr_;
}
}
std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index];
if (EOL_state_ && curr_ == end_)
{
ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
if (*ptr_)
{
end_state_ = true;
id_ = *(ptr_ + boost::lexer::id_index);
uid_ = *(ptr_ + boost::lexer::unique_id_index);
end_start_state_ = *(ptr_ + boost::lexer::state_index);
end_bol_ = bol;
end_token_ = curr_;
}
}
if (end_state_) {
// return longest match
dfa_state_ = end_start_state_;
start_token_ = end_token_;
if (id_ == 0)
{
bol = end_bol_;
goto again;
}
else
{
bol_ = end_bol_;
}
}
else {
bol_ = (*start_token_ == '\n') ? true : false;
id_ = boost::lexer::npos;
uid_ = boost::lexer::npos;
}
unique_id_ = uid_;
return id_;
}
///////////////////////////////////////////////////////////////////////
static std::size_t next (
boost::lexer::basic_state_machine<char_type> const& state_machine_
, bool& bol_, Iterator &start_token_, Iterator const& end_
, std::size_t& unique_id_)
{
if (start_token_ == end_)
{
unique_id_ = boost::lexer::npos;
return 0;
}
bool bol = bol_;
std::size_t const* lookup_ = &state_machine_.data()._lookup[0]->front();
std::size_t dfa_alphabet_ = state_machine_.data()._dfa_alphabet[0];
std::size_t const* dfa_ = &state_machine_.data()._dfa[0]->front ();
std::size_t const* ptr_ = dfa_ + dfa_alphabet_;
Iterator curr_ = start_token_;
bool end_state_ = *ptr_ != 0;
std::size_t id_ = *(ptr_ + boost::lexer::id_index);
std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index);
bool end_bol_ = bol_;
Iterator end_token_ = start_token_;
while (curr_ != end_)
{
std::size_t const BOL_state_ = ptr_[boost::lexer::bol_index];
std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index];
if (BOL_state_ && bol)
{
ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
}
else if (EOL_state_ && *curr_ == '\n')
{
ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
}
else
{
typedef typename
boost::detail::iterator_traits<Iterator>::value_type
value_type;
typedef typename
boost::lexer::char_traits<value_type>::index_type
index_type;
index_type index =
boost::lexer::char_traits<value_type>::call(*curr_++);
bol = (index == '\n') ? true : false;
std::size_t const state_ = ptr_[
lookup_[static_cast<std::size_t>(index)]];
if (state_ == 0)
{
break;
}
ptr_ = &dfa_[state_ * dfa_alphabet_];
}
if (*ptr_)
{
end_state_ = true;
id_ = *(ptr_ + boost::lexer::id_index);
uid_ = *(ptr_ + boost::lexer::unique_id_index);
end_bol_ = bol;
end_token_ = curr_;
}
}
std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index];
if (EOL_state_ && curr_ == end_)
{
ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
if (*ptr_)
{
end_state_ = true;
id_ = *(ptr_ + boost::lexer::id_index);
uid_ = *(ptr_ + boost::lexer::unique_id_index);
end_bol_ = bol;
end_token_ = curr_;
}
}
if (end_state_) {
// return longest match
bol_ = end_bol_;
start_token_ = end_token_;
}
else {
bol_ = *start_token_ == '\n';
id_ = boost::lexer::npos;
uid_ = boost::lexer::npos;
}
unique_id_ = uid_;
return id_;
}
};
}}}}
#endif