Boost C++ Libraries

...one of the most highly regarded and expertly designed C++ library projects in the world. Herb Sutter and Andrei Alexandrescu, C++ Coding Standards

boost/regex/v4/c_regex_traits.hpp

/*
 *
 * Copyright (c) 2004
 * John Maddock
 *
 * Use, modification and distribution are subject to the 
 * Boost Software License, Version 1.0. (See accompanying file 
 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 *
 */
 
 /*
  *   LOCATION:    see http://www.boost.org for most recent version.
  *   FILE         c_regex_traits.hpp
  *   VERSION      see <boost/version.hpp>
  *   DESCRIPTION: Declares regular expression traits class that wraps the global C locale.
  */

#ifndef BOOST_C_REGEX_TRAITS_HPP_INCLUDED
#define BOOST_C_REGEX_TRAITS_HPP_INCLUDED

#ifndef BOOST_REGEX_CONFIG_HPP
#include <boost/regex/config.hpp>
#endif
#ifndef BOOST_REGEX_WORKAROUND_HPP
#include <boost/regex/v4/regex_workaround.hpp>
#endif

#include <cctype>

#ifdef BOOST_NO_STDC_NAMESPACE
namespace std{
   using ::strlen; using ::tolower;
}
#endif

#ifdef BOOST_MSVC
#pragma warning(push)
#pragma warning(disable: 4103 4244)
#endif
#ifdef BOOST_HAS_ABI_HEADERS
#  include BOOST_ABI_PREFIX
#endif
#ifdef BOOST_MSVC
#pragma warning(pop)
#endif

namespace boost{

   namespace BOOST_REGEX_DETAIL_NS {

      enum
      {
         char_class_space = 1 << 0,
         char_class_print = 1 << 1,
         char_class_cntrl = 1 << 2,
         char_class_upper = 1 << 3,
         char_class_lower = 1 << 4,
         char_class_alpha = 1 << 5,
         char_class_digit = 1 << 6,
         char_class_punct = 1 << 7,
         char_class_xdigit = 1 << 8,
         char_class_alnum = char_class_alpha | char_class_digit,
         char_class_graph = char_class_alnum | char_class_punct,
         char_class_blank = 1 << 9,
         char_class_word = 1 << 10,
         char_class_unicode = 1 << 11,
         char_class_horizontal = 1 << 12,
         char_class_vertical = 1 << 13
      };

   }

template <class charT>
struct c_regex_traits;

template<>
struct c_regex_traits<char>
{
   c_regex_traits(){}
   typedef char char_type;
   typedef std::size_t size_type;
   typedef std::string string_type;
   struct locale_type{};
   typedef boost::uint32_t char_class_type;

   static size_type length(const char_type* p) 
   { 
      return (std::strlen)(p); 
   }

   char translate(char c) const 
   { 
      return c; 
   }
   char translate_nocase(char c) const 
   { 
      return static_cast<char>((std::tolower)(static_cast<unsigned char>(c))); 
   }

   static string_type BOOST_REGEX_CALL transform(const char* p1, const char* p2);
   static string_type BOOST_REGEX_CALL transform_primary(const char* p1, const char* p2);

   static char_class_type BOOST_REGEX_CALL lookup_classname(const char* p1, const char* p2);
   static string_type BOOST_REGEX_CALL lookup_collatename(const char* p1, const char* p2);

   static bool BOOST_REGEX_CALL isctype(char, char_class_type);
   static int BOOST_REGEX_CALL value(char, int);

   locale_type imbue(locale_type l)
   { return l; }
   locale_type getloc()const
   { return locale_type(); }

private:
   // this type is not copyable:
   c_regex_traits(const c_regex_traits&);
   c_regex_traits& operator=(const c_regex_traits&);
};

#ifndef BOOST_NO_WREGEX
template<>
struct c_regex_traits<wchar_t>
{
   c_regex_traits(){}
   typedef wchar_t char_type;
   typedef std::size_t size_type;
   typedef std::wstring string_type;
   struct locale_type{};
   typedef boost::uint32_t char_class_type;

   static size_type length(const char_type* p) 
   { 
      return (std::wcslen)(p); 
   }

   wchar_t translate(wchar_t c) const 
   { 
      return c; 
   }
   wchar_t translate_nocase(wchar_t c) const 
   { 
      return (std::towlower)(c); 
   }

   static string_type BOOST_REGEX_CALL transform(const wchar_t* p1, const wchar_t* p2);
   static string_type BOOST_REGEX_CALL transform_primary(const wchar_t* p1, const wchar_t* p2);

   static char_class_type BOOST_REGEX_CALL lookup_classname(const wchar_t* p1, const wchar_t* p2);
   static string_type BOOST_REGEX_CALL lookup_collatename(const wchar_t* p1, const wchar_t* p2);

   static bool BOOST_REGEX_CALL isctype(wchar_t, char_class_type);
   static int BOOST_REGEX_CALL value(wchar_t, int);

   locale_type imbue(locale_type l)
   { return l; }
   locale_type getloc()const
   { return locale_type(); }

private:
   // this type is not copyable:
   c_regex_traits(const c_regex_traits&);
   c_regex_traits& operator=(const c_regex_traits&);
};

#endif // BOOST_NO_WREGEX

inline c_regex_traits<char>::string_type BOOST_REGEX_CALL c_regex_traits<char>::transform(const char* p1, const char* p2)
{
   std::string result(10, ' ');
   std::size_t s = result.size();
   std::size_t r;
   std::string src(p1, p2);
   while (s < (r = std::strxfrm(&*result.begin(), src.c_str(), s)))
   {
#if defined(_CPPLIB_VER)
      //
      // A bug in VC11 and 12 causes the program to hang if we pass a null-string
      // to std::strxfrm, but only for certain locales :-(
      // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware).
      //
      if (r == INT_MAX)
      {
         result.erase();
         result.insert(result.begin(), static_cast<char>(0));
         return result;
      }
#endif
      result.append(r - s + 3, ' ');
      s = result.size();
   }
   result.erase(r);
   return result;
}

inline c_regex_traits<char>::string_type BOOST_REGEX_CALL c_regex_traits<char>::transform_primary(const char* p1, const char* p2)
{
   static char s_delim;
   static const int s_collate_type = ::boost::BOOST_REGEX_DETAIL_NS::find_sort_syntax(static_cast<c_regex_traits<char>*>(0), &s_delim);
   std::string result;
   //
   // What we do here depends upon the format of the sort key returned by
   // sort key returned by this->transform:
   //
   switch (s_collate_type)
   {
   case ::boost::BOOST_REGEX_DETAIL_NS::sort_C:
   case ::boost::BOOST_REGEX_DETAIL_NS::sort_unknown:
      // the best we can do is translate to lower case, then get a regular sort key:
   {
      result.assign(p1, p2);
      for (std::string::size_type i = 0; i < result.size(); ++i)
         result[i] = static_cast<char>((std::tolower)(static_cast<unsigned char>(result[i])));
      result = transform(&*result.begin(), &*result.begin() + result.size());
      break;
   }
   case ::boost::BOOST_REGEX_DETAIL_NS::sort_fixed:
   {
      // get a regular sort key, and then truncate it:
      result = transform(p1, p2);
      result.erase(s_delim);
      break;
   }
   case ::boost::BOOST_REGEX_DETAIL_NS::sort_delim:
      // get a regular sort key, and then truncate everything after the delim:
      result = transform(p1, p2);
      if ((!result.empty()) && (result[0] == s_delim))
         break;
      std::size_t i;
      for (i = 0; i < result.size(); ++i)
      {
         if (result[i] == s_delim)
            break;
      }
      result.erase(i);
      break;
   }
   if (result.empty())
      result = std::string(1, char(0));
   return result;
}

inline c_regex_traits<char>::char_class_type BOOST_REGEX_CALL c_regex_traits<char>::lookup_classname(const char* p1, const char* p2)
{
   using namespace BOOST_REGEX_DETAIL_NS;
   static const char_class_type masks[] =
   {
      0,
      char_class_alnum,
      char_class_alpha,
      char_class_blank,
      char_class_cntrl,
      char_class_digit,
      char_class_digit,
      char_class_graph,
      char_class_horizontal,
      char_class_lower,
      char_class_lower,
      char_class_print,
      char_class_punct,
      char_class_space,
      char_class_space,
      char_class_upper,
      char_class_unicode,
      char_class_upper,
      char_class_vertical,
      char_class_alnum | char_class_word,
      char_class_alnum | char_class_word,
      char_class_xdigit,
   };

   int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2);
   if (idx < 0)
   {
      std::string s(p1, p2);
      for (std::string::size_type i = 0; i < s.size(); ++i)
         s[i] = static_cast<char>((std::tolower)(static_cast<unsigned char>(s[i])));
      idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size());
   }
   BOOST_REGEX_ASSERT(std::size_t(idx) + 1u < sizeof(masks) / sizeof(masks[0]));
   return masks[idx + 1];
}

inline bool BOOST_REGEX_CALL c_regex_traits<char>::isctype(char c, char_class_type mask)
{
   using namespace BOOST_REGEX_DETAIL_NS;
   return
      ((mask & char_class_space) && (std::isspace)(static_cast<unsigned char>(c)))
      || ((mask & char_class_print) && (std::isprint)(static_cast<unsigned char>(c)))
      || ((mask & char_class_cntrl) && (std::iscntrl)(static_cast<unsigned char>(c)))
      || ((mask & char_class_upper) && (std::isupper)(static_cast<unsigned char>(c)))
      || ((mask & char_class_lower) && (std::islower)(static_cast<unsigned char>(c)))
      || ((mask & char_class_alpha) && (std::isalpha)(static_cast<unsigned char>(c)))
      || ((mask & char_class_digit) && (std::isdigit)(static_cast<unsigned char>(c)))
      || ((mask & char_class_punct) && (std::ispunct)(static_cast<unsigned char>(c)))
      || ((mask & char_class_xdigit) && (std::isxdigit)(static_cast<unsigned char>(c)))
      || ((mask & char_class_blank) && (std::isspace)(static_cast<unsigned char>(c)) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c))
      || ((mask & char_class_word) && (c == '_'))
      || ((mask & char_class_vertical) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == '\v')))
      || ((mask & char_class_horizontal) && (std::isspace)(static_cast<unsigned char>(c)) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && (c != '\v'));
}

inline c_regex_traits<char>::string_type BOOST_REGEX_CALL c_regex_traits<char>::lookup_collatename(const char* p1, const char* p2)
{
   std::string s(p1, p2);
   s = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(s);
   if (s.empty() && (p2 - p1 == 1))
      s.append(1, *p1);
   return s;
}

inline int BOOST_REGEX_CALL c_regex_traits<char>::value(char c, int radix)
{
   char b[2] = { c, '\0', };
   char* ep;
   int result = std::strtol(b, &ep, radix);
   if (ep == b)
      return -1;
   return result;
}

#ifndef BOOST_NO_WREGEX

inline c_regex_traits<wchar_t>::string_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::transform(const wchar_t* p1, const wchar_t* p2)
{
   std::size_t r;
   std::size_t s = 10;
   std::wstring src(p1, p2);
   std::wstring result(s, L' ');
   while (s < (r = std::wcsxfrm(&*result.begin(), src.c_str(), s)))
   {
#if defined(_CPPLIB_VER)
      //
      // A bug in VC11 and 12 causes the program to hang if we pass a null-string
      // to std::strxfrm, but only for certain locales :-(
      // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware).
      //
      if (r == INT_MAX)
      {
         result.erase();
         result.insert(result.begin(), static_cast<wchar_t>(0));
         return result;
      }
#endif
      result.append(r - s + 3, L' ');
      s = result.size();
   }
   result.erase(r);
   return result;
}

inline c_regex_traits<wchar_t>::string_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::transform_primary(const wchar_t* p1, const wchar_t* p2)
{
   static wchar_t s_delim;
   static const int s_collate_type = ::boost::BOOST_REGEX_DETAIL_NS::find_sort_syntax(static_cast<const c_regex_traits<wchar_t>*>(0), &s_delim);
   std::wstring result;
   //
   // What we do here depends upon the format of the sort key returned by
   // sort key returned by this->transform:
   //
   switch (s_collate_type)
   {
   case ::boost::BOOST_REGEX_DETAIL_NS::sort_C:
   case ::boost::BOOST_REGEX_DETAIL_NS::sort_unknown:
      // the best we can do is translate to lower case, then get a regular sort key:
   {
      result.assign(p1, p2);
      for (std::wstring::size_type i = 0; i < result.size(); ++i)
         result[i] = (std::towlower)(result[i]);
      result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
      break;
   }
   case ::boost::BOOST_REGEX_DETAIL_NS::sort_fixed:
   {
      // get a regular sort key, and then truncate it:
      result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
      result.erase(s_delim);
      break;
   }
   case ::boost::BOOST_REGEX_DETAIL_NS::sort_delim:
      // get a regular sort key, and then truncate everything after the delim:
      result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
      if ((!result.empty()) && (result[0] == s_delim))
         break;
      std::size_t i;
      for (i = 0; i < result.size(); ++i)
      {
         if (result[i] == s_delim)
            break;
      }
      result.erase(i);
      break;
   }
   if (result.empty())
      result = std::wstring(1, char(0));
   return result;
}

inline c_regex_traits<wchar_t>::char_class_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::lookup_classname(const wchar_t* p1, const wchar_t* p2)
{
   using namespace BOOST_REGEX_DETAIL_NS;
   static const char_class_type masks[] =
   {
      0,
      char_class_alnum,
      char_class_alpha,
      char_class_blank,
      char_class_cntrl,
      char_class_digit,
      char_class_digit,
      char_class_graph,
      char_class_horizontal,
      char_class_lower,
      char_class_lower,
      char_class_print,
      char_class_punct,
      char_class_space,
      char_class_space,
      char_class_upper,
      char_class_unicode,
      char_class_upper,
      char_class_vertical,
      char_class_alnum | char_class_word,
      char_class_alnum | char_class_word,
      char_class_xdigit,
   };

   int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2);
   if (idx < 0)
   {
      std::wstring s(p1, p2);
      for (std::wstring::size_type i = 0; i < s.size(); ++i)
         s[i] = (std::towlower)(s[i]);
      idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size());
   }
   BOOST_REGEX_ASSERT(idx + 1 < static_cast<int>(sizeof(masks) / sizeof(masks[0])));
   return masks[idx + 1];
}

inline bool BOOST_REGEX_CALL c_regex_traits<wchar_t>::isctype(wchar_t c, char_class_type mask)
{
   using namespace BOOST_REGEX_DETAIL_NS;
   return
      ((mask & char_class_space) && (std::iswspace)(c))
      || ((mask & char_class_print) && (std::iswprint)(c))
      || ((mask & char_class_cntrl) && (std::iswcntrl)(c))
      || ((mask & char_class_upper) && (std::iswupper)(c))
      || ((mask & char_class_lower) && (std::iswlower)(c))
      || ((mask & char_class_alpha) && (std::iswalpha)(c))
      || ((mask & char_class_digit) && (std::iswdigit)(c))
      || ((mask & char_class_punct) && (std::iswpunct)(c))
      || ((mask & char_class_xdigit) && (std::iswxdigit)(c))
      || ((mask & char_class_blank) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c))
      || ((mask & char_class_word) && (c == '_'))
      || ((mask & char_class_unicode) && (c & ~static_cast<wchar_t>(0xff)))
      || ((mask & char_class_vertical) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == L'\v')))
      || ((mask & char_class_horizontal) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && (c != L'\v'));
}

inline c_regex_traits<wchar_t>::string_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::lookup_collatename(const wchar_t* p1, const wchar_t* p2)
{
#ifdef BOOST_MSVC
#pragma warning(push)
#pragma warning(disable: 4244)
#endif
   std::string name(p1, p2);
#ifdef BOOST_MSVC
#pragma warning(pop)
#endif
   name = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(name);
   if (!name.empty())
      return string_type(name.begin(), name.end());
   if (p2 - p1 == 1)
      return string_type(1, *p1);
   return string_type();
}

inline int BOOST_REGEX_CALL c_regex_traits<wchar_t>::value(wchar_t c, int radix)
{
#ifdef BOOST_BORLANDC
   // workaround for broken wcstol:
   if ((std::iswxdigit)(c) == 0)
      return -1;
#endif
   wchar_t b[2] = { c, '\0', };
   wchar_t* ep;
   int result = std::wcstol(b, &ep, radix);
   if (ep == b)
      return -1;
   return result;
}

#endif

}

#ifdef BOOST_MSVC
#pragma warning(push)
#pragma warning(disable: 4103)
#endif
#ifdef BOOST_HAS_ABI_HEADERS
#  include BOOST_ABI_SUFFIX
#endif
#ifdef BOOST_MSVC
#pragma warning(pop)
#endif

#endif