Apq
/
ZeroTierOne
зеркало из https://github.com/zerotier/ZeroTierOne.git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
							#if !defined(PQXX_ARRAY_COMPOSITE_HXX)
#  define PQXX_ARRAY_COMPOSITE_HXX

#  include <cassert>

#  include "pqxx/strconv.hxx"

namespace pqxx::internal
{
// Find the end of a double-quoted string.
/** `input[pos]` must be the opening double quote.
 *
 * Returns the offset of the first position after the closing quote.
 */
inline std::size_t scan_double_quoted_string(
  char const input[], std::size_t size, std::size_t pos,
  pqxx::internal::glyph_scanner_func *scan)
{
  // XXX: find_char<'"', '\\'>().
  auto next{scan(input, size, pos)};
  bool at_quote{false};
  for (pos = next, next = scan(input, size, pos); pos < size;
       pos = next, next = scan(input, size, pos))
  {
    if (at_quote)
    {
      if (next - pos == 1 and input[pos] == '"')
      {
        // We just read a pair of double quotes.  Carry on.
        at_quote = false;
      }
      else
      {
        // We just read one double quote, and now we're at a character that's
        // not a second double quote.  Ergo, that last character was the
        // closing double quote and this is the position right after it.
        return pos;
      }
    }
    else if (next - pos == 1)
    {
      switch (input[pos])
      {
      case '\\':
        // Backslash escape.  Skip ahead by one more character.
        pos = next;
        next = scan(input, size, pos);
        break;

      case '"':
        // This is either the closing double quote, or the first of a pair of
        // double quotes.
        at_quote = true;
        break;
      }
    }
    else
    {
      // Multibyte character.  Carry on.
    }
  }
  if (not at_quote)
    throw argument_error{
      "Missing closing double-quote: " + std::string{input}};
  return pos;
}


/// Un-quote and un-escape a double-quoted SQL string.
inline std::string parse_double_quoted_string(
  char const input[], std::size_t end, std::size_t pos,
  pqxx::internal::glyph_scanner_func *scan)
{
  std::string output;
  // Maximum output size is same as the input size, minus the opening and
  // closing quotes.  Or in the extreme opposite case, the real number could be
  // half that.  Usually it'll be a pretty close estimate.
  output.reserve(std::size_t(end - pos - 2));

  for (auto here{scan(input, end, pos)}, next{scan(input, end, here)};
       here < end - 1; here = next, next = scan(input, end, here))
  {
    // A backslash here is always an escape.  So is a double-quote, since we're
    // inside the double-quoted string.  In either case, we can just ignore the
    // escape character and use the next character.  This is the one redeeming
    // feature of SQL's escaping system.
    if ((next - here == 1) and (input[here] == '\\' or input[here] == '"'))
    {
      // Skip escape.
      here = next;
      next = scan(input, end, here);
    }
    output.append(input + here, input + next);
  }
  return output;
}


/// Find the end of an unquoted string in an array or composite-type value.
/** Stops when it gets to the end of the input; or when it sees any of the
 * characters in STOP which has not been escaped.
 *
 * For array values, STOP is a comma, a semicolon, or a closing brace.  For
 * a value of a composite type, STOP is a comma or a closing parenthesis.
 */
template<char... STOP>
inline std::size_t scan_unquoted_string(
  char const input[], std::size_t size, std::size_t pos,
  pqxx::internal::glyph_scanner_func *scan)
{
  bool at_backslash{false};
  auto next{scan(input, size, pos)};
  while ((pos < size) and
         ((next - pos) > 1 or at_backslash or ((input[pos] != STOP) and ...)))
  {
    pos = next;
    next = scan(input, size, pos);
    at_backslash =
      ((not at_backslash) and ((next - pos) == 1) and (input[pos] == '\\'));
  }
  return pos;
}


/// Parse an unquoted array entry or cfield of a composite-type field.
inline std::string parse_unquoted_string(
  char const input[], std::size_t end, std::size_t pos,
  pqxx::internal::glyph_scanner_func *scan)
{
  std::string output;
  bool at_backslash{false};
  output.reserve(end - pos);
  for (auto next{scan(input, end, pos)}; pos < end;
       pos = next, next = scan(input, end, pos))
  {
    at_backslash =
      ((not at_backslash) and ((next - pos) == 1) and (input[pos] == '\\'));
    if (not at_backslash)
      output.append(input + pos, next - pos);
  }
  return output;
}


/// Parse a field of a composite-type value.
/** `T` is the C++ type of the field we're parsing, and `index` is its
 * zero-based number.
 *
 * Strip off the leading parenthesis or bracket yourself before parsing.
 * However, this function will parse the lcosing parenthesis or bracket.
 *
 * After a successful parse, `pos` will point at `std::end(text)`.
 *
 * For the purposes of parsing, ranges and arrays count as compositve values,
 * so this function supports parsing those.  If you specifically need a closing
 * parenthesis, check afterwards that `text` did not end in a bracket instead.
 *
 * @param index Index of the current field, zero-based.  It will increment for
 *     the next field.
 * @param input Full input text for the entire composite-type value.
 * @param pos Starting position (in `input`) of the field that we're parsing.
 *     After parsing, this will point at the beginning of the next field if
 *     there is one, or one position past the last character otherwise.
 * @param field Destination for the parsed value.
 * @param scan Glyph scanning function for the relevant encoding type.
 * @param last_field Number of the last field in the value (zero-based).  When
 *     parsing the last field, this will equal `index`.
 */
template<typename T>
inline void parse_composite_field(
  std::size_t &index, std::string_view input, std::size_t &pos, T &field,
  glyph_scanner_func *scan, std::size_t last_field)
{
  assert(index <= last_field);
  auto next{scan(std::data(input), std::size(input), pos)};
  if ((next - pos) != 1)
    throw conversion_error{"Non-ASCII character in composite-type syntax."};

  // Expect a field.
  switch (input[pos])
  {
  case ',':
  case ')':
  case ']':
    // The field is empty, i.e, null.
    if constexpr (nullness<T>::has_null)
      field = nullness<T>::null();
    else
      throw conversion_error{
        "Can't read composite field " + to_string(index) + ": C++ type " +
        type_name<T> + " does not support nulls."};
    break;

  case '"': {
    auto const stop{scan_double_quoted_string(
      std::data(input), std::size(input), pos, scan)};
    auto const text{
      parse_double_quoted_string(std::data(input), stop, pos, scan)};
    field = from_string<T>(text);
    pos = stop;
  }
  break;

  default: {
    auto const stop{scan_unquoted_string<',', ')', ']'>(
      std::data(input), std::size(input), pos, scan)};
    auto const text{parse_unquoted_string(std::data(input), stop, pos, scan)};
    field = from_string<T>(text);
    pos = stop;
  }
  break;
  }

  // Expect a comma or a closing parenthesis.
  next = scan(std::data(input), std::size(input), pos);

  if ((next - pos) != 1)
    throw conversion_error{
      "Unexpected non-ASCII character after composite field: " +
      std::string{input}};

  if (index < last_field)
  {
    if (input[pos] != ',')
      throw conversion_error{
        "Found '" + std::string{input[pos]} +
        "' in composite value where comma was expected: " + std::data(input)};
  }
  else
  {
    if (input[pos] == ',')
      throw conversion_error{
        "Composite value contained more fields than the expected " +
        to_string(last_field) + ": " + std::data(input)};
    if (input[pos] != ')' and input[pos] != ']')
      throw conversion_error{
        "Composite value has unexpected characters where closing parenthesis "
        "was expected: " +
        std::string{input}};
    if (next != std::size(input))
      throw conversion_error{
        "Composite value has unexpected text after closing parenthesis: " +
        std::string{input}};
  }

  pos = next;
  ++index;
}


/// Conservatively estimate buffer size needed for a composite field.
template<typename T>
inline std::size_t size_composite_field_buffer(T const &field)
{
  if constexpr (is_unquoted_safe<T>)
  {
    // Safe to copy, without quotes or escaping.  Drop the terminating zero.
    return size_buffer(field) - 1;
  }
  else
  {
    // + Opening quote.
    // + Field budget.
    // - Terminating zero.
    // + Escaping for each byte in the field's string representation.
    // - Escaping for terminating zero.
    // + Closing quote.
    return 1 + 2 * (size_buffer(field) - 1) + 1;
  }
}


template<typename T>
inline void write_composite_field(char *&pos, char *end, T const &field)
{
  if constexpr (is_unquoted_safe<T>)
  {
    // No need for quoting or escaping.  Convert it straight into its final
    // place in the buffer, and "backspace" the trailing zero.
    pos = string_traits<T>::into_buf(pos, end, field) - 1;
  }
  else
  {
    // The field may need escaping, which means we need an intermediate buffer.
    // To avoid allocating that at run time, we use the end of the buffer that
    // we have.
    auto const budget{size_buffer(field)};
    *pos++ = '"';

    // Now escape buf into its final position.
    for (char const c : string_traits<T>::to_buf(end - budget, end, field))
    {
      if ((c == '"') or (c == '\\'))
        *pos++ = '\\';

      *pos++ = c;
    }

    *pos++ = '"';
  }

  *pos++ = ',';
}
} // namespace pqxx::internal
#endif