| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305 |
- #if !defined(PQXX_ARRAY_COMPOSITE_HXX)
- # define PQXX_ARRAY_COMPOSITE_HXX
- # include <cassert>
- # include "pqxx/strconv.hxx"
- namespace pqxx::internal
- {
- // Find the end of a double-quoted string.
- /** `input[pos]` must be the opening double quote.
- *
- * Returns the offset of the first position after the closing quote.
- */
- inline std::size_t scan_double_quoted_string(
- char const input[], std::size_t size, std::size_t pos,
- pqxx::internal::glyph_scanner_func *scan)
- {
- // XXX: find_char<'"', '\\'>().
- auto next{scan(input, size, pos)};
- bool at_quote{false};
- for (pos = next, next = scan(input, size, pos); pos < size;
- pos = next, next = scan(input, size, pos))
- {
- if (at_quote)
- {
- if (next - pos == 1 and input[pos] == '"')
- {
- // We just read a pair of double quotes. Carry on.
- at_quote = false;
- }
- else
- {
- // We just read one double quote, and now we're at a character that's
- // not a second double quote. Ergo, that last character was the
- // closing double quote and this is the position right after it.
- return pos;
- }
- }
- else if (next - pos == 1)
- {
- switch (input[pos])
- {
- case '\\':
- // Backslash escape. Skip ahead by one more character.
- pos = next;
- next = scan(input, size, pos);
- break;
- case '"':
- // This is either the closing double quote, or the first of a pair of
- // double quotes.
- at_quote = true;
- break;
- }
- }
- else
- {
- // Multibyte character. Carry on.
- }
- }
- if (not at_quote)
- throw argument_error{
- "Missing closing double-quote: " + std::string{input}};
- return pos;
- }
- /// Un-quote and un-escape a double-quoted SQL string.
- inline std::string parse_double_quoted_string(
- char const input[], std::size_t end, std::size_t pos,
- pqxx::internal::glyph_scanner_func *scan)
- {
- std::string output;
- // Maximum output size is same as the input size, minus the opening and
- // closing quotes. Or in the extreme opposite case, the real number could be
- // half that. Usually it'll be a pretty close estimate.
- output.reserve(std::size_t(end - pos - 2));
- for (auto here{scan(input, end, pos)}, next{scan(input, end, here)};
- here < end - 1; here = next, next = scan(input, end, here))
- {
- // A backslash here is always an escape. So is a double-quote, since we're
- // inside the double-quoted string. In either case, we can just ignore the
- // escape character and use the next character. This is the one redeeming
- // feature of SQL's escaping system.
- if ((next - here == 1) and (input[here] == '\\' or input[here] == '"'))
- {
- // Skip escape.
- here = next;
- next = scan(input, end, here);
- }
- output.append(input + here, input + next);
- }
- return output;
- }
- /// Find the end of an unquoted string in an array or composite-type value.
- /** Stops when it gets to the end of the input; or when it sees any of the
- * characters in STOP which has not been escaped.
- *
- * For array values, STOP is a comma, a semicolon, or a closing brace. For
- * a value of a composite type, STOP is a comma or a closing parenthesis.
- */
- template<char... STOP>
- inline std::size_t scan_unquoted_string(
- char const input[], std::size_t size, std::size_t pos,
- pqxx::internal::glyph_scanner_func *scan)
- {
- bool at_backslash{false};
- auto next{scan(input, size, pos)};
- while ((pos < size) and
- ((next - pos) > 1 or at_backslash or ((input[pos] != STOP) and ...)))
- {
- pos = next;
- next = scan(input, size, pos);
- at_backslash =
- ((not at_backslash) and ((next - pos) == 1) and (input[pos] == '\\'));
- }
- return pos;
- }
- /// Parse an unquoted array entry or cfield of a composite-type field.
- inline std::string parse_unquoted_string(
- char const input[], std::size_t end, std::size_t pos,
- pqxx::internal::glyph_scanner_func *scan)
- {
- std::string output;
- bool at_backslash{false};
- output.reserve(end - pos);
- for (auto next{scan(input, end, pos)}; pos < end;
- pos = next, next = scan(input, end, pos))
- {
- at_backslash =
- ((not at_backslash) and ((next - pos) == 1) and (input[pos] == '\\'));
- if (not at_backslash)
- output.append(input + pos, next - pos);
- }
- return output;
- }
- /// Parse a field of a composite-type value.
- /** `T` is the C++ type of the field we're parsing, and `index` is its
- * zero-based number.
- *
- * Strip off the leading parenthesis or bracket yourself before parsing.
- * However, this function will parse the lcosing parenthesis or bracket.
- *
- * After a successful parse, `pos` will point at `std::end(text)`.
- *
- * For the purposes of parsing, ranges and arrays count as compositve values,
- * so this function supports parsing those. If you specifically need a closing
- * parenthesis, check afterwards that `text` did not end in a bracket instead.
- *
- * @param index Index of the current field, zero-based. It will increment for
- * the next field.
- * @param input Full input text for the entire composite-type value.
- * @param pos Starting position (in `input`) of the field that we're parsing.
- * After parsing, this will point at the beginning of the next field if
- * there is one, or one position past the last character otherwise.
- * @param field Destination for the parsed value.
- * @param scan Glyph scanning function for the relevant encoding type.
- * @param last_field Number of the last field in the value (zero-based). When
- * parsing the last field, this will equal `index`.
- */
- template<typename T>
- inline void parse_composite_field(
- std::size_t &index, std::string_view input, std::size_t &pos, T &field,
- glyph_scanner_func *scan, std::size_t last_field)
- {
- assert(index <= last_field);
- auto next{scan(std::data(input), std::size(input), pos)};
- if ((next - pos) != 1)
- throw conversion_error{"Non-ASCII character in composite-type syntax."};
- // Expect a field.
- switch (input[pos])
- {
- case ',':
- case ')':
- case ']':
- // The field is empty, i.e, null.
- if constexpr (nullness<T>::has_null)
- field = nullness<T>::null();
- else
- throw conversion_error{
- "Can't read composite field " + to_string(index) + ": C++ type " +
- type_name<T> + " does not support nulls."};
- break;
- case '"': {
- auto const stop{scan_double_quoted_string(
- std::data(input), std::size(input), pos, scan)};
- auto const text{
- parse_double_quoted_string(std::data(input), stop, pos, scan)};
- field = from_string<T>(text);
- pos = stop;
- }
- break;
- default: {
- auto const stop{scan_unquoted_string<',', ')', ']'>(
- std::data(input), std::size(input), pos, scan)};
- auto const text{parse_unquoted_string(std::data(input), stop, pos, scan)};
- field = from_string<T>(text);
- pos = stop;
- }
- break;
- }
- // Expect a comma or a closing parenthesis.
- next = scan(std::data(input), std::size(input), pos);
- if ((next - pos) != 1)
- throw conversion_error{
- "Unexpected non-ASCII character after composite field: " +
- std::string{input}};
- if (index < last_field)
- {
- if (input[pos] != ',')
- throw conversion_error{
- "Found '" + std::string{input[pos]} +
- "' in composite value where comma was expected: " + std::data(input)};
- }
- else
- {
- if (input[pos] == ',')
- throw conversion_error{
- "Composite value contained more fields than the expected " +
- to_string(last_field) + ": " + std::data(input)};
- if (input[pos] != ')' and input[pos] != ']')
- throw conversion_error{
- "Composite value has unexpected characters where closing parenthesis "
- "was expected: " +
- std::string{input}};
- if (next != std::size(input))
- throw conversion_error{
- "Composite value has unexpected text after closing parenthesis: " +
- std::string{input}};
- }
- pos = next;
- ++index;
- }
- /// Conservatively estimate buffer size needed for a composite field.
- template<typename T>
- inline std::size_t size_composite_field_buffer(T const &field)
- {
- if constexpr (is_unquoted_safe<T>)
- {
- // Safe to copy, without quotes or escaping. Drop the terminating zero.
- return size_buffer(field) - 1;
- }
- else
- {
- // + Opening quote.
- // + Field budget.
- // - Terminating zero.
- // + Escaping for each byte in the field's string representation.
- // - Escaping for terminating zero.
- // + Closing quote.
- return 1 + 2 * (size_buffer(field) - 1) + 1;
- }
- }
- template<typename T>
- inline void write_composite_field(char *&pos, char *end, T const &field)
- {
- if constexpr (is_unquoted_safe<T>)
- {
- // No need for quoting or escaping. Convert it straight into its final
- // place in the buffer, and "backspace" the trailing zero.
- pos = string_traits<T>::into_buf(pos, end, field) - 1;
- }
- else
- {
- // The field may need escaping, which means we need an intermediate buffer.
- // To avoid allocating that at run time, we use the end of the buffer that
- // we have.
- auto const budget{size_buffer(field)};
- *pos++ = '"';
- // Now escape buf into its final position.
- for (char const c : string_traits<T>::to_buf(end - budget, end, field))
- {
- if ((c == '"') or (c == '\\'))
- *pos++ = '\\';
- *pos++ = c;
- }
- *pos++ = '"';
- }
- *pos++ = ',';
- }
- } // namespace pqxx::internal
- #endif
|