array-composite.hxx 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. #if !defined(PQXX_ARRAY_COMPOSITE_HXX)
  2. # define PQXX_ARRAY_COMPOSITE_HXX
  3. # include <cassert>
  4. # include "pqxx/strconv.hxx"
  5. namespace pqxx::internal
  6. {
  7. // Find the end of a double-quoted string.
  8. /** `input[pos]` must be the opening double quote.
  9. *
  10. * Returns the offset of the first position after the closing quote.
  11. */
  12. inline std::size_t scan_double_quoted_string(
  13. char const input[], std::size_t size, std::size_t pos,
  14. pqxx::internal::glyph_scanner_func *scan)
  15. {
  16. // XXX: find_char<'"', '\\'>().
  17. auto next{scan(input, size, pos)};
  18. bool at_quote{false};
  19. for (pos = next, next = scan(input, size, pos); pos < size;
  20. pos = next, next = scan(input, size, pos))
  21. {
  22. if (at_quote)
  23. {
  24. if (next - pos == 1 and input[pos] == '"')
  25. {
  26. // We just read a pair of double quotes. Carry on.
  27. at_quote = false;
  28. }
  29. else
  30. {
  31. // We just read one double quote, and now we're at a character that's
  32. // not a second double quote. Ergo, that last character was the
  33. // closing double quote and this is the position right after it.
  34. return pos;
  35. }
  36. }
  37. else if (next - pos == 1)
  38. {
  39. switch (input[pos])
  40. {
  41. case '\\':
  42. // Backslash escape. Skip ahead by one more character.
  43. pos = next;
  44. next = scan(input, size, pos);
  45. break;
  46. case '"':
  47. // This is either the closing double quote, or the first of a pair of
  48. // double quotes.
  49. at_quote = true;
  50. break;
  51. }
  52. }
  53. else
  54. {
  55. // Multibyte character. Carry on.
  56. }
  57. }
  58. if (not at_quote)
  59. throw argument_error{
  60. "Missing closing double-quote: " + std::string{input}};
  61. return pos;
  62. }
  63. /// Un-quote and un-escape a double-quoted SQL string.
  64. inline std::string parse_double_quoted_string(
  65. char const input[], std::size_t end, std::size_t pos,
  66. pqxx::internal::glyph_scanner_func *scan)
  67. {
  68. std::string output;
  69. // Maximum output size is same as the input size, minus the opening and
  70. // closing quotes. Or in the extreme opposite case, the real number could be
  71. // half that. Usually it'll be a pretty close estimate.
  72. output.reserve(std::size_t(end - pos - 2));
  73. for (auto here{scan(input, end, pos)}, next{scan(input, end, here)};
  74. here < end - 1; here = next, next = scan(input, end, here))
  75. {
  76. // A backslash here is always an escape. So is a double-quote, since we're
  77. // inside the double-quoted string. In either case, we can just ignore the
  78. // escape character and use the next character. This is the one redeeming
  79. // feature of SQL's escaping system.
  80. if ((next - here == 1) and (input[here] == '\\' or input[here] == '"'))
  81. {
  82. // Skip escape.
  83. here = next;
  84. next = scan(input, end, here);
  85. }
  86. output.append(input + here, input + next);
  87. }
  88. return output;
  89. }
  90. /// Find the end of an unquoted string in an array or composite-type value.
  91. /** Stops when it gets to the end of the input; or when it sees any of the
  92. * characters in STOP which has not been escaped.
  93. *
  94. * For array values, STOP is a comma, a semicolon, or a closing brace. For
  95. * a value of a composite type, STOP is a comma or a closing parenthesis.
  96. */
  97. template<char... STOP>
  98. inline std::size_t scan_unquoted_string(
  99. char const input[], std::size_t size, std::size_t pos,
  100. pqxx::internal::glyph_scanner_func *scan)
  101. {
  102. bool at_backslash{false};
  103. auto next{scan(input, size, pos)};
  104. while ((pos < size) and
  105. ((next - pos) > 1 or at_backslash or ((input[pos] != STOP) and ...)))
  106. {
  107. pos = next;
  108. next = scan(input, size, pos);
  109. at_backslash =
  110. ((not at_backslash) and ((next - pos) == 1) and (input[pos] == '\\'));
  111. }
  112. return pos;
  113. }
  114. /// Parse an unquoted array entry or cfield of a composite-type field.
  115. inline std::string parse_unquoted_string(
  116. char const input[], std::size_t end, std::size_t pos,
  117. pqxx::internal::glyph_scanner_func *scan)
  118. {
  119. std::string output;
  120. bool at_backslash{false};
  121. output.reserve(end - pos);
  122. for (auto next{scan(input, end, pos)}; pos < end;
  123. pos = next, next = scan(input, end, pos))
  124. {
  125. at_backslash =
  126. ((not at_backslash) and ((next - pos) == 1) and (input[pos] == '\\'));
  127. if (not at_backslash)
  128. output.append(input + pos, next - pos);
  129. }
  130. return output;
  131. }
  132. /// Parse a field of a composite-type value.
  133. /** `T` is the C++ type of the field we're parsing, and `index` is its
  134. * zero-based number.
  135. *
  136. * Strip off the leading parenthesis or bracket yourself before parsing.
  137. * However, this function will parse the lcosing parenthesis or bracket.
  138. *
  139. * After a successful parse, `pos` will point at `std::end(text)`.
  140. *
  141. * For the purposes of parsing, ranges and arrays count as compositve values,
  142. * so this function supports parsing those. If you specifically need a closing
  143. * parenthesis, check afterwards that `text` did not end in a bracket instead.
  144. *
  145. * @param index Index of the current field, zero-based. It will increment for
  146. * the next field.
  147. * @param input Full input text for the entire composite-type value.
  148. * @param pos Starting position (in `input`) of the field that we're parsing.
  149. * After parsing, this will point at the beginning of the next field if
  150. * there is one, or one position past the last character otherwise.
  151. * @param field Destination for the parsed value.
  152. * @param scan Glyph scanning function for the relevant encoding type.
  153. * @param last_field Number of the last field in the value (zero-based). When
  154. * parsing the last field, this will equal `index`.
  155. */
  156. template<typename T>
  157. inline void parse_composite_field(
  158. std::size_t &index, std::string_view input, std::size_t &pos, T &field,
  159. glyph_scanner_func *scan, std::size_t last_field)
  160. {
  161. assert(index <= last_field);
  162. auto next{scan(std::data(input), std::size(input), pos)};
  163. if ((next - pos) != 1)
  164. throw conversion_error{"Non-ASCII character in composite-type syntax."};
  165. // Expect a field.
  166. switch (input[pos])
  167. {
  168. case ',':
  169. case ')':
  170. case ']':
  171. // The field is empty, i.e, null.
  172. if constexpr (nullness<T>::has_null)
  173. field = nullness<T>::null();
  174. else
  175. throw conversion_error{
  176. "Can't read composite field " + to_string(index) + ": C++ type " +
  177. type_name<T> + " does not support nulls."};
  178. break;
  179. case '"': {
  180. auto const stop{scan_double_quoted_string(
  181. std::data(input), std::size(input), pos, scan)};
  182. auto const text{
  183. parse_double_quoted_string(std::data(input), stop, pos, scan)};
  184. field = from_string<T>(text);
  185. pos = stop;
  186. }
  187. break;
  188. default: {
  189. auto const stop{scan_unquoted_string<',', ')', ']'>(
  190. std::data(input), std::size(input), pos, scan)};
  191. auto const text{parse_unquoted_string(std::data(input), stop, pos, scan)};
  192. field = from_string<T>(text);
  193. pos = stop;
  194. }
  195. break;
  196. }
  197. // Expect a comma or a closing parenthesis.
  198. next = scan(std::data(input), std::size(input), pos);
  199. if ((next - pos) != 1)
  200. throw conversion_error{
  201. "Unexpected non-ASCII character after composite field: " +
  202. std::string{input}};
  203. if (index < last_field)
  204. {
  205. if (input[pos] != ',')
  206. throw conversion_error{
  207. "Found '" + std::string{input[pos]} +
  208. "' in composite value where comma was expected: " + std::data(input)};
  209. }
  210. else
  211. {
  212. if (input[pos] == ',')
  213. throw conversion_error{
  214. "Composite value contained more fields than the expected " +
  215. to_string(last_field) + ": " + std::data(input)};
  216. if (input[pos] != ')' and input[pos] != ']')
  217. throw conversion_error{
  218. "Composite value has unexpected characters where closing parenthesis "
  219. "was expected: " +
  220. std::string{input}};
  221. if (next != std::size(input))
  222. throw conversion_error{
  223. "Composite value has unexpected text after closing parenthesis: " +
  224. std::string{input}};
  225. }
  226. pos = next;
  227. ++index;
  228. }
  229. /// Conservatively estimate buffer size needed for a composite field.
  230. template<typename T>
  231. inline std::size_t size_composite_field_buffer(T const &field)
  232. {
  233. if constexpr (is_unquoted_safe<T>)
  234. {
  235. // Safe to copy, without quotes or escaping. Drop the terminating zero.
  236. return size_buffer(field) - 1;
  237. }
  238. else
  239. {
  240. // + Opening quote.
  241. // + Field budget.
  242. // - Terminating zero.
  243. // + Escaping for each byte in the field's string representation.
  244. // - Escaping for terminating zero.
  245. // + Closing quote.
  246. return 1 + 2 * (size_buffer(field) - 1) + 1;
  247. }
  248. }
  249. template<typename T>
  250. inline void write_composite_field(char *&pos, char *end, T const &field)
  251. {
  252. if constexpr (is_unquoted_safe<T>)
  253. {
  254. // No need for quoting or escaping. Convert it straight into its final
  255. // place in the buffer, and "backspace" the trailing zero.
  256. pos = string_traits<T>::into_buf(pos, end, field) - 1;
  257. }
  258. else
  259. {
  260. // The field may need escaping, which means we need an intermediate buffer.
  261. // To avoid allocating that at run time, we use the end of the buffer that
  262. // we have.
  263. auto const budget{size_buffer(field)};
  264. *pos++ = '"';
  265. // Now escape buf into its final position.
  266. for (char const c : string_traits<T>::to_buf(end - budget, end, field))
  267. {
  268. if ((c == '"') or (c == '\\'))
  269. *pos++ = '\\';
  270. *pos++ = c;
  271. }
  272. *pos++ = '"';
  273. }
  274. *pos++ = ',';
  275. }
  276. } // namespace pqxx::internal
  277. #endif