jpcre2.hpp 216 KB


  1. /* *****************************************************************************
  2. * ******************* C++ wrapper for PCRE2 Library ****************************
  3. * *****************************************************************************
  4. * Copyright (c) 2015-2018 Md. Jahidul Hamid
  5. *
  6. * -----------------------------------------------------------------------------
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright notice,
  11. * this list of conditions and the following disclaimer.
  12. *
  13. * * Redistributions in binary form must reproduce the above copyright
  14. * notice, this list of conditions and the following disclaimer in the
  15. * documentation and/or other materials provided with the distribution.
  16. *
  17. * * The names of its contributors may not be used to endorse or promote
  18. * products derived from this software without specific prior written
  19. * permission.
  20. *
  21. * Disclaimer:
  22. *
  23. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  24. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  25. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  26. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  27. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  28. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  29. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  30. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  31. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  32. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33. * POSSIBILITY OF SUCH DAMAGE.
  34. * */
  35. /** @file jpcre2.hpp
  36. * @brief Main header file for JPCRE2 library to be included by programs that uses its functionalities.
  37. * It includes the `pcre2.h` header, therefore you shouldn't include `pcre2.h`, neither should you define `PCRE2_CODE_UNIT_WIDTH` before including
  38. * `jpcre2.hpp`.
  39. * If your `pcre2.h` header is not in standard include paths, you may include `pcre2.h` with correct path before including `jpcre2.hpp`
  40. * manually. In this case you will have to define `PCRE2_CODE_UNIT_WIDTH` before including `pcre2.h`.
  41. * Make sure to link required PCRE2 libraries when compiling.
  42. *
  43. * @author [Md Jahidul Hamid](https://github.com/neurobin)
  44. */
  45. #ifndef JPCRE2_HPP
  46. #define JPCRE2_HPP
  47. #ifndef PCRE2_CODE_UNIT_WIDTH
  48. ///@def PCRE2_CODE_UNIT_WIDTH
  49. ///This macro does not have any significance in JPCRE2 context.
  50. ///It is defined as 0 by default. Defining it before including jpcre2.hpp
  51. ///will override the default (discouraged as it will make it harder for you to detect problems),
  52. ///but still it will have no effect in a JPCRE2 perspective.
  53. ///Defining it with an invalid value will yield to compile error.
  54. #define PCRE2_CODE_UNIT_WIDTH 0
  55. #endif
  56. //previous inclusion of pcre2.h will be respected and we won't try to include it twice.
  57. //Thus one can pre-include pcre2.h from an arbitrary/non-standard path.
  58. #ifndef PCRE2_MAJOR
  59. #include <pcre2.h> // pcre2 header
  60. #endif
  61. #include <string> // std::string, std::wstring
  62. #include <vector> // std::vector
  63. #include <map> // std::map
  64. #include <cstdio> // std::fprintf
  65. #include <climits> // CHAR_BIT
  66. #include <cstdlib> // std::abort()
  67. #if __cplusplus >= 201103L
  68. #include <utility>
  69. #ifndef JPCRE2_USE_FUNCTION_POINTER_CALLBACK
  70. #include <functional> // std::function
  71. #endif
  72. #endif
  73. #define JPCRE2_UNUSED(x) ((void)(x))
  74. #if defined(NDEBUG) || defined(JPCRE2_NDEBUG)
  75. #define JPCRE2_ASSERT(cond, msg) ((void)0)
  76. #define JPCRE2_VECTOR_DATA_ASSERT(cond, name) ((void)0)
  77. #else
  78. #define JPCRE2_ASSERT(cond, msg) jpcre2::jassert(cond, msg, __FILE__, __LINE__)
  79. #define JPCRE2_VECTOR_DATA_ASSERT(cond, name) jpcre2::_jvassert(cond, name, __FILE__, __LINE__)
  80. #endif
  81. /** @namespace jpcre2
  82. * Top level namespace of JPCRE2.
  83. *
  84. * All functions, classes/structs, constants, enums that are provided by JPCRE2 belong to this namespace while
  85. * **PCRE2** structs, functions, constants remain outside of its scope.
  86. *
  87. * If you want to use any PCRE2 functions or constants,
  88. * remember that they are in the global scope and should be used as such.
  89. */
  90. namespace jpcre2 {
  91. ///Define for JPCRE2 version.
  92. ///It can be used to support changes in different versions of the lib.
  93. #define JPCRE2_VERSION 103103L
  94. /** @namespace jpcre2::INFO
  95. * Namespace to provide information about JPCRE2 library itself.
  96. * Contains constant Strings with version info.
  97. */
  98. namespace INFO {
  99. static const char NAME[] = "JPCRE2"; ///< Name of the project
  100. static const char FULL_VERSION[] = "10.31.03"; ///< Full version string
  101. static const char VERSION_GENRE[] = "10"; ///< Generation, depends on original PCRE2 version
  102. static const char VERSION_MAJOR[] = "31"; ///< Major version, updated when API change is made
  103. static const char VERSION_MINOR[] = "03"; ///< Minor version, includes bug fix or minor feature upgrade
  104. static const char VERSION_PRE_RELEASE[] = ""; ///< Alpha or beta (testing) release version
  105. }
  106. typedef PCRE2_SIZE SIZE_T; ///< Used for match count and vector size
  107. typedef uint32_t Uint; ///< Used for options (bitwise operation)
  108. typedef uint8_t Ush; ///< 8 bit unsigned integer.
  109. typedef std::vector<SIZE_T> VecOff; ///< vector of size_t.
  110. typedef std::vector<Uint> VecOpt; ///< vector for Uint option values.
  111. /// @namespace jpcre2::ERROR
  112. /// Namespace for error codes.
  113. namespace ERROR {
  114. /** Error numbers for JPCRE2.
  115. * JPCRE2 error numbers are positive integers while
  116. * PCRE2 error numbers are negative integers.
  117. */
  118. enum {
  119. INVALID_MODIFIER = 2, ///< Invalid modifier was detected
  120. INSUFFICIENT_OVECTOR = 3 ///< Ovector was not big enough during a match
  121. };
  122. }
  123. /** These constants provide JPCRE2 options.
  124. */
  125. enum {
  126. NONE = 0x0000000u, ///< Option 0 (zero)
  127. FIND_ALL = 0x0000002u, ///< Find all during match (global match)
  128. JIT_COMPILE = 0x0000004u ///< Perform JIT compilation for optimization
  129. };
  130. //enableif and is_same implementation
  131. template<bool B, typename T = void>
  132. struct EnableIf{};
  133. template<typename T>
  134. struct EnableIf<true, T>{typedef T Type;};
  135. template<typename T1, typename T2>
  136. struct IsSame{ static const bool value = false; };
  137. template<typename T>
  138. struct IsSame<T,T>{ static const bool value = true; };
  139. ///JPCRE2 assert function.
  140. ///Aborts with an error message if condition fails.
  141. ///@param cond boolean condition
  142. ///@param msg message (std::string)
  143. ///@param f file where jassert was called.
  144. ///@param line line number where jassert was called.
  145. static inline void jassert(bool cond, const char* msg, const char* f, size_t line){
  146. if(!cond) {
  147. std::fprintf(stderr,"\n\tE: AssertionFailure\n%s\nAssertion failed in file: %s\t at line: %u\n", msg, f, (unsigned)line);
  148. std::abort();
  149. }
  150. }
  151. static inline void _jvassert(bool cond, char const * name, const char* f, size_t line){
  152. jassert(cond, (std::string("ValueError: \n\
  153. Required data vector of type ")+std::string(name)+" is empty.\n\
  154. Your MatchEvaluator callback function is not\n\
  155. compatible with existing data!!\n\
  156. You are trying to use a vector that does not\n\
  157. have any match data. Either call nreplace() or replace()\n\
  158. with true or perform a match with appropriate\n\
  159. callback function. For more details, refer to\n\
  160. the doc in MatchEvaluator section.").c_str(), f, line);
  161. }
  162. static inline std::string _tostdstring(unsigned x){
  163. char buf[128];
  164. int written = std::sprintf(buf, "%u", x);
  165. return (written > 0) ? std::string(buf, buf + written) : std::string();
  166. }
  167. ////////////////////////// The following are type and function mappings from PCRE2 interface to JPCRE2 interface /////////////////////////
  168. //forward declaration
  169. template<Ush BS> struct Pcre2Type;
  170. template<Ush BS> struct Pcre2Func;
  171. //PCRE2 types
  172. //These templated types will be used in place of actual types
  173. template<Ush BS> struct Pcre2Type {};
  174. template<> struct Pcre2Type<8>{
  175. //typedefs used
  176. typedef PCRE2_UCHAR8 Pcre2Uchar;
  177. typedef PCRE2_SPTR8 Pcre2Sptr;
  178. typedef pcre2_code_8 Pcre2Code;
  179. typedef pcre2_compile_context_8 CompileContext;
  180. typedef pcre2_match_data_8 MatchData;
  181. typedef pcre2_general_context_8 GeneralContext;
  182. typedef pcre2_match_context_8 MatchContext;
  183. typedef pcre2_jit_callback_8 JitCallback;
  184. typedef pcre2_jit_stack_8 JitStack;
  185. };
  186. template<> struct Pcre2Type<16>{
  187. //typedefs used
  188. typedef PCRE2_UCHAR16 Pcre2Uchar;
  189. typedef PCRE2_SPTR16 Pcre2Sptr;
  190. typedef pcre2_code_16 Pcre2Code;
  191. typedef pcre2_compile_context_16 CompileContext;
  192. typedef pcre2_match_data_16 MatchData;
  193. typedef pcre2_general_context_16 GeneralContext;
  194. typedef pcre2_match_context_16 MatchContext;
  195. typedef pcre2_jit_callback_16 JitCallback;
  196. typedef pcre2_jit_stack_16 JitStack;
  197. };
  198. template<> struct Pcre2Type<32>{
  199. //typedefs used
  200. typedef PCRE2_UCHAR32 Pcre2Uchar;
  201. typedef PCRE2_SPTR32 Pcre2Sptr;
  202. typedef pcre2_code_32 Pcre2Code;
  203. typedef pcre2_compile_context_32 CompileContext;
  204. typedef pcre2_match_data_32 MatchData;
  205. typedef pcre2_general_context_32 GeneralContext;
  206. typedef pcre2_match_context_32 MatchContext;
  207. typedef pcre2_jit_callback_32 JitCallback;
  208. typedef pcre2_jit_stack_32 JitStack;
  209. };
  210. //wrappers for PCRE2 functions
  211. template<Ush BS> struct Pcre2Func{};
  212. //8-bit version
  213. template<> struct Pcre2Func<8> {
  214. static Pcre2Type<8>::CompileContext* compile_context_create(Pcre2Type<8>::GeneralContext *gcontext){
  215. return pcre2_compile_context_create_8(gcontext);
  216. }
  217. static void compile_context_free(Pcre2Type<8>::CompileContext *ccontext){
  218. pcre2_compile_context_free_8(ccontext);
  219. }
  220. static Pcre2Type<8>::CompileContext* compile_context_copy(Pcre2Type<8>::CompileContext* ccontext){
  221. return pcre2_compile_context_copy_8(ccontext);
  222. }
  223. static const unsigned char * maketables(Pcre2Type<8>::GeneralContext* gcontext){
  224. return pcre2_maketables_8(gcontext);
  225. }
  226. static int set_character_tables(Pcre2Type<8>::CompileContext * ccontext, const unsigned char * table){
  227. return pcre2_set_character_tables_8(ccontext, table);
  228. }
  229. static Pcre2Type<8>::Pcre2Code * compile(Pcre2Type<8>::Pcre2Sptr pattern,
  230. PCRE2_SIZE length,
  231. uint32_t options,
  232. int *errorcode,
  233. PCRE2_SIZE *erroroffset,
  234. Pcre2Type<8>::CompileContext *ccontext){
  235. return pcre2_compile_8(pattern, length, options, errorcode, erroroffset, ccontext);
  236. }
  237. static int jit_compile(Pcre2Type<8>::Pcre2Code *code, uint32_t options){
  238. return pcre2_jit_compile_8(code, options);
  239. }
  240. static int substitute( const Pcre2Type<8>::Pcre2Code *code,
  241. Pcre2Type<8>::Pcre2Sptr subject,
  242. PCRE2_SIZE length,
  243. PCRE2_SIZE startoffset,
  244. uint32_t options,
  245. Pcre2Type<8>::MatchData *match_data,
  246. Pcre2Type<8>::MatchContext *mcontext,
  247. Pcre2Type<8>::Pcre2Sptr replacement,
  248. PCRE2_SIZE rlength,
  249. Pcre2Type<8>::Pcre2Uchar *outputbuffer,
  250. PCRE2_SIZE *outlengthptr){
  251. return pcre2_substitute_8( code, subject, length, startoffset, options, match_data,
  252. mcontext, replacement, rlength, outputbuffer, outlengthptr);
  253. }
  254. //~ static int substring_get_bynumber(Pcre2Type<8>::MatchData *match_data,
  255. //~ uint32_t number,
  256. //~ Pcre2Type<8>::Pcre2Uchar **bufferptr,
  257. //~ PCRE2_SIZE *bufflen){
  258. //~ return pcre2_substring_get_bynumber_8(match_data, number, bufferptr, bufflen);
  259. //~ }
  260. //~ static int substring_get_byname(Pcre2Type<8>::MatchData *match_data,
  261. //~ Pcre2Type<8>::Pcre2Sptr name,
  262. //~ Pcre2Type<8>::Pcre2Uchar **bufferptr,
  263. //~ PCRE2_SIZE *bufflen){
  264. //~ return pcre2_substring_get_byname_8(match_data, name, bufferptr, bufflen);
  265. //~ }
  266. //~ static void substring_free(Pcre2Type<8>::Pcre2Uchar *buffer){
  267. //~ pcre2_substring_free_8(buffer);
  268. //~ }
  269. //~ static Pcre2Type<8>::Pcre2Code * code_copy(const Pcre2Type<8>::Pcre2Code *code){
  270. //~ return pcre2_code_copy_8(code);
  271. //~ }
  272. static void code_free(Pcre2Type<8>::Pcre2Code *code){
  273. pcre2_code_free_8(code);
  274. }
  275. static int get_error_message( int errorcode,
  276. Pcre2Type<8>::Pcre2Uchar *buffer,
  277. PCRE2_SIZE bufflen){
  278. return pcre2_get_error_message_8(errorcode, buffer, bufflen);
  279. }
  280. static Pcre2Type<8>::MatchData * match_data_create_from_pattern(
  281. const Pcre2Type<8>::Pcre2Code *code,
  282. Pcre2Type<8>::GeneralContext *gcontext){
  283. return pcre2_match_data_create_from_pattern_8(code, gcontext);
  284. }
  285. static int match( const Pcre2Type<8>::Pcre2Code *code,
  286. Pcre2Type<8>::Pcre2Sptr subject,
  287. PCRE2_SIZE length,
  288. PCRE2_SIZE startoffset,
  289. uint32_t options,
  290. Pcre2Type<8>::MatchData *match_data,
  291. Pcre2Type<8>::MatchContext *mcontext){
  292. return pcre2_match_8(code, subject, length, startoffset, options, match_data, mcontext);
  293. }
  294. static void match_data_free(Pcre2Type<8>::MatchData *match_data){
  295. pcre2_match_data_free_8(match_data);
  296. }
  297. static PCRE2_SIZE * get_ovector_pointer(Pcre2Type<8>::MatchData *match_data){
  298. return pcre2_get_ovector_pointer_8(match_data);
  299. }
  300. static int pattern_info(const Pcre2Type<8>::Pcre2Code *code, uint32_t what, void *where){
  301. return pcre2_pattern_info_8(code, what, where);
  302. }
  303. static int set_newline(Pcre2Type<8>::CompileContext *ccontext, uint32_t value){
  304. return pcre2_set_newline_8(ccontext, value);
  305. }
  306. //~ static void jit_stack_assign(Pcre2Type<8>::MatchContext *mcontext,
  307. //~ Pcre2Type<8>::JitCallback callback_function,
  308. //~ void *callback_data){
  309. //~ pcre2_jit_stack_assign_8(mcontext, callback_function, callback_data);
  310. //~ }
  311. //~ static Pcre2Type<8>::JitStack *jit_stack_create(PCRE2_SIZE startsize, PCRE2_SIZE maxsize,
  312. //~ Pcre2Type<8>::GeneralContext *gcontext){
  313. //~ return pcre2_jit_stack_create_8(startsize, maxsize, gcontext);
  314. //~ }
  315. //~ static void jit_stack_free(Pcre2Type<8>::JitStack *jit_stack){
  316. //~ pcre2_jit_stack_free_8(jit_stack);
  317. //~ }
  318. //~ static void jit_free_unused_memory(Pcre2Type<8>::GeneralContext *gcontext){
  319. //~ pcre2_jit_free_unused_memory_8(gcontext);
  320. //~ }
  321. //~ static Pcre2Type<8>::MatchContext *match_context_create(Pcre2Type<8>::GeneralContext *gcontext){
  322. //~ return pcre2_match_context_create_8(gcontext);
  323. //~ }
  324. //~ static Pcre2Type<8>::MatchContext *match_context_copy(Pcre2Type<8>::MatchContext *mcontext){
  325. //~ return pcre2_match_context_copy_8(mcontext);
  326. //~ }
  327. //~ static void match_context_free(Pcre2Type<8>::MatchContext *mcontext){
  328. //~ pcre2_match_context_free_8(mcontext);
  329. //~ }
  330. static uint32_t get_ovector_count(Pcre2Type<8>::MatchData *match_data){
  331. return pcre2_get_ovector_count_8(match_data);
  332. }
  333. };
  334. //16-bit version
  335. template<> struct Pcre2Func<16> {
  336. static Pcre2Type<16>::CompileContext* compile_context_create(Pcre2Type<16>::GeneralContext *gcontext){
  337. return pcre2_compile_context_create_16(gcontext);
  338. }
  339. static void compile_context_free(Pcre2Type<16>::CompileContext *ccontext){
  340. pcre2_compile_context_free_16(ccontext);
  341. }
  342. static Pcre2Type<16>::CompileContext* compile_context_copy(Pcre2Type<16>::CompileContext* ccontext){
  343. return pcre2_compile_context_copy_16(ccontext);
  344. }
  345. static const unsigned char * maketables(Pcre2Type<16>::GeneralContext* gcontext){
  346. return pcre2_maketables_16(gcontext);
  347. }
  348. static int set_character_tables(Pcre2Type<16>::CompileContext * ccontext, const unsigned char * table){
  349. return pcre2_set_character_tables_16(ccontext, table);
  350. }
  351. static Pcre2Type<16>::Pcre2Code * compile(Pcre2Type<16>::Pcre2Sptr pattern,
  352. PCRE2_SIZE length,
  353. uint32_t options,
  354. int *errorcode,
  355. PCRE2_SIZE *erroroffset,
  356. Pcre2Type<16>::CompileContext *ccontext){
  357. return pcre2_compile_16(pattern, length, options, errorcode, erroroffset, ccontext);
  358. }
  359. static int jit_compile(Pcre2Type<16>::Pcre2Code *code, uint32_t options){
  360. return pcre2_jit_compile_16(code, options);
  361. }
  362. static int substitute( const Pcre2Type<16>::Pcre2Code *code,
  363. Pcre2Type<16>::Pcre2Sptr subject,
  364. PCRE2_SIZE length,
  365. PCRE2_SIZE startoffset,
  366. uint32_t options,
  367. Pcre2Type<16>::MatchData *match_data,
  368. Pcre2Type<16>::MatchContext *mcontext,
  369. Pcre2Type<16>::Pcre2Sptr replacement,
  370. PCRE2_SIZE rlength,
  371. Pcre2Type<16>::Pcre2Uchar *outputbuffer,
  372. PCRE2_SIZE *outlengthptr){
  373. return pcre2_substitute_16( code, subject, length, startoffset, options, match_data,
  374. mcontext, replacement, rlength, outputbuffer, outlengthptr);
  375. }
  376. //~ static int substring_get_bynumber(Pcre2Type<16>::MatchData *match_data,
  377. //~ uint32_t number,
  378. //~ Pcre2Type<16>::Pcre2Uchar **bufferptr,
  379. //~ PCRE2_SIZE *bufflen){
  380. //~ return pcre2_substring_get_bynumber_16(match_data, number, bufferptr, bufflen);
  381. //~ }
  382. //~ static int substring_get_byname(Pcre2Type<16>::MatchData *match_data,
  383. //~ Pcre2Type<16>::Pcre2Sptr name,
  384. //~ Pcre2Type<16>::Pcre2Uchar **bufferptr,
  385. //~ PCRE2_SIZE *bufflen){
  386. //~ return pcre2_substring_get_byname_16(match_data, name, bufferptr, bufflen);
  387. //~ }
  388. //~ static void substring_free(Pcre2Type<16>::Pcre2Uchar *buffer){
  389. //~ pcre2_substring_free_16(buffer);
  390. //~ }
  391. //~ static Pcre2Type<16>::Pcre2Code * code_copy(const Pcre2Type<16>::Pcre2Code *code){
  392. //~ return pcre2_code_copy_16(code);
  393. //~ }
  394. static void code_free(Pcre2Type<16>::Pcre2Code *code){
  395. pcre2_code_free_16(code);
  396. }
  397. static int get_error_message( int errorcode,
  398. Pcre2Type<16>::Pcre2Uchar *buffer,
  399. PCRE2_SIZE bufflen){
  400. return pcre2_get_error_message_16(errorcode, buffer, bufflen);
  401. }
  402. static Pcre2Type<16>::MatchData * match_data_create_from_pattern(
  403. const Pcre2Type<16>::Pcre2Code *code,
  404. Pcre2Type<16>::GeneralContext *gcontext){
  405. return pcre2_match_data_create_from_pattern_16(code, gcontext);
  406. }
  407. static int match( const Pcre2Type<16>::Pcre2Code *code,
  408. Pcre2Type<16>::Pcre2Sptr subject,
  409. PCRE2_SIZE length,
  410. PCRE2_SIZE startoffset,
  411. uint32_t options,
  412. Pcre2Type<16>::MatchData *match_data,
  413. Pcre2Type<16>::MatchContext *mcontext){
  414. return pcre2_match_16(code, subject, length, startoffset, options, match_data, mcontext);
  415. }
  416. static void match_data_free(Pcre2Type<16>::MatchData *match_data){
  417. pcre2_match_data_free_16(match_data);
  418. }
  419. static PCRE2_SIZE * get_ovector_pointer(Pcre2Type<16>::MatchData *match_data){
  420. return pcre2_get_ovector_pointer_16(match_data);
  421. }
  422. static int pattern_info(const Pcre2Type<16>::Pcre2Code *code, uint32_t what, void *where){
  423. return pcre2_pattern_info_16(code, what, where);
  424. }
  425. static int set_newline(Pcre2Type<16>::CompileContext *ccontext, uint32_t value){
  426. return pcre2_set_newline_16(ccontext, value);
  427. }
  428. //~ static void jit_stack_assign(Pcre2Type<16>::MatchContext *mcontext,
  429. //~ Pcre2Type<16>::JitCallback callback_function,
  430. //~ void *callback_data){
  431. //~ pcre2_jit_stack_assign_16(mcontext, callback_function, callback_data);
  432. //~ }
  433. //~ static Pcre2Type<16>::JitStack *jit_stack_create(PCRE2_SIZE startsize, PCRE2_SIZE maxsize,
  434. //~ Pcre2Type<16>::GeneralContext *gcontext){
  435. //~ return pcre2_jit_stack_create_16(startsize, maxsize, gcontext);
  436. //~ }
  437. //~ static void jit_stack_free(Pcre2Type<16>::JitStack *jit_stack){
  438. //~ pcre2_jit_stack_free_16(jit_stack);
  439. //~ }
  440. //~ static void jit_free_unused_memory(Pcre2Type<16>::GeneralContext *gcontext){
  441. //~ pcre2_jit_free_unused_memory_16(gcontext);
  442. //~ }
  443. //~ static Pcre2Type<16>::MatchContext *match_context_create(Pcre2Type<16>::GeneralContext *gcontext){
  444. //~ return pcre2_match_context_create_16(gcontext);
  445. //~ }
  446. //~ static Pcre2Type<16>::MatchContext *match_context_copy(Pcre2Type<16>::MatchContext *mcontext){
  447. //~ return pcre2_match_context_copy_16(mcontext);
  448. //~ }
  449. //~ static void match_context_free(Pcre2Type<16>::MatchContext *mcontext){
  450. //~ pcre2_match_context_free_16(mcontext);
  451. //~ }
  452. static uint32_t get_ovector_count(Pcre2Type<16>::MatchData *match_data){
  453. return pcre2_get_ovector_count_16(match_data);
  454. }
  455. };
  456. //32-bit version
  457. template<> struct Pcre2Func<32> {
  458. static Pcre2Type<32>::CompileContext* compile_context_create(Pcre2Type<32>::GeneralContext *gcontext){
  459. return pcre2_compile_context_create_32(gcontext);
  460. }
  461. static void compile_context_free(Pcre2Type<32>::CompileContext *ccontext){
  462. pcre2_compile_context_free_32(ccontext);
  463. }
  464. static Pcre2Type<32>::CompileContext* compile_context_copy(Pcre2Type<32>::CompileContext* ccontext){
  465. return pcre2_compile_context_copy_32(ccontext);
  466. }
  467. static const unsigned char * maketables(Pcre2Type<32>::GeneralContext* gcontext){
  468. return pcre2_maketables_32(gcontext);
  469. }
  470. static int set_character_tables(Pcre2Type<32>::CompileContext * ccontext, const unsigned char * table){
  471. return pcre2_set_character_tables_32(ccontext, table);
  472. }
  473. static Pcre2Type<32>::Pcre2Code * compile(Pcre2Type<32>::Pcre2Sptr pattern,
  474. PCRE2_SIZE length,
  475. uint32_t options,
  476. int *errorcode,
  477. PCRE2_SIZE *erroroffset,
  478. Pcre2Type<32>::CompileContext *ccontext){
  479. return pcre2_compile_32(pattern, length, options, errorcode, erroroffset, ccontext);
  480. }
  481. static int jit_compile(Pcre2Type<32>::Pcre2Code *code, uint32_t options){
  482. return pcre2_jit_compile_32(code, options);
  483. }
  484. static int substitute( const Pcre2Type<32>::Pcre2Code *code,
  485. Pcre2Type<32>::Pcre2Sptr subject,
  486. PCRE2_SIZE length,
  487. PCRE2_SIZE startoffset,
  488. uint32_t options,
  489. Pcre2Type<32>::MatchData *match_data,
  490. Pcre2Type<32>::MatchContext *mcontext,
  491. Pcre2Type<32>::Pcre2Sptr replacement,
  492. PCRE2_SIZE rlength,
  493. Pcre2Type<32>::Pcre2Uchar *outputbuffer,
  494. PCRE2_SIZE *outlengthptr){
  495. return pcre2_substitute_32( code, subject, length, startoffset, options, match_data,
  496. mcontext, replacement, rlength, outputbuffer, outlengthptr);
  497. }
  498. //~ static int substring_get_bynumber(Pcre2Type<32>::MatchData *match_data,
  499. //~ uint32_t number,
  500. //~ Pcre2Type<32>::Pcre2Uchar **bufferptr,
  501. //~ PCRE2_SIZE *bufflen){
  502. //~ return pcre2_substring_get_bynumber_32(match_data, number, bufferptr, bufflen);
  503. //~ }
  504. //~ static int substring_get_byname(Pcre2Type<32>::MatchData *match_data,
  505. //~ Pcre2Type<32>::Pcre2Sptr name,
  506. //~ Pcre2Type<32>::Pcre2Uchar **bufferptr,
  507. //~ PCRE2_SIZE *bufflen){
  508. //~ return pcre2_substring_get_byname_32(match_data, name, bufferptr, bufflen);
  509. //~ }
  510. //~ static void substring_free(Pcre2Type<32>::Pcre2Uchar *buffer){
  511. //~ pcre2_substring_free_32(buffer);
  512. //~ }
  513. //~ static Pcre2Type<32>::Pcre2Code * code_copy(const Pcre2Type<32>::Pcre2Code *code){
  514. //~ return pcre2_code_copy_32(code);
  515. //~ }
  516. static void code_free(Pcre2Type<32>::Pcre2Code *code){
  517. pcre2_code_free_32(code);
  518. }
  519. static int get_error_message( int errorcode,
  520. Pcre2Type<32>::Pcre2Uchar *buffer,
  521. PCRE2_SIZE bufflen){
  522. return pcre2_get_error_message_32(errorcode, buffer, bufflen);
  523. }
  524. static Pcre2Type<32>::MatchData * match_data_create_from_pattern(
  525. const Pcre2Type<32>::Pcre2Code *code,
  526. Pcre2Type<32>::GeneralContext *gcontext){
  527. return pcre2_match_data_create_from_pattern_32(code, gcontext);
  528. }
  529. static int match( const Pcre2Type<32>::Pcre2Code *code,
  530. Pcre2Type<32>::Pcre2Sptr subject,
  531. PCRE2_SIZE length,
  532. PCRE2_SIZE startoffset,
  533. uint32_t options,
  534. Pcre2Type<32>::MatchData *match_data,
  535. Pcre2Type<32>::MatchContext *mcontext){
  536. return pcre2_match_32(code, subject, length, startoffset, options, match_data, mcontext);
  537. }
  538. static void match_data_free(Pcre2Type<32>::MatchData *match_data){
  539. pcre2_match_data_free_32(match_data);
  540. }
  541. static PCRE2_SIZE * get_ovector_pointer(Pcre2Type<32>::MatchData *match_data){
  542. return pcre2_get_ovector_pointer_32(match_data);
  543. }
  544. static int pattern_info(const Pcre2Type<32>::Pcre2Code *code, uint32_t what, void *where){
  545. return pcre2_pattern_info_32(code, what, where);
  546. }
  547. static int set_newline(Pcre2Type<32>::CompileContext *ccontext, uint32_t value){
  548. return pcre2_set_newline_32(ccontext, value);
  549. }
  550. //~ static void jit_stack_assign(Pcre2Type<32>::MatchContext *mcontext,
  551. //~ Pcre2Type<32>::JitCallback callback_function,
  552. //~ void *callback_data){
  553. //~ pcre2_jit_stack_assign_32(mcontext, callback_function, callback_data);
  554. //~ }
  555. //~ static Pcre2Type<32>::JitStack *jit_stack_create(PCRE2_SIZE startsize, PCRE2_SIZE maxsize,
  556. //~ Pcre2Type<32>::GeneralContext *gcontext){
  557. //~ return pcre2_jit_stack_create_32(startsize, maxsize, gcontext);
  558. //~ }
  559. //~ static void jit_stack_free(Pcre2Type<32>::JitStack *jit_stack){
  560. //~ pcre2_jit_stack_free_32(jit_stack);
  561. //~ }
  562. //~ static void jit_free_unused_memory(Pcre2Type<32>::GeneralContext *gcontext){
  563. //~ pcre2_jit_free_unused_memory_32(gcontext);
  564. //~ }
  565. //~ static Pcre2Type<32>::MatchContext *match_context_create(Pcre2Type<32>::GeneralContext *gcontext){
  566. //~ return pcre2_match_context_create_32(gcontext);
  567. //~ }
  568. //~ static Pcre2Type<32>::MatchContext *match_context_copy(Pcre2Type<32>::MatchContext *mcontext){
  569. //~ return pcre2_match_context_copy_32(mcontext);
  570. //~ }
  571. //~ static void match_context_free(Pcre2Type<32>::MatchContext *mcontext){
  572. //~ pcre2_match_context_free_32(mcontext);
  573. //~ }
  574. static uint32_t get_ovector_count(Pcre2Type<32>::MatchData *match_data){
  575. return pcre2_get_ovector_count_32(match_data);
  576. }
  577. };
  578. ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  579. ///Class to take a std::string modifier value with null safety.
  580. ///You don't need to make an instance of this class to pass modifier,
  581. ///just pass std::string or char const*, whatever seems feasible,
  582. ///implicit conversion will kick in and take care of things for you.
  583. class Modifier{
  584. std::string mod;
  585. public:
  586. ///Default constructor.
  587. Modifier(){}
  588. ///Constructor that takes a std::string.
  589. ///@param x std::string as a reference.
  590. Modifier(std::string const& x):mod(x){}
  591. ///Constructor that takes char const * (null safety is provided by this one)
  592. ///@param x char const *
  593. Modifier(char const *x):mod(x?x:""){}
  594. ///Returns the modifier string
  595. ///@return modifier string (std::string)
  596. std::string str() const { return mod; }
  597. ///Returns the c_str() of modifier string
  598. ///@return char const *
  599. char const * c_str() const { return mod.c_str(); }
  600. ///Returns the length of the modifier string
  601. ///@return length
  602. SIZE_T length() const{ return mod.length(); }
  603. ///operator[] overload to access character by index.
  604. ///@param i index
  605. ///@return character at index i.
  606. char operator[](SIZE_T i) const { return mod[i]; }
  607. };
  608. // Namespace for modifier constants.
  609. // For each modifier constant there is a jpcre2::Uint option value.
  610. // Some modifiers may have multiple values set together (ORed in bitwise operation) and
  611. // thus they may include other modifiers. Such an example is the 'n' modifier. It is combined together with 'u'.
  612. namespace MOD {
  613. // Define modifiers for compile
  614. // String of compile modifier characters for PCRE2 options
  615. static const char C_N[] = "eijmnsuxADJU";
  616. // Array of compile modifier values for PCRE2 options
  617. // Uint is being used in getModifier() in for loop to get the number of element in this array,
  618. // be sure to chnage there if you change here.
  619. static const jpcre2::Uint C_V[12] = { PCRE2_MATCH_UNSET_BACKREF, // Modifier e
  620. PCRE2_CASELESS, // Modifier i
  621. PCRE2_ALT_BSUX | PCRE2_MATCH_UNSET_BACKREF, // Modifier j
  622. PCRE2_MULTILINE, // Modifier m
  623. PCRE2_UTF | PCRE2_UCP, // Modifier n (includes u)
  624. PCRE2_DOTALL, // Modifier s
  625. PCRE2_UTF, // Modifier u
  626. PCRE2_EXTENDED, // Modifier x
  627. PCRE2_ANCHORED, // Modifier A
  628. PCRE2_DOLLAR_ENDONLY, // Modifier D
  629. PCRE2_DUPNAMES, // Modifier J
  630. PCRE2_UNGREEDY // Modifier U
  631. };
  632. // String of compile modifier characters for JPCRE2 options
  633. static const char CJ_N[] = "S";
  634. // Array of compile modifier values for JPCRE2 options
  635. static const jpcre2::Uint CJ_V[1] = { JIT_COMPILE, // Modifier S
  636. };
  637. // Define modifiers for replace
  638. // String of action (replace) modifier characters for PCRE2 options
  639. static const char R_N[] = "eEgx";
  640. // Array of action (replace) modifier values for PCRE2 options
  641. static const jpcre2::Uint R_V[4] = { PCRE2_SUBSTITUTE_UNSET_EMPTY, // Modifier e
  642. PCRE2_SUBSTITUTE_UNKNOWN_UNSET | PCRE2_SUBSTITUTE_UNSET_EMPTY, // Modifier E (includes e)
  643. PCRE2_SUBSTITUTE_GLOBAL, // Modifier g
  644. PCRE2_SUBSTITUTE_EXTENDED // Modifier x
  645. };
  646. // String of action (replace) modifier characters for JPCRE2 options
  647. static const char RJ_N[] = "";
  648. // Array of action (replace) modifier values for JPCRE2 options
  649. static const jpcre2::Uint RJ_V[1] = { NONE //placeholder
  650. };
  651. // Define modifiers for match
  652. // String of action (match) modifier characters for PCRE2 options
  653. static const char M_N[] = "A";
  654. // Array of action (match) modifier values for PCRE2 options
  655. static const jpcre2::Uint M_V[1] = { PCRE2_ANCHORED // Modifier A
  656. };
  657. // String of action (match) modifier characters for JPCRE2 options
  658. static const char MJ_N[] = "g";
  659. // Array of action (match) modifier values for JPCRE2 options
  660. static const jpcre2::Uint MJ_V[1] = { FIND_ALL, // Modifier g
  661. };
  662. static inline void toOption(Modifier const& mod, bool x,
  663. Uint const * J_V, char const * J_N, SIZE_T SJ,
  664. Uint const * V, char const * N, SIZE_T S,
  665. Uint* po, Uint* jo,
  666. int* en, SIZE_T* eo
  667. ){
  668. //loop through mod
  669. SIZE_T n = mod.length();
  670. for (SIZE_T i = 0; i < n; ++i) {
  671. //First check for JPCRE2 mods
  672. for(SIZE_T j = 0; j < SJ; ++j){
  673. if(J_N[j] == mod[i]) {
  674. if(x) *jo |= J_V[j];
  675. else *jo &= ~J_V[j];
  676. goto endfor;
  677. }
  678. }
  679. //Now check for PCRE2 mods
  680. for(SIZE_T j = 0; j< S; ++j){
  681. if(N[j] == mod[i]){
  682. if(x) *po |= V[j];
  683. else *po &= ~V[j];
  684. goto endfor;
  685. }
  686. }
  687. //Modifier didn't match, invalid modifier
  688. *en = (int)ERROR::INVALID_MODIFIER;
  689. *eo = (int)mod[i];
  690. endfor:;
  691. }
  692. }
  693. static inline void toMatchOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo){
  694. toOption(mod, x,
  695. MJ_V, MJ_N, sizeof(MJ_V)/sizeof(Uint),
  696. M_V, M_N, sizeof(M_V)/sizeof(Uint),
  697. po, jo, en, eo);
  698. }
  699. static inline void toReplaceOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo){
  700. toOption(mod, x,
  701. RJ_V, RJ_N, sizeof(RJ_V)/sizeof(Uint),
  702. R_V, R_N, sizeof(R_V)/sizeof(Uint),
  703. po, jo, en, eo);
  704. }
  705. static inline void toCompileOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo){
  706. toOption(mod, x,
  707. CJ_V, CJ_N, sizeof(CJ_V)/sizeof(Uint),
  708. C_V, C_N, sizeof(C_V)/sizeof(Uint),
  709. po, jo, en, eo);
  710. }
  711. static inline std::string fromOption(Uint const * J_V, char const * J_N, SIZE_T SJ,
  712. Uint const * V, char const * N, SIZE_T S,
  713. Uint po, Uint jo
  714. ){
  715. std::string mod;
  716. //Calculate PCRE2 mod
  717. for(SIZE_T i = 0; i < S; ++i){
  718. if( (V[i] & po) != 0 &&
  719. (V[i] & po) == V[i]) //One option can include other
  720. mod += N[i];
  721. }
  722. //Calculate JPCRE2 mod
  723. for(SIZE_T i = 0; i < SJ; ++i){
  724. if( (J_V[i] & jo) != 0 &&
  725. (J_V[i] & jo) == J_V[i]) //One option can include other
  726. mod += J_N[i];
  727. }
  728. return mod;
  729. }
  730. static inline std::string fromMatchOption(Uint po, Uint jo){
  731. return fromOption(MJ_V, MJ_N, sizeof(MJ_V)/sizeof(Uint),
  732. M_V, M_N, sizeof(M_V)/sizeof(Uint),
  733. po, jo);
  734. }
  735. static inline std::string fromReplaceOption(Uint po, Uint jo){
  736. return fromOption(RJ_V, RJ_N, sizeof(RJ_V)/sizeof(Uint),
  737. R_V, R_N, sizeof(R_V)/sizeof(Uint),
  738. po, jo);
  739. }
  740. static inline std::string fromCompileOption(Uint po, Uint jo){
  741. return fromOption(CJ_V, CJ_N, sizeof(CJ_V)/sizeof(Uint),
  742. C_V, C_N, sizeof(C_V)/sizeof(Uint),
  743. po, jo);
  744. }
  745. } //MOD namespace ends
  746. ///Lets you create custom modifier tables.
  747. ///An instance of this class can be passed to
  748. ///match, replace or compile related class objects.
  749. class ModifierTable{
  750. std::string tabjms;
  751. std::string tabms;
  752. std::string tabjrs;
  753. std::string tabrs;
  754. std::string tabjcs;
  755. std::string tabcs;
  756. VecOpt tabjmv;
  757. VecOpt tabmv;
  758. VecOpt tabjrv;
  759. VecOpt tabrv;
  760. VecOpt tabjcv;
  761. VecOpt tabcv;
  762. void toOption(Modifier const& mod, bool x,
  763. VecOpt const& J_V, std::string const& J_N,
  764. VecOpt const& V, std::string const& N,
  765. Uint* po, Uint* jo, int* en, SIZE_T* eo
  766. ) const{
  767. SIZE_T SJ = J_V.size();
  768. SIZE_T S = V.size();
  769. JPCRE2_ASSERT(SJ == J_N.length(), ("ValueError: Modifier character and value table must be of the same size (" + _tostdstring(SJ) + " == " + _tostdstring(J_N.length()) + ").").c_str());
  770. JPCRE2_ASSERT(S == N.length(), ("ValueError: Modifier character and value table must be of the same size (" + _tostdstring(S) + " == " + _tostdstring(N.length()) + ").").c_str());
  771. MOD::toOption(mod, x,
  772. J_V.empty()?0:&J_V[0], J_N.c_str(), SJ,
  773. V.empty()?0:&V[0], N.c_str(), S,
  774. po, jo, en, eo
  775. );
  776. }
  777. std::string fromOption(VecOpt const& J_V, std::string const& J_N,
  778. VecOpt const& V, std::string const& N,
  779. Uint po, Uint jo) const{
  780. SIZE_T SJ = J_V.size();
  781. SIZE_T S = V.size();
  782. JPCRE2_ASSERT(SJ == J_N.length(), ("ValueError: Modifier character and value table must be of the same size (" + _tostdstring(SJ) + " == " + _tostdstring(J_N.length()) + ").").c_str());
  783. JPCRE2_ASSERT(S == N.length(), ("ValueError: Modifier character and value table must be of the same size (" + _tostdstring(S) + " == " + _tostdstring(N.length()) + ").").c_str());
  784. return MOD::fromOption(J_V.empty()?0:&J_V[0], J_N.c_str(), SJ,
  785. V.empty()?0:&V[0], N.c_str(), S,
  786. po, jo);
  787. }
  788. void parseModifierTable(std::string& tabjs, VecOpt& tabjv,
  789. std::string& tab_s, VecOpt& tab_v,
  790. std::string const& tabs, VecOpt const& tabv);
  791. public:
  792. ///Default constructor that creates an empty modifier table.
  793. ModifierTable(){}
  794. ///@overload
  795. ///@param deflt Initialize with default table if true, otherwise keep empty.
  796. ModifierTable(bool deflt){
  797. if(deflt) setAllToDefault();
  798. }
  799. ///Reset the match modifier table to its initial (empty) state including memory.
  800. ///@return A reference to the calling ModifierTable object.
  801. ModifierTable& resetMatchModifierTable(){
  802. std::string().swap(tabjms);
  803. std::string().swap(tabms);
  804. VecOpt().swap(tabjmv);
  805. VecOpt().swap(tabmv);
  806. return *this;
  807. }
  808. ///Reset the replace modifier table to its initial (empty) state including memory.
  809. ///@return A reference to the calling ModifierTable object.
  810. ModifierTable& resetReplaceModifierTable(){
  811. std::string().swap(tabjrs);
  812. std::string().swap(tabrs);
  813. VecOpt().swap(tabjrv);
  814. VecOpt().swap(tabrv);
  815. return *this;
  816. }
  817. ///Reset the compile modifier table to its initial (empty) state including memory.
  818. ///@return A reference to the calling ModifierTable object.
  819. ModifierTable& resetCompileModifierTable(){
  820. std::string().swap(tabjcs);
  821. std::string().swap(tabcs);
  822. VecOpt().swap(tabjcv);
  823. VecOpt().swap(tabcv);
  824. return *this;
  825. }
  826. ///Reset the modifier tables to their initial (empty) state including memory.
  827. ///@return A reference to the calling ModifierTable object.
  828. ModifierTable& reset(){
  829. resetMatchModifierTable();
  830. resetReplaceModifierTable();
  831. resetCompileModifierTable();
  832. return *this;
  833. }
  834. ///Clear the match modifier table to its initial (empty) state.
  835. ///Memory may retain for further use.
  836. ///@return A reference to the calling ModifierTable object.
  837. ModifierTable& clearMatchModifierTable(){
  838. tabjms.clear();
  839. tabms.clear();
  840. tabjmv.clear();
  841. tabmv.clear();
  842. return *this;
  843. }
  844. ///Clear the replace modifier table to its initial (empty) state.
  845. ///Memory may retain for further use.
  846. ///@return A reference to the calling ModifierTable object.
  847. ModifierTable& clearReplaceModifierTable(){
  848. tabjrs.clear();
  849. tabrs.clear();
  850. tabjrv.clear();
  851. tabrv.clear();
  852. return *this;
  853. }
  854. ///Clear the compile modifier table to its initial (empty) state.
  855. ///Memory may retain for further use.
  856. ///@return A reference to the calling ModifierTable object.
  857. ModifierTable& clearCompileModifierTable(){
  858. tabjcs.clear();
  859. tabcs.clear();
  860. tabjcv.clear();
  861. tabcv.clear();
  862. return *this;
  863. }
  864. ///Clear the modifier tables to their initial (empty) state.
  865. ///Memory may retain for further use.
  866. ///@return A reference to the calling ModifierTable object.
  867. ModifierTable& clear(){
  868. clearMatchModifierTable();
  869. clearReplaceModifierTable();
  870. clearCompileModifierTable();
  871. return *this;
  872. }
  873. ///Modifier parser for match related options.
  874. ///@param mod modifier string
  875. ///@param x whether to add or remove the modifers.
  876. ///@param po pointer to PCRE2 match option that will be modified.
  877. ///@param jo pointer to JPCRE2 match option that will be modified.
  878. ///@param en where to put the error number.
  879. ///@param eo where to put the error offset.
  880. void toMatchOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo) const {
  881. toOption(mod, x,tabjmv,tabjms,tabmv, tabms,po,jo,en,eo);
  882. }
  883. ///Modifier parser for replace related options.
  884. ///@param mod modifier string
  885. ///@param x whether to add or remove the modifers.
  886. ///@param po pointer to PCRE2 replace option that will be modified.
  887. ///@param jo pointer to JPCRE2 replace option that will be modified.
  888. ///@param en where to put the error number.
  889. ///@param eo where to put the error offset.
  890. void toReplaceOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo) const {
  891. return toOption(mod, x,tabjrv,tabjrs,tabrv,tabrs,po,jo,en,eo);
  892. }
  893. ///Modifier parser for compile related options.
  894. ///@param mod modifier string
  895. ///@param x whether to add or remove the modifers.
  896. ///@param po pointer to PCRE2 compile option that will be modified.
  897. ///@param jo pointer to JPCRE2 compile option that will be modified.
  898. ///@param en where to put the error number.
  899. ///@param eo where to put the error offset.
  900. void toCompileOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo) const {
  901. return toOption(mod, x,tabjcv,tabjcs,tabcv,tabcs,po,jo,en,eo);
  902. }
  903. ///Take match related option value and convert to modifier string.
  904. ///@param po PCRE2 option.
  905. ///@param jo JPCRE2 option.
  906. ///@return modifier string (std::string)
  907. std::string fromMatchOption(Uint po, Uint jo) const {
  908. return fromOption(tabjmv,tabjms,tabmv,tabms,po,jo);
  909. }
  910. ///Take replace related option value and convert to modifier string.
  911. ///@param po PCRE2 option.
  912. ///@param jo JPCRE2 option.
  913. ///@return modifier string (std::string)
  914. std::string fromReplaceOption(Uint po, Uint jo) const {
  915. return fromOption(tabjrv,tabjrs,tabrv,tabrs,po,jo);
  916. }
  917. ///Take compile related option value and convert to modifier string.
  918. ///@param po PCRE2 option.
  919. ///@param jo JPCRE2 option.
  920. ///@return modifier string (std::string)
  921. std::string fromCompileOption(Uint po, Uint jo) const {
  922. return fromOption(tabjcv,tabjcs,tabcv,tabcs,po,jo);
  923. }
  924. ///Set modifier table for match.
  925. ///Takes a string and a vector of sequential options.
  926. ///@param tabs modifier string (list of modifiers)
  927. ///@param tabv vector of Uint (options).
  928. ///@return A reference to the calling ModifierTable object.
  929. ModifierTable& setMatchModifierTable(std::string const& tabs, VecOpt const& tabv){
  930. parseModifierTable(tabjms, tabjmv, tabms, tabmv, tabs, tabv);
  931. return *this;
  932. }
  933. ///Set modifier table for match.
  934. ///Takes a string and an array of sequential options.
  935. ///@param tabs modifier string (list of modifiers)
  936. ///@param tabvp array of Uint (options). If null, table is set to empty.
  937. ///@return A reference to the calling ModifierTable object.
  938. ModifierTable& setMatchModifierTable(std::string const& tabs, const Uint* tabvp){
  939. if(tabvp) {
  940. VecOpt tabv(tabvp, tabvp + tabs.length());
  941. setMatchModifierTable(tabs, tabv);
  942. } else clearMatchModifierTable();
  943. return *this;
  944. }
  945. ///@overload
  946. ///...
  947. ///This one takes modifier and value by array.
  948. ///If the arrays are not of the same length, the behavior is undefined.
  949. ///If any of the argument is null, the table is set empty.
  950. ///@param tabsp modifier string (list of modifiers).
  951. ///@param tabvp array of Uint (options).
  952. ///@return A reference to the calling ModifierTable object.
  953. ModifierTable& setMatchModifierTable(const char* tabsp, const Uint* tabvp){
  954. if(tabsp && tabvp) {
  955. std::string tabs(tabsp);
  956. VecOpt tabv(tabvp, tabvp + tabs.length());
  957. setMatchModifierTable(tabs, tabv);
  958. } else clearMatchModifierTable();
  959. return *this;
  960. }
  961. ///Set modifier table for replace.
  962. ///Takes a string and a vector of sequential options.
  963. ///@param tabs modifier string (list of modifiers)
  964. ///@param tabv vector of Uint (options).
  965. ///@return A reference to the calling ModifierTable object.
  966. ModifierTable& setReplaceModifierTable(std::string const& tabs, VecOpt const& tabv){
  967. parseModifierTable(tabjrs, tabjrv, tabrs, tabrv, tabs, tabv);
  968. return *this;
  969. }
  970. ///Set modifier table for replace.
  971. ///Takes a string and an array of sequential options.
  972. ///@param tabs modifier string (list of modifiers)
  973. ///@param tabvp array of Uint (options). If null, table is set to empty.
  974. ///@return A reference to the calling ModifierTable object.
  975. ModifierTable& setReplaceModifierTable(std::string const& tabs, const Uint* tabvp){
  976. if(tabvp) {
  977. VecOpt tabv(tabvp, tabvp + tabs.length());
  978. setReplaceModifierTable(tabs, tabv);
  979. } else clearReplaceModifierTable();
  980. return *this;
  981. }
  982. ///@overload
  983. ///...
  984. ///This one takes modifier and value by array.
  985. ///If the arrays are not of the same length, the behavior is undefined.
  986. ///If any of the argument is null, the table is set empty.
  987. ///@param tabsp modifier string (list of modifiers).
  988. ///@param tabvp array of Uint (options).
  989. ///@return A reference to the calling ModifierTable object.
  990. ModifierTable& setReplaceModifierTable(const char* tabsp, const Uint* tabvp){
  991. if(tabsp && tabvp) {
  992. std::string tabs(tabsp);
  993. VecOpt tabv(tabvp, tabvp + tabs.length());
  994. setReplaceModifierTable(tabs, tabv);
  995. } else clearReplaceModifierTable();
  996. return *this;
  997. }
  998. ///Set modifier table for compile.
  999. ///Takes a string and a vector of sequential options.
  1000. ///@param tabs modifier string (list of modifiers)
  1001. ///@param tabv vector of Uint (options).
  1002. ///@return A reference to the calling ModifierTable object.
  1003. ModifierTable& setCompileModifierTable(std::string const& tabs, VecOpt const& tabv){
  1004. parseModifierTable(tabjcs, tabjcv, tabcs, tabcv, tabs, tabv);
  1005. return *this;
  1006. }
  1007. ///Set modifier table for compile.
  1008. ///Takes a string and an array of sequential options.
  1009. ///@param tabs modifier string (list of modifiers)
  1010. ///@param tabvp array of Uint (options). If null, table is set to empty.
  1011. ///@return A reference to the calling ModifierTable object.
  1012. ModifierTable& setCompileModifierTable(std::string const& tabs, const Uint* tabvp){
  1013. if(tabvp) {
  1014. VecOpt tabv(tabvp, tabvp + tabs.length());
  1015. setCompileModifierTable(tabs, tabv);
  1016. } else clearCompileModifierTable();
  1017. return *this;
  1018. }
  1019. ///@overload
  1020. ///...
  1021. ///This one takes modifier and value by array.
  1022. ///If the arrays are not of the same length, the behavior is undefined.
  1023. ///If any of the argument is null, the table is set empty.
  1024. ///@param tabsp modifier string (list of modifiers).
  1025. ///@param tabvp array of Uint (options).
  1026. ///@return A reference to the calling ModifierTable object.
  1027. ModifierTable& setCompileModifierTable(const char* tabsp, const Uint* tabvp){
  1028. if(tabsp && tabvp) {
  1029. std::string tabs(tabsp);
  1030. VecOpt tabv(tabvp, tabvp + tabs.length());
  1031. setCompileModifierTable(tabs, tabv);
  1032. } else clearCompileModifierTable();
  1033. return *this;
  1034. }
  1035. ///Set match modifie table to default
  1036. ///@return A reference to the calling ModifierTable object.
  1037. ModifierTable& setMatchModifierTableToDefault(){
  1038. tabjms = std::string(MOD::MJ_N, MOD::MJ_N + sizeof(MOD::MJ_V)/sizeof(Uint));
  1039. tabms = std::string(MOD::M_N, MOD::M_N + sizeof(MOD::M_V)/sizeof(Uint));
  1040. tabjmv = VecOpt(MOD::MJ_V, MOD::MJ_V + sizeof(MOD::MJ_V)/sizeof(Uint));
  1041. tabmv = VecOpt(MOD::M_V, MOD::M_V + sizeof(MOD::M_V)/sizeof(Uint));
  1042. return *this;
  1043. }
  1044. ///Set replace modifier table to default.
  1045. ///@return A reference to the calling ModifierTable object.
  1046. ModifierTable& setReplaceModifierTableToDefault(){
  1047. tabjrs = std::string(MOD::RJ_N, MOD::RJ_N + sizeof(MOD::RJ_V)/sizeof(Uint));
  1048. tabrs = std::string(MOD::R_N, MOD::R_N + sizeof(MOD::R_V)/sizeof(Uint));
  1049. tabjrv = VecOpt(MOD::RJ_V, MOD::RJ_V + sizeof(MOD::RJ_V)/sizeof(Uint));
  1050. tabrv = VecOpt(MOD::R_V, MOD::R_V + sizeof(MOD::R_V)/sizeof(Uint));
  1051. return *this;
  1052. }
  1053. ///Set compile modifier table to default.
  1054. ///@return A reference to the calling ModifierTable object.
  1055. ModifierTable& setCompileModifierTableToDefault(){
  1056. tabjcs = std::string(MOD::CJ_N, MOD::CJ_N + sizeof(MOD::CJ_V)/sizeof(Uint));
  1057. tabcs = std::string(MOD::C_N, MOD::C_N + sizeof(MOD::C_V)/sizeof(Uint));
  1058. tabjcv = VecOpt(MOD::CJ_V, MOD::CJ_V + sizeof(MOD::CJ_V)/sizeof(Uint));
  1059. tabcv = VecOpt(MOD::C_V, MOD::C_V + sizeof(MOD::C_V)/sizeof(Uint));
  1060. return *this;
  1061. }
  1062. ///Set all tables to default.
  1063. ///@return A reference to the calling ModifierTable object.
  1064. ModifierTable& setAllToDefault(){
  1065. setMatchModifierTableToDefault();
  1066. setReplaceModifierTableToDefault();
  1067. setCompileModifierTableToDefault();
  1068. return *this;
  1069. }
  1070. };
  1071. //These message strings are used for error/warning message construction.
  1072. //take care to prevent multiple definition
  1073. template<typename Char_T> struct MSG{
  1074. static std::basic_string<Char_T> INVALID_MODIFIER(void);
  1075. static std::basic_string<Char_T> INSUFFICIENT_OVECTOR(void);
  1076. };
  1077. //specialization
  1078. template<> inline std::basic_string<char> MSG<char>::INVALID_MODIFIER(){ return "Invalid modifier: "; }
  1079. template<> inline std::basic_string<wchar_t> MSG<wchar_t>::INVALID_MODIFIER(){ return L"Invalid modifier: "; }
  1080. template<> inline std::basic_string<char> MSG<char>::INSUFFICIENT_OVECTOR(){ return "ovector wasn't big enough"; }
  1081. template<> inline std::basic_string<wchar_t> MSG<wchar_t>::INSUFFICIENT_OVECTOR(){ return L"ovector wasn't big enough"; }
  1082. #if __cplusplus >= 201103L
  1083. template<> inline std::basic_string<char16_t> MSG<char16_t>::INVALID_MODIFIER(){ return u"Invalid modifier: "; }
  1084. template<> inline std::basic_string<char32_t> MSG<char32_t>::INVALID_MODIFIER(){ return U"Invalid modifier: "; }
  1085. template<> inline std::basic_string<char16_t> MSG<char16_t>::INSUFFICIENT_OVECTOR(){ return u"ovector wasn't big enough"; }
  1086. template<> inline std::basic_string<char32_t> MSG<char32_t>::INSUFFICIENT_OVECTOR(){ return U"ovector wasn't big enough"; }
  1087. #endif
  1088. ///struct to select the types.
  1089. ///
  1090. ///@tparam Char_T Character type (`char`, `wchar_t`, `char16_t`, `char32_t`)
  1091. ///@tparam Map Optional parameter (Only `>= C++11`) to specify a map container (`std::map`, `std::unordered_map` etc..). Default is `std::map`.
  1092. ///
  1093. ///The character type (`Char_T`) must be in accordance with the PCRE2 library you are linking against.
  1094. ///If not sure which library you need, link against all 3 PCRE2 libraries and they will be used as needed.
  1095. ///
  1096. ///If you want to be specific, then here's the rule:
  1097. ///
  1098. ///1. If `Char_T` is 8 bit, you need 8 bit PCRE2 library
  1099. ///2. If `Char_T` is 16 bit, you need 16 bit PCRE2 library
  1100. ///3. If `Char_T` is 32 bit, you need 32 bit PCRE2 library
  1101. ///4. if `Char_T` is not 8 or 16 or 32 bit, you will get compile error.
  1102. ///
  1103. ///In `>= C++11` you get an additional optional template parameter to specify a map container.
  1104. ///For example, you can use `std::unordered_map` instead of the default `std::map`:
  1105. /// ```cpp
  1106. /// #include <unordered_map>
  1107. /// typedef jpcre2::select<char, std::unordered_map> jp;
  1108. /// ```
  1109. ///
  1110. ///We will use the following typedef throughout this doc:
  1111. ///```cpp
  1112. ///typedef jpcre2::select<Char_T> jp;
  1113. ///```
  1114. #if __cplusplus >= 201103L
  1115. template<typename Char_T, template<typename...> class Map=std::map>
  1116. #else
  1117. template<typename Char_T>
  1118. #endif
  1119. struct select{
  1120. ///Typedef for character (`char`, `wchar_t`, `char16_t`, `char32_t`)
  1121. typedef Char_T Char;
  1122. //typedef Char_T Char;
  1123. ///Typedef for string (`std::string`, `std::wstring`, `std::u16string`, `std::u32string`).
  1124. ///Defined as `std::basic_string<Char_T>`.
  1125. ///May be this list will make more sense:
  1126. ///Character | String
  1127. ///--------- | -------
  1128. ///char | std::string
  1129. ///wchar_t | std::wstring
  1130. ///char16_t | std::u16string (>=C++11)
  1131. ///char32_t | std::u32string (>=C++11)
  1132. typedef typename std::basic_string<Char_T> String;
  1133. #if __cplusplus >= 201103L
  1134. ///Map for Named substrings.
  1135. typedef class Map<String, String> MapNas;
  1136. ///Substring name to Substring number map.
  1137. typedef class Map<String, SIZE_T> MapNtN;
  1138. #else
  1139. ///Map for Named substrings.
  1140. typedef typename std::map<String, String> MapNas;
  1141. ///Substring name to Substring number map.
  1142. typedef typename std::map<String, SIZE_T> MapNtN;
  1143. #endif
  1144. ///Allow spelling mistake of MapNtN as MapNtn.
  1145. typedef MapNtN MapNtn;
  1146. ///Vector for Numbered substrings (Sub container).
  1147. typedef typename std::vector<String> NumSub;
  1148. ///Vector of matches with named substrings.
  1149. typedef typename std::vector<MapNas> VecNas;
  1150. ///Vector of substring name to substring number map.
  1151. typedef typename std::vector<MapNtN> VecNtN;
  1152. ///Allow spelling mistake of VecNtN as VecNtn.
  1153. typedef VecNtN VecNtn;
  1154. ///Vector of matches with numbered substrings.
  1155. typedef typename std::vector<NumSub> VecNum;
  1156. //These are to shorten the code
  1157. typedef typename Pcre2Type<sizeof( Char_T ) * CHAR_BIT>::Pcre2Uchar Pcre2Uchar;
  1158. typedef typename Pcre2Type<sizeof( Char_T ) * CHAR_BIT>::Pcre2Sptr Pcre2Sptr;
  1159. typedef typename Pcre2Type<sizeof( Char_T ) * CHAR_BIT>::Pcre2Code Pcre2Code;
  1160. typedef typename Pcre2Type<sizeof( Char_T ) * CHAR_BIT>::CompileContext CompileContext;
  1161. typedef typename Pcre2Type<sizeof( Char_T ) * CHAR_BIT>::MatchData MatchData;
  1162. typedef typename Pcre2Type<sizeof( Char_T ) * CHAR_BIT>::GeneralContext GeneralContext;
  1163. typedef typename Pcre2Type<sizeof( Char_T ) * CHAR_BIT>::MatchContext MatchContext;
  1164. typedef typename Pcre2Type<sizeof( Char_T ) * CHAR_BIT>::JitCallback JitCallback;
  1165. typedef typename Pcre2Type<sizeof( Char_T ) * CHAR_BIT>::JitStack JitStack;
  1166. template<typename T>
  1167. static String toString(T); //prevent implicit type conversion of T
  1168. ///Converts a Char_T to jpcre2::select::String
  1169. ///@param a Char_T
  1170. ///@return jpcre2::select::String
  1171. static String toString(Char a){
  1172. return a?String(1, a):String();
  1173. }
  1174. ///@overload
  1175. ///...
  1176. ///Converts a Char_T const * to jpcre2::select::String
  1177. ///@param a Char_T const *
  1178. ///@return jpcre2::select::String
  1179. static String toString(Char const *a){
  1180. return a?String(a):String();
  1181. }
  1182. ///@overload
  1183. ///...
  1184. ///Converts a Char_T* to jpcre2::select::String
  1185. ///@param a Char_T const *
  1186. ///@return jpcre2::select::String
  1187. static String toString(Char* a){
  1188. return a?String(a):String();
  1189. }
  1190. ///@overload
  1191. ///...
  1192. ///Converts a PCRE2_UCHAR to String
  1193. ///@param a PCRE2_UCHAR
  1194. ///@return jpcre2::select::String
  1195. static String toString(Pcre2Uchar* a) {
  1196. return a?String((Char*) a):String();
  1197. }
  1198. ///Retruns error message from PCRE2 error number
  1199. ///@param err_num error number (negative)
  1200. ///@return message as jpcre2::select::String.
  1201. static String getPcre2ErrorMessage(int err_num) {
  1202. Pcre2Uchar buffer[sizeof(Char)*CHAR_BIT*1024];
  1203. Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::get_error_message(err_num, buffer, sizeof(buffer));
  1204. return toString((Pcre2Uchar*) buffer);
  1205. }
  1206. ///Returns error message (either JPCRE2 or PCRE2) from error number and error offset
  1207. ///@param err_num error number (negative for PCRE2, positive for JPCRE2)
  1208. ///@param err_off error offset
  1209. ///@return message as jpcre2::select::String.
  1210. static String getErrorMessage(int err_num, int err_off) {
  1211. if(err_num == (int)ERROR::INVALID_MODIFIER){
  1212. return MSG<Char>::INVALID_MODIFIER() + toString((Char)err_off);
  1213. } else if(err_num == (int)ERROR::INSUFFICIENT_OVECTOR){
  1214. return MSG<Char>::INSUFFICIENT_OVECTOR();
  1215. } else if(err_num != 0) {
  1216. return getPcre2ErrorMessage((int) err_num);
  1217. } else return String();
  1218. }
  1219. //forward declaration
  1220. class Regex;
  1221. class RegexMatch;
  1222. class RegexReplace;
  1223. class MatchEvaluator;
  1224. /** Provides public constructors to create RegexMatch objects.
  1225. * Every RegexMatch object should be associated with a Regex object.
  1226. * This class stores a pointer to its' associated Regex object, thus when
  1227. * the content of the associated Regex object is changed, there will be no need to
  1228. * set the pointer again.
  1229. *
  1230. * Examples:
  1231. *
  1232. * ```cpp
  1233. * jp::Regex re;
  1234. * jp::RegexMatch rm;
  1235. * rm.setRegexObject(&re);
  1236. * rm.match("subject", "g"); // 0 match
  1237. * re.compile("\\w");
  1238. * rm.match(); // 7 matches
  1239. * ```
  1240. */
  1241. class RegexMatch {
  1242. private:
  1243. friend class MatchEvaluator;
  1244. Regex const *re;
  1245. String m_subject;
  1246. String const *m_subject_ptr;
  1247. Uint match_opts;
  1248. Uint jpcre2_match_opts;
  1249. MatchContext *mcontext;
  1250. ModifierTable const * modtab;
  1251. MatchData * mdata;
  1252. PCRE2_SIZE _start_offset; //name collision, use _ at start
  1253. VecNum* vec_num;
  1254. VecNas* vec_nas;
  1255. VecNtN* vec_ntn;
  1256. VecOff* vec_soff;
  1257. VecOff* vec_eoff;
  1258. bool getNumberedSubstrings(int, Pcre2Sptr, PCRE2_SIZE*);
  1259. bool getNamedSubstrings(int, int, Pcre2Sptr, Pcre2Sptr, PCRE2_SIZE*);
  1260. void init_vars() {
  1261. re = 0;
  1262. vec_num = 0;
  1263. vec_nas = 0;
  1264. vec_ntn = 0;
  1265. vec_soff = 0;
  1266. vec_eoff = 0;
  1267. match_opts = 0;
  1268. jpcre2_match_opts = 0;
  1269. error_number = 0;
  1270. error_offset = 0;
  1271. _start_offset = 0;
  1272. m_subject_ptr = &m_subject;
  1273. mcontext = 0;
  1274. modtab = 0;
  1275. mdata = 0;
  1276. }
  1277. void onlyCopy(RegexMatch const &rm){
  1278. re = rm.re; //only pointer should be copied
  1279. //pointer to subject may point to m_subject or other user data
  1280. m_subject_ptr = (rm.m_subject_ptr == &rm.m_subject) ? &m_subject //not &rm.m_subject
  1281. : rm.m_subject_ptr;
  1282. //underlying data of vectors are not handled by RegexMatch
  1283. //thus it's safe to just copy the pointers.
  1284. vec_num = rm.vec_num;
  1285. vec_nas = rm.vec_nas;
  1286. vec_ntn = rm.vec_ntn;
  1287. vec_soff = rm.vec_soff;
  1288. vec_eoff = rm.vec_eoff;
  1289. match_opts = rm.match_opts;
  1290. jpcre2_match_opts = rm.jpcre2_match_opts;
  1291. error_number = rm.error_number;
  1292. error_offset = rm.error_offset;
  1293. _start_offset = rm._start_offset;
  1294. mcontext = rm.mcontext;
  1295. modtab = rm.modtab;
  1296. mdata = rm.mdata;
  1297. }
  1298. void deepCopy(RegexMatch const &rm){
  1299. m_subject = rm.m_subject;
  1300. onlyCopy(rm);
  1301. }
  1302. #if __cplusplus >= 201103L
  1303. void deepMove(RegexMatch& rm){
  1304. m_subject = std::move_if_noexcept(rm.m_subject);
  1305. onlyCopy(rm);
  1306. }
  1307. #endif
  1308. friend class Regex;
  1309. protected:
  1310. int error_number;
  1311. PCRE2_SIZE error_offset;
  1312. public:
  1313. ///Default constructor.
  1314. RegexMatch(){
  1315. init_vars();
  1316. }
  1317. ///@overload
  1318. ///...
  1319. ///Creates a RegexMatch object associating a Regex object.
  1320. ///Underlying data is not modified.
  1321. ///@param r pointer to a Regex object
  1322. RegexMatch(Regex const *r) {
  1323. init_vars();
  1324. re = r;
  1325. }
  1326. ///@overload
  1327. ///...
  1328. ///Copy constructor.
  1329. ///@param rm Reference to RegexMatch object
  1330. RegexMatch(RegexMatch const &rm){
  1331. init_vars();
  1332. deepCopy(rm);
  1333. }
  1334. ///Overloaded copy-assignment operator.
  1335. ///@param rm RegexMatch object
  1336. ///@return A reference to the calling RegexMatch object.
  1337. virtual RegexMatch& operator=(RegexMatch const &rm){
  1338. if(this == &rm) return *this;
  1339. deepCopy(rm);
  1340. return *this;
  1341. }
  1342. #if __cplusplus >= 201103L
  1343. ///@overload
  1344. ///...
  1345. ///Move constructor.
  1346. ///This constructor steals resources from the argument.
  1347. ///It leaves the argument in a valid but indeterminate sate.
  1348. ///The indeterminate state can be returned to normal by calling reset() on that object.
  1349. ///@param rm rvalue reference to a RegexMatch object
  1350. RegexMatch(RegexMatch&& rm){
  1351. init_vars();
  1352. deepMove(rm);
  1353. }
  1354. ///@overload
  1355. ///...
  1356. ///Overloaded move-assignment operator.
  1357. ///This constructor steals resources from the argument.
  1358. ///It leaves the argument in a valid but indeterminate sate.
  1359. ///The indeterminate state can be returned to normal by calling reset() on that object.
  1360. ///@param rm rvalue reference to a RegexMatch object
  1361. ///@return A reference to the calling RegexMatch object.
  1362. virtual RegexMatch& operator=(RegexMatch&& rm){
  1363. if(this == &rm) return *this;
  1364. deepMove(rm);
  1365. return *this;
  1366. }
  1367. #endif
  1368. ///Destructor
  1369. ///Frees all internal memories that were used.
  1370. virtual ~RegexMatch() {}
  1371. ///Reset all class variables to its default (initial) state including memory.
  1372. ///Data in the vectors will retain (as it's external)
  1373. ///You will need to pass vector pointers again after calling this function to get match results.
  1374. ///@return Reference to the calling RegexMatch object.
  1375. virtual RegexMatch& reset() {
  1376. String().swap(m_subject); //not ptr , external string won't be modified.
  1377. init_vars();
  1378. return *this;
  1379. }
  1380. ///Clear all class variables (may retain some memory for further use).
  1381. ///Data in the vectors will retain (as it's external)
  1382. ///You will need to pass vector pointers again after calling this function to get match results.
  1383. ///@return Reference to the calling RegexMatch object.
  1384. virtual RegexMatch& clear(){
  1385. m_subject.clear(); //not ptr , external string won't be modified.
  1386. init_vars();
  1387. return *this;
  1388. }
  1389. ///reset match related errors to zero.
  1390. ///If you want to examine the error status of a function call in the method chain,
  1391. ///add this function just before your target function so that the error is set to zero
  1392. ///before that target function is called, and leave everything out after the target
  1393. ///function so that there will be no additional errors from other function calls.
  1394. ///@return A reference to the RegexMatch object
  1395. ///@see Regex::resetErrors()
  1396. ///@see RegexReplace::resetErrors()
  1397. virtual RegexMatch& resetErrors(){
  1398. error_number = 0;
  1399. error_offset = 0;
  1400. return *this;
  1401. }
  1402. /// Returns the last error number
  1403. ///@return Last error number
  1404. virtual int getErrorNumber() const {
  1405. return error_number;
  1406. }
  1407. /// Returns the last error offset
  1408. ///@return Last error offset
  1409. virtual int getErrorOffset() const {
  1410. return (int)error_offset;
  1411. }
  1412. /// Returns the last error message
  1413. ///@return Last error message
  1414. virtual String getErrorMessage() const {
  1415. #if __cplusplus >= 201103L
  1416. return select<Char, Map>::getErrorMessage(error_number, error_offset);
  1417. #else
  1418. return select<Char>::getErrorMessage(error_number, error_offset);
  1419. #endif
  1420. }
  1421. ///Get subject string (by value).
  1422. ///@return subject string
  1423. ///@see RegexReplace::getSubject()
  1424. virtual String getSubject() const {
  1425. return *m_subject_ptr;
  1426. }
  1427. ///Get pointer to subject string.
  1428. ///Data can not be changed with this pointer.
  1429. ///@return constant subject string pointer
  1430. ///@see RegexReplace::getSubjectPointer()
  1431. virtual String const * getSubjectPointer() const {
  1432. return m_subject_ptr;
  1433. }
  1434. /// Calculate modifier string from PCRE2 and JPCRE2 options and return it.
  1435. ///
  1436. /// Do remember that modifiers (or PCRE2 and JPCRE2 options) do not change or get initialized
  1437. /// as long as you don't do that explicitly. Calling RegexMatch::setModifier() will re-set them.
  1438. ///
  1439. /// **Mixed or combined modifier**.
  1440. ///
  1441. /// Some modifier may include other modifiers i.e they have the same meaning of some modifiers
  1442. /// combined together. For example, the 'n' modifier includes the 'u' modifier and together they
  1443. /// are equivalent to `PCRE2_UTF | PCRE2_UCP`. When you set a modifier like this, both options
  1444. /// get set, and when you remove the 'n' modifier (with `RegexMatch::changeModifier()`), both will get removed.
  1445. ///@return Calculated modifier string (std::string)
  1446. ///@see Regex::getModifier()
  1447. ///@see RegexReplace::getModifier()
  1448. virtual std::string getModifier() const {
  1449. return modtab ? modtab->fromMatchOption(match_opts, jpcre2_match_opts)
  1450. : MOD::fromMatchOption(match_opts, jpcre2_match_opts);
  1451. }
  1452. ///Get the modifier table that is set,
  1453. ///@return pointer to constant ModifierTable.
  1454. virtual ModifierTable const* getModifierTable(){
  1455. return modtab;
  1456. }
  1457. ///Get PCRE2 option
  1458. ///@return PCRE2 option for match operation
  1459. ///@see Regex::getPcre2Option()
  1460. ///@see RegexReplace::getPcre2Option()
  1461. virtual Uint getPcre2Option() const {
  1462. return match_opts;
  1463. }
  1464. /// Get JPCRE2 option
  1465. ///@return JPCRE2 options for math operation
  1466. ///@see Regex::getJpcre2Option()
  1467. ///@see RegexReplace::getJpcre2Option()
  1468. virtual Uint getJpcre2Option() const {
  1469. return jpcre2_match_opts;
  1470. }
  1471. /// Get offset from where match will start in the subject.
  1472. /// @return Start offset
  1473. virtual PCRE2_SIZE getStartOffset() const {
  1474. return _start_offset;
  1475. }
  1476. ///Get pre-set match start offset vector pointer.
  1477. ///The pointer must be set with RegexMatch::setMatchStartOffsetVector() beforehand
  1478. ///for this to work i.e it is just a convenience method to get the pre-set vector pointer.
  1479. ///@return pointer to the const match start offset vector
  1480. virtual VecOff const* getMatchStartOffsetVector() const {
  1481. return vec_soff;
  1482. }
  1483. ///Get pre-set match end offset vector pointer.
  1484. ///The pointer must be set with RegexMatch::setMatchEndOffsetVector() beforehand
  1485. ///for this to work i.e it is just a convenience method to get the pre-set vector pointer.
  1486. ///@return pointer to the const end offset vector
  1487. virtual VecOff const* getMatchEndOffsetVector() const {
  1488. return vec_eoff;
  1489. }
  1490. ///Get a pointer to the associated Regex object.
  1491. ///If no actual Regex object is associated, null is returned.
  1492. ///@return A pointer to the associated constant Regex object or null.
  1493. virtual Regex const * getRegexObject() const {
  1494. return re;
  1495. }
  1496. ///Get pointer to numbered substring vector.
  1497. ///@return Pointer to const numbered substring vector.
  1498. virtual VecNum const* getNumberedSubstringVector() const {
  1499. return vec_num;
  1500. }
  1501. ///Get pointer to named substring vector.
  1502. ///@return Pointer to const named substring vector.
  1503. virtual VecNas const* getNamedSubstringVector() const {
  1504. return vec_nas;
  1505. }
  1506. ///Get pointer to name to number map vector.
  1507. ///@return Pointer to const name to number map vector.
  1508. virtual VecNtN const* getNameToNumberMapVector() const {
  1509. return vec_ntn;
  1510. }
  1511. ///Set the associated regex object.
  1512. ///Null pointer unsets it.
  1513. ///Underlying data is not modified.
  1514. ///@param r Pointer to a Regex object.
  1515. ///@return Reference to the calling RegexMatch object.
  1516. virtual RegexMatch& setRegexObject(Regex const *r){
  1517. re = r;
  1518. return *this;
  1519. }
  1520. /// Set a pointer to the numbered substring vector.
  1521. /// Null pointer unsets it.
  1522. ///
  1523. /// This vector will be filled with numbered (indexed) captured groups.
  1524. /// @param v pointer to the numbered substring vector
  1525. /// @return Reference to the calling RegexMatch object
  1526. virtual RegexMatch& setNumberedSubstringVector(VecNum* v) {
  1527. vec_num = v;
  1528. return *this;
  1529. }
  1530. /// Set a pointer to the named substring vector.
  1531. /// Null pointer unsets it.
  1532. ///
  1533. /// This vector will be populated with named captured groups.
  1534. /// @param v pointer to the named substring vector
  1535. /// @return Reference to the calling RegexMatch object
  1536. virtual RegexMatch& setNamedSubstringVector(VecNas* v) {
  1537. vec_nas = v;
  1538. return *this;
  1539. }
  1540. /// Set a pointer to the name to number map vector.
  1541. /// Null pointer unsets it.
  1542. ///
  1543. /// This vector will be populated with name to number map for captured groups.
  1544. /// @param v pointer to the name to number map vector
  1545. /// @return Reference to the calling RegexMatch object
  1546. virtual RegexMatch& setNameToNumberMapVector(VecNtN* v) {
  1547. vec_ntn = v;
  1548. return *this;
  1549. }
  1550. /// Set the pointer to a vector to store the offsets where matches
  1551. /// start in the subject.
  1552. /// Null pointer unsets it.
  1553. /// @param v Pointer to a jpcre2::VecOff vector (std::vector<size_t>)
  1554. /// @return Reference to the calling RegexMatch object
  1555. virtual RegexMatch& setMatchStartOffsetVector(VecOff* v){
  1556. vec_soff = v;
  1557. return *this;
  1558. }
  1559. /// Set the pointer to a vector to store the offsets where matches
  1560. /// end in the subject.
  1561. /// Null pointer unsets it.
  1562. /// @param v Pointer to a VecOff vector (std::vector<size_t>)
  1563. /// @return Reference to the calling RegexMatch object
  1564. virtual RegexMatch& setMatchEndOffsetVector(VecOff* v){
  1565. vec_eoff = v;
  1566. return *this;
  1567. }
  1568. ///Set the subject string for match.
  1569. ///This makes a copy of the subject string.
  1570. /// @param s Subject string
  1571. /// @return Reference to the calling RegexMatch object
  1572. /// @see RegexReplace::setSubject()
  1573. virtual RegexMatch& setSubject(String const &s) {
  1574. m_subject = s;
  1575. m_subject_ptr = &m_subject; //must overwrite
  1576. return *this;
  1577. }
  1578. ///@overload
  1579. ///...
  1580. /// Works with the original without modifying it. Null pointer unsets the subject.
  1581. /// @param s Pointer to subject string
  1582. /// @return Reference to the calling RegexMatch object
  1583. /// @see RegexReplace::setSubject()
  1584. virtual RegexMatch& setSubject(String const *s) {
  1585. if(s) m_subject_ptr = s;
  1586. else {
  1587. m_subject.clear();
  1588. m_subject_ptr = &m_subject;
  1589. }
  1590. return *this;
  1591. }
  1592. /// Set the modifier (resets all JPCRE2 and PCRE2 options) by calling RegexMatch::changeModifier().
  1593. /// Re-initializes the option bits for PCRE2 and JPCRE2 options, then parses the modifier to set their equivalent options.
  1594. /// @param s Modifier string.
  1595. /// @return Reference to the calling RegexMatch object
  1596. /// @see RegexReplace::setModifier()
  1597. /// @see Regex::setModifier()
  1598. virtual RegexMatch& setModifier(Modifier const& s) {
  1599. match_opts = 0;
  1600. jpcre2_match_opts = 0;
  1601. changeModifier(s, true);
  1602. return *this;
  1603. }
  1604. ///Set a custom modifier table to be used.
  1605. ///@param mdt pointer to ModifierTable object.
  1606. ///@return Reference to the calling RegexMatch object.
  1607. virtual RegexMatch& setModifierTable(ModifierTable const * mdt){
  1608. modtab = mdt;
  1609. return *this;
  1610. }
  1611. /// Set JPCRE2 option for match (resets all)
  1612. /// @param x Option value
  1613. /// @return Reference to the calling RegexMatch object
  1614. /// @see RegexReplace::setJpcre2Option()
  1615. /// @see Regex::setJpcre2Option()
  1616. virtual RegexMatch& setJpcre2Option(Uint x) {
  1617. jpcre2_match_opts = x;
  1618. return *this;
  1619. }
  1620. ///Set PCRE2 option match (overwrite existing option)
  1621. /// @param x Option value
  1622. /// @return Reference to the calling RegexMatch object
  1623. /// @see RegexReplace::setPcre2Option()
  1624. /// @see Regex::setPcre2Option()
  1625. virtual RegexMatch& setPcre2Option(Uint x) {
  1626. match_opts = x;
  1627. return *this;
  1628. }
  1629. /// Set whether to perform global match
  1630. /// @param x True or False
  1631. /// @return Reference to the calling RegexMatch object
  1632. virtual RegexMatch& setFindAll(bool x) {
  1633. jpcre2_match_opts = x?jpcre2_match_opts | FIND_ALL:jpcre2_match_opts & ~FIND_ALL;
  1634. return *this;
  1635. }
  1636. ///@overload
  1637. ///...
  1638. ///This function just calls RegexMatch::setFindAll(bool x) with `true` as the parameter
  1639. ///@return Reference to the calling RegexMatch object
  1640. virtual RegexMatch& setFindAll() {
  1641. return setFindAll(true);
  1642. }
  1643. /// Set offset from where match starts.
  1644. /// When FIND_ALL is set, a global match would not be performed on all positions on the subject,
  1645. /// rather it will be performed from the start offset and onwards.
  1646. /// @param offset Start offset
  1647. /// @return Reference to the calling RegexMatch object
  1648. virtual RegexMatch& setStartOffset(PCRE2_SIZE offset) {
  1649. _start_offset = offset;
  1650. return *this;
  1651. }
  1652. ///Set the match context.
  1653. ///You can create match context using the native PCRE2 API.
  1654. ///The memory is not handled by RegexMatch object and not freed.
  1655. ///User will be responsible for freeing the memory of the match context.
  1656. ///@param match_context Pointer to the match context.
  1657. ///@return Reference to the calling RegexMatch object
  1658. virtual RegexMatch& setMatchContext(MatchContext *match_context){
  1659. mcontext = match_context;
  1660. return *this;
  1661. }
  1662. ///Return pointer to the match context that was previously set with setMatchContext().
  1663. ///Handling memory is the callers' responsibility.
  1664. ///@return pointer to the match context (default: null).
  1665. virtual MatchContext* getMatchContext(){
  1666. return mcontext;
  1667. }
  1668. ///Set the match data block to be used.
  1669. ///The memory is not handled by RegexMatch object and not freed.
  1670. ///User will be responsible for freeing the memory of the match data block.
  1671. ///@param madt Pointer to a match data block.
  1672. ///@return Reference to the calling RegexMatch object
  1673. virtual RegexMatch& setMatchDataBlock(MatchData* madt){
  1674. mdata = madt;
  1675. return *this;
  1676. }
  1677. ///Get the pointer to the match data block that was set previously with setMatchData()
  1678. ///Handling memory is the callers' responsibility.
  1679. ///@return pointer to the match data (default: null).
  1680. virtual MatchData* getMatchDataBlock(){
  1681. return mdata;
  1682. }
  1683. /// Parse modifier and add/remove equivalent PCRE2 and JPCRE2 options.
  1684. /// This function does not initialize or re-initialize options.
  1685. /// If you want to set options from scratch, initialize them to 0 before calling this function.
  1686. /// If invalid modifier is detected, then the error number for the RegexMatch
  1687. /// object will be jpcre2::ERROR::INVALID_MODIFIER and error offset will be the modifier character.
  1688. /// You can get the message with RegexMatch::getErrorMessage() function.
  1689. ///
  1690. /// @param mod Modifier string.
  1691. /// @param x Whether to add or remove option
  1692. /// @return Reference to the RegexMatch object
  1693. /// @see Regex::changeModifier()
  1694. /// @see RegexReplace::changeModifier()
  1695. virtual RegexMatch& changeModifier(Modifier const& mod, bool x){
  1696. modtab ? modtab->toMatchOption(mod, x, &match_opts, &jpcre2_match_opts, &error_number, &error_offset)
  1697. : MOD::toMatchOption(mod, x, &match_opts, &jpcre2_match_opts, &error_number, &error_offset);
  1698. return *this;
  1699. }
  1700. /// Add or remove a JPCRE2 option
  1701. /// @param opt JPCRE2 option value
  1702. /// @param x Add the option if it's true, remove otherwise.
  1703. /// @return Reference to the calling RegexMatch object
  1704. /// @see RegexReplace::changeJpcre2Option()
  1705. /// @see Regex::changeJpcre2Option()
  1706. virtual RegexMatch& changeJpcre2Option(Uint opt, bool x) {
  1707. jpcre2_match_opts = x ? jpcre2_match_opts | opt : jpcre2_match_opts & ~opt;
  1708. return *this;
  1709. }
  1710. /// Add or remove a PCRE2 option
  1711. /// @param opt PCRE2 option value
  1712. /// @param x Add the option if it's true, remove otherwise.
  1713. /// @return Reference to the calling RegexMatch object
  1714. /// @see RegexReplace::changePcre2Option()
  1715. /// @see Regex::changePcre2Option()
  1716. virtual RegexMatch& changePcre2Option(Uint opt, bool x) {
  1717. match_opts = x ? match_opts | opt : match_opts & ~opt;
  1718. return *this;
  1719. }
  1720. /// Parse modifier string and add equivalent PCRE2 and JPCRE2 options.
  1721. /// This is just a wrapper of the original function RegexMatch::changeModifier()
  1722. /// @param mod Modifier string.
  1723. /// @return Reference to the calling RegexMatch object
  1724. /// @see RegexReplace::addModifier()
  1725. /// @see Regex::addModifier()
  1726. virtual RegexMatch& addModifier(Modifier const& mod){
  1727. return changeModifier(mod, true);
  1728. }
  1729. /// Add option to existing JPCRE2 options for match
  1730. /// @param x Option value
  1731. /// @return Reference to the calling RegexMatch object
  1732. /// @see RegexReplace::addJpcre2Option()
  1733. /// @see Regex::addJpcre2Option()
  1734. virtual RegexMatch& addJpcre2Option(Uint x) {
  1735. jpcre2_match_opts |= x;
  1736. return *this;
  1737. }
  1738. /// Add option to existing PCRE2 options for match
  1739. /// @param x Option value
  1740. /// @return Reference to the calling RegexMatch object
  1741. /// @see RegexReplace::addPcre2Option()
  1742. /// @see Regex::addPcre2Option()
  1743. virtual RegexMatch& addPcre2Option(Uint x) {
  1744. match_opts |= x;
  1745. return *this;
  1746. }
  1747. /// Perform match operation using info from class variables and return the match count and
  1748. /// store the results in specified vectors.
  1749. ///
  1750. /// Note: This function uses pcre2_match() function to do the match.
  1751. ///@return Match count
  1752. virtual SIZE_T match(void);
  1753. };
  1754. ///This class contains a typedef of a function pointer or a templated function wrapper (`std::function`)
  1755. ///to provide callback function to the `MatchEvaluator`.
  1756. ///`std::function` is used when `>=C++11` is being used , otherwise function pointer is used.
  1757. ///You can force using function pointer instead of `std::function` when `>=C++11` is used by defining the macro
  1758. ///`JPCRE2_USE_FUNCTION_POINTER_CALLBACK` before including jpcre2.hpp.
  1759. ///If you are using lambda function with capture, you must use the `std::function` approach.
  1760. ///
  1761. ///The callback function takes exactly three positional arguments:
  1762. ///@tparam T1 The first argument must be `jp::NumSub const &` aka `std::vector<String> const &` (or `void*` if not needed).
  1763. ///@tparam T2 The second argument must be `jp::MapNas const &` aka `std::map<String, size_t> const &` (or `void*` if not needed).
  1764. ///@tparam T3 The third argument must be `jp::MapNtN const &` aka `std::map<String, String> const &` (or `void*` if not needed).
  1765. ///
  1766. /// **Examples:**
  1767. /// ```cpp
  1768. /// typedef jpcre2::select<char> jp;
  1769. /// jp::String myCallback1(jp::NumSub const &m1, void*, void*){
  1770. /// return "("+m1[0]+")";
  1771. /// }
  1772. ///
  1773. /// jp::String myCallback2(jp::NumSub const &m1, jp::MapNas const &m2, void*){
  1774. /// return "("+m1[0]+"/"+m2.at("total")+")";
  1775. /// }
  1776. /// //Now you can pass these functions in MatchEvaluator constructors to create a match evaluator
  1777. /// jp::MatchEvaluator me1(myCallback1);
  1778. ///
  1779. /// //Examples with lambda (>=C++11)
  1780. /// jp::MatchEvaluator me2([](jp::NumSub const &m1, void*, void*)
  1781. /// {
  1782. /// return "("+m1[0]+")";
  1783. /// });
  1784. /// ```
  1785. ///@see MatchEvaluator
  1786. template<typename T1, typename T2, typename T3>
  1787. struct MatchEvaluatorCallback{
  1788. #if !defined JPCRE2_USE_FUNCTION_POINTER_CALLBACK && __cplusplus >= 201103L
  1789. typedef std::function<String (T1,T2,T3)> Callback;
  1790. #else
  1791. typedef String (*Callback)(T1,T2,T3);
  1792. #endif
  1793. };
  1794. ///Provides some default static callback functions.
  1795. ///The primary goal of this class is to provide default
  1796. ///callback function to MatchEvaluator default constructor which is
  1797. ///essentially callback::erase.
  1798. ///This class does not allow object instantiation.
  1799. struct callback{
  1800. ///Callback function that removes the matched part/s in the subject string
  1801. /// and takes all match vectors as argument.
  1802. ///Even though this function itself does not use the vectors, it still takes them
  1803. ///so that the caller can perform a match and populate all the match data to perform
  1804. ///further evaluation of other callback functions without doing the match again.
  1805. ///@param num jp::NumSub vector.
  1806. ///@param nas jp::MapNas map.
  1807. ///@param ntn jp::MapNtN map.
  1808. ///@return empty string.
  1809. static String eraseFill(NumSub const &num, MapNas const &nas, MapNtN const &ntn){
  1810. return String();
  1811. }
  1812. ///Callback function that removes the matched part/s in the subject string
  1813. ///and does not take any match vector.
  1814. ///This is a minimum cost pattern deleting callback function.
  1815. ///
  1816. ///It's the default callback function when you Instantiate
  1817. ///a MatchEvaluator object with its default constructor:
  1818. ///```cpp
  1819. ///MatchEvaluator me;
  1820. ///```
  1821. ///@return empty string.
  1822. static String erase(void*, void*, void*){
  1823. return String();
  1824. }
  1825. ///Callback function for populating match vectors that does not modify the subject string.
  1826. ///It always returns the total matched part and thus the subject string remains the same.
  1827. ///@param num jp::NumSub vector.
  1828. ///@param nas jp::MapNas map.
  1829. ///@param ntn jp::MapNtN map.
  1830. ///@return total match (group 0) of current match.
  1831. static String fill(NumSub const &num, MapNas const &nas, MapNtn const &ntn){
  1832. return num[0];
  1833. }
  1834. private:
  1835. //prevent object instantiation.
  1836. callback();
  1837. callback(callback const &);
  1838. #if __cplusplus >= 201103L
  1839. callback(callback&&);
  1840. #endif
  1841. ~callback();
  1842. };
  1843. ///This class inherits RegexMatch and provides a similar functionality.
  1844. ///All public member functions from RegexMatch class are publicly available except the following:
  1845. ///* setNumberedSubstringVector
  1846. ///* setNamedSubstringVector
  1847. ///* setNameToNumberMapVector
  1848. ///* setMatchStartOffsetVector
  1849. ///* setMatchEndOffsetVector
  1850. ///
  1851. ///The use of above functions is not allowed as the vectors are created according to the callback function you pass.
  1852. ///
  1853. ///Each constructor of this class takes a callback function as argument (see `MatchEvaluatorCallback`).
  1854. ///
  1855. ///It provides a MatchEvaluator::nreplace() function to perform replace operation using native JPCRE2 approach
  1856. ///and `MatchEvaluator::replace()` function for PCRE2 compatible replace operation.
  1857. ///
  1858. ///An instance of this class can also be passed with `RegexReplace::nreplace()` or `RegexReplace::replace()` function to perform replacement
  1859. ///according to this match evaluator.
  1860. ///
  1861. ///Match data is stored in vectors, and the vectors are populated according to the callback functions.
  1862. ///Populated vector data is never deleted but they get overwritten. Vector data can be manually zeroed out
  1863. ///by calling `MatchEvaluator::clearMatchData()`. If the capacities of those match vectors are desired to
  1864. ///to be shrinked too instead of just clearing them, use `MatchEvaluator::resetMatchData()` instead.
  1865. ///
  1866. /// # Re-usability of Match Data
  1867. /// A match data populated with a callback function that takes only a jp::NumSub vector is not compatible
  1868. /// with the data created according to callback function with a jp::MapNas vector.
  1869. /// Because, for this later callback, jp::MapNas data is required but is not available (only jp::NumSub is available).
  1870. /// In such cases, previous Match data can not be used to perform a new replacment operation with this second callback function.
  1871. ///
  1872. /// To populate the match vectors, one must call the `MatchEvaluator::match()` or `MatchEvaluator::nreplace()` function, they will populate
  1873. /// vectors with match data according to call back function.
  1874. ///
  1875. /// ## Example:
  1876. ///
  1877. /// ```cpp
  1878. /// jp::String callback5(NumSub const &m, void*, MapNtn const &n){
  1879. /// return m[0];
  1880. /// }
  1881. /// jp::String callback4(void*, void*, MapNtn const &n){
  1882. /// return std::to_string(n.at("name")); //position of group 'name'.
  1883. /// }
  1884. /// jp::String callback2(void*, MapNas const &m, void*){
  1885. /// return m.at('name'); //substring by name
  1886. /// }
  1887. ///
  1888. /// jp::MatchEvaluator me;
  1889. /// me.setRegexObject(&re).setSubject("string").setCallback(callback5).nreplace();
  1890. /// //In above, nreplace() populates jp::NumSub and jp::MapNtn with match data.
  1891. ///
  1892. /// me.setCallback(callback4).nreplace(false);
  1893. /// //the above uses previous match result (note the 'false') which is OK,
  1894. /// //because, callback4 requires jp::MapNtn which was made available in the previous operation.
  1895. ///
  1896. /// //but the following is not OK: (assertion failure)
  1897. /// me.setCallback(callback2).nreplace(false);
  1898. /// //because, callback2 requires jp::MapNas data which is not available.
  1899. /// //now, this is OK:
  1900. /// me.setCallback(callback2).nreplace();
  1901. /// //because, it will recreate those match data including this one (jp::MapNas).
  1902. /// ```
  1903. ///
  1904. /// # Replace options
  1905. /// MatchEvaluator can not take replace options.
  1906. /// Replace options are taken directly by the replace functions: `nreplace()` and `replace()`.
  1907. ///
  1908. /// # Using as a match object
  1909. /// As it's just a subclass of RegexMatch, it can do all the things that RegexMatch can do, with some restrictions:
  1910. /// * matching options are modified to strip off bad options according to replacement (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT).
  1911. /// * match depends on the callback function. Only those vectors will be populated that are implemented by the callback functions so far
  1912. /// (multiple callback function will set multiple match data vectors.)
  1913. /// * match vectors are internal to this class, you can not set them manually (without callback function). (you can get pointers to these vectors
  1914. /// with `getNumberedSubstringVector()` and related functions).
  1915. ///
  1916. ///@see MatchEvaluatorCallback
  1917. ///@see RegexReplace::nreplace()
  1918. class MatchEvaluator: virtual public RegexMatch{
  1919. private:
  1920. friend class RegexReplace;
  1921. VecNum vec_num;
  1922. VecNas vec_nas;
  1923. VecNtN vec_ntn;
  1924. VecOff vec_soff;
  1925. VecOff vec_eoff;
  1926. int callbackn;
  1927. typename MatchEvaluatorCallback<void*, void*, void*>::Callback callback0;
  1928. typename MatchEvaluatorCallback<NumSub const &, void*, void*>::Callback callback1;
  1929. typename MatchEvaluatorCallback<void*, MapNas const &, void*>::Callback callback2;
  1930. typename MatchEvaluatorCallback<NumSub const &, MapNas const &, void*>::Callback callback3;
  1931. typename MatchEvaluatorCallback<void*, void*, MapNtN const &>::Callback callback4;
  1932. typename MatchEvaluatorCallback<NumSub const &, void*, MapNtN const &>::Callback callback5;
  1933. typename MatchEvaluatorCallback<void*, MapNas const &, MapNtN const &>::Callback callback6;
  1934. typename MatchEvaluatorCallback<NumSub const &, MapNas const &, MapNtN const &>::Callback callback7;
  1935. //Q: Why the callback names seem random? is it random?
  1936. //A: No, it's not random, NumSub = 1, MapNas = 2, MapNtn = 4, thus:
  1937. // NumSub + MapNas = 3
  1938. // NumSub + MapNtn = 5
  1939. // MapNas + MapNtn = 6
  1940. // NumSub + MapNas + MapNtn = 7
  1941. //Q: Why is it like this?
  1942. //A: It's historical. Once, there was not this many callback declaration, there was only one (a templated one).
  1943. // The nreplace function itself used to calculate a mode value according to available vectors
  1944. // and determine what kind of callback function needed to be called.
  1945. //Q: Why the history changed?
  1946. //A: We had some compatibility issues with the single templated callback.
  1947. // Also, this approach proved to be more readable and robust.
  1948. PCRE2_SIZE buffer_size;
  1949. void init(){
  1950. callbackn = 0;
  1951. callback0 = callback::erase;
  1952. callback1 = 0;
  1953. callback2 = 0;
  1954. callback3 = 0;
  1955. callback4 = 0;
  1956. callback5 = 0;
  1957. callback6 = 0;
  1958. callback7 = 0;
  1959. setMatchStartOffsetVector(&vec_soff);
  1960. setMatchEndOffsetVector(&vec_eoff);
  1961. buffer_size = 0;
  1962. }
  1963. void setVectorPointersAccordingToCallback(){
  1964. switch(callbackn){
  1965. case 0: break;
  1966. case 1: setNumberedSubstringVector(&vec_num);break;
  1967. case 2: setNamedSubstringVector(&vec_nas);break;
  1968. case 3: setNumberedSubstringVector(&vec_num).setNamedSubstringVector(&vec_nas);break;
  1969. case 4: setNameToNumberMapVector(&vec_ntn);break;
  1970. case 5: setNumberedSubstringVector(&vec_num).setNameToNumberMapVector(&vec_ntn);break;
  1971. case 6: setNamedSubstringVector(&vec_nas).setNameToNumberMapVector(&vec_ntn);break;
  1972. case 7: setNumberedSubstringVector(&vec_num).setNamedSubstringVector(&vec_nas).setNameToNumberMapVector(&vec_ntn);break;
  1973. }
  1974. }
  1975. void onlyCopy(MatchEvaluator const &me){
  1976. callbackn = me.callbackn;
  1977. callback0 = me.callback0;
  1978. callback1 = me.callback1;
  1979. callback2 = me.callback2;
  1980. callback3 = me.callback3;
  1981. callback4 = me.callback4;
  1982. callback5 = me.callback5;
  1983. callback6 = me.callback6;
  1984. callback7 = me.callback7;
  1985. //must update the pointers to point to this class vectors.
  1986. setVectorPointersAccordingToCallback();
  1987. buffer_size = me.buffer_size;
  1988. }
  1989. void deepCopy(MatchEvaluator const &me) {
  1990. vec_num = me.vec_num;
  1991. vec_nas = me.vec_nas;
  1992. vec_ntn = me.vec_ntn;
  1993. vec_soff = me.vec_soff;
  1994. vec_eoff = me.vec_eoff;
  1995. onlyCopy(me);
  1996. }
  1997. #if __cplusplus >= 201103L
  1998. void deepMove(MatchEvaluator& me){
  1999. vec_num = std::move_if_noexcept(me.vec_num);
  2000. vec_nas = std::move_if_noexcept(me.vec_nas);
  2001. vec_ntn = std::move_if_noexcept(me.vec_ntn);
  2002. vec_soff = std::move_if_noexcept(me.vec_soff);
  2003. vec_eoff = std::move_if_noexcept(me.vec_eoff);
  2004. onlyCopy(me);
  2005. }
  2006. #endif
  2007. //prevent public access to some funcitons
  2008. MatchEvaluator& setNumberedSubstringVector(VecNum* v){
  2009. RegexMatch::setNumberedSubstringVector(v);
  2010. return *this;
  2011. }
  2012. MatchEvaluator& setNamedSubstringVector(VecNas* v){
  2013. RegexMatch::setNamedSubstringVector(v);
  2014. return *this;
  2015. }
  2016. MatchEvaluator& setNameToNumberMapVector(VecNtN* v){
  2017. RegexMatch::setNameToNumberMapVector(v);
  2018. return *this;
  2019. }
  2020. MatchEvaluator& setMatchStartOffsetVector(VecOff* v){
  2021. RegexMatch::setMatchStartOffsetVector(v);
  2022. return *this;
  2023. }
  2024. MatchEvaluator& setMatchEndOffsetVector(VecOff* v){
  2025. RegexMatch::setMatchEndOffsetVector(v);
  2026. return *this;
  2027. }
  2028. public:
  2029. ///Default constructor.
  2030. ///Sets callback::erase as the callback function.
  2031. ///Removes matched part/s from the subject string if the callback is not
  2032. ///changed.
  2033. /// ```cpp
  2034. /// jp::Regex re("\s*string");
  2035. /// jp::MatchEvaluator me;
  2036. /// std::cout<<
  2037. /// me.setRegexObject(&re);
  2038. /// .setSubject("I am a string");
  2039. /// .nreplace();
  2040. /// //The above will delete ' string' from the subject
  2041. /// //thus the result will be 'I am a'
  2042. /// ```
  2043. explicit
  2044. MatchEvaluator():RegexMatch(){
  2045. init();
  2046. }
  2047. ///@overload
  2048. ///...
  2049. ///Constructor taking a Regex object pointer.
  2050. ///It sets the associated Regex object and
  2051. ///initializes the MatchEvaluator object with
  2052. ///callback::erase callback function.
  2053. ///Underlying data is not modified.
  2054. ///@param r constant Regex pointer.
  2055. explicit
  2056. MatchEvaluator(Regex const *r):RegexMatch(r){
  2057. init();
  2058. }
  2059. ///@overload
  2060. ///...
  2061. ///Constructor taking a callback function.
  2062. ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function.
  2063. ///@param mef Callback function.
  2064. explicit
  2065. MatchEvaluator(typename MatchEvaluatorCallback<void*, void*, void*>::Callback mef): RegexMatch(){
  2066. init();
  2067. setCallback(mef);
  2068. }
  2069. ///@overload
  2070. /// ...
  2071. ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function.
  2072. ///@param mef Callback function.
  2073. explicit
  2074. MatchEvaluator(typename MatchEvaluatorCallback<NumSub const &, void*, void*>::Callback mef): RegexMatch(){
  2075. init();
  2076. setCallback(mef);
  2077. }
  2078. ///@overload
  2079. /// ...
  2080. ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function.
  2081. ///@param mef Callback function.
  2082. explicit
  2083. MatchEvaluator(typename MatchEvaluatorCallback<NumSub const &, MapNas const &, void*>::Callback mef): RegexMatch(){
  2084. init();
  2085. setCallback(mef);
  2086. }
  2087. ///@overload
  2088. /// ...
  2089. ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function.
  2090. ///@param mef Callback function.
  2091. explicit
  2092. MatchEvaluator(typename MatchEvaluatorCallback<NumSub const &, void*, MapNtN const &>::Callback mef): RegexMatch(){
  2093. init();
  2094. setCallback(mef);
  2095. }
  2096. ///@overload
  2097. /// ...
  2098. ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function.
  2099. ///@param mef Callback function.
  2100. explicit
  2101. MatchEvaluator(typename MatchEvaluatorCallback<NumSub const &, MapNas const &, MapNtN const &>::Callback mef): RegexMatch(){
  2102. init();
  2103. setCallback(mef);
  2104. }
  2105. ///@overload
  2106. /// ...
  2107. ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function.
  2108. ///@param mef Callback function.
  2109. explicit
  2110. MatchEvaluator(typename MatchEvaluatorCallback<void*, MapNas const &, void*>::Callback mef): RegexMatch(){
  2111. init();
  2112. setCallback(mef);
  2113. }
  2114. ///@overload
  2115. /// ...
  2116. ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function.
  2117. ///@param mef Callback function.
  2118. explicit
  2119. MatchEvaluator(typename MatchEvaluatorCallback<void*, MapNas const &, MapNtN const &>::Callback mef): RegexMatch(){
  2120. init();
  2121. setCallback(mef);
  2122. }
  2123. ///@overload
  2124. /// ...
  2125. ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function.
  2126. ///@param mef Callback function.
  2127. explicit
  2128. MatchEvaluator(typename MatchEvaluatorCallback<void*, void*, MapNtN const &>::Callback mef): RegexMatch(){
  2129. init();
  2130. setCallback(mef);
  2131. }
  2132. ///@overload
  2133. /// ...
  2134. ///Copy constructor.
  2135. ///@param me Reference to MatchEvaluator object
  2136. MatchEvaluator(MatchEvaluator const &me): RegexMatch(me){
  2137. init();
  2138. deepCopy(me);
  2139. }
  2140. ///Overloaded copy-assignment operator
  2141. ///@param me MatchEvaluator object
  2142. ///@return A reference to the calling MatchEvaluator object.
  2143. MatchEvaluator& operator=(MatchEvaluator const &me){
  2144. if(this == &me) return *this;
  2145. RegexMatch::operator=(me);
  2146. deepCopy(me);
  2147. return *this;
  2148. }
  2149. #if __cplusplus >= 201103L
  2150. ///@overload
  2151. /// ...
  2152. ///Move constructor.
  2153. ///This constructor steals resources from the argument.
  2154. ///It leaves the argument in a valid but indeterminate sate.
  2155. ///The indeterminate state can be returned to normal by calling reset() on that object.
  2156. ///@param me rvalue reference to a MatchEvaluator object
  2157. MatchEvaluator(MatchEvaluator&& me): RegexMatch(me){
  2158. init();
  2159. deepMove(me);
  2160. }
  2161. ///@overload
  2162. ///...
  2163. ///Overloaded move-assignment operator.
  2164. ///It steals resources from the argument.
  2165. ///It leaves the argument in a valid but indeterminate sate.
  2166. ///The indeterminate state can be returned to normal by calling reset() on that object.
  2167. ///@param me rvalue reference to a MatchEvaluator object
  2168. ///@return A reference to the calling MatchEvaluator object.
  2169. ///@see MatchEvaluator(MatchEvaluator&& me)
  2170. MatchEvaluator& operator=(MatchEvaluator&& me){
  2171. if(this == &me) return *this;
  2172. RegexMatch::operator=(me);
  2173. deepMove(me);
  2174. return *this;
  2175. }
  2176. #endif
  2177. virtual ~MatchEvaluator(){}
  2178. ///Member function to set a callback function with no vector reference.
  2179. ///Callback function is always overwritten. The implemented vectors are set to be filled with match data.
  2180. ///Other vectors that were set previously, are not unset and thus they will be filled with match data too
  2181. ///when `match()` or `nreplace()` is called.
  2182. ///@param mef Callback function.
  2183. ///@return A reference to the calling MatchEvaluator object.
  2184. MatchEvaluator& setCallback(typename MatchEvaluatorCallback<void*, void*, void*>::Callback mef){
  2185. callback0 = mef;
  2186. callbackn = 0;
  2187. return *this;
  2188. }
  2189. ///@overload
  2190. /// ...
  2191. ///Sets a callback function with a jp::NumSub vector.
  2192. ///You will be working with a reference to the constant vector.
  2193. ///@param mef Callback function.
  2194. ///@return A reference to the calling MatchEvaluator object.
  2195. MatchEvaluator& setCallback(typename MatchEvaluatorCallback<NumSub const &, void*, void*>::Callback mef){
  2196. callback1 = mef;
  2197. callbackn = 1;
  2198. setNumberedSubstringVector(&vec_num);
  2199. return *this;
  2200. }
  2201. ///@overload
  2202. /// ...
  2203. ///Sets a callback function with a jp::NumSub and jp::MapNas.
  2204. ///You will be working with references of the constant vectors.
  2205. ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead:
  2206. ///```cpp
  2207. ///map_nas["word"]; //wrong
  2208. ///map_nas.at("word"); //ok
  2209. ///```
  2210. ///If you want to use `[]` operator with maps, make a copy:
  2211. ///```cpp
  2212. ///jp::MapNas mn = map_nas;
  2213. ///mn["word"]; //ok
  2214. ///```
  2215. ///@param mef Callback function.
  2216. ///@return A reference to the calling MatchEvaluator object.
  2217. MatchEvaluator& setCallback(typename MatchEvaluatorCallback<NumSub const &, MapNas const &, void*>::Callback mef){
  2218. callback3 = mef;
  2219. callbackn = 3;
  2220. setNumberedSubstringVector(&vec_num);
  2221. setNamedSubstringVector(&vec_nas);
  2222. return *this;
  2223. }
  2224. ///@overload
  2225. /// ...
  2226. ///Sets a callback function with a jp::NumSub and jp::MapNtN.
  2227. ///You will be working with references of the constant vectors.
  2228. ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead:
  2229. ///```cpp
  2230. ///map_ntn["word"]; //wrong
  2231. ///map_ntn.at("word"); //ok
  2232. ///```
  2233. ///If you want to use `[]` operator with maps, make a copy:
  2234. ///```cpp
  2235. ///jp::MapNtN mn = map_ntn;
  2236. ///mn["word"]; //ok
  2237. ///```
  2238. ///@param mef Callback function.
  2239. ///@return A reference to the calling MatchEvaluator object.
  2240. MatchEvaluator& setCallback(typename MatchEvaluatorCallback<NumSub const &, void*, MapNtN const &>::Callback mef){
  2241. callback5 = mef;
  2242. callbackn = 5;
  2243. setNumberedSubstringVector(&vec_num);
  2244. setNameToNumberMapVector(&vec_ntn);
  2245. return *this;
  2246. }
  2247. ///@overload
  2248. /// ...
  2249. ///Sets a callback function with a jp::NumSub, jp::MapNas, jp::MapNtN.
  2250. ///You will be working with references of the constant vectors.
  2251. ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead:
  2252. ///```cpp
  2253. ///map_nas["word"]; //wrong
  2254. ///map_nas.at("word"); //ok
  2255. ///```
  2256. ///If you want to use `[]` operator with maps, make a copy:
  2257. ///```cpp
  2258. ///jp::MapNas mn = map_nas;
  2259. ///mn["word"]; //ok
  2260. ///```
  2261. ///@param mef Callback function.
  2262. ///@return A reference to the calling MatchEvaluator object.
  2263. MatchEvaluator& setCallback(typename MatchEvaluatorCallback<NumSub const &, MapNas const &, MapNtN const &>::Callback mef){
  2264. callback7 = mef;
  2265. callbackn = 7;
  2266. setNumberedSubstringVector(&vec_num);
  2267. setNamedSubstringVector(&vec_nas);
  2268. setNameToNumberMapVector(&vec_ntn);
  2269. return *this;
  2270. }
  2271. ///@overload
  2272. /// ...
  2273. ///Sets a callback function with a jp::MapNas.
  2274. ///You will be working with reference of the constant vector.
  2275. ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead:
  2276. ///```cpp
  2277. ///map_nas["word"]; //wrong
  2278. ///map_nas.at("word"); //ok
  2279. ///```
  2280. ///If you want to use `[]` operator with maps, make a copy:
  2281. ///```cpp
  2282. ///jp::MapNas mn = map_nas;
  2283. ///mn["word"]; //ok
  2284. ///```
  2285. ///@param mef Callback function.
  2286. ///@return A reference to the calling MatchEvaluator object.
  2287. MatchEvaluator& setCallback(typename MatchEvaluatorCallback<void*, MapNas const &, void*>::Callback mef){
  2288. callback2 = mef;
  2289. callbackn = 2;
  2290. setNamedSubstringVector(&vec_nas);
  2291. return *this;
  2292. }
  2293. ///@overload
  2294. /// ...
  2295. ///Sets a callback function with a jp::MapNas, jp::MapNtN.
  2296. ///You will be working with reference of the constant vector.
  2297. ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead:
  2298. ///```cpp
  2299. ///map_nas["word"]; //wrong
  2300. ///map_nas.at("word"); //ok
  2301. ///```
  2302. ///If you want to use `[]` operator with maps, make a copy:
  2303. ///```cpp
  2304. ///jp::MapNas mn = map_nas;
  2305. ///mn["word"]; //ok
  2306. ///```
  2307. ///@param mef Callback function.
  2308. ///@return A reference to the calling MatchEvaluator object.
  2309. MatchEvaluator& setCallback(typename MatchEvaluatorCallback<void*, MapNas const &, MapNtN const &>::Callback mef){
  2310. callback6 = mef;
  2311. callbackn = 6;
  2312. setNamedSubstringVector(&vec_nas);
  2313. setNameToNumberMapVector(&vec_ntn);
  2314. return *this;
  2315. }
  2316. ///@overload
  2317. /// ...
  2318. ///Sets a callback function with a jp::MapNtN.
  2319. ///You will be working with references of the constant vectors.
  2320. ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead:
  2321. ///```cpp
  2322. ///map_ntn["word"]; //wrong
  2323. ///map_ntn.at("word"); //ok
  2324. ///```
  2325. ///If you want to use `[]` operator with maps, make a copy:
  2326. ///```cpp
  2327. ///jp::MapNtN mn = map_ntn;
  2328. ///mn["word"]; //ok
  2329. ///```
  2330. ///@param mef Callback function.
  2331. ///@return A reference to the calling MatchEvaluator object.
  2332. MatchEvaluator& setCallback(typename MatchEvaluatorCallback<void*, void*, MapNtN const &>::Callback mef){
  2333. callback4 = mef;
  2334. callbackn = 4;
  2335. setNameToNumberMapVector(&vec_ntn);
  2336. return *this;
  2337. }
  2338. ///Clear match data.
  2339. ///It clears all match data from all vectors (without shrinking).
  2340. ///For shrinking the vectors, use `resetMatchData()`
  2341. ///A call to `match()` or nreplace() will be required to produce match data again.
  2342. ///@return A reference to the calling MatchEvaluator object.
  2343. MatchEvaluator& clearMatchData(){
  2344. vec_num.clear();
  2345. vec_nas.clear();
  2346. vec_ntn.clear();
  2347. vec_soff.clear();
  2348. vec_eoff.clear();
  2349. return *this;
  2350. }
  2351. ///Reset match data to initial state.
  2352. ///It deletes all match data from all vectors shrinking their capacity.
  2353. ///A call to `match()` or nreplace() will be required to produce match data again.
  2354. ///@return A reference to the calling MatchEvaluator object.
  2355. MatchEvaluator& resetMatchData(){
  2356. VecNum().swap(vec_num);
  2357. VecNas().swap(vec_nas);
  2358. VecNtN().swap(vec_ntn);
  2359. VecOff().swap(vec_soff);
  2360. VecOff().swap(vec_eoff);
  2361. return *this;
  2362. }
  2363. ///Reset MatchEvaluator to initial state including memory.
  2364. ///@return A reference to the calling MatchEvaluator object.
  2365. MatchEvaluator& reset(){
  2366. RegexMatch::reset();
  2367. resetMatchData();
  2368. init();
  2369. return *this;
  2370. }
  2371. ///Clears MatchEvaluator.
  2372. ///Returns everything to initial state (some memory may retain for further and faster use).
  2373. ///@return A reference to the calling MatchEvaluator object.
  2374. MatchEvaluator& clear(){
  2375. RegexMatch::clear();
  2376. clearMatchData();
  2377. init();
  2378. return *this;
  2379. }
  2380. ///Call RegexMatch::resetErrors().
  2381. ///@return A reference to the calling MatchEvaluator object.
  2382. MatchEvaluator& resetErrors(){
  2383. RegexMatch::resetErrors();
  2384. return *this;
  2385. }
  2386. ///Call RegexMatch::setRegexObject(r).
  2387. ///@param r constant Regex object pointer
  2388. ///@return A reference to the calling MatchEvaluator object.
  2389. MatchEvaluator& setRegexObject (Regex const *r){
  2390. RegexMatch::setRegexObject(r);
  2391. return *this;
  2392. }
  2393. ///Call RegexMatch::setSubject(String const &s).
  2394. ///@param s subject string.
  2395. ///@return A reference to the calling MatchEvaluator object.
  2396. MatchEvaluator& setSubject (String const &s){
  2397. RegexMatch::setSubject(s);
  2398. return *this;
  2399. }
  2400. ///@overload
  2401. ///@param s constant subject string by pointer
  2402. ///@return A reference to the calling MatchEvaluator object.
  2403. MatchEvaluator& setSubject (String const *s){
  2404. RegexMatch::setSubject(s);
  2405. return *this;
  2406. }
  2407. ///Call RegexMatch::setModifier(Modifier const& s).
  2408. ///@param s modifier string.
  2409. ///@return A reference to the calling MatchEvaluator object.
  2410. MatchEvaluator& setModifier (Modifier const& s){
  2411. RegexMatch::setModifier(s);
  2412. return *this;
  2413. }
  2414. ///Call RegexMatch::setModifierTable(ModifierTable const * s).
  2415. ///@param mdt pointer to ModifierTable object.
  2416. ///@return A reference to the calling MatchEvaluator object.
  2417. MatchEvaluator& setModifierTable (ModifierTable const * mdt){
  2418. RegexMatch::setModifierTable(mdt);
  2419. return *this;
  2420. }
  2421. ///Call RegexMatch::setJpcre2Option(Uint x).
  2422. ///@param x JPCRE2 option value.
  2423. ///@return A reference to the calling MatchEvaluator object.
  2424. MatchEvaluator& setJpcre2Option (Uint x){
  2425. RegexMatch::setJpcre2Option(x);
  2426. return *this;
  2427. }
  2428. ///Call RegexMatch::setPcre2Option (Uint x).
  2429. ///@param x PCRE2 option value.
  2430. ///@return A reference to the calling MatchEvaluator object.
  2431. MatchEvaluator& setPcre2Option (Uint x){
  2432. RegexMatch::setPcre2Option(x);
  2433. return *this;
  2434. }
  2435. ///Call RegexMatch::setFindAll(bool x).
  2436. ///@param x true if global match, false otherwise.
  2437. ///@return A reference to the calling MatchEvaluator object.
  2438. MatchEvaluator& setFindAll (bool x){
  2439. RegexMatch::setFindAll(x);
  2440. return *this;
  2441. }
  2442. ///Call RegexMatch::setFindAll().
  2443. ///@return A reference to the calling MatchEvaluator object.
  2444. MatchEvaluator& setFindAll(){
  2445. RegexMatch::setFindAll();
  2446. return *this;
  2447. }
  2448. ///Call RegexMatch::setStartOffset (PCRE2_SIZE offset).
  2449. ///@param offset match start offset in the subject.
  2450. ///@return A reference to the calling MatchEvaluator object.
  2451. MatchEvaluator& setStartOffset (PCRE2_SIZE offset){
  2452. RegexMatch::setStartOffset(offset);
  2453. return *this;
  2454. }
  2455. ///Call RegexMatch::setMatchContext(MatchContext *match_context).
  2456. ///@param match_context pointer to match context.
  2457. ///@return A reference to the calling MatchEvaluator object.
  2458. MatchEvaluator& setMatchContext (MatchContext *match_context){
  2459. RegexMatch::setMatchContext(match_context);
  2460. return *this;
  2461. }
  2462. ///Call RegexMatch::setMatchDataBlock(MatchContext * mdt);
  2463. ///@param mdt pointer to match data block
  2464. ///@return A reference to the calling MatchEvaluator object.
  2465. MatchEvaluator& setMatchDataBlock(MatchData* mdt){
  2466. RegexMatch::setMatchDataBlock(mdt);
  2467. return *this;
  2468. }
  2469. ///Set the buffer size that will be used by pcre2_substitute (replace()).
  2470. ///If buffer size proves to be enough to fit the resultant string
  2471. ///from each match (not the total resultant string), it will yield one less call
  2472. ///to pcre2_substitute for each match.
  2473. ///@param x buffer size.
  2474. ///@return A reference to the calling MatchEvaluator object.
  2475. MatchEvaluator& setBufferSize(PCRE2_SIZE x){
  2476. buffer_size = x;
  2477. return *this;
  2478. }
  2479. ///Get the initial buffer size that is being used by internal function pcre2_substitute
  2480. ///@return buffer_size
  2481. PCRE2_SIZE getBufferSize(){
  2482. return buffer_size;
  2483. }
  2484. ///Call RegexMatch::changeModifier(Modifier const& mod, bool x).
  2485. ///@param mod modifier string.
  2486. ///@param x true (add) or false (remove).
  2487. ///@return A reference to the calling MatchEvaluator object.
  2488. MatchEvaluator& changeModifier (Modifier const& mod, bool x){
  2489. RegexMatch::changeModifier(mod, x);
  2490. return *this;
  2491. }
  2492. ///Call RegexMatch::changeJpcre2Option(Uint opt, bool x).
  2493. ///@param opt JPCRE2 option
  2494. ///@param x true (add) or false (remove).
  2495. ///@return A reference to the calling MatchEvaluator object.
  2496. MatchEvaluator& changeJpcre2Option (Uint opt, bool x){
  2497. RegexMatch::changeJpcre2Option(opt, x);
  2498. return *this;
  2499. }
  2500. ///Call RegexMatch::changePcre2Option(Uint opt, bool x).
  2501. ///@param opt PCRE2 option.
  2502. ///@param x true (add) or false (remove).
  2503. ///@return A reference to the calling MatchEvaluator object.
  2504. MatchEvaluator& changePcre2Option (Uint opt, bool x){
  2505. RegexMatch::changePcre2Option(opt, x);
  2506. return *this;
  2507. }
  2508. ///Call RegexMatch::addModifier(Modifier const& mod).
  2509. ///@param mod modifier string.
  2510. ///@return A reference to the calling MatchEvaluator object.
  2511. MatchEvaluator& addModifier (Modifier const& mod){
  2512. RegexMatch::addModifier(mod);
  2513. return *this;
  2514. }
  2515. ///Call RegexMatch::addJpcre2Option(Uint x).
  2516. ///@param x JPCRE2 option.
  2517. ///@return A reference to the calling MatchEvaluator object.
  2518. MatchEvaluator& addJpcre2Option (Uint x){
  2519. RegexMatch::addJpcre2Option(x);
  2520. return *this;
  2521. }
  2522. ///Call RegexMatch::addPcre2Option(Uint x).
  2523. ///@param x PCRE2 option.
  2524. ///@return A reference to the calling MatchEvaluator object.
  2525. MatchEvaluator& addPcre2Option (Uint x){
  2526. RegexMatch::addPcre2Option(x);
  2527. return *this;
  2528. }
  2529. ///Perform match and return the match count.
  2530. ///This function strips off matching options (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT) that are considered
  2531. ///bad options for replacement operation and then calls
  2532. ///RegexMatch::match() to perform the match.
  2533. ///@return match count.
  2534. SIZE_T match(void){
  2535. //remove bad matching options
  2536. RegexMatch::changePcre2Option(PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT, false);
  2537. return RegexMatch::match();
  2538. }
  2539. ///Perform regex replace with this match evaluator.
  2540. ///This is a JPCRE2 native replace function (thus the name nreplace).
  2541. ///It uses the `MatchEvaluatorCallback` function that was set with a constructor or `MatchEvaluator::setCallback()` function
  2542. ///to generate the replacement strings on the fly.
  2543. ///The string returned by the callback function will be treated as literal and will
  2544. ///not go through any further processing.
  2545. ///
  2546. ///This function performs a new match everytime it is called unless it is passed with a boolean `false` as the first argument.
  2547. ///To use existing match data that was created by a previous `MatchEvaluator::nreplace()` or `MatchEvaluator::match()`, call this
  2548. ///function with boolean `false` as the first argument.
  2549. ///
  2550. ///## Complexity
  2551. /// 1. Changes in replace related option takes effect without a re-match.
  2552. /// 2. Changes in match related option (e.g start offset) needs a re-match to take effect.
  2553. /// 3. To re-use existing match data, callback function must be compatible with the data, otherwise assertion error.
  2554. /// 4. If the associated Regex object or subject string changes, a new match must be performed,
  2555. /// trying to use the existing match data in such cases is undefined behavior.
  2556. ///
  2557. ///@param do_match Perform a new matching operation if true, otherwise use existing match data.
  2558. ///@param jro JPCRE2 replace options.
  2559. ///@param counter Pointer to a counter to store the number of replacement done.
  2560. ///@return resultant string after replace.
  2561. ///@see MatchEvaluator.
  2562. ///@see MatchEvaluatorCallback.
  2563. String nreplace(bool do_match=true, Uint jro=0, SIZE_T* counter=0);
  2564. ///PCRE2 compatible replace function that uses this MatchEvaluator.
  2565. ///Performs regex replace with pcre2_substitute function
  2566. ///by generating the replacement strings dynamically with MatchEvaluator callback.
  2567. ///The string returned by callback function is processed by internal pcre2_substitute, thus allowing
  2568. ///all options that are provided by PCRE2 itself.
  2569. ///
  2570. ///This function performs a new match everytime it is called unless it is passed with a boolean `false` as the first argument.
  2571. ///
  2572. ///## Complexity
  2573. /// 1. Changes in replace related option takes effect without a re-match.
  2574. /// 2. Changes in match related option (e.g start offset) needs a re-match to take effect.
  2575. /// 3. To re-use existing match data, callback function must be compatible with the data, otherwise assertion error.
  2576. /// 4. If the associated Regex object or subject string changes, a new match must be performed,
  2577. /// trying to use the existing match data in such cases is undefined behavior.
  2578. ///
  2579. ///@param do_match perform a new match if true, otherwise use existing data.
  2580. ///@param ro replace related PCRE2 options.
  2581. ///@param counter Pointer to a counter to store the number of replacement done.
  2582. ///@return resultant string after replacement.
  2583. String replace(bool do_match=true, Uint ro=0, SIZE_T* counter=0);
  2584. };
  2585. /** Provides public constructors to create RegexReplace objects.
  2586. * Every RegexReplace object should be associated with a Regex object.
  2587. * This class stores a pointer to its' associated Regex object, thus when
  2588. * the content of the associated Regex object is changed, there's no need to
  2589. * set the pointer again.
  2590. *
  2591. * Examples:
  2592. *
  2593. * ```cpp
  2594. * jp::Regex re;
  2595. * jp::RegexReplace rr;
  2596. * rr.setRegexObject(&re);
  2597. * rr.replace("subject", "me"); // returns 'subject'
  2598. * re.compile("\\w+");
  2599. * rr.replace(); // replaces 'subject' with 'me' i.e returns 'me'
  2600. * ```
  2601. */
  2602. class RegexReplace {
  2603. private:
  2604. friend class Regex;
  2605. Regex const *re;
  2606. String r_subject;
  2607. String *r_subject_ptr; //preplace method modifies it in-place
  2608. String r_replw;
  2609. String const *r_replw_ptr;
  2610. Uint replace_opts;
  2611. Uint jpcre2_replace_opts;
  2612. PCRE2_SIZE buffer_size;
  2613. PCRE2_SIZE _start_offset;
  2614. MatchData *mdata;
  2615. MatchContext *mcontext;
  2616. ModifierTable const * modtab;
  2617. SIZE_T last_replace_count;
  2618. SIZE_T* last_replace_counter;
  2619. void init_vars() {
  2620. re = 0;
  2621. r_subject_ptr = &r_subject;
  2622. r_replw_ptr = &r_replw;
  2623. replace_opts = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH;
  2624. jpcre2_replace_opts = 0;
  2625. buffer_size = 0;
  2626. error_number = 0;
  2627. error_offset = 0;
  2628. _start_offset = 0;
  2629. mdata = 0;
  2630. mcontext = 0;
  2631. modtab = 0;
  2632. last_replace_count = 0;
  2633. last_replace_counter = &last_replace_count;
  2634. }
  2635. void onlyCopy(RegexReplace const &rr){
  2636. re = rr.re; //only pointer should be copied.
  2637. //rr.r_subject_ptr may point to rr.r_subject or other user data
  2638. r_subject_ptr = (rr.r_subject_ptr == &rr.r_subject) ? &r_subject //not rr.r_subject
  2639. : rr.r_subject_ptr; //other user data
  2640. r_replw = rr.r_replw;
  2641. //rr.r_replw_ptr may point to rr.r_replw or other user data
  2642. r_replw_ptr = (rr.r_replw_ptr == &rr.r_replw) ? &r_replw //not rr.r_replw
  2643. : rr.r_replw_ptr; //other user data
  2644. replace_opts = rr.replace_opts;
  2645. jpcre2_replace_opts = rr.jpcre2_replace_opts;
  2646. buffer_size = rr.buffer_size;
  2647. error_number = rr.error_number;
  2648. error_offset = rr.error_offset;
  2649. _start_offset = rr._start_offset;
  2650. mdata = rr.mdata;
  2651. mcontext = rr.mcontext;
  2652. modtab = rr.modtab;
  2653. last_replace_count = rr.last_replace_count;
  2654. last_replace_counter = (rr.last_replace_counter == &rr.last_replace_count) ? &last_replace_count
  2655. : rr.last_replace_counter;
  2656. }
  2657. void deepCopy(RegexReplace const &rr){
  2658. r_subject = rr.r_subject;
  2659. onlyCopy(rr);
  2660. }
  2661. #if __cplusplus >= 201103L
  2662. void deepMove(RegexReplace& rr){
  2663. r_subject = std::move_if_noexcept(rr.r_subject);
  2664. onlyCopy(rr);
  2665. }
  2666. #endif
  2667. protected:
  2668. int error_number;
  2669. PCRE2_SIZE error_offset;
  2670. public:
  2671. ///Default constructor
  2672. RegexReplace(){
  2673. init_vars();
  2674. }
  2675. ///@overload
  2676. /// ...
  2677. ///Creates a RegexReplace object associating a Regex object.
  2678. ///Regex object is not modified.
  2679. ///@param r pointer to a Regex object
  2680. RegexReplace(Regex const *r) {
  2681. init_vars();
  2682. re = r;
  2683. }
  2684. ///@overload
  2685. ///...
  2686. ///Copy constructor.
  2687. ///@param rr RegexReplace object reference
  2688. RegexReplace(RegexReplace const &rr){
  2689. init_vars();
  2690. deepCopy(rr);
  2691. }
  2692. ///Overloaded Copy assignment operator.
  2693. ///@param rr RegexReplace object reference
  2694. ///@return A reference to the calling RegexReplace object
  2695. RegexReplace& operator=(RegexReplace const &rr){
  2696. if(this == &rr) return *this;
  2697. deepCopy(rr);
  2698. return *this;
  2699. }
  2700. #if __cplusplus >= 201103L
  2701. ///@overload
  2702. ///...
  2703. ///Move constructor.
  2704. ///This constructor steals resources from the argument.
  2705. ///It leaves the argument in a valid but indeterminate sate.
  2706. ///The indeterminate state can be returned to normal by calling reset() on that object.
  2707. ///@param rr rvalue reference to a RegexReplace object reference
  2708. RegexReplace(RegexReplace&& rr){
  2709. init_vars();
  2710. deepMove(rr);
  2711. }
  2712. ///@overload
  2713. ///...
  2714. ///Overloaded move assignment operator.
  2715. ///This constructor steals resources from the argument.
  2716. ///It leaves the argument in a valid but indeterminate sate.
  2717. ///The indeterminate state can be returned to normal by calling reset() on that object.
  2718. ///@param rr rvalue reference to a RegexReplace object reference
  2719. ///@return A reference to the calling RegexReplace object
  2720. RegexReplace& operator=(RegexReplace&& rr){
  2721. if(this == &rr) return *this;
  2722. deepMove(rr);
  2723. return *this;
  2724. }
  2725. #endif
  2726. virtual ~RegexReplace() {}
  2727. ///Reset all class variables to its default (initial) state including memory.
  2728. ///@return Reference to the calling RegexReplace object.
  2729. RegexReplace& reset() {
  2730. String().swap(r_subject);
  2731. String().swap(r_replw);
  2732. init_vars();
  2733. return *this;
  2734. }
  2735. ///Clear all class variables to its default (initial) state (some memory may retain for further use).
  2736. ///@return Reference to the calling RegexReplace object.
  2737. RegexReplace& clear() {
  2738. r_subject.clear();
  2739. r_replw.clear();
  2740. init_vars();
  2741. return *this;
  2742. }
  2743. ///Reset replace related errors to zero.
  2744. ///@return Reference to the calling RegexReplace object
  2745. ///@see Regex::resetErrors()
  2746. ///@see RegexMatch::resetErrors()
  2747. RegexReplace& resetErrors(){
  2748. error_number = 0;
  2749. error_offset = 0;
  2750. return *this;
  2751. }
  2752. /// Returns the last error number
  2753. ///@return Last error number
  2754. int getErrorNumber() const {
  2755. return error_number;
  2756. }
  2757. /// Returns the last error offset
  2758. ///@return Last error offset
  2759. int getErrorOffset() const {
  2760. return (int)error_offset;
  2761. }
  2762. /// Returns the last error message
  2763. ///@return Last error message
  2764. String getErrorMessage() const {
  2765. #if __cplusplus >= 201103L
  2766. return select<Char, Map>::getErrorMessage(error_number, error_offset);
  2767. #else
  2768. return select<Char>::getErrorMessage(error_number, error_offset);
  2769. #endif
  2770. }
  2771. /// Get replacement string
  2772. ///@return replacement string
  2773. String getReplaceWith() const {
  2774. return *r_replw_ptr;
  2775. }
  2776. /// Get pointer to replacement string
  2777. ///@return pointer to replacement string
  2778. String const * getReplaceWithPointer() const {
  2779. return r_replw_ptr;
  2780. }
  2781. /// Get subject string
  2782. ///@return subject string
  2783. ///@see RegexMatch::getSubject()
  2784. String getSubject() const {
  2785. return *r_subject_ptr;
  2786. }
  2787. /// Get pointer to subject string
  2788. ///@return Pointer to constant subject string
  2789. ///@see RegexMatch::getSubjectPointer()
  2790. String const * getSubjectPointer() const {
  2791. return r_subject_ptr;
  2792. }
  2793. /// Calculate modifier string from PCRE2 and JPCRE2 options and return it.
  2794. ///
  2795. /// Do remember that modifiers (or PCRE2 and JPCRE2 options) do not change or get initialized
  2796. /// as long as you don't do that explicitly. Calling RegexReplace::setModifier() will re-set them.
  2797. ///
  2798. /// **Mixed or combined modifier**.
  2799. ///
  2800. /// Some modifier may include other modifiers i.e they have the same meaning of some modifiers
  2801. /// combined together. For example, the 'n' modifier includes the 'u' modifier and together they
  2802. /// are equivalent to `PCRE2_UTF | PCRE2_UCP`. When you set a modifier like this, both options
  2803. /// get set, and when you remove the 'n' modifier (with `RegexReplace::changeModifier()`), both will get removed.
  2804. /// @return Calculated modifier string (std::string)
  2805. ///@see RegexMatch::getModifier()
  2806. ///@see Regex::getModifier()
  2807. std::string getModifier() const {
  2808. return modtab ? modtab->fromReplaceOption(replace_opts, jpcre2_replace_opts)
  2809. : MOD::fromReplaceOption(replace_opts, jpcre2_replace_opts);
  2810. }
  2811. ///Get the modifier table that is set,
  2812. ///@return constant ModifierTable pointer.
  2813. ModifierTable const* getModifierTable(){
  2814. return modtab;
  2815. }
  2816. ///Get start offset.
  2817. ///@return the start offset where matching starts for replace operation
  2818. PCRE2_SIZE getStartOffset() const {
  2819. return _start_offset;
  2820. }
  2821. /// Get PCRE2 option
  2822. ///@return PCRE2 option for replace
  2823. ///@see Regex::getPcre2Option()
  2824. ///@see RegexMatch::getPcre2Option()
  2825. Uint getPcre2Option() const {
  2826. return replace_opts;
  2827. }
  2828. /// Get JPCRE2 option
  2829. ///@return JPCRE2 option for replace
  2830. ///@see Regex::getJpcre2Option()
  2831. ///@see RegexMatch::getJpcre2Option()
  2832. Uint getJpcre2Option() const {
  2833. return jpcre2_replace_opts;
  2834. }
  2835. ///Get a pointer to the associated Regex object.
  2836. ///If no actual Regex object is associated, null is returned
  2837. ///@return A pointer to the associated constant Regex object or null
  2838. Regex const * getRegexObject() const {
  2839. return re;
  2840. }
  2841. ///Return pointer to the match context that was previously set with setMatchContext().
  2842. ///Handling memory is the callers' responsibility.
  2843. ///@return pointer to the match context (default: null).
  2844. MatchContext* getMatchContext(){
  2845. return mcontext;
  2846. }
  2847. ///Get the pointer to the match data block that was set previously with setMatchData()
  2848. ///Handling memory is the callers' responsibility.
  2849. ///@return pointer to the match data (default: null).
  2850. virtual MatchData* getMatchDataBlock(){
  2851. return mdata;
  2852. }
  2853. ///Get the initial buffer size that is being used by internal function pcre2_substitute
  2854. ///@return buffer_size
  2855. PCRE2_SIZE getBufferSize(){
  2856. return buffer_size;
  2857. }
  2858. ///Get the number of replacement in last replace operation.
  2859. ///If you set an external counter with RegexReplace::setReplaceCounter(),
  2860. ///a call to this getter method will dereference the pointer to the external counter
  2861. ///and return the value.
  2862. ///@return Last replace count
  2863. SIZE_T getLastReplaceCount(){
  2864. return *last_replace_counter;
  2865. }
  2866. ///Set an external counter variable to store the replacement count.
  2867. ///This counter will be updated after each replacement operation on this object.
  2868. ///A call to this method will reset the internal counter to 0, thus when you reset the counter
  2869. ///to internal counter (by giving null as param), the previous replace count won't be available.
  2870. ///@param counter Pointer to a counter variable. Null sets the counter to default internal counter.
  2871. ///@return Reference to the calling RegexReplace object.
  2872. RegexReplace& setReplaceCounter(SIZE_T* counter){
  2873. last_replace_count = 0;
  2874. last_replace_counter = counter ? counter : &last_replace_count;
  2875. return *this;
  2876. }
  2877. ///Set the associated Regex object.
  2878. ///Regex object is not modified.
  2879. ///@param r Pointer to a Regex object.
  2880. ///@return Reference to the calling RegexReplace object.
  2881. RegexReplace& setRegexObject(Regex const *r){
  2882. re = r;
  2883. return *this;
  2884. }
  2885. /// Set the subject string for replace.
  2886. ///This makes a copy of the string. If no copy is desired or you are working
  2887. ///with big text, consider passing by pointer.
  2888. ///@param s Subject string
  2889. ///@return Reference to the calling RegexReplace object
  2890. ///@see RegexMatch::setSubject()
  2891. RegexReplace& setSubject(String const &s) {
  2892. r_subject = s;
  2893. r_subject_ptr = &r_subject; //must overwrite
  2894. return *this;
  2895. }
  2896. ///@overload
  2897. ///...
  2898. /// Set pointer to the subject string for replace, null pointer unsets it.
  2899. /// The underlined data is not modified unless RegexReplace::preplace() method is used.
  2900. ///@param s Pointer to subject string
  2901. ///@return Reference to the calling RegexReplace object
  2902. ///@see RegexMatch::setSubject()
  2903. RegexReplace& setSubject(String *s) {
  2904. if(s) r_subject_ptr = s;
  2905. else {
  2906. r_subject.clear();
  2907. r_subject_ptr = &r_subject;
  2908. }
  2909. return *this;
  2910. }
  2911. /// Set the replacement string.
  2912. ///`$` is a special character which implies captured group.
  2913. ///
  2914. ///1. A numbered substring can be referenced with `$n` or `${n}` where n is the group number.
  2915. ///2. A named substring can be referenced with `${name}`, where 'name' is the group name.
  2916. ///3. A literal `$` can be given as `$$`.
  2917. ///
  2918. ///**Note:** This function makes a copy of the string. If no copy is desired or
  2919. ///you are working with big text, consider passing the string with pointer.
  2920. ///
  2921. ///@param s String to replace with
  2922. ///@return Reference to the calling RegexReplace object
  2923. RegexReplace& setReplaceWith(String const &s) {
  2924. r_replw = s;
  2925. r_replw_ptr = &r_replw; //must overwrite
  2926. return *this;
  2927. }
  2928. ///@overload
  2929. ///...
  2930. ///@param s Pointer to the string to replace with, null pointer unsets it.
  2931. ///@return Reference to the calling RegexReplace object
  2932. RegexReplace& setReplaceWith(String const *s) {
  2933. if(s) r_replw_ptr = s;
  2934. else {
  2935. r_replw.clear();
  2936. r_replw_ptr = &r_replw;
  2937. }
  2938. return *this;
  2939. }
  2940. /// Set the modifier string (resets all JPCRE2 and PCRE2 options) by calling RegexReplace::changeModifier().
  2941. ///@param s Modifier string.
  2942. ///@return Reference to the calling RegexReplace object
  2943. ///@see RegexMatch::setModifier()
  2944. ///@see Regex::setModifier()
  2945. RegexReplace& setModifier(Modifier const& s) {
  2946. replace_opts = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH; /* must not be initialized to 0 */
  2947. jpcre2_replace_opts = 0;
  2948. return changeModifier(s, true);
  2949. }
  2950. ///Set a custom modifier table to be used.
  2951. ///@param mdt pointer to ModifierTable object.
  2952. /// @return Reference to the calling RegexReplace object.
  2953. RegexReplace& setModifierTable(ModifierTable const * mdt){
  2954. modtab = mdt;
  2955. return *this;
  2956. }
  2957. /// Set the initial buffer size to be allocated for replaced string (used by PCRE2)
  2958. ///@param x Buffer size
  2959. ///@return Reference to the calling RegexReplace object
  2960. RegexReplace& setBufferSize(PCRE2_SIZE x) {
  2961. buffer_size = x;
  2962. return *this;
  2963. }
  2964. ///Set start offset.
  2965. ///Set the offset where matching starts for replace operation
  2966. ///@param start_offset The offset where matching starts for replace operation
  2967. ///@return Reference to the calling RegexReplace object
  2968. RegexReplace& setStartOffset(PCRE2_SIZE start_offset){
  2969. _start_offset = start_offset;
  2970. return *this;
  2971. }
  2972. /// Set JPCRE2 option for replace (overwrite existing option)
  2973. ///@param x Option value
  2974. ///@return Reference to the calling RegexReplace object
  2975. ///@see RegexMatch::setJpcre2Option()
  2976. ///@see Regex::setJpcre2Option()
  2977. RegexReplace& setJpcre2Option(Uint x) {
  2978. jpcre2_replace_opts = x;
  2979. return *this;
  2980. }
  2981. /// Set PCRE2 option replace (overwrite existing option)
  2982. ///@param x Option value
  2983. ///@return Reference to the calling RegexReplace object
  2984. ///@see RegexMatch::setPcre2Option()
  2985. ///@see Regex::setPcre2Option()
  2986. RegexReplace& setPcre2Option(Uint x) {
  2987. replace_opts = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH | x;
  2988. return *this;
  2989. }
  2990. ///Set the match context to be used.
  2991. ///Native PCRE2 API may be used to create match context.
  2992. ///The memory of the match context is not handled by RegexReplace object and not freed.
  2993. ///User will be responsible for freeing memory.
  2994. ///@param match_context Pointer to match context.
  2995. ///@return Reference to the calling RegexReplace object.
  2996. RegexReplace& setMatchContext(MatchContext * match_context){
  2997. mcontext = match_context;
  2998. return *this;
  2999. }
  3000. ///Set the match data block to be used.
  3001. ///Native PCRE2 API may be used to create match data block.
  3002. ///The memory of the match data is not handled by RegexReplace object and not freed.
  3003. ///User will be responsible for creating/freeing memory.
  3004. ///@param match_data Pointer to match data.
  3005. ///@return Reference to the calling RegexReplace object.
  3006. RegexReplace& setMatchDataBlock(MatchData *match_data){
  3007. mdata = match_data;
  3008. return *this;
  3009. }
  3010. /// After a call to this function PCRE2 and JPCRE2 options will be properly set.
  3011. /// This function does not initialize or re-initialize options.
  3012. /// If you want to set options from scratch, initialize them to 0 before calling this function.
  3013. ///
  3014. /// If invalid modifier is detected, then the error number for the RegexReplace
  3015. /// object will be jpcre2::ERROR::INVALID_MODIFIER and error offset will be the modifier character.
  3016. /// You can get the message with RegexReplace::getErrorMessage() function.
  3017. /// @param mod Modifier string.
  3018. /// @param x Whether to add or remove option
  3019. /// @return Reference to the RegexReplace object
  3020. /// @see Regex::changeModifier()
  3021. /// @see RegexMatch::changeModifier()
  3022. RegexReplace& changeModifier(Modifier const& mod, bool x){
  3023. modtab ? modtab->toReplaceOption(mod, x, &replace_opts, &jpcre2_replace_opts, &error_number, &error_offset)
  3024. : MOD::toReplaceOption(mod, x, &replace_opts, &jpcre2_replace_opts, &error_number, &error_offset);
  3025. return *this;
  3026. }
  3027. /// Parse modifier and add/remove equivalent PCRE2 and JPCRE2 options.
  3028. /// Add or remove a JPCRE2 option
  3029. /// @param opt JPCRE2 option value
  3030. /// @param x Add the option if it's true, remove otherwise.
  3031. /// @return Reference to the calling RegexReplace object
  3032. /// @see RegexMatch::changeJpcre2Option()
  3033. /// @see Regex::changeJpcre2Option()
  3034. RegexReplace& changeJpcre2Option(Uint opt, bool x) {
  3035. jpcre2_replace_opts = x ? jpcre2_replace_opts | opt : jpcre2_replace_opts & ~opt;
  3036. return *this;
  3037. }
  3038. /// Add or remove a PCRE2 option
  3039. /// @param opt PCRE2 option value
  3040. /// @param x Add the option if it's true, remove otherwise.
  3041. /// @return Reference to the calling RegexReplace object
  3042. /// @see RegexMatch::changePcre2Option()
  3043. /// @see Regex::changePcre2Option()
  3044. RegexReplace& changePcre2Option(Uint opt, bool x) {
  3045. replace_opts = x ? replace_opts | opt : replace_opts & ~opt;
  3046. //replace_opts |= PCRE2_SUBSTITUTE_OVERFLOW_LENGTH; /* It's important, but let user override it. */
  3047. return *this;
  3048. }
  3049. /// Parse modifier string and add equivalent PCRE2 and JPCRE2 options.
  3050. /// This is just a wrapper of the original function RegexReplace::changeModifier()
  3051. /// provided for convenience.
  3052. /// @param mod Modifier string.
  3053. /// @return Reference to the calling RegexReplace object
  3054. /// @see RegexMatch::addModifier()
  3055. /// @see Regex::addModifier()
  3056. RegexReplace& addModifier(Modifier const& mod){
  3057. return changeModifier(mod, true);
  3058. }
  3059. /// Add specified JPCRE2 option to existing options for replace.
  3060. ///@param x Option value
  3061. ///@return Reference to the calling RegexReplace object
  3062. ///@see RegexMatch::addJpcre2Option()
  3063. ///@see Regex::addJpcre2Option()
  3064. RegexReplace& addJpcre2Option(Uint x) {
  3065. jpcre2_replace_opts |= x;
  3066. return *this;
  3067. }
  3068. /// Add specified PCRE2 option to existing options for replace
  3069. ///@param x Option value
  3070. ///@return Reference to the calling RegexReplace object
  3071. ///@see RegexMatch::addPcre2Option()
  3072. ///@see Regex::addPcre2Option()
  3073. RegexReplace& addPcre2Option(Uint x) {
  3074. replace_opts |= x;
  3075. return *this;
  3076. }
  3077. /// Perform regex replace by retrieving subject string, replacement string, modifier and other options from class variables.
  3078. /// In the replacement string (see RegexReplace::setReplaceWith()) `$` is a special character which implies captured group.
  3079. /// 1. A numbered substring can be referenced with `$n` or `${n}` where n is the group number.
  3080. /// 2. A named substring can be referenced with `${name}`, where 'name' is the group name.
  3081. /// 3. A literal `$` can be given as `$$`.
  3082. /// 4. Bash like features: ${<n>:-<string>} and ${<n>:+<string1>:<string2>}, where <n> is a group number or name.
  3083. ///
  3084. ///All options supported by pcre2_substitute is available.
  3085. ///
  3086. /// Note: This function calls pcre2_substitute() to do the replacement.
  3087. ///@return Replaced string
  3088. String replace(void);
  3089. /// Perl compatible replace method.
  3090. /// Modifies subject string in-place and returns replace count.
  3091. ///
  3092. /// The replacement is performed with `RegexReplace::replace()` which uses `pcre2_substitute()`.
  3093. /// @return replace count
  3094. SIZE_T preplace(void){
  3095. *r_subject_ptr = replace();
  3096. return *last_replace_counter;
  3097. }
  3098. /// Perl compatible replace method with match evaluator.
  3099. /// Modifies subject string in-place and returns replace count.
  3100. /// MatchEvaluator class does not have a implementation of this replace method, thus it is not possible
  3101. /// to re-use match data with preplace() method.
  3102. /// Re-using match data with preplace doesn't actually make any sense, because new subject will
  3103. /// always require new match data.
  3104. ///
  3105. /// The replacement is performed with `RegexReplace::replace()` which uses `pcre2_substitute()`.
  3106. /// @param me MatchEvaluator object.
  3107. /// @return replace count
  3108. SIZE_T preplace(MatchEvaluator me){
  3109. *r_subject_ptr = me.setRegexObject(getRegexObject())
  3110. .setSubject(r_subject_ptr) //do not use method
  3111. .setFindAll((getPcre2Option() & PCRE2_SUBSTITUTE_GLOBAL)!=0)
  3112. .setMatchContext(getMatchContext())
  3113. .setMatchDataBlock(getMatchDataBlock())
  3114. .setBufferSize(getBufferSize())
  3115. .setStartOffset(getStartOffset())
  3116. .replace(true, getPcre2Option(), last_replace_counter);
  3117. return *last_replace_counter;
  3118. }
  3119. ///JPCRE2 native replace function.
  3120. ///A different name is adopted to
  3121. ///distinguish itself from the regular replace() function which
  3122. ///uses pcre2_substitute() to do the replacement; contrary to that,
  3123. ///it will provide a JPCRE2 native way of replacement operation.
  3124. ///It takes a MatchEvaluator object which provides a callback function that is used
  3125. ///to generate replacement string on the fly. Any replacement string set with
  3126. ///`RegexReplace::setReplaceWith()` function will have no effect.
  3127. ///The string returned by the callback function will be treated as literal and will
  3128. ///not go through any further processing.
  3129. ///
  3130. ///This function works on a copy of the MatchEvaluator, and thus makes no changes
  3131. ///to the original. The copy is modified as below:
  3132. ///
  3133. ///1. Global replacement will set FIND_ALL for match, unset otherwise.
  3134. ///2. Bad matching options such as `PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT` will be removed.
  3135. ///3. subject, start_offset and Regex object will change according to the RegexReplace object.
  3136. ///4. match context, and match data block will be changed according to the RegexReplace object.
  3137. ///
  3138. ///It calls MatchEvaluator::nreplace() on the MatchEvaluator object to perform the replacement.
  3139. ///
  3140. ///It always performs a new match.
  3141. ///@param me A MatchEvaluator object.
  3142. ///@return The resultant string after replacement.
  3143. ///@see MatchEvaluator::nreplace()
  3144. ///@see MatchEvaluator
  3145. ///@see MatchEvaluatorCallback
  3146. String nreplace(MatchEvaluator me){
  3147. return me.setRegexObject(getRegexObject())
  3148. .setSubject(getSubjectPointer())
  3149. .setFindAll((getPcre2Option() & PCRE2_SUBSTITUTE_GLOBAL)!=0)
  3150. .setMatchContext(getMatchContext())
  3151. .setMatchDataBlock(getMatchDataBlock())
  3152. .setStartOffset(getStartOffset())
  3153. .nreplace(true, getJpcre2Option(), last_replace_counter);
  3154. }
  3155. ///PCRE2 compatible replace function that takes a MatchEvaluator.
  3156. ///String returned by callback function is processed by pcre2_substitute,
  3157. ///thus all PCRE2 substitute options are supported by this replace function.
  3158. ///
  3159. ///It always performs a new match.
  3160. ///@param me MatchEvaluator instance, (copied and modified according to this object).
  3161. ///@return resultant string.
  3162. ///@see replace()
  3163. String replace(MatchEvaluator me){
  3164. return me.setRegexObject(getRegexObject())
  3165. .setSubject(getSubjectPointer())
  3166. .setFindAll((getPcre2Option() & PCRE2_SUBSTITUTE_GLOBAL)!=0)
  3167. .setMatchContext(getMatchContext())
  3168. .setMatchDataBlock(getMatchDataBlock())
  3169. .setBufferSize(getBufferSize())
  3170. .setStartOffset(getStartOffset())
  3171. .replace(true, getPcre2Option(), last_replace_counter);
  3172. }
  3173. };
  3174. /** Provides public constructors to create Regex object.
  3175. * Each regex pattern needs an object of this class and each pattern needs to be compiled.
  3176. * Pattern compilation can be done using one of its' overloaded constructors or the `Regex::compile()`
  3177. * member function.
  3178. *
  3179. * Examples:
  3180. *
  3181. * ```cpp
  3182. * jp::Regex re; //does not perform a compile
  3183. * re.compile("pattern", "modifier");
  3184. * jp::Regex re2("pattern", "modifier"); //performs a compile
  3185. * ```
  3186. *
  3187. */
  3188. class Regex {
  3189. private:
  3190. friend class RegexMatch;
  3191. friend class RegexReplace;
  3192. friend class MatchEvaluator;
  3193. String pat_str;
  3194. String const *pat_str_ptr;
  3195. Pcre2Code *code;
  3196. Uint compile_opts;
  3197. Uint jpcre2_compile_opts;
  3198. ModifierTable const * modtab;
  3199. CompileContext *ccontext;
  3200. std::vector<unsigned char> tabv;
  3201. void init_vars() {
  3202. jpcre2_compile_opts = 0;
  3203. compile_opts = 0;
  3204. error_number = 0;
  3205. error_offset = 0;
  3206. code = 0;
  3207. pat_str_ptr = &pat_str;
  3208. ccontext = 0;
  3209. modtab = 0;
  3210. }
  3211. void freeRegexMemory(void) {
  3212. Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::code_free(code);
  3213. code = 0; //we may use it again
  3214. }
  3215. void freeCompileContext(){
  3216. Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::compile_context_free(ccontext);
  3217. ccontext = 0;
  3218. }
  3219. void onlyCopy(Regex const &r){
  3220. //r.pat_str_ptr may point to other user data
  3221. pat_str_ptr = (r.pat_str_ptr == &r.pat_str) ? &pat_str //not r.pat_str
  3222. : r.pat_str_ptr; //other user data
  3223. compile_opts = r.compile_opts;
  3224. jpcre2_compile_opts = r.jpcre2_compile_opts;
  3225. error_number = r.error_number;
  3226. error_offset = r.error_offset;
  3227. modtab = r.modtab;
  3228. }
  3229. void deepCopy(Regex const &r) {
  3230. pat_str = r.pat_str; //must not use setPattern() here
  3231. onlyCopy(r);
  3232. //copy tables
  3233. tabv = r.tabv;
  3234. //copy ccontext if it's not null
  3235. freeCompileContext();
  3236. ccontext = (r.ccontext) ? Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::compile_context_copy(r.ccontext) : 0;
  3237. //if tabv is not empty and ccontext is ok (not null) set the table pointer to ccontext
  3238. if(ccontext && !tabv.empty()) Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::set_character_tables(ccontext, &tabv[0]);
  3239. //table pointer must be updated in the compiled code itself, jit memory copy is not available.
  3240. //copy is not going to work, we need a recompile.
  3241. //as all vars are already copied, we can just call compile()
  3242. r.code ? compile() //compile frees previous memory.
  3243. : freeRegexMemory();
  3244. }
  3245. #if __cplusplus >= 201103L
  3246. void deepMove(Regex& r) {
  3247. pat_str = std::move_if_noexcept(r.pat_str);
  3248. onlyCopy(r);
  3249. //steal tables
  3250. tabv = std::move_if_noexcept(r.tabv);
  3251. //steal ccontext
  3252. freeCompileContext();
  3253. ccontext = r.ccontext; r.ccontext = 0; //must set this to 0
  3254. if(ccontext && !tabv.empty()) Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::set_character_tables(ccontext, &tabv[0]);
  3255. //steal the code
  3256. freeRegexMemory();
  3257. code = r.code; r.code = 0; //must set this to 0
  3258. }
  3259. #endif
  3260. protected:
  3261. int error_number;
  3262. PCRE2_SIZE error_offset;
  3263. public:
  3264. /// Default Constructor.
  3265. /// Initializes all class variables to defaults.
  3266. /// Does not perform any pattern compilation.
  3267. Regex() {
  3268. init_vars();
  3269. }
  3270. ///Compile pattern with initialization.
  3271. /// @param re Pattern string
  3272. Regex(String const &re) {
  3273. init_vars();
  3274. compile(re);
  3275. }
  3276. /// @overload
  3277. /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern.
  3278. Regex(String const *re) {
  3279. init_vars();
  3280. compile(re);
  3281. }
  3282. ///@overload
  3283. /// @param re Pattern string .
  3284. /// @param mod Modifier string.
  3285. Regex(String const &re, Modifier const& mod) {
  3286. init_vars();
  3287. compile(re, mod);
  3288. }
  3289. ///@overload
  3290. /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern.
  3291. /// @param mod Modifier string.
  3292. Regex(String const *re, Modifier const& mod) {
  3293. init_vars();
  3294. compile(re, mod);
  3295. }
  3296. ///@overload
  3297. /// @param re Pattern string .
  3298. /// @param po PCRE2 option value
  3299. Regex(String const &re, Uint po) {
  3300. init_vars();
  3301. compile(re, po);
  3302. }
  3303. ///@overload
  3304. /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern.
  3305. /// @param po PCRE2 option value
  3306. Regex(String const *re, Uint po) {
  3307. init_vars();
  3308. compile(re, po);
  3309. }
  3310. ///@overload
  3311. /// @param re Pattern string .
  3312. /// @param po PCRE2 option value
  3313. /// @param jo JPCRE2 option value
  3314. Regex(String const &re, Uint po, Uint jo) {
  3315. init_vars();
  3316. compile(re, po, jo);
  3317. }
  3318. ///@overload
  3319. /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern.
  3320. /// @param po PCRE2 option value
  3321. /// @param jo JPCRE2 option value
  3322. Regex(String const *re, Uint po, Uint jo) {
  3323. init_vars();
  3324. compile(re, po, jo);
  3325. }
  3326. /// @overload
  3327. ///...
  3328. /// Copy constructor.
  3329. /// A separate and new compile is performed from the copied options.
  3330. ///
  3331. /// @param r Constant Regex object reference.
  3332. Regex(Regex const &r) {
  3333. init_vars();
  3334. deepCopy(r);
  3335. }
  3336. /// Overloaded assignment operator.
  3337. /// @param r Regex const &
  3338. /// @return *this
  3339. Regex& operator=(Regex const &r) {
  3340. if (this == &r) return *this;
  3341. deepCopy(r);
  3342. return *this;
  3343. }
  3344. #if __cplusplus >= 201103L
  3345. /// @overload
  3346. ///...
  3347. /// Move constructor.
  3348. ///This constructor steals resources from the argument.
  3349. ///It leaves the argument in a valid but indeterminate sate.
  3350. ///The indeterminate state can be returned to normal by calling reset() on that object.
  3351. /// @param r rvalue reference to a Regex object.
  3352. Regex(Regex&& r) {
  3353. init_vars();
  3354. deepMove(r);
  3355. }
  3356. ///@overload
  3357. ///...
  3358. /// Overloaded move-assignment operator.
  3359. ///This constructor steals resources from the argument.
  3360. ///It leaves the argument in a valid but indeterminate sate.
  3361. ///The indeterminate state can be returned to normal by calling reset() on that object.
  3362. /// @param r Regex&&
  3363. /// @return *this
  3364. Regex& operator=(Regex&& r) {
  3365. if (this == &r) return *this;
  3366. deepMove(r);
  3367. return *this;
  3368. }
  3369. /// Provides boolean check for the status of the object.
  3370. /// This overloaded boolean operator needs to be declared
  3371. /// explicit to prevent implicit conversion and overloading issues.
  3372. ///
  3373. /// We will only enable it if >=C++11 is being used, as the explicit keyword
  3374. /// for a function other than constructor is not supported in older compilers.
  3375. ///
  3376. /// If you are dealing with legacy code/compilers use the Double bang trick mentioned
  3377. /// in Regex::operator!().
  3378. ///
  3379. /// This helps us to check the status of the compiled regex like this:
  3380. ///
  3381. /// ```
  3382. /// jpcre2::select<char>::Regex re("pat", "mod");
  3383. /// if(re) {
  3384. /// std::cout<<"Compile success";
  3385. /// } else {
  3386. /// std::cout<<"Compile failed";
  3387. /// }
  3388. /// ```
  3389. ///@return true if regex compiled successfully, false otherwise.
  3390. ///
  3391. explicit operator bool() const {
  3392. return (code != 0);
  3393. }
  3394. #endif
  3395. /// Provides boolean check for the status of the object.
  3396. /// This is a safe boolean approach (no implicit conversion or overloading).
  3397. /// We don't need the explicit keyword here and thus it's the preferable method
  3398. /// to check for object status that will work well with older compilers.
  3399. /// e.g:
  3400. ///
  3401. /// ```
  3402. /// jpcre2::select<char>::Regex re("pat","mod");
  3403. /// if(!re) {
  3404. /// std::cout<<"Compile failed";
  3405. /// } else {
  3406. /// std::cout<<"Compiled successfully";
  3407. /// }
  3408. /// ```
  3409. /// Double bang trick:
  3410. ///
  3411. /// ```
  3412. /// jpcre2::select<char>::Regex re("pat","mod");
  3413. /// if(!!re) {
  3414. /// std::cout<<"Compiled successfully";
  3415. /// } else {
  3416. /// std::cout<<"Compile failed";
  3417. /// }
  3418. /// ```
  3419. /// @return true if regex compile failed, false otherwise.
  3420. bool operator!() const {
  3421. return (code == 0);
  3422. }
  3423. virtual ~Regex() {
  3424. freeRegexMemory();
  3425. freeCompileContext();
  3426. }
  3427. ///Reset all class variables to its default (initial) state including memory.
  3428. ///@return Reference to the calling Regex object.
  3429. Regex& reset() {
  3430. freeRegexMemory();
  3431. freeCompileContext();
  3432. String().swap(pat_str);
  3433. init_vars();
  3434. return *this;
  3435. }
  3436. ///Clear all class variables to its default (initial) state (some memory may retain for further use).
  3437. ///@return Reference to the calling Regex object.
  3438. Regex& clear() {
  3439. freeRegexMemory();
  3440. freeCompileContext();
  3441. pat_str.clear();
  3442. init_vars();
  3443. return *this;
  3444. }
  3445. ///Reset regex compile related errors to zero.
  3446. ///@return A reference to the Regex object
  3447. ///@see RegexReplace::resetErrors()
  3448. ///@see RegexMatch::resetErrors()
  3449. Regex& resetErrors() {
  3450. error_number = 0;
  3451. error_offset = 0;
  3452. return *this;
  3453. }
  3454. /// Recreate character tables used by PCRE2.
  3455. /// You should call this function after changing the locale to remake the
  3456. /// character tables according to the new locale.
  3457. /// These character tables are used to compile the regex and used by match
  3458. /// and replace operation. A separate call to compile() will be required
  3459. /// to apply the new character tables.
  3460. /// @return Reference to the calling Regex object.
  3461. Regex& resetCharacterTables() {
  3462. const unsigned char* tables = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::maketables(0); //must pass 0, we are using free() to free the tables.
  3463. tabv = std::vector<unsigned char>(tables, tables+1088);
  3464. ::free((void*)tables); //must free memory
  3465. if(!ccontext)
  3466. ccontext = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::compile_context_create(0);
  3467. Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::set_character_tables(ccontext, &tabv[0]);
  3468. return *this;
  3469. }
  3470. ///Get Pcre2 raw compiled code pointer.
  3471. ///@return pointer to constant pcre2_code or null.
  3472. Pcre2Code const* getPcre2Code() const{
  3473. return code;
  3474. }
  3475. /// Get pattern string
  3476. ///@return pattern string of type jpcre2::select::String
  3477. String getPattern() const {
  3478. return *pat_str_ptr;
  3479. }
  3480. /// Get pointer to pattern string
  3481. ///@return Pointer to constant pattern string
  3482. String const * getPatternPointer() const {
  3483. return pat_str_ptr;
  3484. }
  3485. /// Calculate modifier string from PCRE2 and JPCRE2 options and return it.
  3486. ///
  3487. /// **Mixed or combined modifier**.
  3488. ///
  3489. /// Some modifier may include other modifiers i.e they have the same meaning of some modifiers
  3490. /// combined together. For example, the 'n' modifier includes the 'u' modifier and together they
  3491. /// are equivalent to `PCRE2_UTF | PCRE2_UCP`. When you set a modifier like this, both options
  3492. /// get set, and when you remove the 'n' modifier (with `Regex::changeModifier()`), both will get removed.
  3493. ///@tparam Char_T Character type
  3494. ///@return Calculated modifier string (std::string)
  3495. ///@see RegexMatch::getModifier()
  3496. ///@see RegexReplace::getModifier()
  3497. std::string getModifier() const {
  3498. return modtab ? modtab->fromCompileOption(compile_opts, jpcre2_compile_opts)
  3499. : MOD::fromCompileOption(compile_opts, jpcre2_compile_opts);
  3500. }
  3501. /// Get PCRE2 option
  3502. /// @return Compile time PCRE2 option value
  3503. ///@see RegexReplace::getPcre2Option()
  3504. ///@see RegexMatch::getPcre2Option()
  3505. Uint getPcre2Option() const {
  3506. return compile_opts;
  3507. }
  3508. /// Get JPCRE2 option
  3509. /// @return Compile time JPCRE2 option value
  3510. ///@see RegexReplace::getJpcre2Option()
  3511. ///@see RegexMatch::getJpcre2Option()
  3512. Uint getJpcre2Option() const {
  3513. return jpcre2_compile_opts;
  3514. }
  3515. /// Returns the last error number
  3516. ///@return Last error number
  3517. int getErrorNumber() const {
  3518. return error_number;
  3519. }
  3520. /// Returns the last error offset
  3521. ///@return Last error offset
  3522. int getErrorOffset() const {
  3523. return (int)error_offset;
  3524. }
  3525. /// Returns the last error message
  3526. ///@return Last error message
  3527. String getErrorMessage() const {
  3528. #if __cplusplus >= 201103L
  3529. return select<Char, Map>::getErrorMessage(error_number, error_offset);
  3530. #else
  3531. return select<Char>::getErrorMessage(error_number, error_offset);
  3532. #endif
  3533. }
  3534. ///Get new line convention from compiled code.
  3535. ///@return New line option value or 0.
  3536. ///```
  3537. ///PCRE2_NEWLINE_CR Carriage return only
  3538. ///PCRE2_NEWLINE_LF Linefeed only
  3539. ///PCRE2_NEWLINE_CRLF CR followed by LF only
  3540. ///PCRE2_NEWLINE_ANYCRLF Any of the above
  3541. ///PCRE2_NEWLINE_ANY Any Unicode newline sequence
  3542. ///```
  3543. Uint getNewLine() {
  3544. if(!code) return 0;
  3545. Uint newline = 0;
  3546. int ret = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::pattern_info(code, PCRE2_INFO_NEWLINE, &newline);
  3547. if(ret < 0) error_number = ret;
  3548. return newline;
  3549. }
  3550. ///Get the modifier table that is set,
  3551. ///@return constant ModifierTable pointer.
  3552. ModifierTable const* getModifierTable(){
  3553. return modtab;
  3554. }
  3555. ///Set new line convention.
  3556. ///@param value New line option value.
  3557. ///```
  3558. ///PCRE2_NEWLINE_CR Carriage return only
  3559. ///PCRE2_NEWLINE_LF Linefeed only
  3560. ///PCRE2_NEWLINE_CRLF CR followed by LF only
  3561. ///PCRE2_NEWLINE_ANYCRLF Any of the above
  3562. ///PCRE2_NEWLINE_ANY Any Unicode newline sequence
  3563. ///```
  3564. ///@return Reference to the calling Regex object
  3565. Regex& setNewLine(Uint value){
  3566. if(!ccontext)
  3567. ccontext = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::compile_context_create(0);
  3568. int ret = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::set_newline(ccontext, value);
  3569. if(ret < 0) error_number = ret;
  3570. return *this;
  3571. }
  3572. /// Set the pattern string to compile
  3573. /// @param re Pattern string
  3574. /// @return Reference to the calling Regex object.
  3575. Regex& setPattern(String const &re) {
  3576. pat_str = re;
  3577. pat_str_ptr = &pat_str; //must overwrite
  3578. return *this;
  3579. }
  3580. /// @overload
  3581. /// @param re Pattern string pointer, null pointer will unset it.
  3582. /// @return Reference to the calling Regex object.
  3583. Regex& setPattern(String const *re) {
  3584. if(re) pat_str_ptr = re;
  3585. else {
  3586. pat_str.clear();
  3587. pat_str_ptr = &pat_str;
  3588. }
  3589. return *this;
  3590. }
  3591. /// set the modifier (resets all JPCRE2 and PCRE2 options) by calling Regex::changeModifier().
  3592. /// Re-initializes the option bits for PCRE2 and JPCRE2 options, then parses the modifier and sets
  3593. /// equivalent PCRE2 and JPCRE2 options.
  3594. /// @param x Modifier string.
  3595. /// @return Reference to the calling Regex object.
  3596. /// @see RegexMatch::setModifier()
  3597. /// @see RegexReplace::setModifier()
  3598. Regex& setModifier(Modifier const& x) {
  3599. compile_opts = 0;
  3600. jpcre2_compile_opts = 0;
  3601. return changeModifier(x, true);
  3602. }
  3603. ///Set a custom modifier table to be used.
  3604. ///@param mdt pointer to ModifierTable object.
  3605. /// @return Reference to the calling Regex object.
  3606. Regex& setModifierTable(ModifierTable const * mdt){
  3607. modtab = mdt;
  3608. return *this;
  3609. }
  3610. /// Set JPCRE2 option for compile (overwrites existing option)
  3611. /// @param x Option value
  3612. /// @return Reference to the calling Regex object.
  3613. /// @see RegexMatch::setJpcre2Option()
  3614. /// @see RegexReplace::setJpcre2Option()
  3615. Regex& setJpcre2Option(Uint x) {
  3616. jpcre2_compile_opts = x;
  3617. return *this;
  3618. }
  3619. /// Set PCRE2 option for compile (overwrites existing option)
  3620. /// @param x Option value
  3621. /// @return Reference to the calling Regex object.
  3622. /// @see RegexMatch::setPcre2Option()
  3623. /// @see RegexReplace::setPcre2Option()
  3624. Regex& setPcre2Option(Uint x) {
  3625. compile_opts = x;
  3626. return *this;
  3627. }
  3628. /// Parse modifier and add/remove equivalent PCRE2 and JPCRE2 options.
  3629. /// This function does not initialize or re-initialize options.
  3630. /// If you want to set options from scratch, initialize them to 0 before calling this function.
  3631. ///
  3632. /// If invalid modifier is detected, then the error number for the Regex
  3633. /// object will be jpcre2::ERROR::INVALID_MODIFIER and error offset will be the modifier character.
  3634. /// You can get the message with Regex::getErrorMessage() function.
  3635. /// @param mod Modifier string.
  3636. /// @param x Whether to add or remove option
  3637. /// @return Reference to the calling Regex object
  3638. /// @see RegexMatch::changeModifier()
  3639. /// @see RegexReplace::changeModifier()
  3640. Regex& changeModifier(Modifier const& mod, bool x){
  3641. modtab ? modtab->toCompileOption(mod, x, &compile_opts, &jpcre2_compile_opts, &error_number, &error_offset)
  3642. : MOD::toCompileOption(mod, x, &compile_opts, &jpcre2_compile_opts, &error_number, &error_offset);
  3643. return *this;
  3644. }
  3645. /// Add or remove a JPCRE2 option
  3646. /// @param opt JPCRE2 option value
  3647. /// @param x Add the option if it's true, remove otherwise.
  3648. /// @return Reference to the calling Regex object
  3649. /// @see RegexMatch::changeJpcre2Option()
  3650. /// @see RegexReplace::changeJpcre2Option()
  3651. Regex& changeJpcre2Option(Uint opt, bool x) {
  3652. jpcre2_compile_opts = x ? jpcre2_compile_opts | opt : jpcre2_compile_opts & ~opt;
  3653. return *this;
  3654. }
  3655. /// Add or remove a PCRE2 option
  3656. /// @param opt PCRE2 option value
  3657. /// @param x Add the option if it's true, remove otherwise.
  3658. /// @return Reference to the calling Regex object
  3659. /// @see RegexMatch::changePcre2Option()
  3660. /// @see RegexReplace::changePcre2Option()
  3661. Regex& changePcre2Option(Uint opt, bool x) {
  3662. compile_opts = x ? compile_opts | opt : compile_opts & ~opt;
  3663. return *this;
  3664. }
  3665. /// Parse modifier string and add equivalent PCRE2 and JPCRE2 options.
  3666. /// This is just a wrapper of the original function Regex::changeModifier()
  3667. /// provided for convenience.
  3668. /// @param mod Modifier string.
  3669. /// @return Reference to the calling Regex object
  3670. /// @see RegexMatch::addModifier()
  3671. /// @see RegexReplace::addModifier()
  3672. Regex& addModifier(Modifier const& mod){
  3673. return changeModifier(mod, true);
  3674. }
  3675. /// Add option to existing JPCRE2 options for compile
  3676. /// @param x Option value
  3677. /// @return Reference to the calling Regex object
  3678. /// @see RegexMatch::addJpcre2Option()
  3679. /// @see RegexReplace::addJpcre2Option()
  3680. Regex& addJpcre2Option(Uint x) {
  3681. jpcre2_compile_opts |= x;
  3682. return *this;
  3683. }
  3684. /// Add option to existing PCRE2 options for compile
  3685. /// @param x Option value
  3686. /// @return Reference to the calling Regex object
  3687. /// @see RegexMatch::addPcre2Option()
  3688. /// @see RegexReplace::addPcre2Option()
  3689. Regex& addPcre2Option(Uint x) {
  3690. compile_opts |= x;
  3691. return *this;
  3692. }
  3693. ///Compile pattern using info from class variables.
  3694. ///@see Regex::compile(String const &re, Uint po, Uint jo)
  3695. ///@see Regex::compile(String const &re, Uint po)
  3696. ///@see Regex::compile(String const &re, Modifier mod)
  3697. ///@see Regex::compile(String const &re)
  3698. void compile(void);
  3699. ///@overload
  3700. ///...
  3701. /// Set the specified parameters, then compile the pattern using information from class variables.
  3702. /// @param re Pattern string
  3703. /// @param po PCRE2 option
  3704. /// @param jo JPCRE2 option
  3705. void compile(String const &re, Uint po, Uint jo) {
  3706. setPattern(re).setPcre2Option(po).setJpcre2Option(jo);
  3707. compile();
  3708. }
  3709. ///@overload
  3710. /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern.
  3711. /// @param po PCRE2 option
  3712. /// @param jo JPCRE2 option
  3713. void compile(String const *re, Uint po, Uint jo) {
  3714. setPattern(re).setPcre2Option(po).setJpcre2Option(jo);
  3715. compile();
  3716. }
  3717. ///@overload
  3718. /// @param re Pattern string
  3719. /// @param po PCRE2 option
  3720. void compile(String const &re, Uint po) {
  3721. setPattern(re).setPcre2Option(po);
  3722. compile();
  3723. }
  3724. ///@overload
  3725. /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern.
  3726. /// @param po PCRE2 option
  3727. void compile(String const *re, Uint po) {
  3728. setPattern(re).setPcre2Option(po);
  3729. compile();
  3730. }
  3731. /// @overload
  3732. /// @param re Pattern string
  3733. /// @param mod Modifier string.
  3734. void compile(String const &re, Modifier const& mod) {
  3735. setPattern(re).setModifier(mod);
  3736. compile();
  3737. }
  3738. ///@overload
  3739. /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern.
  3740. /// @param mod Modifier string.
  3741. void compile(String const *re, Modifier const& mod) {
  3742. setPattern(re).setModifier(mod);
  3743. compile();
  3744. }
  3745. ///@overload
  3746. /// @param re Pattern string .
  3747. void compile(String const &re) {
  3748. setPattern(re);
  3749. compile();
  3750. }
  3751. ///@overload
  3752. /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern.
  3753. void compile(String const *re) {
  3754. setPattern(re);
  3755. compile();
  3756. }
  3757. ///Returns a default constructed RegexMatch object by value.
  3758. ///This object is initialized with the same modifier table
  3759. ///as this Regex object.
  3760. ///@return RegexMatch object.
  3761. RegexMatch initMatch(){
  3762. RegexMatch rm(this);
  3763. rm.setModifierTable(modtab);
  3764. return rm;
  3765. }
  3766. ///Synonym for initMatch()
  3767. ///@return RegexMatch object by value.
  3768. RegexMatch getMatchObject(){
  3769. return initMatch();
  3770. }
  3771. /// Perform regex match and return match count using a temporary match object.
  3772. /// This temporary match object will get available options from this Regex object,
  3773. /// that includes modifier table.
  3774. /// @param s Subject string .
  3775. /// @param mod Modifier string.
  3776. /// @param start_offset Offset from where matching will start in the subject string.
  3777. /// @return Match count
  3778. /// @see RegexMatch::match()
  3779. SIZE_T match(String const &s, Modifier const& mod, PCRE2_SIZE start_offset=0) {
  3780. return initMatch().setStartOffset(start_offset).setSubject(s).setModifier(mod).match();
  3781. }
  3782. ///@overload
  3783. ///...
  3784. ///@param s Pointer to subject string. A null pointer will unset the subject and perform a match with empty subject.
  3785. ///@param mod Modifier string.
  3786. ///@param start_offset Offset from where matching will start in the subject string.
  3787. ///@return Match count
  3788. SIZE_T match(String const *s, Modifier const& mod, PCRE2_SIZE start_offset=0) {
  3789. return initMatch().setStartOffset(start_offset).setSubject(s).setModifier(mod).match();
  3790. }
  3791. ///@overload
  3792. ///...
  3793. /// @param s Subject string .
  3794. /// @param start_offset Offset from where matching will start in the subject string.
  3795. /// @return Match count
  3796. /// @see RegexMatch::match()
  3797. SIZE_T match(String const &s, PCRE2_SIZE start_offset=0) {
  3798. return initMatch().setStartOffset(start_offset).setSubject(s).match();
  3799. }
  3800. ///@overload
  3801. ///...
  3802. /// @param s Pointer to subject string. A null pointer will unset the subject and perform a match with empty subject.
  3803. /// @param start_offset Offset from where matching will start in the subject string.
  3804. /// @return Match count
  3805. /// @see RegexMatch::match()
  3806. SIZE_T match(String const *s, PCRE2_SIZE start_offset=0) {
  3807. return initMatch().setStartOffset(start_offset).setSubject(s).match();
  3808. }
  3809. ///Returns a default constructed RegexReplace object by value.
  3810. ///This object is initialized with the same modifier table as this Regex object.
  3811. ///@return RegexReplace object.
  3812. RegexReplace initReplace(){
  3813. RegexReplace rr(this);
  3814. rr.setModifierTable(modtab);
  3815. return rr;
  3816. }
  3817. ///Synonym for initReplace()
  3818. ///@return RegexReplace object.
  3819. RegexReplace getReplaceObject(){
  3820. return initReplace();
  3821. }
  3822. /// Perform regex replace and return the replaced string using a temporary replace object.
  3823. /// This temporary replace object will get available options from this Regex object,
  3824. /// that includes modifier table.
  3825. /// @param mains Subject string.
  3826. /// @param repl String to replace with
  3827. /// @param mod Modifier string.
  3828. ///@param counter Pointer to a counter to store the number of replacement done.
  3829. /// @return Resultant string after regex replace
  3830. /// @see RegexReplace::replace()
  3831. String replace(String const &mains, String const &repl, Modifier const& mod="", SIZE_T* counter=0) {
  3832. return initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(counter).replace();
  3833. }
  3834. ///@overload
  3835. /// @param mains Pointer to subject string
  3836. /// @param repl String to replace with
  3837. /// @param mod Modifier string.
  3838. ///@param counter Pointer to a counter to store the number of replacement done.
  3839. /// @return Resultant string after regex replace
  3840. /// @see RegexReplace::replace()
  3841. String replace(String *mains, String const &repl, Modifier const& mod="", SIZE_T* counter=0) {
  3842. return initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(counter).replace();
  3843. }
  3844. ///@overload
  3845. ///...
  3846. /// @param mains Subject string
  3847. /// @param repl Pointer to string to replace with
  3848. /// @param mod Modifier string.
  3849. ///@param counter Pointer to a counter to store the number of replacement done.
  3850. /// @return Resultant string after regex replace
  3851. /// @see RegexReplace::replace()
  3852. String replace(String const &mains, String const *repl, Modifier const& mod="", SIZE_T* counter=0) {
  3853. return initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(counter).replace();
  3854. }
  3855. ///@overload
  3856. ///...
  3857. /// @param mains Pointer to subject string
  3858. /// @param repl Pointer to string to replace with
  3859. /// @param mod Modifier string.
  3860. ///@param counter Pointer to a counter to store the number of replacement done.
  3861. /// @return Resultant string after regex replace
  3862. /// @see RegexReplace::replace()
  3863. String replace(String *mains, String const *repl, Modifier const& mod="", SIZE_T* counter=0) {
  3864. return initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(counter).replace();
  3865. }
  3866. /// Perl compatible replace method.
  3867. /// Modifies subject string in-place and returns replace count.
  3868. ///
  3869. /// It's a shorthand method to `RegexReplace::preplace()`.
  3870. /// @param mains Pointer to subject string.
  3871. /// @param repl Replacement string (string to replace with).
  3872. /// @param mod Modifier string.
  3873. /// @return replace count.
  3874. SIZE_T preplace(String * mains, String const& repl, Modifier const& mod=""){
  3875. SIZE_T counter = 0;
  3876. if(mains) *mains = initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(&counter).replace();
  3877. return counter;
  3878. }
  3879. /// @overload
  3880. ///
  3881. /// Perl compatible replace method.
  3882. /// Modifies subject string in-place and returns replace count.
  3883. ///
  3884. /// It's a shorthand method to `RegexReplace::preplace()`.
  3885. /// @param mains Pointer to subject string.
  3886. /// @param repl Pointer to replacement string (string to replace with).
  3887. /// @param mod Modifier string.
  3888. /// @return replace count.
  3889. SIZE_T preplace(String * mains, String const* repl, Modifier const& mod=""){
  3890. SIZE_T counter = 0;
  3891. if(mains) *mains = initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(&counter).replace();
  3892. return counter;
  3893. }
  3894. /// @overload
  3895. ///
  3896. /// Perl compatible replace method.
  3897. /// Returns replace count and discards subject string.
  3898. ///
  3899. /// It's a shorthand method to `RegexReplace::preplace()`.
  3900. /// @param mains Subject string.
  3901. /// @param repl Replacement string (string to replace with).
  3902. /// @param mod Modifier string.
  3903. /// @return replace count.
  3904. SIZE_T preplace(String const& mains, String const& repl, Modifier const& mod=""){
  3905. SIZE_T counter = 0;
  3906. initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(&counter).replace();
  3907. return counter;
  3908. }
  3909. /// @overload
  3910. ///
  3911. /// Perl compatible replace method.
  3912. /// Returns replace count and discards subject string.
  3913. ///
  3914. /// It's a shorthand method to `RegexReplace::preplace()`.
  3915. /// @param mains Subject string.
  3916. /// @param repl Pointer to replacement string (string to replace with).
  3917. /// @param mod Modifier string.
  3918. /// @return replace count.
  3919. SIZE_T preplace(String const& mains, String const* repl, Modifier const& mod=""){
  3920. SIZE_T counter = 0;
  3921. initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(&counter).replace();
  3922. return counter;
  3923. }
  3924. };
  3925. private:
  3926. //prevent object instantiation of select class
  3927. select();
  3928. select(select const &);
  3929. #if __cplusplus >= 201103L
  3930. select(select&&);
  3931. #endif
  3932. ~select();
  3933. };//struct select
  3934. }//jpcre2 namespace
  3935. inline void jpcre2::ModifierTable::parseModifierTable(std::string& tabjs, VecOpt& tabjv,
  3936. std::string& tab_s, VecOpt& tab_v,
  3937. std::string const& tabs, VecOpt const& tabv){
  3938. SIZE_T n = tabs.length();
  3939. JPCRE2_ASSERT(n == tabv.size(), ("ValueError: Could not set Modifier table.\
  3940. Modifier character and value tables are not of the same size (" + _tostdstring(n) + " == " + _tostdstring(tabv.size()) + ").").c_str());
  3941. tabjs.clear();
  3942. tab_s.clear(); tab_s.reserve(n);
  3943. tabjv.clear();
  3944. tab_v.clear(); tab_v.reserve(n);
  3945. for(SIZE_T i=0;i<n;++i){
  3946. switch(tabv[i]){
  3947. case JIT_COMPILE:
  3948. case FIND_ALL: //JPCRE2 options are unique, so it's not necessary to check if it's compile or replace or match.
  3949. tabjs.push_back(tabs[i]); tabjv.push_back(tabv[i]);break;
  3950. default: tab_s.push_back(tabs[i]); tab_v.push_back(tabv[i]); break;
  3951. }
  3952. }
  3953. }
  3954. #if __cplusplus >= 201103L
  3955. template<typename Char_T, template<typename...> class Map>
  3956. void jpcre2::select<Char_T, Map>::Regex::compile() {
  3957. #else
  3958. template<typename Char_T>
  3959. void jpcre2::select<Char_T>::Regex::compile() {
  3960. #endif
  3961. //Get c_str of pattern
  3962. Pcre2Sptr c_pattern = (Pcre2Sptr) pat_str_ptr->c_str();
  3963. int err_number = 0;
  3964. PCRE2_SIZE err_offset = 0;
  3965. /**************************************************************************
  3966. * Compile the regular expression pattern, and handle
  3967. * any errors that are detected.
  3968. *************************************************************************/
  3969. //first release any previous memory
  3970. freeRegexMemory();
  3971. code = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::compile( c_pattern, /* the pattern */
  3972. PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
  3973. compile_opts, /* default options */
  3974. &err_number, /* for error number */
  3975. &err_offset, /* for error offset */
  3976. ccontext); /* use compile context */
  3977. if (code == 0) {
  3978. /* Compilation failed */
  3979. //must not free regex memory, the only function has that right is the destructor
  3980. error_number = err_number;
  3981. error_offset = err_offset;
  3982. return;
  3983. } else if ((jpcre2_compile_opts & JIT_COMPILE) != 0) {
  3984. ///perform JIT compilation it it's enabled
  3985. int jit_ret = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::jit_compile(code, PCRE2_JIT_COMPLETE);
  3986. if(jit_ret < 0) error_number = jit_ret;
  3987. }
  3988. //everything's OK
  3989. }
  3990. #if __cplusplus >= 201103L
  3991. template<typename Char_T, template<typename...> class Map>
  3992. typename jpcre2::select<Char_T, Map>::String jpcre2::select<Char_T, Map>::MatchEvaluator::replace(bool do_match, Uint replace_opts, SIZE_T * counter) {
  3993. #else
  3994. template<typename Char_T>
  3995. typename jpcre2::select<Char_T>::String jpcre2::select<Char_T>::MatchEvaluator::replace(bool do_match, Uint replace_opts, SIZE_T * counter) {
  3996. #endif
  3997. if(counter) *counter = 0;
  3998. replace_opts |= PCRE2_SUBSTITUTE_OVERFLOW_LENGTH;
  3999. replace_opts &= ~PCRE2_SUBSTITUTE_GLOBAL;
  4000. Regex const * re = RegexMatch::getRegexObject();
  4001. // If re or re->code is null, return the subject string unmodified.
  4002. if (!re || re->code == 0)
  4003. return RegexMatch::getSubject();
  4004. Pcre2Sptr r_subject_ptr = (Pcre2Sptr) RegexMatch::getSubjectPointer()->c_str();
  4005. //~ SIZE_T totlen = RegexMatch::getSubjectPointer()->length();
  4006. if(do_match) match();
  4007. SIZE_T mcount = vec_soff.size();
  4008. // if mcount is 0, return the subject string. (there's no need to worry about re)
  4009. if(!mcount) return RegexMatch::getSubject();
  4010. SIZE_T current_offset = 0; //needs to be zero, not start_offset, because it's from where unmatched parts will be copied.
  4011. String res, tmp;
  4012. //A check, this check is not fullproof.
  4013. SIZE_T last = vec_eoff.size();
  4014. last = (last>0)?last-1:0;
  4015. JPCRE2_ASSERT(vec_eoff[last] <= RegexMatch::getSubject().size(), "ValueError: subject string is not of the required size, may be it's changed!!!\
  4016. If you are using esisting match data, try a new match.");
  4017. //loop through the matches
  4018. for(SIZE_T i=0;i<mcount;++i){
  4019. //first copy the unmatched part.
  4020. //Matches that use \K to end before they start are not supported.
  4021. if(vec_soff[i] < current_offset || vec_eoff[i] < vec_soff[i]){
  4022. RegexMatch::error_number = PCRE2_ERROR_BADSUBSPATTERN;
  4023. return RegexMatch::getSubject();
  4024. } else {
  4025. //~ res += RegexMatch::getSubject().substr(current_offset, vec_soff[i]-current_offset);
  4026. res += String(r_subject_ptr+current_offset, r_subject_ptr+vec_soff[i]);
  4027. }
  4028. //now process the matched part
  4029. switch(callbackn){
  4030. case 0: tmp = callback0((void*)0, (void*)0, (void*)0); break;
  4031. case 1: JPCRE2_VECTOR_DATA_ASSERT(vec_num.size() == mcount, "VecNum");
  4032. tmp = callback1(vec_num[i], (void*)0, (void*)0); break;
  4033. case 2: JPCRE2_VECTOR_DATA_ASSERT(vec_nas.size() == mcount, "VecNas");
  4034. tmp = callback2((void*)0, vec_nas[i], (void*)0); break;
  4035. case 3: JPCRE2_VECTOR_DATA_ASSERT(vec_num.size() == mcount && vec_nas.size() == mcount, "VecNum or VecNas");
  4036. tmp = callback3(vec_num[i], vec_nas[i], (void*)0); break;
  4037. case 4: JPCRE2_VECTOR_DATA_ASSERT(vec_ntn.size() == mcount, "VecNtn");
  4038. tmp = callback4((void*)0, (void*)0, vec_ntn[i]); break;
  4039. case 5: JPCRE2_VECTOR_DATA_ASSERT(vec_num.size() == mcount && vec_ntn.size() == mcount, "VecNum or VecNtn");
  4040. tmp = callback5(vec_num[i], (void*)0, vec_ntn[i]); break;
  4041. case 6: JPCRE2_VECTOR_DATA_ASSERT(vec_nas.size() == mcount && vec_ntn.size() == mcount, "VecNas or VecNtn");
  4042. tmp = callback6((void*)0, vec_nas[i], vec_ntn[i]); break;
  4043. case 7: JPCRE2_VECTOR_DATA_ASSERT(vec_num.size() == mcount && vec_nas.size() == mcount && vec_ntn.size() == mcount, "VecNum\n or VecNas or VecNtn");
  4044. tmp = callback7(vec_num[i], vec_nas[i], vec_ntn[i]); break;
  4045. default: JPCRE2_ASSERT(2 == 1, "Invalid callbackn. Please file a bug report (must include the line number from below)."); break;
  4046. }
  4047. //reset the current offset
  4048. current_offset = vec_eoff[i];
  4049. //second part
  4050. ///the matched part is the subject
  4051. //~ Pcre2Sptr subject = (Pcre2Sptr) RegexMatch::getSubjectPointer()->c_str();
  4052. //substr(vec_soff[i], vec_eoff[i] - vec_soff[i]).c_str();//->substr(vec_soff[i], vec_eoff[i]-vec_soff[i]);
  4053. Pcre2Sptr subject = r_subject_ptr + vec_soff[i];
  4054. PCRE2_SIZE subject_length = vec_eoff[i] - vec_soff[i];
  4055. ///the string returned from the callback is the replacement string.
  4056. Pcre2Sptr replace = (Pcre2Sptr) tmp.c_str();
  4057. PCRE2_SIZE replace_length = tmp.length();
  4058. bool retry = true;
  4059. int ret = 0;
  4060. PCRE2_SIZE outlengthptr = 0;
  4061. Pcre2Uchar* output_buffer = new Pcre2Uchar[outlengthptr + 1]();
  4062. while (true) {
  4063. ret = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::substitute(
  4064. re->code, /*Points to the compiled pattern*/
  4065. subject, /*Points to the subject string*/
  4066. subject_length, /*Length of the subject string*/
  4067. 0, /*Offset in the subject at which to start matching*/ //must be zero
  4068. replace_opts, /*Option bits*/
  4069. RegexMatch::mdata, /*Points to a match data block, or is NULL*/
  4070. RegexMatch::mcontext, /*Points to a match context, or is NULL*/
  4071. replace, /*Points to the replacement string*/
  4072. replace_length, /*Length of the replacement string*/
  4073. output_buffer, /*Points to the output buffer*/
  4074. &outlengthptr /*Points to the length of the output buffer*/
  4075. );
  4076. if (ret < 0) {
  4077. //Handle errors
  4078. if ((replace_opts & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0
  4079. && ret == (int) PCRE2_ERROR_NOMEMORY && retry) {
  4080. retry = false;
  4081. /// If initial #buffer_size wasn't big enough for resultant string,
  4082. /// we will try once more with a new buffer size adjusted to the length of the resultant string.
  4083. delete[] output_buffer;
  4084. output_buffer = new Pcre2Uchar[outlengthptr + 1]();
  4085. // Go and try to perform the substitute again
  4086. continue;
  4087. } else {
  4088. RegexMatch::error_number = ret;
  4089. delete[] output_buffer;
  4090. return RegexMatch::getSubject();
  4091. }
  4092. }
  4093. //If everything's ok exit the loop
  4094. break;
  4095. }
  4096. res += String((Char*) output_buffer,(Char*) (output_buffer + outlengthptr) );
  4097. delete[] output_buffer;
  4098. if(counter) *counter += ret;
  4099. //if FIND_ALL is not set, single match will be performed
  4100. if((RegexMatch::getJpcre2Option() & FIND_ALL) == 0) break;
  4101. }
  4102. //All matched parts have been dealt with.
  4103. //now copy rest of the string from current_offset
  4104. res += RegexMatch::getSubject().substr(current_offset, String::npos);
  4105. return res;
  4106. }
  4107. #if __cplusplus >= 201103L
  4108. template<typename Char_T, template<typename...> class Map>
  4109. typename jpcre2::select<Char_T, Map>::String jpcre2::select<Char_T, Map>::MatchEvaluator::nreplace(bool do_match, Uint jo, SIZE_T* counter){
  4110. #else
  4111. template<typename Char_T>
  4112. typename jpcre2::select<Char_T>::String jpcre2::select<Char_T>::MatchEvaluator::nreplace(bool do_match, Uint jo, SIZE_T* counter){
  4113. #endif
  4114. if(counter) *counter = 0;
  4115. if(do_match) match();
  4116. SIZE_T mcount = vec_soff.size();
  4117. // if mcount is 0, return the subject string. (there's no need to worry about re)
  4118. if(!mcount) return RegexMatch::getSubject();
  4119. SIZE_T current_offset = 0; //no need for worrying about start offset, it's handled by match and we get valid offsets out of it.
  4120. String res;
  4121. //A check, this check is not fullproof
  4122. SIZE_T last = vec_eoff.size();
  4123. last = (last>0)?last-1:0;
  4124. JPCRE2_ASSERT(vec_eoff[last] <= RegexMatch::getSubject().size(), "ValueError: subject string is not of the required size, may be it's changed!!!\
  4125. If you are using esisting match data, try a new match.");
  4126. //loop through the matches
  4127. for(SIZE_T i=0;i<mcount;++i){
  4128. //first copy the unmatched part.
  4129. //Matches that use \K to end before they start are not supported.
  4130. if(vec_soff[i] < current_offset){
  4131. RegexMatch::error_number = PCRE2_ERROR_BADSUBSPATTERN;
  4132. return RegexMatch::getSubject();
  4133. } else {
  4134. res += RegexMatch::getSubject().substr(current_offset, vec_soff[i]-current_offset);
  4135. }
  4136. //now process the matched part
  4137. switch(callbackn){
  4138. case 0: res += callback0((void*)0, (void*)0, (void*)0); break;
  4139. case 1: JPCRE2_VECTOR_DATA_ASSERT(vec_num.size() == mcount, "VecNum");
  4140. res += callback1(vec_num[i], (void*)0, (void*)0); break;
  4141. case 2: JPCRE2_VECTOR_DATA_ASSERT(vec_nas.size() == mcount, "VecNas");
  4142. res += callback2((void*)0, vec_nas[i], (void*)0); break;
  4143. case 3: JPCRE2_VECTOR_DATA_ASSERT(vec_num.size() == mcount && vec_nas.size() == mcount, "VecNum or VecNas");
  4144. res += callback3(vec_num[i], vec_nas[i], (void*)0); break;
  4145. case 4: JPCRE2_VECTOR_DATA_ASSERT(vec_ntn.size() == mcount, "VecNtn");
  4146. res += callback4((void*)0, (void*)0, vec_ntn[i]); break;
  4147. case 5: JPCRE2_VECTOR_DATA_ASSERT(vec_num.size() == mcount && vec_ntn.size() == mcount, "VecNum or VecNtn");
  4148. res += callback5(vec_num[i], (void*)0, vec_ntn[i]); break;
  4149. case 6: JPCRE2_VECTOR_DATA_ASSERT(vec_nas.size() == mcount && vec_ntn.size() == mcount, "VecNas or VecNtn");
  4150. res += callback6((void*)0, vec_nas[i], vec_ntn[i]); break;
  4151. case 7: JPCRE2_VECTOR_DATA_ASSERT(vec_num.size() == mcount && vec_nas.size() == mcount && vec_ntn.size() == mcount, "VecNum\n or VecNas or VecNtn");
  4152. res += callback7(vec_num[i], vec_nas[i], vec_ntn[i]); break;
  4153. default: JPCRE2_ASSERT(2 == 1, "Invalid callbackn. Please file a bug report (must include the line number from below)."); break;
  4154. }
  4155. //reset the current offset
  4156. current_offset = vec_eoff[i];
  4157. if(counter) *counter += 1;
  4158. //if FIND_ALL is not set, single match will be performd
  4159. if((RegexMatch::getJpcre2Option() & FIND_ALL) == 0) break;
  4160. }
  4161. //All matched parts have been dealt with.
  4162. //now copy rest of the string from current_offset
  4163. res += RegexMatch::getSubject().substr(current_offset, String::npos);
  4164. return res;
  4165. }
  4166. #if __cplusplus >= 201103L
  4167. template<typename Char_T, template<typename...> class Map>
  4168. typename jpcre2::select<Char_T, Map>::String jpcre2::select<Char_T, Map>::RegexReplace::replace() {
  4169. #else
  4170. template<typename Char_T>
  4171. typename jpcre2::select<Char_T>::String jpcre2::select<Char_T>::RegexReplace::replace() {
  4172. #endif
  4173. *last_replace_counter = 0;
  4174. // If re or re->code is null, return the subject string unmodified.
  4175. if (!re || re->code == 0)
  4176. return *r_subject_ptr;
  4177. Pcre2Sptr subject = (Pcre2Sptr) r_subject_ptr->c_str();
  4178. PCRE2_SIZE subject_length = r_subject_ptr->length();
  4179. Pcre2Sptr replace = (Pcre2Sptr) r_replw_ptr->c_str();
  4180. PCRE2_SIZE replace_length = r_replw_ptr->length();
  4181. PCRE2_SIZE outlengthptr = (PCRE2_SIZE) buffer_size;
  4182. bool retry = true;
  4183. int ret = 0;
  4184. Pcre2Uchar* output_buffer = new Pcre2Uchar[outlengthptr + 1]();
  4185. while (true) {
  4186. ret = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::substitute(
  4187. re->code, /*Points to the compiled pattern*/
  4188. subject, /*Points to the subject string*/
  4189. subject_length, /*Length of the subject string*/
  4190. _start_offset, /*Offset in the subject at which to start matching*/
  4191. replace_opts, /*Option bits*/
  4192. mdata, /*Points to a match data block, or is NULL*/
  4193. mcontext, /*Points to a match context, or is NULL*/
  4194. replace, /*Points to the replacement string*/
  4195. replace_length, /*Length of the replacement string*/
  4196. output_buffer, /*Points to the output buffer*/
  4197. &outlengthptr /*Points to the length of the output buffer*/
  4198. );
  4199. if (ret < 0) {
  4200. //Handle errors
  4201. if ((replace_opts & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0
  4202. && ret == (int) PCRE2_ERROR_NOMEMORY && retry) {
  4203. retry = false;
  4204. /// If initial #buffer_size wasn't big enough for resultant string,
  4205. /// we will try once more with a new buffer size adjusted to the length of the resultant string.
  4206. delete[] output_buffer;
  4207. output_buffer = new Pcre2Uchar[outlengthptr + 1]();
  4208. // Go and try to perform the substitute again
  4209. continue;
  4210. } else {
  4211. error_number = ret;
  4212. delete[] output_buffer;
  4213. return *r_subject_ptr;
  4214. }
  4215. }
  4216. //If everything's ok exit the loop
  4217. break;
  4218. }
  4219. *last_replace_counter += ret;
  4220. String result = String((Char*) output_buffer,(Char*) (output_buffer + outlengthptr) );
  4221. delete[] output_buffer;
  4222. return result;
  4223. }
  4224. #if __cplusplus >= 201103L
  4225. template<typename Char_T, template<typename...> class Map>
  4226. bool jpcre2::select<Char_T, Map>::RegexMatch::getNumberedSubstrings(int rc, Pcre2Sptr subject, PCRE2_SIZE* ovector) {
  4227. #else
  4228. template<typename Char_T>
  4229. bool jpcre2::select<Char_T>::RegexMatch::getNumberedSubstrings(int rc, Pcre2Sptr subject, PCRE2_SIZE* ovector) {
  4230. #endif
  4231. NumSub num_sub;
  4232. num_sub.reserve(rc); //we know exactly how many elements it will have.
  4233. for (int i = 0; i < rc; i++)
  4234. num_sub.push_back(String((Char*)(subject + ovector[2*i]), ovector[2*i+1] - ovector[2*i]));
  4235. vec_num->push_back(num_sub); //this function shouldn't be called if this vector is null
  4236. return true;
  4237. }
  4238. #if __cplusplus >= 201103L
  4239. template<typename Char_T, template<typename...> class Map>
  4240. bool jpcre2::select<Char_T, Map>::RegexMatch::getNamedSubstrings(int namecount, int name_entry_size,
  4241. Pcre2Sptr name_table,
  4242. Pcre2Sptr subject, PCRE2_SIZE* ovector ) {
  4243. #else
  4244. template<typename Char_T>
  4245. bool jpcre2::select<Char_T>::RegexMatch::getNamedSubstrings(int namecount, int name_entry_size,
  4246. Pcre2Sptr name_table,
  4247. Pcre2Sptr subject, PCRE2_SIZE* ovector ) {
  4248. #endif
  4249. Pcre2Sptr tabptr = name_table;
  4250. String key;
  4251. MapNas map_nas;
  4252. MapNtN map_ntn;
  4253. for (int i = 0; i < namecount; i++) {
  4254. int n;
  4255. if(sizeof( Char_T ) * CHAR_BIT == 8){
  4256. n = (int)((tabptr[0] << 8) | tabptr[1]);
  4257. key = toString((Char*) (tabptr + 2));
  4258. }
  4259. else{
  4260. n = (int)tabptr[0];
  4261. key = toString((Char*) (tabptr + 1));
  4262. }
  4263. //Use of tabptr is finished for this iteration, let's increment it now.
  4264. tabptr += name_entry_size;
  4265. String value((Char*)(subject + ovector[2*n]), ovector[2*n+1] - ovector[2*n]); //n, not i.
  4266. if(vec_nas) map_nas[key] = value;
  4267. if(vec_ntn) map_ntn[key] = n;
  4268. }
  4269. //push the maps into vectors:
  4270. if(vec_nas) vec_nas->push_back(map_nas);
  4271. if(vec_ntn) vec_ntn->push_back(map_ntn);
  4272. return true;
  4273. }
  4274. #if __cplusplus >= 201103L
  4275. template<typename Char_T, template<typename...> class Map>
  4276. jpcre2::SIZE_T jpcre2::select<Char_T, Map>::RegexMatch::match() {
  4277. #else
  4278. template<typename Char_T>
  4279. jpcre2::SIZE_T jpcre2::select<Char_T>::RegexMatch::match() {
  4280. #endif
  4281. // If re or re->code is null, return 0 as the match count
  4282. if (!re || re->code == 0)
  4283. return 0;
  4284. Pcre2Sptr subject = (Pcre2Sptr) m_subject_ptr->c_str();
  4285. Pcre2Sptr name_table = 0;
  4286. int crlf_is_newline = 0;
  4287. int namecount = 0;
  4288. int name_entry_size = 0;
  4289. int rc = 0;
  4290. int utf = 0;
  4291. SIZE_T count = 0;
  4292. Uint option_bits;
  4293. Uint newline = 0;
  4294. PCRE2_SIZE *ovector = 0;
  4295. SIZE_T subject_length = 0;
  4296. MatchData *match_data = 0;
  4297. subject_length = m_subject_ptr->length();
  4298. bool mdc = false; //mdata created.
  4299. if (vec_num) vec_num->clear();
  4300. if (vec_nas) vec_nas->clear();
  4301. if (vec_ntn) vec_ntn->clear();
  4302. if(vec_soff) vec_soff->clear();
  4303. if(vec_eoff) vec_eoff->clear();
  4304. /* Using this function ensures that the block is exactly the right size for
  4305. the number of capturing parentheses in the pattern. */
  4306. if(mdata) match_data = mdata;
  4307. else {
  4308. match_data = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::match_data_create_from_pattern(re->code, 0);
  4309. mdc = true;
  4310. }
  4311. rc = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::match( re->code, /* the compiled pattern */
  4312. subject, /* the subject string */
  4313. subject_length, /* the length of the subject */
  4314. _start_offset, /* start at offset 'start_offset' in the subject */
  4315. match_opts, /* default options */
  4316. match_data, /* block for storing the result */
  4317. mcontext); /* use default match context */
  4318. /* Matching failed: handle error cases */
  4319. if (rc < 0) {
  4320. if(mdc)
  4321. Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::match_data_free(match_data); /* Release memory used for the match */
  4322. //must not free code. This function has no right to modify regex
  4323. switch (rc) {
  4324. case PCRE2_ERROR_NOMATCH:
  4325. return count;
  4326. /*
  4327. Handle other special cases if you like
  4328. */
  4329. default:;
  4330. }
  4331. error_number = rc;
  4332. return count;
  4333. }
  4334. ++count; //Increment the counter
  4335. /* Match succeded. Get a pointer to the output vector, where string offsets are
  4336. stored. */
  4337. ovector = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::get_ovector_pointer(match_data);
  4338. /************************************************************************//*
  4339. * We have found the first match within the subject string. If the output *
  4340. * vector wasn't big enough, say so. Then output any substrings that were *
  4341. * captured. *
  4342. *************************************************************************/
  4343. /* The output vector wasn't big enough. This should not happen, because we used
  4344. pcre2_match_data_create_from_pattern() above. */
  4345. if (rc == 0) {
  4346. //ovector was not big enough for all the captured substrings;
  4347. error_number = (int)ERROR::INSUFFICIENT_OVECTOR;
  4348. rc = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::get_ovector_count(match_data);
  4349. }
  4350. //match succeeded at offset ovector[0]
  4351. if(vec_soff) vec_soff->push_back(ovector[0]);
  4352. if(vec_eoff) vec_eoff->push_back(ovector[1]);
  4353. // Get numbered substrings if vec_num isn't null
  4354. if (vec_num) { //must do null check
  4355. if(!getNumberedSubstrings(rc, subject, ovector))
  4356. return count;
  4357. }
  4358. //get named substrings if either vec_nas or vec_ntn is given.
  4359. if (vec_nas || vec_ntn) {
  4360. /* See if there are any named substrings, and if so, show them by name. First
  4361. we have to extract the count of named parentheses from the pattern. */
  4362. (void) Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::pattern_info( re->code, /* the compiled pattern */
  4363. PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */
  4364. &namecount); /* where to put the answer */
  4365. if (namecount <= 0); /*No named substrings*/
  4366. else {
  4367. /* Before we can access the substrings, we must extract the table for
  4368. translating names to numbers, and the size of each entry in the table. */
  4369. (void) Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::pattern_info( re->code, /* the compiled pattern */
  4370. PCRE2_INFO_NAMETABLE, /* address of the table */
  4371. &name_table); /* where to put the answer */
  4372. (void) Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::pattern_info( re->code, /* the compiled pattern */
  4373. PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
  4374. &name_entry_size); /* where to put the answer */
  4375. /* Now we can scan the table and, for each entry, print the number, the name,
  4376. and the substring itself. In the 8-bit library the number is held in two
  4377. bytes, most significant first. */
  4378. // Get named substrings if vec_nas isn't null.
  4379. // Get name to number map if vec_ntn isn't null.
  4380. }
  4381. //the following must be outside the above if-else
  4382. if(!getNamedSubstrings(namecount, name_entry_size, name_table, subject, ovector))
  4383. return count;
  4384. }
  4385. /***********************************************************************//*
  4386. * If the "g" modifier was given, we want to continue *
  4387. * to search for additional matches in the subject string, in a similar *
  4388. * way to the /g option in Perl. This turns out to be trickier than you *
  4389. * might think because of the possibility of matching an empty string. *
  4390. * What happens is as follows: *
  4391. * *
  4392. * If the previous match was NOT for an empty string, we can just start *
  4393. * the next match at the end of the previous one. *
  4394. * *
  4395. * If the previous match WAS for an empty string, we can't do that, as it *
  4396. * would lead to an infinite loop. Instead, a call of pcre2_match() is *
  4397. * made with the PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set. The *
  4398. * first of these tells PCRE2 that an empty string at the start of the *
  4399. * subject is not a valid match; other possibilities must be tried. The *
  4400. * second flag restricts PCRE2 to one match attempt at the initial string *
  4401. * position. If this match succeeds, an alternative to the empty string *
  4402. * match has been found, and we can print it and proceed round the loop, *
  4403. * advancing by the length of whatever was found. If this match does not *
  4404. * succeed, we still stay in the loop, advancing by just one character. *
  4405. * In UTF-8 mode, which can be set by (*UTF) in the pattern, this may be *
  4406. * more than one byte. *
  4407. * *
  4408. * However, there is a complication concerned with newlines. When the *
  4409. * newline convention is such that CRLF is a valid newline, we must *
  4410. * advance by two characters rather than one. The newline convention can *
  4411. * be set in the regex by (*CR), etc.; if not, we must find the default. *
  4412. *************************************************************************/
  4413. if ((jpcre2_match_opts & FIND_ALL) == 0) {
  4414. if(mdc)
  4415. Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::match_data_free(match_data); /* Release the memory that was used */
  4416. // Must not free code. This function has no right to modify regex.
  4417. return count; /* Exit the program. */
  4418. }
  4419. /* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
  4420. sequence. First, find the options with which the regex was compiled and extract
  4421. the UTF state. */
  4422. (void) Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::pattern_info(re->code, PCRE2_INFO_ALLOPTIONS, &option_bits);
  4423. utf = ((option_bits & PCRE2_UTF) != 0);
  4424. /* Now find the newline convention and see whether CRLF is a valid newline
  4425. sequence. */
  4426. (void) Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::pattern_info(re->code, PCRE2_INFO_NEWLINE, &newline);
  4427. crlf_is_newline = newline == PCRE2_NEWLINE_ANY
  4428. || newline == PCRE2_NEWLINE_CRLF
  4429. || newline == PCRE2_NEWLINE_ANYCRLF;
  4430. /** We got the first match. Now loop for second and subsequent matches. */
  4431. for (;;) {
  4432. Uint options = match_opts; /* Normally no options */
  4433. PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */
  4434. /* If the previous match was for an empty string, we are finished if we are
  4435. at the end of the subject. Otherwise, arrange to run another match at the
  4436. same point to see if a non-empty match can be found. */
  4437. if (ovector[0] == ovector[1]) {
  4438. if (ovector[0] == subject_length)
  4439. break;
  4440. options |= PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
  4441. }
  4442. /// Run the next matching operation */
  4443. rc = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::match( re->code, /* the compiled pattern */
  4444. subject, /* the subject string */
  4445. subject_length, /* the length of the subject */
  4446. start_offset, /* starting offset in the subject */
  4447. options, /* options */
  4448. match_data, /* block for storing the result */
  4449. mcontext); /* use match context */
  4450. /* This time, a result of NOMATCH isn't an error. If the value in "options"
  4451. is zero, it just means we have found all possible matches, so the loop ends.
  4452. Otherwise, it means we have failed to find a non-empty-string match at a
  4453. point where there was a previous empty-string match. In this case, we do what
  4454. Perl does: advance the matching position by one character, and continue. We
  4455. do this by setting the "end of previous match" offset, because that is picked
  4456. up at the top of the loop as the point at which to start again.
  4457. There are two complications: (a) When CRLF is a valid newline sequence, and
  4458. the current position is just before it, advance by an extra byte. (b)
  4459. Otherwise we must ensure that we skip an entire UTF character if we are in
  4460. UTF mode. */
  4461. if (rc == PCRE2_ERROR_NOMATCH) {
  4462. if (options == 0)
  4463. break; /* All matches found */
  4464. ovector[1] = start_offset + 1; /* Advance one code unit */
  4465. if (crlf_is_newline && /* If CRLF is newline & */
  4466. start_offset < subject_length - 1 && /* we are at CRLF, */
  4467. subject[start_offset] == '\r' && subject[start_offset + 1] == '\n')
  4468. ovector[1] += 1; /* Advance by one more. */
  4469. else if (utf) { /* advance a whole UTF (8 or 16), for UTF-32, it's not needed */
  4470. while (ovector[1] < subject_length) {
  4471. if(sizeof( Char_T ) * CHAR_BIT == 8 && (subject[ovector[1]] & 0xc0) != 0x80) break;
  4472. else if(sizeof( Char_T ) * CHAR_BIT == 16 && (subject[ovector[1]] & 0xfc00) != 0xdc00) break;
  4473. else if(sizeof( Char_T ) * CHAR_BIT == 32) break; //must be else if
  4474. ovector[1] += 1;
  4475. }
  4476. }
  4477. continue; /* Go round the loop again */
  4478. }
  4479. /* Other matching errors are not recoverable. */
  4480. if (rc < 0) {
  4481. if(mdc)
  4482. Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::match_data_free(match_data);
  4483. // Must not free code. This function has no right to modify regex.
  4484. error_number = rc;
  4485. return count;
  4486. }
  4487. /* match succeeded */
  4488. ++count; //Increment the counter
  4489. if (rc == 0) {
  4490. /* The match succeeded, but the output vector wasn't big enough. This
  4491. should not happen. */
  4492. error_number = (int)ERROR::INSUFFICIENT_OVECTOR;
  4493. rc = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::get_ovector_count(match_data);
  4494. }
  4495. //match succeded at ovector[0]
  4496. if(vec_soff) vec_soff->push_back(ovector[0]);
  4497. if(vec_eoff) vec_eoff->push_back(ovector[1]);
  4498. /* As before, get substrings stored in the output vector by number, and then
  4499. also any named substrings. */
  4500. // Get numbered substrings if vec_num isn't null
  4501. if (vec_num) { //must do null check
  4502. if(!getNumberedSubstrings(rc, subject, ovector))
  4503. return count;
  4504. }
  4505. if (vec_nas || vec_ntn) {
  4506. //must call this whether we have named substrings or not:
  4507. if(!getNamedSubstrings(namecount, name_entry_size, name_table, subject, ovector))
  4508. return count;
  4509. }
  4510. } /* End of loop to find second and subsequent matches */
  4511. if(mdc)
  4512. Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::match_data_free(match_data);
  4513. // Must not free code. This function has no right to modify regex.
  4514. return count;
  4515. }
  4516. #undef JPCRE2_VECTOR_DATA_ASSERT
  4517. #undef JPCRE2_UNUSED
  4518. //some macro documentation for doxygen
  4519. #ifdef __DOXYGEN__
  4520. #ifndef JPCRE2_USE_FUNCTION_POINTER_CALLBACK
  4521. #define JPCRE2_USE_FUNCTION_POINTER_CALLBACK
  4522. #endif
  4523. #ifndef JPCRE2_NDEBUG
  4524. #define JPCRE2_NDEBUG
  4525. #endif
  4526. ///@def JPCRE2_USE_FUNCTION_POINTER_CALLBACK
  4527. ///Use function pointer in all cases for MatchEvaluatorCallback function.
  4528. ///By default function pointer is used for callback in MatchEvaluator when using <C++11 compiler, but for
  4529. ///`>=C++11` compiler `std::function` instead of function pointer is used.
  4530. ///If this macro is defined before including jpcre2.hpp, function pointer will be used in all cases.
  4531. ///It you are using lambda function with captures, stick with `std::function`, on the other hand, if
  4532. ///you are using older compilers, you might want to use function pointer instead.
  4533. ///
  4534. ///For example, with gcc-4.7, `std::function` will give compile error in C++11 mode, in such cases where full C++11
  4535. ///support is not available, use function pointer.
  4536. ///@def JPCRE2_ASSERT(cond, msg)
  4537. ///Macro to call `jpcre2::jassert()` with file path and line number.
  4538. ///When `NDEBUG` or `JPCRE2_NDEBUG` is defined before including this header, this macro will
  4539. ///be defined as `((void)0)` thus eliminating this assertion.
  4540. ///@param cond condtion (boolean)
  4541. ///@param msg message
  4542. ///@def JPCRE2_NDEBUG
  4543. ///Macro to remove debug codes.
  4544. ///Using this macro is discouraged even in production mode but provided for completeness.
  4545. ///You should not use this macro to bypass any error in your program.
  4546. ///Define this macro before including this header if you want to remove debug codes included in this library.
  4547. ///
  4548. ///Using the standard `NDEBUG` macro will have the same effect,
  4549. ///but it is recommended that you use `JPCRE2_NDEBUG` to strip out debug codes specifically for this library.
  4550. #endif
  4551. #endif