jpcre2.hpp 215 KB


  1. /* *****************************************************************************
  2. * ******************* C++ wrapper for PCRE2 Library ****************************
  3. * *****************************************************************************
  4. * Copyright (c) Md. Jahidul Hamid
  5. *
  6. * -----------------------------------------------------------------------------
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright notice,
  11. * this list of conditions and the following disclaimer.
  12. *
  13. * * Redistributions in binary form must reproduce the above copyright
  14. * notice, this list of conditions and the following disclaimer in the
  15. * documentation and/or other materials provided with the distribution.
  16. *
  17. * * The names of its contributors may not be used to endorse or promote
  18. * products derived from this software without specific prior written
  19. * permission.
  20. *
  21. * Disclaimer:
  22. *
  23. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  24. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  25. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  26. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  27. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  28. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  29. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  30. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  31. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  32. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33. * POSSIBILITY OF SUCH DAMAGE.
  34. * */
  35. /** @file jpcre2.hpp
  36. * @brief Main header file for JPCRE2 library to be included by programs that uses its functionalities.
  37. * It includes the `pcre2.h` header, therefore you shouldn't include `pcre2.h`, neither should you define `PCRE2_CODE_UNIT_WIDTH` before including
  38. * `jpcre2.hpp`.
  39. * If your `pcre2.h` header is not in standard include paths, you may include `pcre2.h` with correct path before including `jpcre2.hpp`
  40. * manually. In this case you will have to define `PCRE2_CODE_UNIT_WIDTH` before including `pcre2.h`.
  41. * Make sure to link required PCRE2 libraries when compiling.
  42. *
  43. * @author [Md Jahidul Hamid](https://github.com/neurobin)
  44. */
  45. #ifndef JPCRE2_HPP
  46. #define JPCRE2_HPP
  47. #ifndef PCRE2_CODE_UNIT_WIDTH
  48. ///@def PCRE2_CODE_UNIT_WIDTH
  49. ///This macro does not have any significance in JPCRE2 context.
  50. ///It is defined as 0 by default. Defining it before including jpcre2.hpp
  51. ///will override the default (discouraged as it will make it harder for you to detect problems),
  52. ///but still it will have no effect in a JPCRE2 perspective.
  53. ///Defining it with an invalid value will yield to compile error.
  54. #define PCRE2_CODE_UNIT_WIDTH 0
  55. #endif
  56. //previous inclusion of pcre2.h will be respected and we won't try to include it twice.
  57. //Thus one can pre-include pcre2.h from an arbitrary/non-standard path.
  58. #ifndef PCRE2_MAJOR
  59. #include <pcre2.h> // pcre2 header
  60. #endif
  61. #include <string> // std::string, std::wstring
  62. #include <vector> // std::vector
  63. #include <map> // std::map
  64. #include <cstdio> // std::fprintf
  65. #include <climits> // CHAR_BIT
  66. #include <cstdlib> // std::abort()
  67. #if __cplusplus >= 201103L || _MSVC_LANG >= 201103L
  68. #define JPCRE2_USE_MINIMUM_CXX_11 1
  69. #include <utility>
  70. #ifndef JPCRE2_USE_FUNCTION_POINTER_CALLBACK
  71. #include <functional> // std::function
  72. #endif
  73. #endif
  74. #if __cplusplus >= 201703L || _MSVC_LANG >= 201703L
  75. #define JPCRE2_USE_MINIMUM_CXX_17 1
  76. #include <optional>
  77. #else
  78. #ifdef JPCRE2_UNSET_CAPTURES_NULL
  79. #error JPCRE2_UNSET_CAPTURES_NULL requires C++17
  80. #endif
  81. #endif
  82. #define JPCRE2_UNUSED(x) ((void)(x))
  83. #if defined(NDEBUG) || defined(JPCRE2_NDEBUG)
  84. #define JPCRE2_ASSERT(cond, msg) ((void)0)
  85. #define JPCRE2_VECTOR_DATA_ASSERT(cond, name) ((void)0)
  86. #else
  87. #define JPCRE2_ASSERT(cond, msg) jpcre2::jassert(cond, msg, __FILE__, __LINE__)
  88. #define JPCRE2_VECTOR_DATA_ASSERT(cond, name) jpcre2::_jvassert(cond, name, __FILE__, __LINE__)
  89. #endif
  90. // In Windows, Windows.h defines ERROR macro
  91. // It conflicts with our jpcre2::ERROR namespace
  92. #ifdef ERROR
  93. #undef ERROR
  94. #endif
  95. /** @namespace jpcre2
  96. * Top level namespace of JPCRE2.
  97. *
  98. * All functions, classes/structs, constants, enums that are provided by JPCRE2 belong to this namespace while
  99. * **PCRE2** structs, functions, constants remain outside of its scope.
  100. *
  101. * If you want to use any PCRE2 functions or constants,
  102. * remember that they are in the global scope and should be used as such.
  103. */
  104. namespace jpcre2 {
  105. ///Define for JPCRE2 version.
  106. ///It can be used to support changes in different versions of the lib.
  107. #define JPCRE2_VERSION 103201L
  108. /** @namespace jpcre2::INFO
  109. * Namespace to provide information about JPCRE2 library itself.
  110. * Contains constant Strings with version info.
  111. */
  112. namespace INFO {
  113. static const char NAME[] = "JPCRE2"; ///< Name of the project
  114. static const char FULL_VERSION[] = "10.32.01"; ///< Full version string
  115. static const char VERSION_GENRE[] = "10"; ///< Generation, depends on original PCRE2 version
  116. static const char VERSION_MAJOR[] = "32"; ///< Major version, updated when API change is made
  117. static const char VERSION_MINOR[] = "01"; ///< Minor version, includes bug fix or minor feature upgrade
  118. static const char VERSION_PRE_RELEASE[] = ""; ///< Alpha or beta (testing) release version
  119. }
  120. typedef PCRE2_SIZE SIZE_T; ///< Used for match count and vector size
  121. typedef uint32_t Uint; ///< Used for options (bitwise operation)
  122. typedef uint8_t Ush; ///< 8 bit unsigned integer.
  123. typedef std::vector<SIZE_T> VecOff; ///< vector of size_t.
  124. typedef std::vector<Uint> VecOpt; ///< vector for Uint option values.
  125. /// @namespace jpcre2::ERROR
  126. /// Namespace for error codes.
  127. namespace ERROR {
  128. /** Error numbers for JPCRE2.
  129. * JPCRE2 error numbers are positive integers while
  130. * PCRE2 error numbers are negative integers.
  131. */
  132. enum {
  133. INVALID_MODIFIER = 2, ///< Invalid modifier was detected
  134. INSUFFICIENT_OVECTOR = 3 ///< Ovector was not big enough during a match
  135. };
  136. }
  137. /** These constants provide JPCRE2 options.
  138. */
  139. enum {
  140. NONE = 0x0000000u, ///< Option 0 (zero)
  141. FIND_ALL = 0x0000002u, ///< Find all during match (global match)
  142. JIT_COMPILE = 0x0000004u ///< Perform JIT compilation for optimization
  143. };
  144. //enableif and is_same implementation
  145. template<bool B, typename T = void>
  146. struct EnableIf{};
  147. template<typename T>
  148. struct EnableIf<true, T>{typedef T Type;};
  149. template<typename T1, typename T2>
  150. struct IsSame{ static const bool value = false; };
  151. template<typename T>
  152. struct IsSame<T,T>{ static const bool value = true; };
  153. ///JPCRE2 assert function.
  154. ///Aborts with an error message if condition fails.
  155. ///@param cond boolean condition
  156. ///@param msg message (std::string)
  157. ///@param f file where jassert was called.
  158. ///@param line line number where jassert was called.
  159. static inline void jassert(bool cond, const char* msg, const char* f, size_t line){
  160. if(!cond) {
  161. std::fprintf(stderr,"\n\tE: AssertionFailure\n%s\nAssertion failed in file: %s\t at line: %u\n", msg, f, (unsigned)line);
  162. std::abort();
  163. }
  164. }
  165. static inline void _jvassert(bool cond, char const * name, const char* f, size_t line){
  166. jassert(cond, (std::string("ValueError: \n\
  167. Required data vector of type ")+std::string(name)+" is empty.\n\
  168. Your MatchEvaluator callback function is not\n\
  169. compatible with existing data!!\n\
  170. You are trying to use a vector that does not\n\
  171. have any match data. Either call nreplace() or replace()\n\
  172. with true or perform a match with appropriate\n\
  173. callback function. For more details, refer to\n\
  174. the doc in MatchEvaluator section.").c_str(), f, line);
  175. }
  176. static inline std::string _tostdstring(unsigned x){
  177. char buf[128];
  178. int written = std::snprintf(buf, 128, "%u", x);
  179. return (written > 0) ? std::string(buf, buf + written) : std::string();
  180. }
  181. ////////////////////////// The following are type and function mappings from PCRE2 interface to JPCRE2 interface /////////////////////////
  182. //forward declaration
  183. template<Ush BS> struct Pcre2Type;
  184. template<Ush BS> struct Pcre2Func;
  185. //PCRE2 types
  186. //These templated types will be used in place of actual types
  187. template<Ush BS> struct Pcre2Type {};
  188. template<> struct Pcre2Type<8>{
  189. //typedefs used
  190. typedef PCRE2_UCHAR8 Pcre2Uchar;
  191. typedef PCRE2_SPTR8 Pcre2Sptr;
  192. typedef pcre2_code_8 Pcre2Code;
  193. typedef pcre2_compile_context_8 CompileContext;
  194. typedef pcre2_match_data_8 MatchData;
  195. typedef pcre2_general_context_8 GeneralContext;
  196. typedef pcre2_match_context_8 MatchContext;
  197. typedef pcre2_jit_callback_8 JitCallback;
  198. typedef pcre2_jit_stack_8 JitStack;
  199. };
  200. template<> struct Pcre2Type<16>{
  201. //typedefs used
  202. typedef PCRE2_UCHAR16 Pcre2Uchar;
  203. typedef PCRE2_SPTR16 Pcre2Sptr;
  204. typedef pcre2_code_16 Pcre2Code;
  205. typedef pcre2_compile_context_16 CompileContext;
  206. typedef pcre2_match_data_16 MatchData;
  207. typedef pcre2_general_context_16 GeneralContext;
  208. typedef pcre2_match_context_16 MatchContext;
  209. typedef pcre2_jit_callback_16 JitCallback;
  210. typedef pcre2_jit_stack_16 JitStack;
  211. };
  212. template<> struct Pcre2Type<32>{
  213. //typedefs used
  214. typedef PCRE2_UCHAR32 Pcre2Uchar;
  215. typedef PCRE2_SPTR32 Pcre2Sptr;
  216. typedef pcre2_code_32 Pcre2Code;
  217. typedef pcre2_compile_context_32 CompileContext;
  218. typedef pcre2_match_data_32 MatchData;
  219. typedef pcre2_general_context_32 GeneralContext;
  220. typedef pcre2_match_context_32 MatchContext;
  221. typedef pcre2_jit_callback_32 JitCallback;
  222. typedef pcre2_jit_stack_32 JitStack;
  223. };
  224. //wrappers for PCRE2 functions
  225. template<Ush BS> struct Pcre2Func{};
  226. //8-bit version
  227. template<> struct Pcre2Func<8> {
  228. static Pcre2Type<8>::CompileContext* compile_context_create(Pcre2Type<8>::GeneralContext *gcontext){
  229. return pcre2_compile_context_create_8(gcontext);
  230. }
  231. static void compile_context_free(Pcre2Type<8>::CompileContext *ccontext){
  232. pcre2_compile_context_free_8(ccontext);
  233. }
  234. static Pcre2Type<8>::CompileContext* compile_context_copy(Pcre2Type<8>::CompileContext* ccontext){
  235. return pcre2_compile_context_copy_8(ccontext);
  236. }
  237. static const unsigned char * maketables(Pcre2Type<8>::GeneralContext* gcontext){
  238. return pcre2_maketables_8(gcontext);
  239. }
  240. static int set_character_tables(Pcre2Type<8>::CompileContext * ccontext, const unsigned char * table){
  241. return pcre2_set_character_tables_8(ccontext, table);
  242. }
  243. static Pcre2Type<8>::Pcre2Code * compile(Pcre2Type<8>::Pcre2Sptr pattern,
  244. PCRE2_SIZE length,
  245. uint32_t options,
  246. int *errorcode,
  247. PCRE2_SIZE *erroroffset,
  248. Pcre2Type<8>::CompileContext *ccontext){
  249. return pcre2_compile_8(pattern, length, options, errorcode, erroroffset, ccontext);
  250. }
  251. static int jit_compile(Pcre2Type<8>::Pcre2Code *code, uint32_t options){
  252. return pcre2_jit_compile_8(code, options);
  253. }
  254. static int substitute( const Pcre2Type<8>::Pcre2Code *code,
  255. Pcre2Type<8>::Pcre2Sptr subject,
  256. PCRE2_SIZE length,
  257. PCRE2_SIZE startoffset,
  258. uint32_t options,
  259. Pcre2Type<8>::MatchData *match_data,
  260. Pcre2Type<8>::MatchContext *mcontext,
  261. Pcre2Type<8>::Pcre2Sptr replacement,
  262. PCRE2_SIZE rlength,
  263. Pcre2Type<8>::Pcre2Uchar *outputbuffer,
  264. PCRE2_SIZE *outlengthptr){
  265. return pcre2_substitute_8( code, subject, length, startoffset, options, match_data,
  266. mcontext, replacement, rlength, outputbuffer, outlengthptr);
  267. }
  268. //~ static int substring_get_bynumber(Pcre2Type<8>::MatchData *match_data,
  269. //~ uint32_t number,
  270. //~ Pcre2Type<8>::Pcre2Uchar **bufferptr,
  271. //~ PCRE2_SIZE *bufflen){
  272. //~ return pcre2_substring_get_bynumber_8(match_data, number, bufferptr, bufflen);
  273. //~ }
  274. //~ static int substring_get_byname(Pcre2Type<8>::MatchData *match_data,
  275. //~ Pcre2Type<8>::Pcre2Sptr name,
  276. //~ Pcre2Type<8>::Pcre2Uchar **bufferptr,
  277. //~ PCRE2_SIZE *bufflen){
  278. //~ return pcre2_substring_get_byname_8(match_data, name, bufferptr, bufflen);
  279. //~ }
  280. //~ static void substring_free(Pcre2Type<8>::Pcre2Uchar *buffer){
  281. //~ pcre2_substring_free_8(buffer);
  282. //~ }
  283. //~ static Pcre2Type<8>::Pcre2Code * code_copy(const Pcre2Type<8>::Pcre2Code *code){
  284. //~ return pcre2_code_copy_8(code);
  285. //~ }
  286. static void code_free(Pcre2Type<8>::Pcre2Code *code){
  287. pcre2_code_free_8(code);
  288. }
  289. static int get_error_message( int errorcode,
  290. Pcre2Type<8>::Pcre2Uchar *buffer,
  291. PCRE2_SIZE bufflen){
  292. return pcre2_get_error_message_8(errorcode, buffer, bufflen);
  293. }
  294. static Pcre2Type<8>::MatchData * match_data_create_from_pattern(
  295. const Pcre2Type<8>::Pcre2Code *code,
  296. Pcre2Type<8>::GeneralContext *gcontext){
  297. return pcre2_match_data_create_from_pattern_8(code, gcontext);
  298. }
  299. static int match( const Pcre2Type<8>::Pcre2Code *code,
  300. Pcre2Type<8>::Pcre2Sptr subject,
  301. PCRE2_SIZE length,
  302. PCRE2_SIZE startoffset,
  303. uint32_t options,
  304. Pcre2Type<8>::MatchData *match_data,
  305. Pcre2Type<8>::MatchContext *mcontext){
  306. return pcre2_match_8(code, subject, length, startoffset, options, match_data, mcontext);
  307. }
  308. static void match_data_free(Pcre2Type<8>::MatchData *match_data){
  309. pcre2_match_data_free_8(match_data);
  310. }
  311. static PCRE2_SIZE * get_ovector_pointer(Pcre2Type<8>::MatchData *match_data){
  312. return pcre2_get_ovector_pointer_8(match_data);
  313. }
  314. static int pattern_info(const Pcre2Type<8>::Pcre2Code *code, uint32_t what, void *where){
  315. return pcre2_pattern_info_8(code, what, where);
  316. }
  317. static int set_newline(Pcre2Type<8>::CompileContext *ccontext, uint32_t value){
  318. return pcre2_set_newline_8(ccontext, value);
  319. }
  320. //~ static void jit_stack_assign(Pcre2Type<8>::MatchContext *mcontext,
  321. //~ Pcre2Type<8>::JitCallback callback_function,
  322. //~ void *callback_data){
  323. //~ pcre2_jit_stack_assign_8(mcontext, callback_function, callback_data);
  324. //~ }
  325. //~ static Pcre2Type<8>::JitStack *jit_stack_create(PCRE2_SIZE startsize, PCRE2_SIZE maxsize,
  326. //~ Pcre2Type<8>::GeneralContext *gcontext){
  327. //~ return pcre2_jit_stack_create_8(startsize, maxsize, gcontext);
  328. //~ }
  329. //~ static void jit_stack_free(Pcre2Type<8>::JitStack *jit_stack){
  330. //~ pcre2_jit_stack_free_8(jit_stack);
  331. //~ }
  332. //~ static void jit_free_unused_memory(Pcre2Type<8>::GeneralContext *gcontext){
  333. //~ pcre2_jit_free_unused_memory_8(gcontext);
  334. //~ }
  335. //~ static Pcre2Type<8>::MatchContext *match_context_create(Pcre2Type<8>::GeneralContext *gcontext){
  336. //~ return pcre2_match_context_create_8(gcontext);
  337. //~ }
  338. //~ static Pcre2Type<8>::MatchContext *match_context_copy(Pcre2Type<8>::MatchContext *mcontext){
  339. //~ return pcre2_match_context_copy_8(mcontext);
  340. //~ }
  341. //~ static void match_context_free(Pcre2Type<8>::MatchContext *mcontext){
  342. //~ pcre2_match_context_free_8(mcontext);
  343. //~ }
  344. static uint32_t get_ovector_count(Pcre2Type<8>::MatchData *match_data){
  345. return pcre2_get_ovector_count_8(match_data);
  346. }
  347. };
  348. //16-bit version
  349. template<> struct Pcre2Func<16> {
  350. static Pcre2Type<16>::CompileContext* compile_context_create(Pcre2Type<16>::GeneralContext *gcontext){
  351. return pcre2_compile_context_create_16(gcontext);
  352. }
  353. static void compile_context_free(Pcre2Type<16>::CompileContext *ccontext){
  354. pcre2_compile_context_free_16(ccontext);
  355. }
  356. static Pcre2Type<16>::CompileContext* compile_context_copy(Pcre2Type<16>::CompileContext* ccontext){
  357. return pcre2_compile_context_copy_16(ccontext);
  358. }
  359. static const unsigned char * maketables(Pcre2Type<16>::GeneralContext* gcontext){
  360. return pcre2_maketables_16(gcontext);
  361. }
  362. static int set_character_tables(Pcre2Type<16>::CompileContext * ccontext, const unsigned char * table){
  363. return pcre2_set_character_tables_16(ccontext, table);
  364. }
  365. static Pcre2Type<16>::Pcre2Code * compile(Pcre2Type<16>::Pcre2Sptr pattern,
  366. PCRE2_SIZE length,
  367. uint32_t options,
  368. int *errorcode,
  369. PCRE2_SIZE *erroroffset,
  370. Pcre2Type<16>::CompileContext *ccontext){
  371. return pcre2_compile_16(pattern, length, options, errorcode, erroroffset, ccontext);
  372. }
  373. static int jit_compile(Pcre2Type<16>::Pcre2Code *code, uint32_t options){
  374. return pcre2_jit_compile_16(code, options);
  375. }
  376. static int substitute( const Pcre2Type<16>::Pcre2Code *code,
  377. Pcre2Type<16>::Pcre2Sptr subject,
  378. PCRE2_SIZE length,
  379. PCRE2_SIZE startoffset,
  380. uint32_t options,
  381. Pcre2Type<16>::MatchData *match_data,
  382. Pcre2Type<16>::MatchContext *mcontext,
  383. Pcre2Type<16>::Pcre2Sptr replacement,
  384. PCRE2_SIZE rlength,
  385. Pcre2Type<16>::Pcre2Uchar *outputbuffer,
  386. PCRE2_SIZE *outlengthptr){
  387. return pcre2_substitute_16( code, subject, length, startoffset, options, match_data,
  388. mcontext, replacement, rlength, outputbuffer, outlengthptr);
  389. }
  390. //~ static int substring_get_bynumber(Pcre2Type<16>::MatchData *match_data,
  391. //~ uint32_t number,
  392. //~ Pcre2Type<16>::Pcre2Uchar **bufferptr,
  393. //~ PCRE2_SIZE *bufflen){
  394. //~ return pcre2_substring_get_bynumber_16(match_data, number, bufferptr, bufflen);
  395. //~ }
  396. //~ static int substring_get_byname(Pcre2Type<16>::MatchData *match_data,
  397. //~ Pcre2Type<16>::Pcre2Sptr name,
  398. //~ Pcre2Type<16>::Pcre2Uchar **bufferptr,
  399. //~ PCRE2_SIZE *bufflen){
  400. //~ return pcre2_substring_get_byname_16(match_data, name, bufferptr, bufflen);
  401. //~ }
  402. //~ static void substring_free(Pcre2Type<16>::Pcre2Uchar *buffer){
  403. //~ pcre2_substring_free_16(buffer);
  404. //~ }
  405. //~ static Pcre2Type<16>::Pcre2Code * code_copy(const Pcre2Type<16>::Pcre2Code *code){
  406. //~ return pcre2_code_copy_16(code);
  407. //~ }
  408. static void code_free(Pcre2Type<16>::Pcre2Code *code){
  409. pcre2_code_free_16(code);
  410. }
  411. static int get_error_message( int errorcode,
  412. Pcre2Type<16>::Pcre2Uchar *buffer,
  413. PCRE2_SIZE bufflen){
  414. return pcre2_get_error_message_16(errorcode, buffer, bufflen);
  415. }
  416. static Pcre2Type<16>::MatchData * match_data_create_from_pattern(
  417. const Pcre2Type<16>::Pcre2Code *code,
  418. Pcre2Type<16>::GeneralContext *gcontext){
  419. return pcre2_match_data_create_from_pattern_16(code, gcontext);
  420. }
  421. static int match( const Pcre2Type<16>::Pcre2Code *code,
  422. Pcre2Type<16>::Pcre2Sptr subject,
  423. PCRE2_SIZE length,
  424. PCRE2_SIZE startoffset,
  425. uint32_t options,
  426. Pcre2Type<16>::MatchData *match_data,
  427. Pcre2Type<16>::MatchContext *mcontext){
  428. return pcre2_match_16(code, subject, length, startoffset, options, match_data, mcontext);
  429. }
  430. static void match_data_free(Pcre2Type<16>::MatchData *match_data){
  431. pcre2_match_data_free_16(match_data);
  432. }
  433. static PCRE2_SIZE * get_ovector_pointer(Pcre2Type<16>::MatchData *match_data){
  434. return pcre2_get_ovector_pointer_16(match_data);
  435. }
  436. static int pattern_info(const Pcre2Type<16>::Pcre2Code *code, uint32_t what, void *where){
  437. return pcre2_pattern_info_16(code, what, where);
  438. }
  439. static int set_newline(Pcre2Type<16>::CompileContext *ccontext, uint32_t value){
  440. return pcre2_set_newline_16(ccontext, value);
  441. }
  442. //~ static void jit_stack_assign(Pcre2Type<16>::MatchContext *mcontext,
  443. //~ Pcre2Type<16>::JitCallback callback_function,
  444. //~ void *callback_data){
  445. //~ pcre2_jit_stack_assign_16(mcontext, callback_function, callback_data);
  446. //~ }
  447. //~ static Pcre2Type<16>::JitStack *jit_stack_create(PCRE2_SIZE startsize, PCRE2_SIZE maxsize,
  448. //~ Pcre2Type<16>::GeneralContext *gcontext){
  449. //~ return pcre2_jit_stack_create_16(startsize, maxsize, gcontext);
  450. //~ }
  451. //~ static void jit_stack_free(Pcre2Type<16>::JitStack *jit_stack){
  452. //~ pcre2_jit_stack_free_16(jit_stack);
  453. //~ }
  454. //~ static void jit_free_unused_memory(Pcre2Type<16>::GeneralContext *gcontext){
  455. //~ pcre2_jit_free_unused_memory_16(gcontext);
  456. //~ }
  457. //~ static Pcre2Type<16>::MatchContext *match_context_create(Pcre2Type<16>::GeneralContext *gcontext){
  458. //~ return pcre2_match_context_create_16(gcontext);
  459. //~ }
  460. //~ static Pcre2Type<16>::MatchContext *match_context_copy(Pcre2Type<16>::MatchContext *mcontext){
  461. //~ return pcre2_match_context_copy_16(mcontext);
  462. //~ }
  463. //~ static void match_context_free(Pcre2Type<16>::MatchContext *mcontext){
  464. //~ pcre2_match_context_free_16(mcontext);
  465. //~ }
  466. static uint32_t get_ovector_count(Pcre2Type<16>::MatchData *match_data){
  467. return pcre2_get_ovector_count_16(match_data);
  468. }
  469. };
  470. //32-bit version
  471. template<> struct Pcre2Func<32> {
  472. static Pcre2Type<32>::CompileContext* compile_context_create(Pcre2Type<32>::GeneralContext *gcontext){
  473. return pcre2_compile_context_create_32(gcontext);
  474. }
  475. static void compile_context_free(Pcre2Type<32>::CompileContext *ccontext){
  476. pcre2_compile_context_free_32(ccontext);
  477. }
  478. static Pcre2Type<32>::CompileContext* compile_context_copy(Pcre2Type<32>::CompileContext* ccontext){
  479. return pcre2_compile_context_copy_32(ccontext);
  480. }
  481. static const unsigned char * maketables(Pcre2Type<32>::GeneralContext* gcontext){
  482. return pcre2_maketables_32(gcontext);
  483. }
  484. static int set_character_tables(Pcre2Type<32>::CompileContext * ccontext, const unsigned char * table){
  485. return pcre2_set_character_tables_32(ccontext, table);
  486. }
  487. static Pcre2Type<32>::Pcre2Code * compile(Pcre2Type<32>::Pcre2Sptr pattern,
  488. PCRE2_SIZE length,
  489. uint32_t options,
  490. int *errorcode,
  491. PCRE2_SIZE *erroroffset,
  492. Pcre2Type<32>::CompileContext *ccontext){
  493. return pcre2_compile_32(pattern, length, options, errorcode, erroroffset, ccontext);
  494. }
  495. static int jit_compile(Pcre2Type<32>::Pcre2Code *code, uint32_t options){
  496. return pcre2_jit_compile_32(code, options);
  497. }
  498. static int substitute( const Pcre2Type<32>::Pcre2Code *code,
  499. Pcre2Type<32>::Pcre2Sptr subject,
  500. PCRE2_SIZE length,
  501. PCRE2_SIZE startoffset,
  502. uint32_t options,
  503. Pcre2Type<32>::MatchData *match_data,
  504. Pcre2Type<32>::MatchContext *mcontext,
  505. Pcre2Type<32>::Pcre2Sptr replacement,
  506. PCRE2_SIZE rlength,
  507. Pcre2Type<32>::Pcre2Uchar *outputbuffer,
  508. PCRE2_SIZE *outlengthptr){
  509. return pcre2_substitute_32( code, subject, length, startoffset, options, match_data,
  510. mcontext, replacement, rlength, outputbuffer, outlengthptr);
  511. }
  512. //~ static int substring_get_bynumber(Pcre2Type<32>::MatchData *match_data,
  513. //~ uint32_t number,
  514. //~ Pcre2Type<32>::Pcre2Uchar **bufferptr,
  515. //~ PCRE2_SIZE *bufflen){
  516. //~ return pcre2_substring_get_bynumber_32(match_data, number, bufferptr, bufflen);
  517. //~ }
  518. //~ static int substring_get_byname(Pcre2Type<32>::MatchData *match_data,
  519. //~ Pcre2Type<32>::Pcre2Sptr name,
  520. //~ Pcre2Type<32>::Pcre2Uchar **bufferptr,
  521. //~ PCRE2_SIZE *bufflen){
  522. //~ return pcre2_substring_get_byname_32(match_data, name, bufferptr, bufflen);
  523. //~ }
  524. //~ static void substring_free(Pcre2Type<32>::Pcre2Uchar *buffer){
  525. //~ pcre2_substring_free_32(buffer);
  526. //~ }
  527. //~ static Pcre2Type<32>::Pcre2Code * code_copy(const Pcre2Type<32>::Pcre2Code *code){
  528. //~ return pcre2_code_copy_32(code);
  529. //~ }
  530. static void code_free(Pcre2Type<32>::Pcre2Code *code){
  531. pcre2_code_free_32(code);
  532. }
  533. static int get_error_message( int errorcode,
  534. Pcre2Type<32>::Pcre2Uchar *buffer,
  535. PCRE2_SIZE bufflen){
  536. return pcre2_get_error_message_32(errorcode, buffer, bufflen);
  537. }
  538. static Pcre2Type<32>::MatchData * match_data_create_from_pattern(
  539. const Pcre2Type<32>::Pcre2Code *code,
  540. Pcre2Type<32>::GeneralContext *gcontext){
  541. return pcre2_match_data_create_from_pattern_32(code, gcontext);
  542. }
  543. static int match( const Pcre2Type<32>::Pcre2Code *code,
  544. Pcre2Type<32>::Pcre2Sptr subject,
  545. PCRE2_SIZE length,
  546. PCRE2_SIZE startoffset,
  547. uint32_t options,
  548. Pcre2Type<32>::MatchData *match_data,
  549. Pcre2Type<32>::MatchContext *mcontext){
  550. return pcre2_match_32(code, subject, length, startoffset, options, match_data, mcontext);
  551. }
  552. static void match_data_free(Pcre2Type<32>::MatchData *match_data){
  553. pcre2_match_data_free_32(match_data);
  554. }
  555. static PCRE2_SIZE * get_ovector_pointer(Pcre2Type<32>::MatchData *match_data){
  556. return pcre2_get_ovector_pointer_32(match_data);
  557. }
  558. static int pattern_info(const Pcre2Type<32>::Pcre2Code *code, uint32_t what, void *where){
  559. return pcre2_pattern_info_32(code, what, where);
  560. }
  561. static int set_newline(Pcre2Type<32>::CompileContext *ccontext, uint32_t value){
  562. return pcre2_set_newline_32(ccontext, value);
  563. }
  564. //~ static void jit_stack_assign(Pcre2Type<32>::MatchContext *mcontext,
  565. //~ Pcre2Type<32>::JitCallback callback_function,
  566. //~ void *callback_data){
  567. //~ pcre2_jit_stack_assign_32(mcontext, callback_function, callback_data);
  568. //~ }
  569. //~ static Pcre2Type<32>::JitStack *jit_stack_create(PCRE2_SIZE startsize, PCRE2_SIZE maxsize,
  570. //~ Pcre2Type<32>::GeneralContext *gcontext){
  571. //~ return pcre2_jit_stack_create_32(startsize, maxsize, gcontext);
  572. //~ }
  573. //~ static void jit_stack_free(Pcre2Type<32>::JitStack *jit_stack){
  574. //~ pcre2_jit_stack_free_32(jit_stack);
  575. //~ }
  576. //~ static void jit_free_unused_memory(Pcre2Type<32>::GeneralContext *gcontext){
  577. //~ pcre2_jit_free_unused_memory_32(gcontext);
  578. //~ }
  579. //~ static Pcre2Type<32>::MatchContext *match_context_create(Pcre2Type<32>::GeneralContext *gcontext){
  580. //~ return pcre2_match_context_create_32(gcontext);
  581. //~ }
  582. //~ static Pcre2Type<32>::MatchContext *match_context_copy(Pcre2Type<32>::MatchContext *mcontext){
  583. //~ return pcre2_match_context_copy_32(mcontext);
  584. //~ }
  585. //~ static void match_context_free(Pcre2Type<32>::MatchContext *mcontext){
  586. //~ pcre2_match_context_free_32(mcontext);
  587. //~ }
  588. static uint32_t get_ovector_count(Pcre2Type<32>::MatchData *match_data){
  589. return pcre2_get_ovector_count_32(match_data);
  590. }
  591. };
  592. ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  593. ///Class to take a std::string modifier value with null safety.
  594. ///You don't need to make an instance of this class to pass modifier,
  595. ///just pass std::string or char const*, whatever seems feasible,
  596. ///implicit conversion will kick in and take care of things for you.
  597. class Modifier{
  598. std::string mod;
  599. public:
  600. ///Default constructor.
  601. Modifier(){}
  602. ///Constructor that takes a std::string.
  603. ///@param x std::string as a reference.
  604. Modifier(std::string const& x):mod(x){}
  605. ///Constructor that takes char const * (null safety is provided by this one)
  606. ///@param x char const *
  607. Modifier(char const *x):mod(x?x:""){}
  608. ///Returns the modifier string
  609. ///@return modifier string (std::string)
  610. std::string str() const { return mod; }
  611. ///Returns the c_str() of modifier string
  612. ///@return char const *
  613. char const * c_str() const { return mod.c_str(); }
  614. ///Returns the length of the modifier string
  615. ///@return length
  616. SIZE_T length() const{ return mod.length(); }
  617. ///operator[] overload to access character by index.
  618. ///@param i index
  619. ///@return character at index i.
  620. char operator[](SIZE_T i) const { return mod[i]; }
  621. };
  622. // Namespace for modifier constants.
  623. // For each modifier constant there is a jpcre2::Uint option value.
  624. // Some modifiers may have multiple values set together (ORed in bitwise operation) and
  625. // thus they may include other modifiers. Such an example is the 'n' modifier. It is combined together with 'u'.
  626. namespace MOD {
  627. // Define modifiers for compile
  628. // String of compile modifier characters for PCRE2 options
  629. static const char C_N[] = "eijmnsuxADJU";
  630. // Array of compile modifier values for PCRE2 options
  631. // Uint is being used in getModifier() in for loop to get the number of element in this array,
  632. // be sure to chnage there if you change here.
  633. static const jpcre2::Uint C_V[12] = { PCRE2_MATCH_UNSET_BACKREF, // Modifier e
  634. PCRE2_CASELESS, // Modifier i
  635. PCRE2_ALT_BSUX | PCRE2_MATCH_UNSET_BACKREF, // Modifier j
  636. PCRE2_MULTILINE, // Modifier m
  637. PCRE2_UTF | PCRE2_UCP, // Modifier n (includes u)
  638. PCRE2_DOTALL, // Modifier s
  639. PCRE2_UTF, // Modifier u
  640. PCRE2_EXTENDED, // Modifier x
  641. PCRE2_ANCHORED, // Modifier A
  642. PCRE2_DOLLAR_ENDONLY, // Modifier D
  643. PCRE2_DUPNAMES, // Modifier J
  644. PCRE2_UNGREEDY // Modifier U
  645. };
  646. // String of compile modifier characters for JPCRE2 options
  647. static const char CJ_N[] = "S";
  648. // Array of compile modifier values for JPCRE2 options
  649. static const jpcre2::Uint CJ_V[1] = { JIT_COMPILE, // Modifier S
  650. };
  651. // Define modifiers for replace
  652. // String of action (replace) modifier characters for PCRE2 options
  653. static const char R_N[] = "eEgx";
  654. // Array of action (replace) modifier values for PCRE2 options
  655. static const jpcre2::Uint R_V[4] = { PCRE2_SUBSTITUTE_UNSET_EMPTY, // Modifier e
  656. PCRE2_SUBSTITUTE_UNKNOWN_UNSET | PCRE2_SUBSTITUTE_UNSET_EMPTY, // Modifier E (includes e)
  657. PCRE2_SUBSTITUTE_GLOBAL, // Modifier g
  658. PCRE2_SUBSTITUTE_EXTENDED // Modifier x
  659. };
  660. // String of action (replace) modifier characters for JPCRE2 options
  661. static const char RJ_N[] = "";
  662. // Array of action (replace) modifier values for JPCRE2 options
  663. static const jpcre2::Uint RJ_V[1] = { NONE //placeholder
  664. };
  665. // Define modifiers for match
  666. // String of action (match) modifier characters for PCRE2 options
  667. static const char M_N[] = "A";
  668. // Array of action (match) modifier values for PCRE2 options
  669. static const jpcre2::Uint M_V[1] = { PCRE2_ANCHORED // Modifier A
  670. };
  671. // String of action (match) modifier characters for JPCRE2 options
  672. static const char MJ_N[] = "g";
  673. // Array of action (match) modifier values for JPCRE2 options
  674. static const jpcre2::Uint MJ_V[1] = { FIND_ALL, // Modifier g
  675. };
  676. static inline void toOption(Modifier const& mod, bool x,
  677. Uint const * J_V, char const * J_N, SIZE_T SJ,
  678. Uint const * V, char const * N, SIZE_T S,
  679. Uint* po, Uint* jo,
  680. int* en, SIZE_T* eo
  681. ){
  682. //loop through mod
  683. SIZE_T n = mod.length();
  684. for (SIZE_T i = 0; i < n; ++i) {
  685. //First check for JPCRE2 mods
  686. for(SIZE_T j = 0; j < SJ; ++j){
  687. if(J_N[j] == mod[i]) {
  688. if(x) *jo |= J_V[j];
  689. else *jo &= ~J_V[j];
  690. goto endfor;
  691. }
  692. }
  693. //Now check for PCRE2 mods
  694. for(SIZE_T j = 0; j< S; ++j){
  695. if(N[j] == mod[i]){
  696. if(x) *po |= V[j];
  697. else *po &= ~V[j];
  698. goto endfor;
  699. }
  700. }
  701. //Modifier didn't match, invalid modifier
  702. *en = (int)ERROR::INVALID_MODIFIER;
  703. *eo = (int)mod[i];
  704. endfor:;
  705. }
  706. }
  707. static inline void toMatchOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo){
  708. toOption(mod, x,
  709. MJ_V, MJ_N, sizeof(MJ_V)/sizeof(Uint),
  710. M_V, M_N, sizeof(M_V)/sizeof(Uint),
  711. po, jo, en, eo);
  712. }
  713. static inline void toReplaceOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo){
  714. toOption(mod, x,
  715. RJ_V, RJ_N, sizeof(RJ_V)/sizeof(Uint),
  716. R_V, R_N, sizeof(R_V)/sizeof(Uint),
  717. po, jo, en, eo);
  718. }
  719. static inline void toCompileOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo){
  720. toOption(mod, x,
  721. CJ_V, CJ_N, sizeof(CJ_V)/sizeof(Uint),
  722. C_V, C_N, sizeof(C_V)/sizeof(Uint),
  723. po, jo, en, eo);
  724. }
  725. static inline std::string fromOption(Uint const * J_V, char const * J_N, SIZE_T SJ,
  726. Uint const * V, char const * N, SIZE_T S,
  727. Uint po, Uint jo
  728. ){
  729. std::string mod;
  730. //Calculate PCRE2 mod
  731. for(SIZE_T i = 0; i < S; ++i){
  732. if( (V[i] & po) != 0 &&
  733. (V[i] & po) == V[i]) //One option can include other
  734. mod += N[i];
  735. }
  736. //Calculate JPCRE2 mod
  737. for(SIZE_T i = 0; i < SJ; ++i){
  738. if( (J_V[i] & jo) != 0 &&
  739. (J_V[i] & jo) == J_V[i]) //One option can include other
  740. mod += J_N[i];
  741. }
  742. return mod;
  743. }
  744. static inline std::string fromMatchOption(Uint po, Uint jo){
  745. return fromOption(MJ_V, MJ_N, sizeof(MJ_V)/sizeof(Uint),
  746. M_V, M_N, sizeof(M_V)/sizeof(Uint),
  747. po, jo);
  748. }
  749. static inline std::string fromReplaceOption(Uint po, Uint jo){
  750. return fromOption(RJ_V, RJ_N, sizeof(RJ_V)/sizeof(Uint),
  751. R_V, R_N, sizeof(R_V)/sizeof(Uint),
  752. po, jo);
  753. }
  754. static inline std::string fromCompileOption(Uint po, Uint jo){
  755. return fromOption(CJ_V, CJ_N, sizeof(CJ_V)/sizeof(Uint),
  756. C_V, C_N, sizeof(C_V)/sizeof(Uint),
  757. po, jo);
  758. }
  759. } //MOD namespace ends
  760. ///Lets you create custom modifier tables.
  761. ///An instance of this class can be passed to
  762. ///match, replace or compile related class objects.
  763. class ModifierTable{
  764. std::string tabjms;
  765. std::string tabms;
  766. std::string tabjrs;
  767. std::string tabrs;
  768. std::string tabjcs;
  769. std::string tabcs;
  770. VecOpt tabjmv;
  771. VecOpt tabmv;
  772. VecOpt tabjrv;
  773. VecOpt tabrv;
  774. VecOpt tabjcv;
  775. VecOpt tabcv;
  776. void toOption(Modifier const& mod, bool x,
  777. VecOpt const& J_V, std::string const& J_N,
  778. VecOpt const& V, std::string const& N,
  779. Uint* po, Uint* jo, int* en, SIZE_T* eo
  780. ) const{
  781. SIZE_T SJ = J_V.size();
  782. SIZE_T S = V.size();
  783. JPCRE2_ASSERT(SJ == J_N.length(), ("ValueError: Modifier character and value table must be of the same size (" + _tostdstring(SJ) + " == " + _tostdstring(J_N.length()) + ").").c_str());
  784. JPCRE2_ASSERT(S == N.length(), ("ValueError: Modifier character and value table must be of the same size (" + _tostdstring(S) + " == " + _tostdstring(N.length()) + ").").c_str());
  785. MOD::toOption(mod, x,
  786. J_V.empty()?0:&J_V[0], J_N.c_str(), SJ,
  787. V.empty()?0:&V[0], N.c_str(), S,
  788. po, jo, en, eo
  789. );
  790. }
  791. std::string fromOption(VecOpt const& J_V, std::string const& J_N,
  792. VecOpt const& V, std::string const& N,
  793. Uint po, Uint jo) const{
  794. SIZE_T SJ = J_V.size();
  795. SIZE_T S = V.size();
  796. JPCRE2_ASSERT(SJ == J_N.length(), ("ValueError: Modifier character and value table must be of the same size (" + _tostdstring(SJ) + " == " + _tostdstring(J_N.length()) + ").").c_str());
  797. JPCRE2_ASSERT(S == N.length(), ("ValueError: Modifier character and value table must be of the same size (" + _tostdstring(S) + " == " + _tostdstring(N.length()) + ").").c_str());
  798. return MOD::fromOption(J_V.empty()?0:&J_V[0], J_N.c_str(), SJ,
  799. V.empty()?0:&V[0], N.c_str(), S,
  800. po, jo);
  801. }
  802. void parseModifierTable(std::string& tabjs, VecOpt& tabjv,
  803. std::string& tab_s, VecOpt& tab_v,
  804. std::string const& tabs, VecOpt const& tabv);
  805. public:
  806. ///Default constructor that creates an empty modifier table.
  807. ModifierTable(){}
  808. ///@overload
  809. ///@param deflt Initialize with default table if true, otherwise keep empty.
  810. ModifierTable(bool deflt){
  811. if(deflt) setAllToDefault();
  812. }
  813. ///Reset the match modifier table to its initial (empty) state including memory.
  814. ///@return A reference to the calling ModifierTable object.
  815. ModifierTable& resetMatchModifierTable(){
  816. std::string().swap(tabjms);
  817. std::string().swap(tabms);
  818. VecOpt().swap(tabjmv);
  819. VecOpt().swap(tabmv);
  820. return *this;
  821. }
  822. ///Reset the replace modifier table to its initial (empty) state including memory.
  823. ///@return A reference to the calling ModifierTable object.
  824. ModifierTable& resetReplaceModifierTable(){
  825. std::string().swap(tabjrs);
  826. std::string().swap(tabrs);
  827. VecOpt().swap(tabjrv);
  828. VecOpt().swap(tabrv);
  829. return *this;
  830. }
  831. ///Reset the compile modifier table to its initial (empty) state including memory.
  832. ///@return A reference to the calling ModifierTable object.
  833. ModifierTable& resetCompileModifierTable(){
  834. std::string().swap(tabjcs);
  835. std::string().swap(tabcs);
  836. VecOpt().swap(tabjcv);
  837. VecOpt().swap(tabcv);
  838. return *this;
  839. }
  840. ///Reset the modifier tables to their initial (empty) state including memory.
  841. ///@return A reference to the calling ModifierTable object.
  842. ModifierTable& reset(){
  843. resetMatchModifierTable();
  844. resetReplaceModifierTable();
  845. resetCompileModifierTable();
  846. return *this;
  847. }
  848. ///Clear the match modifier table to its initial (empty) state.
  849. ///Memory may retain for further use.
  850. ///@return A reference to the calling ModifierTable object.
  851. ModifierTable& clearMatchModifierTable(){
  852. tabjms.clear();
  853. tabms.clear();
  854. tabjmv.clear();
  855. tabmv.clear();
  856. return *this;
  857. }
  858. ///Clear the replace modifier table to its initial (empty) state.
  859. ///Memory may retain for further use.
  860. ///@return A reference to the calling ModifierTable object.
  861. ModifierTable& clearReplaceModifierTable(){
  862. tabjrs.clear();
  863. tabrs.clear();
  864. tabjrv.clear();
  865. tabrv.clear();
  866. return *this;
  867. }
  868. ///Clear the compile modifier table to its initial (empty) state.
  869. ///Memory may retain for further use.
  870. ///@return A reference to the calling ModifierTable object.
  871. ModifierTable& clearCompileModifierTable(){
  872. tabjcs.clear();
  873. tabcs.clear();
  874. tabjcv.clear();
  875. tabcv.clear();
  876. return *this;
  877. }
  878. ///Clear the modifier tables to their initial (empty) state.
  879. ///Memory may retain for further use.
  880. ///@return A reference to the calling ModifierTable object.
  881. ModifierTable& clear(){
  882. clearMatchModifierTable();
  883. clearReplaceModifierTable();
  884. clearCompileModifierTable();
  885. return *this;
  886. }
  887. ///Modifier parser for match related options.
  888. ///@param mod modifier string
  889. ///@param x whether to add or remove the modifers.
  890. ///@param po pointer to PCRE2 match option that will be modified.
  891. ///@param jo pointer to JPCRE2 match option that will be modified.
  892. ///@param en where to put the error number.
  893. ///@param eo where to put the error offset.
  894. void toMatchOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo) const {
  895. toOption(mod, x,tabjmv,tabjms,tabmv, tabms,po,jo,en,eo);
  896. }
  897. ///Modifier parser for replace related options.
  898. ///@param mod modifier string
  899. ///@param x whether to add or remove the modifers.
  900. ///@param po pointer to PCRE2 replace option that will be modified.
  901. ///@param jo pointer to JPCRE2 replace option that will be modified.
  902. ///@param en where to put the error number.
  903. ///@param eo where to put the error offset.
  904. void toReplaceOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo) const {
  905. return toOption(mod, x,tabjrv,tabjrs,tabrv,tabrs,po,jo,en,eo);
  906. }
  907. ///Modifier parser for compile related options.
  908. ///@param mod modifier string
  909. ///@param x whether to add or remove the modifers.
  910. ///@param po pointer to PCRE2 compile option that will be modified.
  911. ///@param jo pointer to JPCRE2 compile option that will be modified.
  912. ///@param en where to put the error number.
  913. ///@param eo where to put the error offset.
  914. void toCompileOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo) const {
  915. return toOption(mod, x,tabjcv,tabjcs,tabcv,tabcs,po,jo,en,eo);
  916. }
  917. ///Take match related option value and convert to modifier string.
  918. ///@param po PCRE2 option.
  919. ///@param jo JPCRE2 option.
  920. ///@return modifier string (std::string)
  921. std::string fromMatchOption(Uint po, Uint jo) const {
  922. return fromOption(tabjmv,tabjms,tabmv,tabms,po,jo);
  923. }
  924. ///Take replace related option value and convert to modifier string.
  925. ///@param po PCRE2 option.
  926. ///@param jo JPCRE2 option.
  927. ///@return modifier string (std::string)
  928. std::string fromReplaceOption(Uint po, Uint jo) const {
  929. return fromOption(tabjrv,tabjrs,tabrv,tabrs,po,jo);
  930. }
  931. ///Take compile related option value and convert to modifier string.
  932. ///@param po PCRE2 option.
  933. ///@param jo JPCRE2 option.
  934. ///@return modifier string (std::string)
  935. std::string fromCompileOption(Uint po, Uint jo) const {
  936. return fromOption(tabjcv,tabjcs,tabcv,tabcs,po,jo);
  937. }
  938. ///Set modifier table for match.
  939. ///Takes a string and a vector of sequential options.
  940. ///@param tabs modifier string (list of modifiers)
  941. ///@param tabv vector of Uint (options).
  942. ///@return A reference to the calling ModifierTable object.
  943. ModifierTable& setMatchModifierTable(std::string const& tabs, VecOpt const& tabv){
  944. parseModifierTable(tabjms, tabjmv, tabms, tabmv, tabs, tabv);
  945. return *this;
  946. }
  947. ///Set modifier table for match.
  948. ///Takes a string and an array of sequential options.
  949. ///@param tabs modifier string (list of modifiers)
  950. ///@param tabvp array of Uint (options). If null, table is set to empty.
  951. ///@return A reference to the calling ModifierTable object.
  952. ModifierTable& setMatchModifierTable(std::string const& tabs, const Uint* tabvp){
  953. if(tabvp) {
  954. VecOpt tabv(tabvp, tabvp + tabs.length());
  955. setMatchModifierTable(tabs, tabv);
  956. } else clearMatchModifierTable();
  957. return *this;
  958. }
  959. ///@overload
  960. ///...
  961. ///This one takes modifier and value by array.
  962. ///If the arrays are not of the same length, the behavior is undefined.
  963. ///If any of the argument is null, the table is set empty.
  964. ///@param tabsp modifier string (list of modifiers).
  965. ///@param tabvp array of Uint (options).
  966. ///@return A reference to the calling ModifierTable object.
  967. ModifierTable& setMatchModifierTable(const char* tabsp, const Uint* tabvp){
  968. if(tabsp && tabvp) {
  969. std::string tabs(tabsp);
  970. VecOpt tabv(tabvp, tabvp + tabs.length());
  971. setMatchModifierTable(tabs, tabv);
  972. } else clearMatchModifierTable();
  973. return *this;
  974. }
  975. ///Set modifier table for replace.
  976. ///Takes a string and a vector of sequential options.
  977. ///@param tabs modifier string (list of modifiers)
  978. ///@param tabv vector of Uint (options).
  979. ///@return A reference to the calling ModifierTable object.
  980. ModifierTable& setReplaceModifierTable(std::string const& tabs, VecOpt const& tabv){
  981. parseModifierTable(tabjrs, tabjrv, tabrs, tabrv, tabs, tabv);
  982. return *this;
  983. }
  984. ///Set modifier table for replace.
  985. ///Takes a string and an array of sequential options.
  986. ///@param tabs modifier string (list of modifiers)
  987. ///@param tabvp array of Uint (options). If null, table is set to empty.
  988. ///@return A reference to the calling ModifierTable object.
  989. ModifierTable& setReplaceModifierTable(std::string const& tabs, const Uint* tabvp){
  990. if(tabvp) {
  991. VecOpt tabv(tabvp, tabvp + tabs.length());
  992. setReplaceModifierTable(tabs, tabv);
  993. } else clearReplaceModifierTable();
  994. return *this;
  995. }
  996. ///@overload
  997. ///...
  998. ///This one takes modifier and value by array.
  999. ///If the arrays are not of the same length, the behavior is undefined.
  1000. ///If any of the argument is null, the table is set empty.
  1001. ///@param tabsp modifier string (list of modifiers).
  1002. ///@param tabvp array of Uint (options).
  1003. ///@return A reference to the calling ModifierTable object.
  1004. ModifierTable& setReplaceModifierTable(const char* tabsp, const Uint* tabvp){
  1005. if(tabsp && tabvp) {
  1006. std::string tabs(tabsp);
  1007. VecOpt tabv(tabvp, tabvp + tabs.length());
  1008. setReplaceModifierTable(tabs, tabv);
  1009. } else clearReplaceModifierTable();
  1010. return *this;
  1011. }
  1012. ///Set modifier table for compile.
  1013. ///Takes a string and a vector of sequential options.
  1014. ///@param tabs modifier string (list of modifiers)
  1015. ///@param tabv vector of Uint (options).
  1016. ///@return A reference to the calling ModifierTable object.
  1017. ModifierTable& setCompileModifierTable(std::string const& tabs, VecOpt const& tabv){
  1018. parseModifierTable(tabjcs, tabjcv, tabcs, tabcv, tabs, tabv);
  1019. return *this;
  1020. }
  1021. ///Set modifier table for compile.
  1022. ///Takes a string and an array of sequential options.
  1023. ///@param tabs modifier string (list of modifiers)
  1024. ///@param tabvp array of Uint (options). If null, table is set to empty.
  1025. ///@return A reference to the calling ModifierTable object.
  1026. ModifierTable& setCompileModifierTable(std::string const& tabs, const Uint* tabvp){
  1027. if(tabvp) {
  1028. VecOpt tabv(tabvp, tabvp + tabs.length());
  1029. setCompileModifierTable(tabs, tabv);
  1030. } else clearCompileModifierTable();
  1031. return *this;
  1032. }
  1033. ///@overload
  1034. ///...
  1035. ///This one takes modifier and value by array.
  1036. ///If the arrays are not of the same length, the behavior is undefined.
  1037. ///If any of the argument is null, the table is set empty.
  1038. ///@param tabsp modifier string (list of modifiers).
  1039. ///@param tabvp array of Uint (options).
  1040. ///@return A reference to the calling ModifierTable object.
  1041. ModifierTable& setCompileModifierTable(const char* tabsp, const Uint* tabvp){
  1042. if(tabsp && tabvp) {
  1043. std::string tabs(tabsp);
  1044. VecOpt tabv(tabvp, tabvp + tabs.length());
  1045. setCompileModifierTable(tabs, tabv);
  1046. } else clearCompileModifierTable();
  1047. return *this;
  1048. }
  1049. ///Set match modifie table to default
  1050. ///@return A reference to the calling ModifierTable object.
  1051. ModifierTable& setMatchModifierTableToDefault(){
  1052. tabjms = std::string(MOD::MJ_N, MOD::MJ_N + sizeof(MOD::MJ_V)/sizeof(Uint));
  1053. tabms = std::string(MOD::M_N, MOD::M_N + sizeof(MOD::M_V)/sizeof(Uint));
  1054. tabjmv = VecOpt(MOD::MJ_V, MOD::MJ_V + sizeof(MOD::MJ_V)/sizeof(Uint));
  1055. tabmv = VecOpt(MOD::M_V, MOD::M_V + sizeof(MOD::M_V)/sizeof(Uint));
  1056. return *this;
  1057. }
  1058. ///Set replace modifier table to default.
  1059. ///@return A reference to the calling ModifierTable object.
  1060. ModifierTable& setReplaceModifierTableToDefault(){
  1061. tabjrs = std::string(MOD::RJ_N, MOD::RJ_N + sizeof(MOD::RJ_V)/sizeof(Uint));
  1062. tabrs = std::string(MOD::R_N, MOD::R_N + sizeof(MOD::R_V)/sizeof(Uint));
  1063. tabjrv = VecOpt(MOD::RJ_V, MOD::RJ_V + sizeof(MOD::RJ_V)/sizeof(Uint));
  1064. tabrv = VecOpt(MOD::R_V, MOD::R_V + sizeof(MOD::R_V)/sizeof(Uint));
  1065. return *this;
  1066. }
  1067. ///Set compile modifier table to default.
  1068. ///@return A reference to the calling ModifierTable object.
  1069. ModifierTable& setCompileModifierTableToDefault(){
  1070. tabjcs = std::string(MOD::CJ_N, MOD::CJ_N + sizeof(MOD::CJ_V)/sizeof(Uint));
  1071. tabcs = std::string(MOD::C_N, MOD::C_N + sizeof(MOD::C_V)/sizeof(Uint));
  1072. tabjcv = VecOpt(MOD::CJ_V, MOD::CJ_V + sizeof(MOD::CJ_V)/sizeof(Uint));
  1073. tabcv = VecOpt(MOD::C_V, MOD::C_V + sizeof(MOD::C_V)/sizeof(Uint));
  1074. return *this;
  1075. }
  1076. ///Set all tables to default.
  1077. ///@return A reference to the calling ModifierTable object.
  1078. ModifierTable& setAllToDefault(){
  1079. setMatchModifierTableToDefault();
  1080. setReplaceModifierTableToDefault();
  1081. setCompileModifierTableToDefault();
  1082. return *this;
  1083. }
  1084. };
  1085. //These message strings are used for error/warning message construction.
  1086. //take care to prevent multiple definition
  1087. template<typename Char_T> struct MSG{
  1088. static std::basic_string<Char_T> INVALID_MODIFIER(void);
  1089. static std::basic_string<Char_T> INSUFFICIENT_OVECTOR(void);
  1090. };
  1091. //specialization
  1092. template<> inline std::basic_string<char> MSG<char>::INVALID_MODIFIER(){ return "Invalid modifier: "; }
  1093. template<> inline std::basic_string<wchar_t> MSG<wchar_t>::INVALID_MODIFIER(){ return L"Invalid modifier: "; }
  1094. template<> inline std::basic_string<char> MSG<char>::INSUFFICIENT_OVECTOR(){ return "ovector wasn't big enough"; }
  1095. template<> inline std::basic_string<wchar_t> MSG<wchar_t>::INSUFFICIENT_OVECTOR(){ return L"ovector wasn't big enough"; }
  1096. #ifdef JPCRE2_USE_MINIMUM_CXX_11
  1097. template<> inline std::basic_string<char16_t> MSG<char16_t>::INVALID_MODIFIER(){ return u"Invalid modifier: "; }
  1098. template<> inline std::basic_string<char32_t> MSG<char32_t>::INVALID_MODIFIER(){ return U"Invalid modifier: "; }
  1099. template<> inline std::basic_string<char16_t> MSG<char16_t>::INSUFFICIENT_OVECTOR(){ return u"ovector wasn't big enough"; }
  1100. template<> inline std::basic_string<char32_t> MSG<char32_t>::INSUFFICIENT_OVECTOR(){ return U"ovector wasn't big enough"; }
  1101. #endif
  1102. ///struct to select the types.
  1103. ///
  1104. ///@tparam Char_T Character type (`char`, `wchar_t`, `char16_t`, `char32_t`)
  1105. ///@tparam Map Optional parameter (Only `>= C++11`) to specify a map container (`std::map`, `std::unordered_map` etc..). Default is `std::map`.
  1106. ///
  1107. ///The character type (`Char_T`) must be in accordance with the PCRE2 library you are linking against.
  1108. ///If not sure which library you need, link against all 3 PCRE2 libraries and they will be used as needed.
  1109. ///
  1110. ///If you want to be specific, then here's the rule:
  1111. ///
  1112. ///1. If `Char_T` is 8 bit, you need 8 bit PCRE2 library
  1113. ///2. If `Char_T` is 16 bit, you need 16 bit PCRE2 library
  1114. ///3. If `Char_T` is 32 bit, you need 32 bit PCRE2 library
  1115. ///4. if `Char_T` is not 8 or 16 or 32 bit, you will get compile error.
  1116. ///
  1117. ///In `>= C++11` you get an additional optional template parameter to specify a map container.
  1118. ///For example, you can use `std::unordered_map` instead of the default `std::map`:
  1119. /// ```cpp
  1120. /// #include <unordered_map>
  1121. /// typedef jpcre2::select<char, std::unordered_map> jp;
  1122. /// ```
  1123. ///
  1124. ///We will use the following typedef throughout this doc:
  1125. ///```cpp
  1126. ///typedef jpcre2::select<Char_T> jp;
  1127. ///```
  1128. #ifdef JPCRE2_USE_MINIMUM_CXX_11
  1129. template<typename Char_T, template<typename...> class Map=std::map>
  1130. #else
  1131. template<typename Char_T>
  1132. #endif
  1133. struct select{
  1134. ///Typedef for character (`char`, `wchar_t`, `char16_t`, `char32_t`)
  1135. typedef Char_T Char;
  1136. //typedef Char_T Char;
  1137. ///Typedef for string (`std::string`, `std::wstring`, `std::u16string`, `std::u32string`).
  1138. ///Defined as `std::basic_string<Char_T>`.
  1139. ///May be this list will make more sense:
  1140. ///Character | String
  1141. ///--------- | -------
  1142. ///char | std::string
  1143. ///wchar_t | std::wstring
  1144. ///char16_t | std::u16string (>=C++11)
  1145. ///char32_t | std::u32string (>=C++11)
  1146. typedef typename std::basic_string<Char_T> String;
  1147. #ifdef JPCRE2_USE_MINIMUM_CXX_11
  1148. ///Map for Named substrings.
  1149. typedef class Map<String, String> MapNas;
  1150. ///Substring name to Substring number map.
  1151. typedef class Map<String, SIZE_T> MapNtN;
  1152. #else
  1153. ///Map for Named substrings.
  1154. typedef typename std::map<String, String> MapNas;
  1155. ///Substring name to Substring number map.
  1156. typedef typename std::map<String, SIZE_T> MapNtN;
  1157. #endif
  1158. ///Allow spelling mistake of MapNtN as MapNtn.
  1159. typedef MapNtN MapNtn;
  1160. ///Vector for Numbered substrings (Sub container).
  1161. #ifdef JPCRE2_UNSET_CAPTURES_NULL
  1162. typedef typename std::vector<std::optional<String>> NumSub;
  1163. #else
  1164. typedef typename std::vector<String> NumSub;
  1165. #endif
  1166. ///Vector of matches with named substrings.
  1167. typedef typename std::vector<MapNas> VecNas;
  1168. ///Vector of substring name to substring number map.
  1169. typedef typename std::vector<MapNtN> VecNtN;
  1170. ///Allow spelling mistake of VecNtN as VecNtn.
  1171. typedef VecNtN VecNtn;
  1172. ///Vector of matches with numbered substrings.
  1173. typedef typename std::vector<NumSub> VecNum;
  1174. //These are to shorten the code
  1175. typedef typename Pcre2Type<sizeof( Char_T ) * CHAR_BIT>::Pcre2Uchar Pcre2Uchar;
  1176. typedef typename Pcre2Type<sizeof( Char_T ) * CHAR_BIT>::Pcre2Sptr Pcre2Sptr;
  1177. typedef typename Pcre2Type<sizeof( Char_T ) * CHAR_BIT>::Pcre2Code Pcre2Code;
  1178. typedef typename Pcre2Type<sizeof( Char_T ) * CHAR_BIT>::CompileContext CompileContext;
  1179. typedef typename Pcre2Type<sizeof( Char_T ) * CHAR_BIT>::MatchData MatchData;
  1180. typedef typename Pcre2Type<sizeof( Char_T ) * CHAR_BIT>::GeneralContext GeneralContext;
  1181. typedef typename Pcre2Type<sizeof( Char_T ) * CHAR_BIT>::MatchContext MatchContext;
  1182. typedef typename Pcre2Type<sizeof( Char_T ) * CHAR_BIT>::JitCallback JitCallback;
  1183. typedef typename Pcre2Type<sizeof( Char_T ) * CHAR_BIT>::JitStack JitStack;
  1184. template<typename T>
  1185. static String toString(T); //prevent implicit type conversion of T
  1186. ///Converts a Char_T to jpcre2::select::String
  1187. ///@param a Char_T
  1188. ///@return jpcre2::select::String
  1189. static String toString(Char a){
  1190. return a?String(1, a):String();
  1191. }
  1192. ///@overload
  1193. ///...
  1194. ///Converts a Char_T const * to jpcre2::select::String
  1195. ///@param a Char_T const *
  1196. ///@return jpcre2::select::String
  1197. static String toString(Char const *a){
  1198. return a?String(a):String();
  1199. }
  1200. ///@overload
  1201. ///...
  1202. ///Converts a Char_T* to jpcre2::select::String
  1203. ///@param a Char_T const *
  1204. ///@return jpcre2::select::String
  1205. static String toString(Char* a){
  1206. return a?String(a):String();
  1207. }
  1208. ///@overload
  1209. ///...
  1210. ///Converts a PCRE2_UCHAR to String
  1211. ///@param a PCRE2_UCHAR
  1212. ///@return jpcre2::select::String
  1213. static String toString(Pcre2Uchar* a) {
  1214. return a?String((Char*) a):String();
  1215. }
  1216. ///Retruns error message from PCRE2 error number
  1217. ///@param err_num error number (negative)
  1218. ///@return message as jpcre2::select::String.
  1219. static String getPcre2ErrorMessage(int err_num) {
  1220. Pcre2Uchar buffer[sizeof(Char)*CHAR_BIT*1024];
  1221. Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::get_error_message(err_num, buffer, sizeof(buffer));
  1222. return toString((Pcre2Uchar*) buffer);
  1223. }
  1224. ///Returns error message (either JPCRE2 or PCRE2) from error number and error offset
  1225. ///@param err_num error number (negative for PCRE2, positive for JPCRE2)
  1226. ///@param err_off error offset
  1227. ///@return message as jpcre2::select::String.
  1228. static String getErrorMessage(int err_num, int err_off) {
  1229. if(err_num == (int)ERROR::INVALID_MODIFIER){
  1230. return MSG<Char>::INVALID_MODIFIER() + toString((Char)err_off);
  1231. } else if(err_num == (int)ERROR::INSUFFICIENT_OVECTOR){
  1232. return MSG<Char>::INSUFFICIENT_OVECTOR();
  1233. } else if(err_num != 0) {
  1234. return getPcre2ErrorMessage((int) err_num);
  1235. } else return String();
  1236. }
  1237. //forward declaration
  1238. class Regex;
  1239. class RegexMatch;
  1240. class RegexReplace;
  1241. class MatchEvaluator;
  1242. /** Provides public constructors to create RegexMatch objects.
  1243. * Every RegexMatch object should be associated with a Regex object.
  1244. * This class stores a pointer to its' associated Regex object, thus when
  1245. * the content of the associated Regex object is changed, there will be no need to
  1246. * set the pointer again.
  1247. *
  1248. * Examples:
  1249. *
  1250. * ```cpp
  1251. * jp::Regex re;
  1252. * jp::RegexMatch rm;
  1253. * rm.setRegexObject(&re);
  1254. * rm.match("subject", "g"); // 0 match
  1255. * re.compile("\\w");
  1256. * rm.match(); // 7 matches
  1257. * ```
  1258. */
  1259. class RegexMatch {
  1260. private:
  1261. friend class MatchEvaluator;
  1262. Regex const *re;
  1263. String m_subject;
  1264. String const *m_subject_ptr;
  1265. Uint match_opts;
  1266. Uint jpcre2_match_opts;
  1267. MatchContext *mcontext;
  1268. ModifierTable const * modtab;
  1269. MatchData * mdata;
  1270. PCRE2_SIZE _start_offset; //name collision, use _ at start
  1271. VecNum* vec_num;
  1272. VecNas* vec_nas;
  1273. VecNtN* vec_ntn;
  1274. VecOff* vec_soff;
  1275. VecOff* vec_eoff;
  1276. bool getNumberedSubstrings(int, Pcre2Sptr, PCRE2_SIZE*, uint32_t);
  1277. bool getNamedSubstrings(int, int, Pcre2Sptr, Pcre2Sptr, PCRE2_SIZE*);
  1278. void init_vars() {
  1279. re = 0;
  1280. vec_num = 0;
  1281. vec_nas = 0;
  1282. vec_ntn = 0;
  1283. vec_soff = 0;
  1284. vec_eoff = 0;
  1285. match_opts = 0;
  1286. jpcre2_match_opts = 0;
  1287. error_number = 0;
  1288. error_offset = 0;
  1289. _start_offset = 0;
  1290. m_subject_ptr = &m_subject;
  1291. mcontext = 0;
  1292. modtab = 0;
  1293. mdata = 0;
  1294. }
  1295. void onlyCopy(RegexMatch const &rm){
  1296. re = rm.re; //only pointer should be copied
  1297. //pointer to subject may point to m_subject or other user data
  1298. m_subject_ptr = (rm.m_subject_ptr == &rm.m_subject) ? &m_subject //not &rm.m_subject
  1299. : rm.m_subject_ptr;
  1300. //underlying data of vectors are not handled by RegexMatch
  1301. //thus it's safe to just copy the pointers.
  1302. vec_num = rm.vec_num;
  1303. vec_nas = rm.vec_nas;
  1304. vec_ntn = rm.vec_ntn;
  1305. vec_soff = rm.vec_soff;
  1306. vec_eoff = rm.vec_eoff;
  1307. match_opts = rm.match_opts;
  1308. jpcre2_match_opts = rm.jpcre2_match_opts;
  1309. error_number = rm.error_number;
  1310. error_offset = rm.error_offset;
  1311. _start_offset = rm._start_offset;
  1312. mcontext = rm.mcontext;
  1313. modtab = rm.modtab;
  1314. mdata = rm.mdata;
  1315. }
  1316. void deepCopy(RegexMatch const &rm){
  1317. m_subject = rm.m_subject;
  1318. onlyCopy(rm);
  1319. }
  1320. #ifdef JPCRE2_USE_MINIMUM_CXX_11
  1321. void deepMove(RegexMatch& rm){
  1322. m_subject = std::move_if_noexcept(rm.m_subject);
  1323. onlyCopy(rm);
  1324. }
  1325. #endif
  1326. friend class Regex;
  1327. protected:
  1328. int error_number;
  1329. PCRE2_SIZE error_offset;
  1330. public:
  1331. ///Default constructor.
  1332. RegexMatch(){
  1333. init_vars();
  1334. }
  1335. ///@overload
  1336. ///...
  1337. ///Creates a RegexMatch object associating a Regex object.
  1338. ///Underlying data is not modified.
  1339. ///@param r pointer to a Regex object
  1340. RegexMatch(Regex const *r) {
  1341. init_vars();
  1342. re = r;
  1343. }
  1344. ///@overload
  1345. ///...
  1346. ///Copy constructor.
  1347. ///@param rm Reference to RegexMatch object
  1348. RegexMatch(RegexMatch const &rm){
  1349. init_vars();
  1350. deepCopy(rm);
  1351. }
  1352. ///Overloaded copy-assignment operator.
  1353. ///@param rm RegexMatch object
  1354. ///@return A reference to the calling RegexMatch object.
  1355. virtual RegexMatch& operator=(RegexMatch const &rm){
  1356. if(this == &rm) return *this;
  1357. deepCopy(rm);
  1358. return *this;
  1359. }
  1360. #ifdef JPCRE2_USE_MINIMUM_CXX_11
  1361. ///@overload
  1362. ///...
  1363. ///Move constructor.
  1364. ///This constructor steals resources from the argument.
  1365. ///It leaves the argument in a valid but indeterminate sate.
  1366. ///The indeterminate state can be returned to normal by calling reset() on that object.
  1367. ///@param rm rvalue reference to a RegexMatch object
  1368. RegexMatch(RegexMatch&& rm){
  1369. init_vars();
  1370. deepMove(rm);
  1371. }
  1372. ///@overload
  1373. ///...
  1374. ///Overloaded move-assignment operator.
  1375. ///This constructor steals resources from the argument.
  1376. ///It leaves the argument in a valid but indeterminate sate.
  1377. ///The indeterminate state can be returned to normal by calling reset() on that object.
  1378. ///@param rm rvalue reference to a RegexMatch object
  1379. ///@return A reference to the calling RegexMatch object.
  1380. virtual RegexMatch& operator=(RegexMatch&& rm){
  1381. if(this == &rm) return *this;
  1382. deepMove(rm);
  1383. return *this;
  1384. }
  1385. #endif
  1386. ///Destructor
  1387. ///Frees all internal memories that were used.
  1388. virtual ~RegexMatch() {}
  1389. ///Reset all class variables to its default (initial) state including memory.
  1390. ///Data in the vectors will retain (as it's external)
  1391. ///You will need to pass vector pointers again after calling this function to get match results.
  1392. ///@return Reference to the calling RegexMatch object.
  1393. virtual RegexMatch& reset() {
  1394. String().swap(m_subject); //not ptr , external string won't be modified.
  1395. init_vars();
  1396. return *this;
  1397. }
  1398. ///Clear all class variables (may retain some memory for further use).
  1399. ///Data in the vectors will retain (as it's external)
  1400. ///You will need to pass vector pointers again after calling this function to get match results.
  1401. ///@return Reference to the calling RegexMatch object.
  1402. virtual RegexMatch& clear(){
  1403. m_subject.clear(); //not ptr , external string won't be modified.
  1404. init_vars();
  1405. return *this;
  1406. }
  1407. ///reset match related errors to zero.
  1408. ///If you want to examine the error status of a function call in the method chain,
  1409. ///add this function just before your target function so that the error is set to zero
  1410. ///before that target function is called, and leave everything out after the target
  1411. ///function so that there will be no additional errors from other function calls.
  1412. ///@return A reference to the RegexMatch object
  1413. ///@see Regex::resetErrors()
  1414. ///@see RegexReplace::resetErrors()
  1415. virtual RegexMatch& resetErrors(){
  1416. error_number = 0;
  1417. error_offset = 0;
  1418. return *this;
  1419. }
  1420. /// Returns the last error number
  1421. ///@return Last error number
  1422. virtual int getErrorNumber() const {
  1423. return error_number;
  1424. }
  1425. /// Returns the last error offset
  1426. ///@return Last error offset
  1427. virtual int getErrorOffset() const {
  1428. return (int)error_offset;
  1429. }
  1430. /// Returns the last error message
  1431. ///@return Last error message
  1432. virtual String getErrorMessage() const {
  1433. #ifdef JPCRE2_USE_MINIMUM_CXX_11
  1434. return select<Char, Map>::getErrorMessage(error_number, error_offset);
  1435. #else
  1436. return select<Char>::getErrorMessage(error_number, error_offset);
  1437. #endif
  1438. }
  1439. ///Get subject string (by value).
  1440. ///@return subject string
  1441. ///@see RegexReplace::getSubject()
  1442. virtual String getSubject() const {
  1443. return *m_subject_ptr;
  1444. }
  1445. ///Get pointer to subject string.
  1446. ///Data can not be changed with this pointer.
  1447. ///@return constant subject string pointer
  1448. ///@see RegexReplace::getSubjectPointer()
  1449. virtual String const * getSubjectPointer() const {
  1450. return m_subject_ptr;
  1451. }
  1452. /// Calculate modifier string from PCRE2 and JPCRE2 options and return it.
  1453. ///
  1454. /// Do remember that modifiers (or PCRE2 and JPCRE2 options) do not change or get initialized
  1455. /// as long as you don't do that explicitly. Calling RegexMatch::setModifier() will re-set them.
  1456. ///
  1457. /// **Mixed or combined modifier**.
  1458. ///
  1459. /// Some modifier may include other modifiers i.e they have the same meaning of some modifiers
  1460. /// combined together. For example, the 'n' modifier includes the 'u' modifier and together they
  1461. /// are equivalent to `PCRE2_UTF | PCRE2_UCP`. When you set a modifier like this, both options
  1462. /// get set, and when you remove the 'n' modifier (with `RegexMatch::changeModifier()`), both will get removed.
  1463. ///@return Calculated modifier string (std::string)
  1464. ///@see Regex::getModifier()
  1465. ///@see RegexReplace::getModifier()
  1466. virtual std::string getModifier() const {
  1467. return modtab ? modtab->fromMatchOption(match_opts, jpcre2_match_opts)
  1468. : MOD::fromMatchOption(match_opts, jpcre2_match_opts);
  1469. }
  1470. ///Get the modifier table that is set,
  1471. ///@return pointer to constant ModifierTable.
  1472. virtual ModifierTable const* getModifierTable(){
  1473. return modtab;
  1474. }
  1475. ///Get PCRE2 option
  1476. ///@return PCRE2 option for match operation
  1477. ///@see Regex::getPcre2Option()
  1478. ///@see RegexReplace::getPcre2Option()
  1479. virtual Uint getPcre2Option() const {
  1480. return match_opts;
  1481. }
  1482. /// Get JPCRE2 option
  1483. ///@return JPCRE2 options for math operation
  1484. ///@see Regex::getJpcre2Option()
  1485. ///@see RegexReplace::getJpcre2Option()
  1486. virtual Uint getJpcre2Option() const {
  1487. return jpcre2_match_opts;
  1488. }
  1489. /// Get offset from where match will start in the subject.
  1490. /// @return Start offset
  1491. virtual PCRE2_SIZE getStartOffset() const {
  1492. return _start_offset;
  1493. }
  1494. ///Get pre-set match start offset vector pointer.
  1495. ///The pointer must be set with RegexMatch::setMatchStartOffsetVector() beforehand
  1496. ///for this to work i.e it is just a convenience method to get the pre-set vector pointer.
  1497. ///@return pointer to the const match start offset vector
  1498. virtual VecOff const* getMatchStartOffsetVector() const {
  1499. return vec_soff;
  1500. }
  1501. ///Get pre-set match end offset vector pointer.
  1502. ///The pointer must be set with RegexMatch::setMatchEndOffsetVector() beforehand
  1503. ///for this to work i.e it is just a convenience method to get the pre-set vector pointer.
  1504. ///@return pointer to the const end offset vector
  1505. virtual VecOff const* getMatchEndOffsetVector() const {
  1506. return vec_eoff;
  1507. }
  1508. ///Get a pointer to the associated Regex object.
  1509. ///If no actual Regex object is associated, null is returned.
  1510. ///@return A pointer to the associated constant Regex object or null.
  1511. virtual Regex const * getRegexObject() const {
  1512. return re;
  1513. }
  1514. ///Get pointer to numbered substring vector.
  1515. ///@return Pointer to const numbered substring vector.
  1516. virtual VecNum const* getNumberedSubstringVector() const {
  1517. return vec_num;
  1518. }
  1519. ///Get pointer to named substring vector.
  1520. ///@return Pointer to const named substring vector.
  1521. virtual VecNas const* getNamedSubstringVector() const {
  1522. return vec_nas;
  1523. }
  1524. ///Get pointer to name to number map vector.
  1525. ///@return Pointer to const name to number map vector.
  1526. virtual VecNtN const* getNameToNumberMapVector() const {
  1527. return vec_ntn;
  1528. }
  1529. ///Set the associated regex object.
  1530. ///Null pointer unsets it.
  1531. ///Underlying data is not modified.
  1532. ///@param r Pointer to a Regex object.
  1533. ///@return Reference to the calling RegexMatch object.
  1534. virtual RegexMatch& setRegexObject(Regex const *r){
  1535. re = r;
  1536. return *this;
  1537. }
  1538. /// Set a pointer to the numbered substring vector.
  1539. /// Null pointer unsets it.
  1540. ///
  1541. /// This vector will be filled with numbered (indexed) captured groups.
  1542. /// @param v pointer to the numbered substring vector
  1543. /// @return Reference to the calling RegexMatch object
  1544. virtual RegexMatch& setNumberedSubstringVector(VecNum* v) {
  1545. vec_num = v;
  1546. return *this;
  1547. }
  1548. /// Set a pointer to the named substring vector.
  1549. /// Null pointer unsets it.
  1550. ///
  1551. /// This vector will be populated with named captured groups.
  1552. /// @param v pointer to the named substring vector
  1553. /// @return Reference to the calling RegexMatch object
  1554. virtual RegexMatch& setNamedSubstringVector(VecNas* v) {
  1555. vec_nas = v;
  1556. return *this;
  1557. }
  1558. /// Set a pointer to the name to number map vector.
  1559. /// Null pointer unsets it.
  1560. ///
  1561. /// This vector will be populated with name to number map for captured groups.
  1562. /// @param v pointer to the name to number map vector
  1563. /// @return Reference to the calling RegexMatch object
  1564. virtual RegexMatch& setNameToNumberMapVector(VecNtN* v) {
  1565. vec_ntn = v;
  1566. return *this;
  1567. }
  1568. /// Set the pointer to a vector to store the offsets where matches
  1569. /// start in the subject.
  1570. /// Null pointer unsets it.
  1571. /// @param v Pointer to a jpcre2::VecOff vector (std::vector<size_t>)
  1572. /// @return Reference to the calling RegexMatch object
  1573. virtual RegexMatch& setMatchStartOffsetVector(VecOff* v){
  1574. vec_soff = v;
  1575. return *this;
  1576. }
  1577. /// Set the pointer to a vector to store the offsets where matches
  1578. /// end in the subject.
  1579. /// Null pointer unsets it.
  1580. /// @param v Pointer to a VecOff vector (std::vector<size_t>)
  1581. /// @return Reference to the calling RegexMatch object
  1582. virtual RegexMatch& setMatchEndOffsetVector(VecOff* v){
  1583. vec_eoff = v;
  1584. return *this;
  1585. }
  1586. ///Set the subject string for match.
  1587. ///This makes a copy of the subject string.
  1588. /// @param s Subject string
  1589. /// @return Reference to the calling RegexMatch object
  1590. /// @see RegexReplace::setSubject()
  1591. virtual RegexMatch& setSubject(String const &s) {
  1592. m_subject = s;
  1593. m_subject_ptr = &m_subject; //must overwrite
  1594. return *this;
  1595. }
  1596. ///@overload
  1597. ///...
  1598. /// Works with the original without modifying it. Null pointer unsets the subject.
  1599. /// @param s Pointer to subject string
  1600. /// @return Reference to the calling RegexMatch object
  1601. /// @see RegexReplace::setSubject()
  1602. virtual RegexMatch& setSubject(String const *s) {
  1603. if(s) m_subject_ptr = s;
  1604. else {
  1605. m_subject.clear();
  1606. m_subject_ptr = &m_subject;
  1607. }
  1608. return *this;
  1609. }
  1610. /// Set the modifier (resets all JPCRE2 and PCRE2 options) by calling RegexMatch::changeModifier().
  1611. /// Re-initializes the option bits for PCRE2 and JPCRE2 options, then parses the modifier to set their equivalent options.
  1612. /// @param s Modifier string.
  1613. /// @return Reference to the calling RegexMatch object
  1614. /// @see RegexReplace::setModifier()
  1615. /// @see Regex::setModifier()
  1616. virtual RegexMatch& setModifier(Modifier const& s) {
  1617. match_opts = 0;
  1618. jpcre2_match_opts = 0;
  1619. changeModifier(s, true);
  1620. return *this;
  1621. }
  1622. ///Set a custom modifier table to be used.
  1623. ///@param mdt pointer to ModifierTable object.
  1624. ///@return Reference to the calling RegexMatch object.
  1625. virtual RegexMatch& setModifierTable(ModifierTable const * mdt){
  1626. modtab = mdt;
  1627. return *this;
  1628. }
  1629. /// Set JPCRE2 option for match (resets all)
  1630. /// @param x Option value
  1631. /// @return Reference to the calling RegexMatch object
  1632. /// @see RegexReplace::setJpcre2Option()
  1633. /// @see Regex::setJpcre2Option()
  1634. virtual RegexMatch& setJpcre2Option(Uint x) {
  1635. jpcre2_match_opts = x;
  1636. return *this;
  1637. }
  1638. ///Set PCRE2 option match (overwrite existing option)
  1639. /// @param x Option value
  1640. /// @return Reference to the calling RegexMatch object
  1641. /// @see RegexReplace::setPcre2Option()
  1642. /// @see Regex::setPcre2Option()
  1643. virtual RegexMatch& setPcre2Option(Uint x) {
  1644. match_opts = x;
  1645. return *this;
  1646. }
  1647. /// Set whether to perform global match
  1648. /// @param x True or False
  1649. /// @return Reference to the calling RegexMatch object
  1650. virtual RegexMatch& setFindAll(bool x) {
  1651. jpcre2_match_opts = x?jpcre2_match_opts | FIND_ALL:jpcre2_match_opts & ~FIND_ALL;
  1652. return *this;
  1653. }
  1654. ///@overload
  1655. ///...
  1656. ///This function just calls RegexMatch::setFindAll(bool x) with `true` as the parameter
  1657. ///@return Reference to the calling RegexMatch object
  1658. virtual RegexMatch& setFindAll() {
  1659. return setFindAll(true);
  1660. }
  1661. /// Set offset from where match starts.
  1662. /// When FIND_ALL is set, a global match would not be performed on all positions on the subject,
  1663. /// rather it will be performed from the start offset and onwards.
  1664. /// @param offset Start offset
  1665. /// @return Reference to the calling RegexMatch object
  1666. virtual RegexMatch& setStartOffset(PCRE2_SIZE offset) {
  1667. _start_offset = offset;
  1668. return *this;
  1669. }
  1670. ///Set the match context.
  1671. ///You can create match context using the native PCRE2 API.
  1672. ///The memory is not handled by RegexMatch object and not freed.
  1673. ///User will be responsible for freeing the memory of the match context.
  1674. ///@param match_context Pointer to the match context.
  1675. ///@return Reference to the calling RegexMatch object
  1676. virtual RegexMatch& setMatchContext(MatchContext *match_context){
  1677. mcontext = match_context;
  1678. return *this;
  1679. }
  1680. ///Return pointer to the match context that was previously set with setMatchContext().
  1681. ///Handling memory is the callers' responsibility.
  1682. ///@return pointer to the match context (default: null).
  1683. virtual MatchContext* getMatchContext(){
  1684. return mcontext;
  1685. }
  1686. ///Set the match data block to be used.
  1687. ///The memory is not handled by RegexMatch object and not freed.
  1688. ///User will be responsible for freeing the memory of the match data block.
  1689. ///@param madt Pointer to a match data block.
  1690. ///@return Reference to the calling RegexMatch object
  1691. virtual RegexMatch& setMatchDataBlock(MatchData* madt){
  1692. mdata = madt;
  1693. return *this;
  1694. }
  1695. ///Get the pointer to the match data block that was set previously with setMatchData()
  1696. ///Handling memory is the callers' responsibility.
  1697. ///@return pointer to the match data (default: null).
  1698. virtual MatchData* getMatchDataBlock(){
  1699. return mdata;
  1700. }
  1701. /// Parse modifier and add/remove equivalent PCRE2 and JPCRE2 options.
  1702. /// This function does not initialize or re-initialize options.
  1703. /// If you want to set options from scratch, initialize them to 0 before calling this function.
  1704. /// If invalid modifier is detected, then the error number for the RegexMatch
  1705. /// object will be jpcre2::ERROR::INVALID_MODIFIER and error offset will be the modifier character.
  1706. /// You can get the message with RegexMatch::getErrorMessage() function.
  1707. ///
  1708. /// @param mod Modifier string.
  1709. /// @param x Whether to add or remove option
  1710. /// @return Reference to the RegexMatch object
  1711. /// @see Regex::changeModifier()
  1712. /// @see RegexReplace::changeModifier()
  1713. virtual RegexMatch& changeModifier(Modifier const& mod, bool x){
  1714. modtab ? modtab->toMatchOption(mod, x, &match_opts, &jpcre2_match_opts, &error_number, &error_offset)
  1715. : MOD::toMatchOption(mod, x, &match_opts, &jpcre2_match_opts, &error_number, &error_offset);
  1716. return *this;
  1717. }
  1718. /// Add or remove a JPCRE2 option
  1719. /// @param opt JPCRE2 option value
  1720. /// @param x Add the option if it's true, remove otherwise.
  1721. /// @return Reference to the calling RegexMatch object
  1722. /// @see RegexReplace::changeJpcre2Option()
  1723. /// @see Regex::changeJpcre2Option()
  1724. virtual RegexMatch& changeJpcre2Option(Uint opt, bool x) {
  1725. jpcre2_match_opts = x ? jpcre2_match_opts | opt : jpcre2_match_opts & ~opt;
  1726. return *this;
  1727. }
  1728. /// Add or remove a PCRE2 option
  1729. /// @param opt PCRE2 option value
  1730. /// @param x Add the option if it's true, remove otherwise.
  1731. /// @return Reference to the calling RegexMatch object
  1732. /// @see RegexReplace::changePcre2Option()
  1733. /// @see Regex::changePcre2Option()
  1734. virtual RegexMatch& changePcre2Option(Uint opt, bool x) {
  1735. match_opts = x ? match_opts | opt : match_opts & ~opt;
  1736. return *this;
  1737. }
  1738. /// Parse modifier string and add equivalent PCRE2 and JPCRE2 options.
  1739. /// This is just a wrapper of the original function RegexMatch::changeModifier()
  1740. /// @param mod Modifier string.
  1741. /// @return Reference to the calling RegexMatch object
  1742. /// @see RegexReplace::addModifier()
  1743. /// @see Regex::addModifier()
  1744. virtual RegexMatch& addModifier(Modifier const& mod){
  1745. return changeModifier(mod, true);
  1746. }
  1747. /// Add option to existing JPCRE2 options for match
  1748. /// @param x Option value
  1749. /// @return Reference to the calling RegexMatch object
  1750. /// @see RegexReplace::addJpcre2Option()
  1751. /// @see Regex::addJpcre2Option()
  1752. virtual RegexMatch& addJpcre2Option(Uint x) {
  1753. jpcre2_match_opts |= x;
  1754. return *this;
  1755. }
  1756. /// Add option to existing PCRE2 options for match
  1757. /// @param x Option value
  1758. /// @return Reference to the calling RegexMatch object
  1759. /// @see RegexReplace::addPcre2Option()
  1760. /// @see Regex::addPcre2Option()
  1761. virtual RegexMatch& addPcre2Option(Uint x) {
  1762. match_opts |= x;
  1763. return *this;
  1764. }
  1765. /// Perform match operation using info from class variables and return the match count and
  1766. /// store the results in specified vectors.
  1767. ///
  1768. /// Note: This function uses pcre2_match() function to do the match.
  1769. ///@return Match count
  1770. virtual SIZE_T match(void);
  1771. };
  1772. ///This class contains a typedef of a function pointer or a templated function wrapper (`std::function`)
  1773. ///to provide callback function to the `MatchEvaluator`.
  1774. ///`std::function` is used when `>=C++11` is being used , otherwise function pointer is used.
  1775. ///You can force using function pointer instead of `std::function` when `>=C++11` is used by defining the macro
  1776. ///`JPCRE2_USE_FUNCTION_POINTER_CALLBACK` before including jpcre2.hpp.
  1777. ///If you are using lambda function with capture, you must use the `std::function` approach.
  1778. ///
  1779. ///The callback function takes exactly three positional arguments:
  1780. ///@tparam T1 The first argument must be `jp::NumSub const &` aka `std::vector<String> const &` (or `void*` if not needed).
  1781. ///@tparam T2 The second argument must be `jp::MapNas const &` aka `std::map<String, size_t> const &` (or `void*` if not needed).
  1782. ///@tparam T3 The third argument must be `jp::MapNtN const &` aka `std::map<String, String> const &` (or `void*` if not needed).
  1783. ///
  1784. /// **Examples:**
  1785. /// ```cpp
  1786. /// typedef jpcre2::select<char> jp;
  1787. /// jp::String myCallback1(jp::NumSub const &m1, void*, void*){
  1788. /// return "("+m1[0]+")";
  1789. /// }
  1790. ///
  1791. /// jp::String myCallback2(jp::NumSub const &m1, jp::MapNas const &m2, void*){
  1792. /// return "("+m1[0]+"/"+m2.at("total")+")";
  1793. /// }
  1794. /// //Now you can pass these functions in MatchEvaluator constructors to create a match evaluator
  1795. /// jp::MatchEvaluator me1(myCallback1);
  1796. ///
  1797. /// //Examples with lambda (>=C++11)
  1798. /// jp::MatchEvaluator me2([](jp::NumSub const &m1, void*, void*)
  1799. /// {
  1800. /// return "("+m1[0]+")";
  1801. /// });
  1802. /// ```
  1803. ///@see MatchEvaluator
  1804. template<typename T1, typename T2, typename T3>
  1805. struct MatchEvaluatorCallback{
  1806. #if !defined JPCRE2_USE_FUNCTION_POINTER_CALLBACK && JPCRE2_USE_MINIMUM_CXX_11
  1807. typedef std::function<String (T1,T2,T3)> Callback;
  1808. #else
  1809. typedef String (*Callback)(T1,T2,T3);
  1810. #endif
  1811. };
  1812. ///Provides some default static callback functions.
  1813. ///The primary goal of this class is to provide default
  1814. ///callback function to MatchEvaluator default constructor which is
  1815. ///essentially callback::erase.
  1816. ///This class does not allow object instantiation.
  1817. struct callback{
  1818. ///Callback function that removes the matched part/s in the subject string
  1819. /// and takes all match vectors as argument.
  1820. ///Even though this function itself does not use the vectors, it still takes them
  1821. ///so that the caller can perform a match and populate all the match data to perform
  1822. ///further evaluation of other callback functions without doing the match again.
  1823. ///@param num jp::NumSub vector.
  1824. ///@param nas jp::MapNas map.
  1825. ///@param ntn jp::MapNtN map.
  1826. ///@return empty string.
  1827. static String eraseFill(NumSub const &num, MapNas const &nas, MapNtN const &ntn){
  1828. return String();
  1829. }
  1830. ///Callback function that removes the matched part/s in the subject string
  1831. ///and does not take any match vector.
  1832. ///This is a minimum cost pattern deleting callback function.
  1833. ///
  1834. ///It's the default callback function when you Instantiate
  1835. ///a MatchEvaluator object with its default constructor:
  1836. ///```cpp
  1837. ///MatchEvaluator me;
  1838. ///```
  1839. ///@return empty string.
  1840. static String erase(void*, void*, void*){
  1841. return String();
  1842. }
  1843. ///Callback function for populating match vectors that does not modify the subject string.
  1844. ///It always returns the total matched part and thus the subject string remains the same.
  1845. ///@param num jp::NumSub vector.
  1846. ///@param nas jp::MapNas map.
  1847. ///@param ntn jp::MapNtN map.
  1848. ///@return total match (group 0) of current match.
  1849. static String fill(NumSub const &num, MapNas const &nas, MapNtn const &ntn){
  1850. #ifdef JPCRE2_UNSET_CAPTURES_NULL
  1851. return *num[0];
  1852. #else
  1853. return num[0];
  1854. #endif
  1855. }
  1856. private:
  1857. //prevent object instantiation.
  1858. callback();
  1859. callback(callback const &);
  1860. #ifdef JPCRE2_USE_MINIMUM_CXX_11
  1861. callback(callback&&);
  1862. #endif
  1863. ~callback();
  1864. };
  1865. ///This class inherits RegexMatch and provides a similar functionality.
  1866. ///All public member functions from RegexMatch class are publicly available except the following:
  1867. ///* setNumberedSubstringVector
  1868. ///* setNamedSubstringVector
  1869. ///* setNameToNumberMapVector
  1870. ///* setMatchStartOffsetVector
  1871. ///* setMatchEndOffsetVector
  1872. ///
  1873. ///The use of above functions is not allowed as the vectors are created according to the callback function you pass.
  1874. ///
  1875. ///Each constructor of this class takes a callback function as argument (see `MatchEvaluatorCallback`).
  1876. ///
  1877. ///It provides a MatchEvaluator::nreplace() function to perform replace operation using native JPCRE2 approach
  1878. ///and `MatchEvaluator::replace()` function for PCRE2 compatible replace operation.
  1879. ///
  1880. ///An instance of this class can also be passed with `RegexReplace::nreplace()` or `RegexReplace::replace()` function to perform replacement
  1881. ///according to this match evaluator.
  1882. ///
  1883. ///Match data is stored in vectors, and the vectors are populated according to the callback functions.
  1884. ///Populated vector data is never deleted but they get overwritten. Vector data can be manually zeroed out
  1885. ///by calling `MatchEvaluator::clearMatchData()`. If the capacities of those match vectors are desired to
  1886. ///to be shrinked too instead of just clearing them, use `MatchEvaluator::resetMatchData()` instead.
  1887. ///
  1888. /// # Re-usability of Match Data
  1889. /// A match data populated with a callback function that takes only a jp::NumSub vector is not compatible
  1890. /// with the data created according to callback function with a jp::MapNas vector.
  1891. /// Because, for this later callback, jp::MapNas data is required but is not available (only jp::NumSub is available).
  1892. /// In such cases, previous Match data can not be used to perform a new replacment operation with this second callback function.
  1893. ///
  1894. /// To populate the match vectors, one must call the `MatchEvaluator::match()` or `MatchEvaluator::nreplace()` function, they will populate
  1895. /// vectors with match data according to call back function.
  1896. ///
  1897. /// ## Example:
  1898. ///
  1899. /// ```cpp
  1900. /// jp::String callback5(NumSub const &m, void*, MapNtn const &n){
  1901. /// return m[0];
  1902. /// }
  1903. /// jp::String callback4(void*, void*, MapNtn const &n){
  1904. /// return std::to_string(n.at("name")); //position of group 'name'.
  1905. /// }
  1906. /// jp::String callback2(void*, MapNas const &m, void*){
  1907. /// return m.at('name'); //substring by name
  1908. /// }
  1909. ///
  1910. /// jp::MatchEvaluator me;
  1911. /// me.setRegexObject(&re).setSubject("string").setCallback(callback5).nreplace();
  1912. /// //In above, nreplace() populates jp::NumSub and jp::MapNtn with match data.
  1913. ///
  1914. /// me.setCallback(callback4).nreplace(false);
  1915. /// //the above uses previous match result (note the 'false') which is OK,
  1916. /// //because, callback4 requires jp::MapNtn which was made available in the previous operation.
  1917. ///
  1918. /// //but the following is not OK: (assertion failure)
  1919. /// me.setCallback(callback2).nreplace(false);
  1920. /// //because, callback2 requires jp::MapNas data which is not available.
  1921. /// //now, this is OK:
  1922. /// me.setCallback(callback2).nreplace();
  1923. /// //because, it will recreate those match data including this one (jp::MapNas).
  1924. /// ```
  1925. ///
  1926. /// # Replace options
  1927. /// MatchEvaluator can not take replace options.
  1928. /// Replace options are taken directly by the replace functions: `nreplace()` and `replace()`.
  1929. ///
  1930. /// # Using as a match object
  1931. /// As it's just a subclass of RegexMatch, it can do all the things that RegexMatch can do, with some restrictions:
  1932. /// * matching options are modified to strip off bad options according to replacement (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT).
  1933. /// * match depends on the callback function. Only those vectors will be populated that are implemented by the callback functions so far
  1934. /// (multiple callback function will set multiple match data vectors.)
  1935. /// * match vectors are internal to this class, you can not set them manually (without callback function). (you can get pointers to these vectors
  1936. /// with `getNumberedSubstringVector()` and related functions).
  1937. ///
  1938. ///@see MatchEvaluatorCallback
  1939. ///@see RegexReplace::nreplace()
  1940. class MatchEvaluator: virtual public RegexMatch{
  1941. private:
  1942. friend class RegexReplace;
  1943. VecNum vec_num;
  1944. VecNas vec_nas;
  1945. VecNtN vec_ntn;
  1946. VecOff vec_soff;
  1947. VecOff vec_eoff;
  1948. int callbackn;
  1949. typename MatchEvaluatorCallback<void*, void*, void*>::Callback callback0;
  1950. typename MatchEvaluatorCallback<NumSub const &, void*, void*>::Callback callback1;
  1951. typename MatchEvaluatorCallback<void*, MapNas const &, void*>::Callback callback2;
  1952. typename MatchEvaluatorCallback<NumSub const &, MapNas const &, void*>::Callback callback3;
  1953. typename MatchEvaluatorCallback<void*, void*, MapNtN const &>::Callback callback4;
  1954. typename MatchEvaluatorCallback<NumSub const &, void*, MapNtN const &>::Callback callback5;
  1955. typename MatchEvaluatorCallback<void*, MapNas const &, MapNtN const &>::Callback callback6;
  1956. typename MatchEvaluatorCallback<NumSub const &, MapNas const &, MapNtN const &>::Callback callback7;
  1957. //Q: Why the callback names seem random? is it random?
  1958. //A: No, it's not random, NumSub = 1, MapNas = 2, MapNtn = 4, thus:
  1959. // NumSub + MapNas = 3
  1960. // NumSub + MapNtn = 5
  1961. // MapNas + MapNtn = 6
  1962. // NumSub + MapNas + MapNtn = 7
  1963. //Q: Why is it like this?
  1964. //A: It's historical. Once, there was not this many callback declaration, there was only one (a templated one).
  1965. // The nreplace function itself used to calculate a mode value according to available vectors
  1966. // and determine what kind of callback function needed to be called.
  1967. //Q: Why the history changed?
  1968. //A: We had some compatibility issues with the single templated callback.
  1969. // Also, this approach proved to be more readable and robust.
  1970. PCRE2_SIZE buffer_size;
  1971. void init(){
  1972. callbackn = 0;
  1973. callback0 = callback::erase;
  1974. callback1 = 0;
  1975. callback2 = 0;
  1976. callback3 = 0;
  1977. callback4 = 0;
  1978. callback5 = 0;
  1979. callback6 = 0;
  1980. callback7 = 0;
  1981. setMatchStartOffsetVector(&vec_soff);
  1982. setMatchEndOffsetVector(&vec_eoff);
  1983. buffer_size = 0;
  1984. }
  1985. void setVectorPointersAccordingToCallback(){
  1986. switch(callbackn){
  1987. case 0: break;
  1988. case 1: setNumberedSubstringVector(&vec_num);break;
  1989. case 2: setNamedSubstringVector(&vec_nas);break;
  1990. case 3: setNumberedSubstringVector(&vec_num).setNamedSubstringVector(&vec_nas);break;
  1991. case 4: setNameToNumberMapVector(&vec_ntn);break;
  1992. case 5: setNumberedSubstringVector(&vec_num).setNameToNumberMapVector(&vec_ntn);break;
  1993. case 6: setNamedSubstringVector(&vec_nas).setNameToNumberMapVector(&vec_ntn);break;
  1994. case 7: setNumberedSubstringVector(&vec_num).setNamedSubstringVector(&vec_nas).setNameToNumberMapVector(&vec_ntn);break;
  1995. }
  1996. }
  1997. void onlyCopy(MatchEvaluator const &me){
  1998. callbackn = me.callbackn;
  1999. callback0 = me.callback0;
  2000. callback1 = me.callback1;
  2001. callback2 = me.callback2;
  2002. callback3 = me.callback3;
  2003. callback4 = me.callback4;
  2004. callback5 = me.callback5;
  2005. callback6 = me.callback6;
  2006. callback7 = me.callback7;
  2007. //must update the pointers to point to this class vectors.
  2008. setVectorPointersAccordingToCallback();
  2009. buffer_size = me.buffer_size;
  2010. }
  2011. void deepCopy(MatchEvaluator const &me) {
  2012. vec_num = me.vec_num;
  2013. vec_nas = me.vec_nas;
  2014. vec_ntn = me.vec_ntn;
  2015. vec_soff = me.vec_soff;
  2016. vec_eoff = me.vec_eoff;
  2017. onlyCopy(me);
  2018. }
  2019. #ifdef JPCRE2_USE_MINIMUM_CXX_11
  2020. void deepMove(MatchEvaluator& me){
  2021. vec_num = std::move_if_noexcept(me.vec_num);
  2022. vec_nas = std::move_if_noexcept(me.vec_nas);
  2023. vec_ntn = std::move_if_noexcept(me.vec_ntn);
  2024. vec_soff = std::move_if_noexcept(me.vec_soff);
  2025. vec_eoff = std::move_if_noexcept(me.vec_eoff);
  2026. onlyCopy(me);
  2027. }
  2028. #endif
  2029. //prevent public access to some funcitons
  2030. MatchEvaluator& setNumberedSubstringVector(VecNum* v){
  2031. RegexMatch::setNumberedSubstringVector(v);
  2032. return *this;
  2033. }
  2034. MatchEvaluator& setNamedSubstringVector(VecNas* v){
  2035. RegexMatch::setNamedSubstringVector(v);
  2036. return *this;
  2037. }
  2038. MatchEvaluator& setNameToNumberMapVector(VecNtN* v){
  2039. RegexMatch::setNameToNumberMapVector(v);
  2040. return *this;
  2041. }
  2042. MatchEvaluator& setMatchStartOffsetVector(VecOff* v){
  2043. RegexMatch::setMatchStartOffsetVector(v);
  2044. return *this;
  2045. }
  2046. MatchEvaluator& setMatchEndOffsetVector(VecOff* v){
  2047. RegexMatch::setMatchEndOffsetVector(v);
  2048. return *this;
  2049. }
  2050. public:
  2051. ///Default constructor.
  2052. ///Sets callback::erase as the callback function.
  2053. ///Removes matched part/s from the subject string if the callback is not
  2054. ///changed.
  2055. /// ```cpp
  2056. /// jp::Regex re("\s*string");
  2057. /// jp::MatchEvaluator me;
  2058. /// std::cout<<
  2059. /// me.setRegexObject(&re);
  2060. /// .setSubject("I am a string");
  2061. /// .nreplace();
  2062. /// //The above will delete ' string' from the subject
  2063. /// //thus the result will be 'I am a'
  2064. /// ```
  2065. explicit
  2066. MatchEvaluator():RegexMatch(){
  2067. init();
  2068. }
  2069. ///@overload
  2070. ///...
  2071. ///Constructor taking a Regex object pointer.
  2072. ///It sets the associated Regex object and
  2073. ///initializes the MatchEvaluator object with
  2074. ///callback::erase callback function.
  2075. ///Underlying data is not modified.
  2076. ///@param r constant Regex pointer.
  2077. explicit
  2078. MatchEvaluator(Regex const *r):RegexMatch(r){
  2079. init();
  2080. }
  2081. ///@overload
  2082. ///...
  2083. ///Constructor taking a callback function.
  2084. ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function.
  2085. ///@param mef Callback function.
  2086. explicit
  2087. MatchEvaluator(typename MatchEvaluatorCallback<void*, void*, void*>::Callback mef): RegexMatch(){
  2088. init();
  2089. setCallback(mef);
  2090. }
  2091. ///@overload
  2092. /// ...
  2093. ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function.
  2094. ///@param mef Callback function.
  2095. explicit
  2096. MatchEvaluator(typename MatchEvaluatorCallback<NumSub const &, void*, void*>::Callback mef): RegexMatch(){
  2097. init();
  2098. setCallback(mef);
  2099. }
  2100. ///@overload
  2101. /// ...
  2102. ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function.
  2103. ///@param mef Callback function.
  2104. explicit
  2105. MatchEvaluator(typename MatchEvaluatorCallback<NumSub const &, MapNas const &, void*>::Callback mef): RegexMatch(){
  2106. init();
  2107. setCallback(mef);
  2108. }
  2109. ///@overload
  2110. /// ...
  2111. ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function.
  2112. ///@param mef Callback function.
  2113. explicit
  2114. MatchEvaluator(typename MatchEvaluatorCallback<NumSub const &, void*, MapNtN const &>::Callback mef): RegexMatch(){
  2115. init();
  2116. setCallback(mef);
  2117. }
  2118. ///@overload
  2119. /// ...
  2120. ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function.
  2121. ///@param mef Callback function.
  2122. explicit
  2123. MatchEvaluator(typename MatchEvaluatorCallback<NumSub const &, MapNas const &, MapNtN const &>::Callback mef): RegexMatch(){
  2124. init();
  2125. setCallback(mef);
  2126. }
  2127. ///@overload
  2128. /// ...
  2129. ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function.
  2130. ///@param mef Callback function.
  2131. explicit
  2132. MatchEvaluator(typename MatchEvaluatorCallback<void*, MapNas const &, void*>::Callback mef): RegexMatch(){
  2133. init();
  2134. setCallback(mef);
  2135. }
  2136. ///@overload
  2137. /// ...
  2138. ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function.
  2139. ///@param mef Callback function.
  2140. explicit
  2141. MatchEvaluator(typename MatchEvaluatorCallback<void*, MapNas const &, MapNtN const &>::Callback mef): RegexMatch(){
  2142. init();
  2143. setCallback(mef);
  2144. }
  2145. ///@overload
  2146. /// ...
  2147. ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function.
  2148. ///@param mef Callback function.
  2149. explicit
  2150. MatchEvaluator(typename MatchEvaluatorCallback<void*, void*, MapNtN const &>::Callback mef): RegexMatch(){
  2151. init();
  2152. setCallback(mef);
  2153. }
  2154. ///@overload
  2155. /// ...
  2156. ///Copy constructor.
  2157. ///@param me Reference to MatchEvaluator object
  2158. MatchEvaluator(MatchEvaluator const &me): RegexMatch(me){
  2159. init();
  2160. deepCopy(me);
  2161. }
  2162. ///Overloaded copy-assignment operator
  2163. ///@param me MatchEvaluator object
  2164. ///@return A reference to the calling MatchEvaluator object.
  2165. MatchEvaluator& operator=(MatchEvaluator const &me){
  2166. if(this == &me) return *this;
  2167. RegexMatch::operator=(me);
  2168. deepCopy(me);
  2169. return *this;
  2170. }
  2171. #ifdef JPCRE2_USE_MINIMUM_CXX_11
  2172. ///@overload
  2173. /// ...
  2174. ///Move constructor.
  2175. ///This constructor steals resources from the argument.
  2176. ///It leaves the argument in a valid but indeterminate sate.
  2177. ///The indeterminate state can be returned to normal by calling reset() on that object.
  2178. ///@param me rvalue reference to a MatchEvaluator object
  2179. MatchEvaluator(MatchEvaluator&& me): RegexMatch(me){
  2180. init();
  2181. deepMove(me);
  2182. }
  2183. ///@overload
  2184. ///...
  2185. ///Overloaded move-assignment operator.
  2186. ///It steals resources from the argument.
  2187. ///It leaves the argument in a valid but indeterminate sate.
  2188. ///The indeterminate state can be returned to normal by calling reset() on that object.
  2189. ///@param me rvalue reference to a MatchEvaluator object
  2190. ///@return A reference to the calling MatchEvaluator object.
  2191. ///@see MatchEvaluator(MatchEvaluator&& me)
  2192. MatchEvaluator& operator=(MatchEvaluator&& me){
  2193. if(this == &me) return *this;
  2194. RegexMatch::operator=(me);
  2195. deepMove(me);
  2196. return *this;
  2197. }
  2198. #endif
  2199. virtual ~MatchEvaluator(){}
  2200. ///Member function to set a callback function with no vector reference.
  2201. ///Callback function is always overwritten. The implemented vectors are set to be filled with match data.
  2202. ///Other vectors that were set previously, are not unset and thus they will be filled with match data too
  2203. ///when `match()` or `nreplace()` is called.
  2204. ///@param mef Callback function.
  2205. ///@return A reference to the calling MatchEvaluator object.
  2206. MatchEvaluator& setCallback(typename MatchEvaluatorCallback<void*, void*, void*>::Callback mef){
  2207. callback0 = mef;
  2208. callbackn = 0;
  2209. return *this;
  2210. }
  2211. ///@overload
  2212. /// ...
  2213. ///Sets a callback function with a jp::NumSub vector.
  2214. ///You will be working with a reference to the constant vector.
  2215. ///@param mef Callback function.
  2216. ///@return A reference to the calling MatchEvaluator object.
  2217. MatchEvaluator& setCallback(typename MatchEvaluatorCallback<NumSub const &, void*, void*>::Callback mef){
  2218. callback1 = mef;
  2219. callbackn = 1;
  2220. setNumberedSubstringVector(&vec_num);
  2221. return *this;
  2222. }
  2223. ///@overload
  2224. /// ...
  2225. ///Sets a callback function with a jp::NumSub and jp::MapNas.
  2226. ///You will be working with references of the constant vectors.
  2227. ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead:
  2228. ///```cpp
  2229. ///map_nas["word"]; //wrong
  2230. ///map_nas.at("word"); //ok
  2231. ///```
  2232. ///If you want to use `[]` operator with maps, make a copy:
  2233. ///```cpp
  2234. ///jp::MapNas mn = map_nas;
  2235. ///mn["word"]; //ok
  2236. ///```
  2237. ///@param mef Callback function.
  2238. ///@return A reference to the calling MatchEvaluator object.
  2239. MatchEvaluator& setCallback(typename MatchEvaluatorCallback<NumSub const &, MapNas const &, void*>::Callback mef){
  2240. callback3 = mef;
  2241. callbackn = 3;
  2242. setNumberedSubstringVector(&vec_num);
  2243. setNamedSubstringVector(&vec_nas);
  2244. return *this;
  2245. }
  2246. ///@overload
  2247. /// ...
  2248. ///Sets a callback function with a jp::NumSub and jp::MapNtN.
  2249. ///You will be working with references of the constant vectors.
  2250. ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead:
  2251. ///```cpp
  2252. ///map_ntn["word"]; //wrong
  2253. ///map_ntn.at("word"); //ok
  2254. ///```
  2255. ///If you want to use `[]` operator with maps, make a copy:
  2256. ///```cpp
  2257. ///jp::MapNtN mn = map_ntn;
  2258. ///mn["word"]; //ok
  2259. ///```
  2260. ///@param mef Callback function.
  2261. ///@return A reference to the calling MatchEvaluator object.
  2262. MatchEvaluator& setCallback(typename MatchEvaluatorCallback<NumSub const &, void*, MapNtN const &>::Callback mef){
  2263. callback5 = mef;
  2264. callbackn = 5;
  2265. setNumberedSubstringVector(&vec_num);
  2266. setNameToNumberMapVector(&vec_ntn);
  2267. return *this;
  2268. }
  2269. ///@overload
  2270. /// ...
  2271. ///Sets a callback function with a jp::NumSub, jp::MapNas, jp::MapNtN.
  2272. ///You will be working with references of the constant vectors.
  2273. ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead:
  2274. ///```cpp
  2275. ///map_nas["word"]; //wrong
  2276. ///map_nas.at("word"); //ok
  2277. ///```
  2278. ///If you want to use `[]` operator with maps, make a copy:
  2279. ///```cpp
  2280. ///jp::MapNas mn = map_nas;
  2281. ///mn["word"]; //ok
  2282. ///```
  2283. ///@param mef Callback function.
  2284. ///@return A reference to the calling MatchEvaluator object.
  2285. MatchEvaluator& setCallback(typename MatchEvaluatorCallback<NumSub const &, MapNas const &, MapNtN const &>::Callback mef){
  2286. callback7 = mef;
  2287. callbackn = 7;
  2288. setNumberedSubstringVector(&vec_num);
  2289. setNamedSubstringVector(&vec_nas);
  2290. setNameToNumberMapVector(&vec_ntn);
  2291. return *this;
  2292. }
  2293. ///@overload
  2294. /// ...
  2295. ///Sets a callback function with a jp::MapNas.
  2296. ///You will be working with reference of the constant vector.
  2297. ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead:
  2298. ///```cpp
  2299. ///map_nas["word"]; //wrong
  2300. ///map_nas.at("word"); //ok
  2301. ///```
  2302. ///If you want to use `[]` operator with maps, make a copy:
  2303. ///```cpp
  2304. ///jp::MapNas mn = map_nas;
  2305. ///mn["word"]; //ok
  2306. ///```
  2307. ///@param mef Callback function.
  2308. ///@return A reference to the calling MatchEvaluator object.
  2309. MatchEvaluator& setCallback(typename MatchEvaluatorCallback<void*, MapNas const &, void*>::Callback mef){
  2310. callback2 = mef;
  2311. callbackn = 2;
  2312. setNamedSubstringVector(&vec_nas);
  2313. return *this;
  2314. }
  2315. ///@overload
  2316. /// ...
  2317. ///Sets a callback function with a jp::MapNas, jp::MapNtN.
  2318. ///You will be working with reference of the constant vector.
  2319. ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead:
  2320. ///```cpp
  2321. ///map_nas["word"]; //wrong
  2322. ///map_nas.at("word"); //ok
  2323. ///```
  2324. ///If you want to use `[]` operator with maps, make a copy:
  2325. ///```cpp
  2326. ///jp::MapNas mn = map_nas;
  2327. ///mn["word"]; //ok
  2328. ///```
  2329. ///@param mef Callback function.
  2330. ///@return A reference to the calling MatchEvaluator object.
  2331. MatchEvaluator& setCallback(typename MatchEvaluatorCallback<void*, MapNas const &, MapNtN const &>::Callback mef){
  2332. callback6 = mef;
  2333. callbackn = 6;
  2334. setNamedSubstringVector(&vec_nas);
  2335. setNameToNumberMapVector(&vec_ntn);
  2336. return *this;
  2337. }
  2338. ///@overload
  2339. /// ...
  2340. ///Sets a callback function with a jp::MapNtN.
  2341. ///You will be working with references of the constant vectors.
  2342. ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead:
  2343. ///```cpp
  2344. ///map_ntn["word"]; //wrong
  2345. ///map_ntn.at("word"); //ok
  2346. ///```
  2347. ///If you want to use `[]` operator with maps, make a copy:
  2348. ///```cpp
  2349. ///jp::MapNtN mn = map_ntn;
  2350. ///mn["word"]; //ok
  2351. ///```
  2352. ///@param mef Callback function.
  2353. ///@return A reference to the calling MatchEvaluator object.
  2354. MatchEvaluator& setCallback(typename MatchEvaluatorCallback<void*, void*, MapNtN const &>::Callback mef){
  2355. callback4 = mef;
  2356. callbackn = 4;
  2357. setNameToNumberMapVector(&vec_ntn);
  2358. return *this;
  2359. }
  2360. ///Clear match data.
  2361. ///It clears all match data from all vectors (without shrinking).
  2362. ///For shrinking the vectors, use `resetMatchData()`
  2363. ///A call to `match()` or nreplace() will be required to produce match data again.
  2364. ///@return A reference to the calling MatchEvaluator object.
  2365. MatchEvaluator& clearMatchData(){
  2366. vec_num.clear();
  2367. vec_nas.clear();
  2368. vec_ntn.clear();
  2369. vec_soff.clear();
  2370. vec_eoff.clear();
  2371. return *this;
  2372. }
  2373. ///Reset match data to initial state.
  2374. ///It deletes all match data from all vectors shrinking their capacity.
  2375. ///A call to `match()` or nreplace() will be required to produce match data again.
  2376. ///@return A reference to the calling MatchEvaluator object.
  2377. MatchEvaluator& resetMatchData(){
  2378. VecNum().swap(vec_num);
  2379. VecNas().swap(vec_nas);
  2380. VecNtN().swap(vec_ntn);
  2381. VecOff().swap(vec_soff);
  2382. VecOff().swap(vec_eoff);
  2383. return *this;
  2384. }
  2385. ///Reset MatchEvaluator to initial state including memory.
  2386. ///@return A reference to the calling MatchEvaluator object.
  2387. MatchEvaluator& reset(){
  2388. RegexMatch::reset();
  2389. resetMatchData();
  2390. init();
  2391. return *this;
  2392. }
  2393. ///Clears MatchEvaluator.
  2394. ///Returns everything to initial state (some memory may retain for further and faster use).
  2395. ///@return A reference to the calling MatchEvaluator object.
  2396. MatchEvaluator& clear(){
  2397. RegexMatch::clear();
  2398. clearMatchData();
  2399. init();
  2400. return *this;
  2401. }
  2402. ///Call RegexMatch::resetErrors().
  2403. ///@return A reference to the calling MatchEvaluator object.
  2404. MatchEvaluator& resetErrors(){
  2405. RegexMatch::resetErrors();
  2406. return *this;
  2407. }
  2408. ///Call RegexMatch::setRegexObject(r).
  2409. ///@param r constant Regex object pointer
  2410. ///@return A reference to the calling MatchEvaluator object.
  2411. MatchEvaluator& setRegexObject (Regex const *r){
  2412. RegexMatch::setRegexObject(r);
  2413. return *this;
  2414. }
  2415. ///Call RegexMatch::setSubject(String const &s).
  2416. ///@param s subject string.
  2417. ///@return A reference to the calling MatchEvaluator object.
  2418. MatchEvaluator& setSubject (String const &s){
  2419. RegexMatch::setSubject(s);
  2420. return *this;
  2421. }
  2422. ///@overload
  2423. ///@param s constant subject string by pointer
  2424. ///@return A reference to the calling MatchEvaluator object.
  2425. MatchEvaluator& setSubject (String const *s){
  2426. RegexMatch::setSubject(s);
  2427. return *this;
  2428. }
  2429. ///Call RegexMatch::setModifier(Modifier const& s).
  2430. ///@param s modifier string.
  2431. ///@return A reference to the calling MatchEvaluator object.
  2432. MatchEvaluator& setModifier (Modifier const& s){
  2433. RegexMatch::setModifier(s);
  2434. return *this;
  2435. }
  2436. ///Call RegexMatch::setModifierTable(ModifierTable const * s).
  2437. ///@param mdt pointer to ModifierTable object.
  2438. ///@return A reference to the calling MatchEvaluator object.
  2439. MatchEvaluator& setModifierTable (ModifierTable const * mdt){
  2440. RegexMatch::setModifierTable(mdt);
  2441. return *this;
  2442. }
  2443. ///Call RegexMatch::setJpcre2Option(Uint x).
  2444. ///@param x JPCRE2 option value.
  2445. ///@return A reference to the calling MatchEvaluator object.
  2446. MatchEvaluator& setJpcre2Option (Uint x){
  2447. RegexMatch::setJpcre2Option(x);
  2448. return *this;
  2449. }
  2450. ///Call RegexMatch::setPcre2Option (Uint x).
  2451. ///@param x PCRE2 option value.
  2452. ///@return A reference to the calling MatchEvaluator object.
  2453. MatchEvaluator& setPcre2Option (Uint x){
  2454. RegexMatch::setPcre2Option(x);
  2455. return *this;
  2456. }
  2457. ///Call RegexMatch::setFindAll(bool x).
  2458. ///@param x true if global match, false otherwise.
  2459. ///@return A reference to the calling MatchEvaluator object.
  2460. MatchEvaluator& setFindAll (bool x){
  2461. RegexMatch::setFindAll(x);
  2462. return *this;
  2463. }
  2464. ///Call RegexMatch::setFindAll().
  2465. ///@return A reference to the calling MatchEvaluator object.
  2466. MatchEvaluator& setFindAll(){
  2467. RegexMatch::setFindAll();
  2468. return *this;
  2469. }
  2470. ///Call RegexMatch::setStartOffset (PCRE2_SIZE offset).
  2471. ///@param offset match start offset in the subject.
  2472. ///@return A reference to the calling MatchEvaluator object.
  2473. MatchEvaluator& setStartOffset (PCRE2_SIZE offset){
  2474. RegexMatch::setStartOffset(offset);
  2475. return *this;
  2476. }
  2477. ///Call RegexMatch::setMatchContext(MatchContext *match_context).
  2478. ///@param match_context pointer to match context.
  2479. ///@return A reference to the calling MatchEvaluator object.
  2480. MatchEvaluator& setMatchContext (MatchContext *match_context){
  2481. RegexMatch::setMatchContext(match_context);
  2482. return *this;
  2483. }
  2484. ///Call RegexMatch::setMatchDataBlock(MatchContext * mdt);
  2485. ///@param mdt pointer to match data block
  2486. ///@return A reference to the calling MatchEvaluator object.
  2487. MatchEvaluator& setMatchDataBlock(MatchData* mdt){
  2488. RegexMatch::setMatchDataBlock(mdt);
  2489. return *this;
  2490. }
  2491. ///Set the buffer size that will be used by pcre2_substitute (replace()).
  2492. ///If buffer size proves to be enough to fit the resultant string
  2493. ///from each match (not the total resultant string), it will yield one less call
  2494. ///to pcre2_substitute for each match.
  2495. ///@param x buffer size.
  2496. ///@return A reference to the calling MatchEvaluator object.
  2497. MatchEvaluator& setBufferSize(PCRE2_SIZE x){
  2498. buffer_size = x;
  2499. return *this;
  2500. }
  2501. ///Get the initial buffer size that is being used by internal function pcre2_substitute
  2502. ///@return buffer_size
  2503. PCRE2_SIZE getBufferSize(){
  2504. return buffer_size;
  2505. }
  2506. ///Call RegexMatch::changeModifier(Modifier const& mod, bool x).
  2507. ///@param mod modifier string.
  2508. ///@param x true (add) or false (remove).
  2509. ///@return A reference to the calling MatchEvaluator object.
  2510. MatchEvaluator& changeModifier (Modifier const& mod, bool x){
  2511. RegexMatch::changeModifier(mod, x);
  2512. return *this;
  2513. }
  2514. ///Call RegexMatch::changeJpcre2Option(Uint opt, bool x).
  2515. ///@param opt JPCRE2 option
  2516. ///@param x true (add) or false (remove).
  2517. ///@return A reference to the calling MatchEvaluator object.
  2518. MatchEvaluator& changeJpcre2Option (Uint opt, bool x){
  2519. RegexMatch::changeJpcre2Option(opt, x);
  2520. return *this;
  2521. }
  2522. ///Call RegexMatch::changePcre2Option(Uint opt, bool x).
  2523. ///@param opt PCRE2 option.
  2524. ///@param x true (add) or false (remove).
  2525. ///@return A reference to the calling MatchEvaluator object.
  2526. MatchEvaluator& changePcre2Option (Uint opt, bool x){
  2527. RegexMatch::changePcre2Option(opt, x);
  2528. return *this;
  2529. }
  2530. ///Call RegexMatch::addModifier(Modifier const& mod).
  2531. ///@param mod modifier string.
  2532. ///@return A reference to the calling MatchEvaluator object.
  2533. MatchEvaluator& addModifier (Modifier const& mod){
  2534. RegexMatch::addModifier(mod);
  2535. return *this;
  2536. }
  2537. ///Call RegexMatch::addJpcre2Option(Uint x).
  2538. ///@param x JPCRE2 option.
  2539. ///@return A reference to the calling MatchEvaluator object.
  2540. MatchEvaluator& addJpcre2Option (Uint x){
  2541. RegexMatch::addJpcre2Option(x);
  2542. return *this;
  2543. }
  2544. ///Call RegexMatch::addPcre2Option(Uint x).
  2545. ///@param x PCRE2 option.
  2546. ///@return A reference to the calling MatchEvaluator object.
  2547. MatchEvaluator& addPcre2Option (Uint x){
  2548. RegexMatch::addPcre2Option(x);
  2549. return *this;
  2550. }
  2551. ///Perform match and return the match count.
  2552. ///This function strips off matching options (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT) that are considered
  2553. ///bad options for replacement operation and then calls
  2554. ///RegexMatch::match() to perform the match.
  2555. ///@return match count.
  2556. SIZE_T match(void){
  2557. //remove bad matching options
  2558. RegexMatch::changePcre2Option(PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT, false);
  2559. return RegexMatch::match();
  2560. }
  2561. ///Perform regex replace with this match evaluator.
  2562. ///This is a JPCRE2 native replace function (thus the name nreplace).
  2563. ///It uses the `MatchEvaluatorCallback` function that was set with a constructor or `MatchEvaluator::setCallback()` function
  2564. ///to generate the replacement strings on the fly.
  2565. ///The string returned by the callback function will be treated as literal and will
  2566. ///not go through any further processing.
  2567. ///
  2568. ///This function performs a new match everytime it is called unless it is passed with a boolean `false` as the first argument.
  2569. ///To use existing match data that was created by a previous `MatchEvaluator::nreplace()` or `MatchEvaluator::match()`, call this
  2570. ///function with boolean `false` as the first argument.
  2571. ///
  2572. ///## Complexity
  2573. /// 1. Changes in replace related option takes effect without a re-match.
  2574. /// 2. Changes in match related option (e.g start offset) needs a re-match to take effect.
  2575. /// 3. To re-use existing match data, callback function must be compatible with the data, otherwise assertion error.
  2576. /// 4. If the associated Regex object or subject string changes, a new match must be performed,
  2577. /// trying to use the existing match data in such cases is undefined behavior.
  2578. ///
  2579. ///@param do_match Perform a new matching operation if true, otherwise use existing match data.
  2580. ///@param jro JPCRE2 replace options.
  2581. ///@param counter Pointer to a counter to store the number of replacement done.
  2582. ///@return resultant string after replace.
  2583. ///@see MatchEvaluator.
  2584. ///@see MatchEvaluatorCallback.
  2585. String nreplace(bool do_match=true, Uint jro=0, SIZE_T* counter=0);
  2586. ///PCRE2 compatible replace function that uses this MatchEvaluator.
  2587. ///Performs regex replace with pcre2_substitute function
  2588. ///by generating the replacement strings dynamically with MatchEvaluator callback.
  2589. ///The string returned by callback function is processed by internal pcre2_substitute, thus allowing
  2590. ///all options that are provided by PCRE2 itself.
  2591. ///
  2592. ///This function performs a new match everytime it is called unless it is passed with a boolean `false` as the first argument.
  2593. ///
  2594. ///## Complexity
  2595. /// 1. Changes in replace related option takes effect without a re-match.
  2596. /// 2. Changes in match related option (e.g start offset) needs a re-match to take effect.
  2597. /// 3. To re-use existing match data, callback function must be compatible with the data, otherwise assertion error.
  2598. /// 4. If the associated Regex object or subject string changes, a new match must be performed,
  2599. /// trying to use the existing match data in such cases is undefined behavior.
  2600. ///
  2601. ///@param do_match perform a new match if true, otherwise use existing data.
  2602. ///@param ro replace related PCRE2 options.
  2603. ///@param counter Pointer to a counter to store the number of replacement done.
  2604. ///@return resultant string after replacement.
  2605. String replace(bool do_match=true, Uint ro=0, SIZE_T* counter=0);
  2606. };
  2607. /** Provides public constructors to create RegexReplace objects.
  2608. * Every RegexReplace object should be associated with a Regex object.
  2609. * This class stores a pointer to its' associated Regex object, thus when
  2610. * the content of the associated Regex object is changed, there's no need to
  2611. * set the pointer again.
  2612. *
  2613. * Examples:
  2614. *
  2615. * ```cpp
  2616. * jp::Regex re;
  2617. * jp::RegexReplace rr;
  2618. * rr.setRegexObject(&re);
  2619. * rr.replace("subject", "me"); // returns 'subject'
  2620. * re.compile("\\w+");
  2621. * rr.replace(); // replaces 'subject' with 'me' i.e returns 'me'
  2622. * ```
  2623. */
  2624. class RegexReplace {
  2625. private:
  2626. friend class Regex;
  2627. Regex const *re;
  2628. String r_subject;
  2629. String *r_subject_ptr; //preplace method modifies it in-place
  2630. String r_replw;
  2631. String const *r_replw_ptr;
  2632. Uint replace_opts;
  2633. Uint jpcre2_replace_opts;
  2634. PCRE2_SIZE buffer_size;
  2635. PCRE2_SIZE _start_offset;
  2636. MatchData *mdata;
  2637. MatchContext *mcontext;
  2638. ModifierTable const * modtab;
  2639. SIZE_T last_replace_count;
  2640. SIZE_T* last_replace_counter;
  2641. void init_vars() {
  2642. re = 0;
  2643. r_subject_ptr = &r_subject;
  2644. r_replw_ptr = &r_replw;
  2645. replace_opts = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH;
  2646. jpcre2_replace_opts = 0;
  2647. buffer_size = 0;
  2648. error_number = 0;
  2649. error_offset = 0;
  2650. _start_offset = 0;
  2651. mdata = 0;
  2652. mcontext = 0;
  2653. modtab = 0;
  2654. last_replace_count = 0;
  2655. last_replace_counter = &last_replace_count;
  2656. }
  2657. void onlyCopy(RegexReplace const &rr){
  2658. re = rr.re; //only pointer should be copied.
  2659. //rr.r_subject_ptr may point to rr.r_subject or other user data
  2660. r_subject_ptr = (rr.r_subject_ptr == &rr.r_subject) ? &r_subject //not rr.r_subject
  2661. : rr.r_subject_ptr; //other user data
  2662. r_replw = rr.r_replw;
  2663. //rr.r_replw_ptr may point to rr.r_replw or other user data
  2664. r_replw_ptr = (rr.r_replw_ptr == &rr.r_replw) ? &r_replw //not rr.r_replw
  2665. : rr.r_replw_ptr; //other user data
  2666. replace_opts = rr.replace_opts;
  2667. jpcre2_replace_opts = rr.jpcre2_replace_opts;
  2668. buffer_size = rr.buffer_size;
  2669. error_number = rr.error_number;
  2670. error_offset = rr.error_offset;
  2671. _start_offset = rr._start_offset;
  2672. mdata = rr.mdata;
  2673. mcontext = rr.mcontext;
  2674. modtab = rr.modtab;
  2675. last_replace_count = rr.last_replace_count;
  2676. last_replace_counter = (rr.last_replace_counter == &rr.last_replace_count) ? &last_replace_count
  2677. : rr.last_replace_counter;
  2678. }
  2679. void deepCopy(RegexReplace const &rr){
  2680. r_subject = rr.r_subject;
  2681. onlyCopy(rr);
  2682. }
  2683. #ifdef JPCRE2_USE_MINIMUM_CXX_11
  2684. void deepMove(RegexReplace& rr){
  2685. r_subject = std::move_if_noexcept(rr.r_subject);
  2686. onlyCopy(rr);
  2687. }
  2688. #endif
  2689. protected:
  2690. int error_number;
  2691. PCRE2_SIZE error_offset;
  2692. public:
  2693. ///Default constructor
  2694. RegexReplace(){
  2695. init_vars();
  2696. }
  2697. ///@overload
  2698. /// ...
  2699. ///Creates a RegexReplace object associating a Regex object.
  2700. ///Regex object is not modified.
  2701. ///@param r pointer to a Regex object
  2702. RegexReplace(Regex const *r) {
  2703. init_vars();
  2704. re = r;
  2705. }
  2706. ///@overload
  2707. ///...
  2708. ///Copy constructor.
  2709. ///@param rr RegexReplace object reference
  2710. RegexReplace(RegexReplace const &rr){
  2711. init_vars();
  2712. deepCopy(rr);
  2713. }
  2714. ///Overloaded Copy assignment operator.
  2715. ///@param rr RegexReplace object reference
  2716. ///@return A reference to the calling RegexReplace object
  2717. RegexReplace& operator=(RegexReplace const &rr){
  2718. if(this == &rr) return *this;
  2719. deepCopy(rr);
  2720. return *this;
  2721. }
  2722. #ifdef JPCRE2_USE_MINIMUM_CXX_11
  2723. ///@overload
  2724. ///...
  2725. ///Move constructor.
  2726. ///This constructor steals resources from the argument.
  2727. ///It leaves the argument in a valid but indeterminate sate.
  2728. ///The indeterminate state can be returned to normal by calling reset() on that object.
  2729. ///@param rr rvalue reference to a RegexReplace object reference
  2730. RegexReplace(RegexReplace&& rr){
  2731. init_vars();
  2732. deepMove(rr);
  2733. }
  2734. ///@overload
  2735. ///...
  2736. ///Overloaded move assignment operator.
  2737. ///This constructor steals resources from the argument.
  2738. ///It leaves the argument in a valid but indeterminate sate.
  2739. ///The indeterminate state can be returned to normal by calling reset() on that object.
  2740. ///@param rr rvalue reference to a RegexReplace object reference
  2741. ///@return A reference to the calling RegexReplace object
  2742. RegexReplace& operator=(RegexReplace&& rr){
  2743. if(this == &rr) return *this;
  2744. deepMove(rr);
  2745. return *this;
  2746. }
  2747. #endif
  2748. virtual ~RegexReplace() {}
  2749. ///Reset all class variables to its default (initial) state including memory.
  2750. ///@return Reference to the calling RegexReplace object.
  2751. RegexReplace& reset() {
  2752. String().swap(r_subject);
  2753. String().swap(r_replw);
  2754. init_vars();
  2755. return *this;
  2756. }
  2757. ///Clear all class variables to its default (initial) state (some memory may retain for further use).
  2758. ///@return Reference to the calling RegexReplace object.
  2759. RegexReplace& clear() {
  2760. r_subject.clear();
  2761. r_replw.clear();
  2762. init_vars();
  2763. return *this;
  2764. }
  2765. ///Reset replace related errors to zero.
  2766. ///@return Reference to the calling RegexReplace object
  2767. ///@see Regex::resetErrors()
  2768. ///@see RegexMatch::resetErrors()
  2769. RegexReplace& resetErrors(){
  2770. error_number = 0;
  2771. error_offset = 0;
  2772. return *this;
  2773. }
  2774. /// Returns the last error number
  2775. ///@return Last error number
  2776. int getErrorNumber() const {
  2777. return error_number;
  2778. }
  2779. /// Returns the last error offset
  2780. ///@return Last error offset
  2781. int getErrorOffset() const {
  2782. return (int)error_offset;
  2783. }
  2784. /// Returns the last error message
  2785. ///@return Last error message
  2786. String getErrorMessage() const {
  2787. #ifdef JPCRE2_USE_MINIMUM_CXX_11
  2788. return select<Char, Map>::getErrorMessage(error_number, error_offset);
  2789. #else
  2790. return select<Char>::getErrorMessage(error_number, error_offset);
  2791. #endif
  2792. }
  2793. /// Get replacement string
  2794. ///@return replacement string
  2795. String getReplaceWith() const {
  2796. return *r_replw_ptr;
  2797. }
  2798. /// Get pointer to replacement string
  2799. ///@return pointer to replacement string
  2800. String const * getReplaceWithPointer() const {
  2801. return r_replw_ptr;
  2802. }
  2803. /// Get subject string
  2804. ///@return subject string
  2805. ///@see RegexMatch::getSubject()
  2806. String getSubject() const {
  2807. return *r_subject_ptr;
  2808. }
  2809. /// Get pointer to subject string
  2810. ///@return Pointer to constant subject string
  2811. ///@see RegexMatch::getSubjectPointer()
  2812. String const * getSubjectPointer() const {
  2813. return r_subject_ptr;
  2814. }
  2815. /// Calculate modifier string from PCRE2 and JPCRE2 options and return it.
  2816. ///
  2817. /// Do remember that modifiers (or PCRE2 and JPCRE2 options) do not change or get initialized
  2818. /// as long as you don't do that explicitly. Calling RegexReplace::setModifier() will re-set them.
  2819. ///
  2820. /// **Mixed or combined modifier**.
  2821. ///
  2822. /// Some modifier may include other modifiers i.e they have the same meaning of some modifiers
  2823. /// combined together. For example, the 'n' modifier includes the 'u' modifier and together they
  2824. /// are equivalent to `PCRE2_UTF | PCRE2_UCP`. When you set a modifier like this, both options
  2825. /// get set, and when you remove the 'n' modifier (with `RegexReplace::changeModifier()`), both will get removed.
  2826. /// @return Calculated modifier string (std::string)
  2827. ///@see RegexMatch::getModifier()
  2828. ///@see Regex::getModifier()
  2829. std::string getModifier() const {
  2830. return modtab ? modtab->fromReplaceOption(replace_opts, jpcre2_replace_opts)
  2831. : MOD::fromReplaceOption(replace_opts, jpcre2_replace_opts);
  2832. }
  2833. ///Get the modifier table that is set,
  2834. ///@return constant ModifierTable pointer.
  2835. ModifierTable const* getModifierTable(){
  2836. return modtab;
  2837. }
  2838. ///Get start offset.
  2839. ///@return the start offset where matching starts for replace operation
  2840. PCRE2_SIZE getStartOffset() const {
  2841. return _start_offset;
  2842. }
  2843. /// Get PCRE2 option
  2844. ///@return PCRE2 option for replace
  2845. ///@see Regex::getPcre2Option()
  2846. ///@see RegexMatch::getPcre2Option()
  2847. Uint getPcre2Option() const {
  2848. return replace_opts;
  2849. }
  2850. /// Get JPCRE2 option
  2851. ///@return JPCRE2 option for replace
  2852. ///@see Regex::getJpcre2Option()
  2853. ///@see RegexMatch::getJpcre2Option()
  2854. Uint getJpcre2Option() const {
  2855. return jpcre2_replace_opts;
  2856. }
  2857. ///Get a pointer to the associated Regex object.
  2858. ///If no actual Regex object is associated, null is returned
  2859. ///@return A pointer to the associated constant Regex object or null
  2860. Regex const * getRegexObject() const {
  2861. return re;
  2862. }
  2863. ///Return pointer to the match context that was previously set with setMatchContext().
  2864. ///Handling memory is the callers' responsibility.
  2865. ///@return pointer to the match context (default: null).
  2866. MatchContext* getMatchContext(){
  2867. return mcontext;
  2868. }
  2869. ///Get the pointer to the match data block that was set previously with setMatchData()
  2870. ///Handling memory is the callers' responsibility.
  2871. ///@return pointer to the match data (default: null).
  2872. virtual MatchData* getMatchDataBlock(){
  2873. return mdata;
  2874. }
  2875. ///Get the initial buffer size that is being used by internal function pcre2_substitute
  2876. ///@return buffer_size
  2877. PCRE2_SIZE getBufferSize(){
  2878. return buffer_size;
  2879. }
  2880. ///Get the number of replacement in last replace operation.
  2881. ///If you set an external counter with RegexReplace::setReplaceCounter(),
  2882. ///a call to this getter method will dereference the pointer to the external counter
  2883. ///and return the value.
  2884. ///@return Last replace count
  2885. SIZE_T getLastReplaceCount(){
  2886. return *last_replace_counter;
  2887. }
  2888. ///Set an external counter variable to store the replacement count.
  2889. ///This counter will be updated after each replacement operation on this object.
  2890. ///A call to this method will reset the internal counter to 0, thus when you reset the counter
  2891. ///to internal counter (by giving null as param), the previous replace count won't be available.
  2892. ///@param counter Pointer to a counter variable. Null sets the counter to default internal counter.
  2893. ///@return Reference to the calling RegexReplace object.
  2894. RegexReplace& setReplaceCounter(SIZE_T* counter){
  2895. last_replace_count = 0;
  2896. last_replace_counter = counter ? counter : &last_replace_count;
  2897. return *this;
  2898. }
  2899. ///Set the associated Regex object.
  2900. ///Regex object is not modified.
  2901. ///@param r Pointer to a Regex object.
  2902. ///@return Reference to the calling RegexReplace object.
  2903. RegexReplace& setRegexObject(Regex const *r){
  2904. re = r;
  2905. return *this;
  2906. }
  2907. /// Set the subject string for replace.
  2908. ///This makes a copy of the string. If no copy is desired or you are working
  2909. ///with big text, consider passing by pointer.
  2910. ///@param s Subject string
  2911. ///@return Reference to the calling RegexReplace object
  2912. ///@see RegexMatch::setSubject()
  2913. RegexReplace& setSubject(String const &s) {
  2914. r_subject = s;
  2915. r_subject_ptr = &r_subject; //must overwrite
  2916. return *this;
  2917. }
  2918. ///@overload
  2919. ///...
  2920. /// Set pointer to the subject string for replace, null pointer unsets it.
  2921. /// The underlined data is not modified unless RegexReplace::preplace() method is used.
  2922. ///@param s Pointer to subject string
  2923. ///@return Reference to the calling RegexReplace object
  2924. ///@see RegexMatch::setSubject()
  2925. RegexReplace& setSubject(String *s) {
  2926. if(s) r_subject_ptr = s;
  2927. else {
  2928. r_subject.clear();
  2929. r_subject_ptr = &r_subject;
  2930. }
  2931. return *this;
  2932. }
  2933. /// Set the replacement string.
  2934. ///`$` is a special character which implies captured group.
  2935. ///
  2936. ///1. A numbered substring can be referenced with `$n` or `${n}` where n is the group number.
  2937. ///2. A named substring can be referenced with `${name}`, where 'name' is the group name.
  2938. ///3. A literal `$` can be given as `$$`.
  2939. ///
  2940. ///**Note:** This function makes a copy of the string. If no copy is desired or
  2941. ///you are working with big text, consider passing the string with pointer.
  2942. ///
  2943. ///@param s String to replace with
  2944. ///@return Reference to the calling RegexReplace object
  2945. RegexReplace& setReplaceWith(String const &s) {
  2946. r_replw = s;
  2947. r_replw_ptr = &r_replw; //must overwrite
  2948. return *this;
  2949. }
  2950. ///@overload
  2951. ///...
  2952. ///@param s Pointer to the string to replace with, null pointer unsets it.
  2953. ///@return Reference to the calling RegexReplace object
  2954. RegexReplace& setReplaceWith(String const *s) {
  2955. if(s) r_replw_ptr = s;
  2956. else {
  2957. r_replw.clear();
  2958. r_replw_ptr = &r_replw;
  2959. }
  2960. return *this;
  2961. }
  2962. /// Set the modifier string (resets all JPCRE2 and PCRE2 options) by calling RegexReplace::changeModifier().
  2963. ///@param s Modifier string.
  2964. ///@return Reference to the calling RegexReplace object
  2965. ///@see RegexMatch::setModifier()
  2966. ///@see Regex::setModifier()
  2967. RegexReplace& setModifier(Modifier const& s) {
  2968. replace_opts = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH; /* must not be initialized to 0 */
  2969. jpcre2_replace_opts = 0;
  2970. return changeModifier(s, true);
  2971. }
  2972. ///Set a custom modifier table to be used.
  2973. ///@param mdt pointer to ModifierTable object.
  2974. /// @return Reference to the calling RegexReplace object.
  2975. RegexReplace& setModifierTable(ModifierTable const * mdt){
  2976. modtab = mdt;
  2977. return *this;
  2978. }
  2979. /// Set the initial buffer size to be allocated for replaced string (used by PCRE2)
  2980. ///@param x Buffer size
  2981. ///@return Reference to the calling RegexReplace object
  2982. RegexReplace& setBufferSize(PCRE2_SIZE x) {
  2983. buffer_size = x;
  2984. return *this;
  2985. }
  2986. ///Set start offset.
  2987. ///Set the offset where matching starts for replace operation
  2988. ///@param start_offset The offset where matching starts for replace operation
  2989. ///@return Reference to the calling RegexReplace object
  2990. RegexReplace& setStartOffset(PCRE2_SIZE start_offset){
  2991. _start_offset = start_offset;
  2992. return *this;
  2993. }
  2994. /// Set JPCRE2 option for replace (overwrite existing option)
  2995. ///@param x Option value
  2996. ///@return Reference to the calling RegexReplace object
  2997. ///@see RegexMatch::setJpcre2Option()
  2998. ///@see Regex::setJpcre2Option()
  2999. RegexReplace& setJpcre2Option(Uint x) {
  3000. jpcre2_replace_opts = x;
  3001. return *this;
  3002. }
  3003. /// Set PCRE2 option replace (overwrite existing option)
  3004. ///@param x Option value
  3005. ///@return Reference to the calling RegexReplace object
  3006. ///@see RegexMatch::setPcre2Option()
  3007. ///@see Regex::setPcre2Option()
  3008. RegexReplace& setPcre2Option(Uint x) {
  3009. replace_opts = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH | x;
  3010. return *this;
  3011. }
  3012. ///Set the match context to be used.
  3013. ///Native PCRE2 API may be used to create match context.
  3014. ///The memory of the match context is not handled by RegexReplace object and not freed.
  3015. ///User will be responsible for freeing memory.
  3016. ///@param match_context Pointer to match context.
  3017. ///@return Reference to the calling RegexReplace object.
  3018. RegexReplace& setMatchContext(MatchContext * match_context){
  3019. mcontext = match_context;
  3020. return *this;
  3021. }
  3022. ///Set the match data block to be used.
  3023. ///Native PCRE2 API may be used to create match data block.
  3024. ///The memory of the match data is not handled by RegexReplace object and not freed.
  3025. ///User will be responsible for creating/freeing memory.
  3026. ///@param match_data Pointer to match data.
  3027. ///@return Reference to the calling RegexReplace object.
  3028. RegexReplace& setMatchDataBlock(MatchData *match_data){
  3029. mdata = match_data;
  3030. return *this;
  3031. }
  3032. /// After a call to this function PCRE2 and JPCRE2 options will be properly set.
  3033. /// This function does not initialize or re-initialize options.
  3034. /// If you want to set options from scratch, initialize them to 0 before calling this function.
  3035. ///
  3036. /// If invalid modifier is detected, then the error number for the RegexReplace
  3037. /// object will be jpcre2::ERROR::INVALID_MODIFIER and error offset will be the modifier character.
  3038. /// You can get the message with RegexReplace::getErrorMessage() function.
  3039. /// @param mod Modifier string.
  3040. /// @param x Whether to add or remove option
  3041. /// @return Reference to the RegexReplace object
  3042. /// @see Regex::changeModifier()
  3043. /// @see RegexMatch::changeModifier()
  3044. RegexReplace& changeModifier(Modifier const& mod, bool x){
  3045. modtab ? modtab->toReplaceOption(mod, x, &replace_opts, &jpcre2_replace_opts, &error_number, &error_offset)
  3046. : MOD::toReplaceOption(mod, x, &replace_opts, &jpcre2_replace_opts, &error_number, &error_offset);
  3047. return *this;
  3048. }
  3049. /// Parse modifier and add/remove equivalent PCRE2 and JPCRE2 options.
  3050. /// Add or remove a JPCRE2 option
  3051. /// @param opt JPCRE2 option value
  3052. /// @param x Add the option if it's true, remove otherwise.
  3053. /// @return Reference to the calling RegexReplace object
  3054. /// @see RegexMatch::changeJpcre2Option()
  3055. /// @see Regex::changeJpcre2Option()
  3056. RegexReplace& changeJpcre2Option(Uint opt, bool x) {
  3057. jpcre2_replace_opts = x ? jpcre2_replace_opts | opt : jpcre2_replace_opts & ~opt;
  3058. return *this;
  3059. }
  3060. /// Add or remove a PCRE2 option
  3061. /// @param opt PCRE2 option value
  3062. /// @param x Add the option if it's true, remove otherwise.
  3063. /// @return Reference to the calling RegexReplace object
  3064. /// @see RegexMatch::changePcre2Option()
  3065. /// @see Regex::changePcre2Option()
  3066. RegexReplace& changePcre2Option(Uint opt, bool x) {
  3067. replace_opts = x ? replace_opts | opt : replace_opts & ~opt;
  3068. //replace_opts |= PCRE2_SUBSTITUTE_OVERFLOW_LENGTH; /* It's important, but let user override it. */
  3069. return *this;
  3070. }
  3071. /// Parse modifier string and add equivalent PCRE2 and JPCRE2 options.
  3072. /// This is just a wrapper of the original function RegexReplace::changeModifier()
  3073. /// provided for convenience.
  3074. /// @param mod Modifier string.
  3075. /// @return Reference to the calling RegexReplace object
  3076. /// @see RegexMatch::addModifier()
  3077. /// @see Regex::addModifier()
  3078. RegexReplace& addModifier(Modifier const& mod){
  3079. return changeModifier(mod, true);
  3080. }
  3081. /// Add specified JPCRE2 option to existing options for replace.
  3082. ///@param x Option value
  3083. ///@return Reference to the calling RegexReplace object
  3084. ///@see RegexMatch::addJpcre2Option()
  3085. ///@see Regex::addJpcre2Option()
  3086. RegexReplace& addJpcre2Option(Uint x) {
  3087. jpcre2_replace_opts |= x;
  3088. return *this;
  3089. }
  3090. /// Add specified PCRE2 option to existing options for replace
  3091. ///@param x Option value
  3092. ///@return Reference to the calling RegexReplace object
  3093. ///@see RegexMatch::addPcre2Option()
  3094. ///@see Regex::addPcre2Option()
  3095. RegexReplace& addPcre2Option(Uint x) {
  3096. replace_opts |= x;
  3097. return *this;
  3098. }
  3099. /// Perform regex replace by retrieving subject string, replacement string, modifier and other options from class variables.
  3100. /// In the replacement string (see RegexReplace::setReplaceWith()) `$` is a special character which implies captured group.
  3101. /// 1. A numbered substring can be referenced with `$n` or `${n}` where n is the group number.
  3102. /// 2. A named substring can be referenced with `${name}`, where 'name' is the group name.
  3103. /// 3. A literal `$` can be given as `$$`.
  3104. /// 4. Bash like features: ${<n>:-<string>} and ${<n>:+<string1>:<string2>}, where <n> is a group number or name.
  3105. ///
  3106. ///All options supported by pcre2_substitute is available.
  3107. ///
  3108. /// Note: This function calls pcre2_substitute() to do the replacement.
  3109. ///@return Replaced string
  3110. String replace(void);
  3111. /// Perl compatible replace method.
  3112. /// Modifies subject string in-place and returns replace count.
  3113. ///
  3114. /// The replacement is performed with `RegexReplace::replace()` which uses `pcre2_substitute()`.
  3115. /// @return replace count
  3116. SIZE_T preplace(void){
  3117. *r_subject_ptr = replace();
  3118. return *last_replace_counter;
  3119. }
  3120. /// Perl compatible replace method with match evaluator.
  3121. /// Modifies subject string in-place and returns replace count.
  3122. /// MatchEvaluator class does not have a implementation of this replace method, thus it is not possible
  3123. /// to re-use match data with preplace() method.
  3124. /// Re-using match data with preplace doesn't actually make any sense, because new subject will
  3125. /// always require new match data.
  3126. ///
  3127. /// The replacement is performed with `RegexReplace::replace()` which uses `pcre2_substitute()`.
  3128. /// @param me MatchEvaluator object.
  3129. /// @return replace count
  3130. SIZE_T preplace(MatchEvaluator me){
  3131. *r_subject_ptr = me.setRegexObject(getRegexObject())
  3132. .setSubject(r_subject_ptr) //do not use method
  3133. .setFindAll((getPcre2Option() & PCRE2_SUBSTITUTE_GLOBAL)!=0)
  3134. .setMatchContext(getMatchContext())
  3135. .setMatchDataBlock(getMatchDataBlock())
  3136. .setBufferSize(getBufferSize())
  3137. .setStartOffset(getStartOffset())
  3138. .replace(true, getPcre2Option(), last_replace_counter);
  3139. return *last_replace_counter;
  3140. }
  3141. ///JPCRE2 native replace function.
  3142. ///A different name is adopted to
  3143. ///distinguish itself from the regular replace() function which
  3144. ///uses pcre2_substitute() to do the replacement; contrary to that,
  3145. ///it will provide a JPCRE2 native way of replacement operation.
  3146. ///It takes a MatchEvaluator object which provides a callback function that is used
  3147. ///to generate replacement string on the fly. Any replacement string set with
  3148. ///`RegexReplace::setReplaceWith()` function will have no effect.
  3149. ///The string returned by the callback function will be treated as literal and will
  3150. ///not go through any further processing.
  3151. ///
  3152. ///This function works on a copy of the MatchEvaluator, and thus makes no changes
  3153. ///to the original. The copy is modified as below:
  3154. ///
  3155. ///1. Global replacement will set FIND_ALL for match, unset otherwise.
  3156. ///2. Bad matching options such as `PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT` will be removed.
  3157. ///3. subject, start_offset and Regex object will change according to the RegexReplace object.
  3158. ///4. match context, and match data block will be changed according to the RegexReplace object.
  3159. ///
  3160. ///It calls MatchEvaluator::nreplace() on the MatchEvaluator object to perform the replacement.
  3161. ///
  3162. ///It always performs a new match.
  3163. ///@param me A MatchEvaluator object.
  3164. ///@return The resultant string after replacement.
  3165. ///@see MatchEvaluator::nreplace()
  3166. ///@see MatchEvaluator
  3167. ///@see MatchEvaluatorCallback
  3168. String nreplace(MatchEvaluator me){
  3169. return me.setRegexObject(getRegexObject())
  3170. .setSubject(getSubjectPointer())
  3171. .setFindAll((getPcre2Option() & PCRE2_SUBSTITUTE_GLOBAL)!=0)
  3172. .setMatchContext(getMatchContext())
  3173. .setMatchDataBlock(getMatchDataBlock())
  3174. .setStartOffset(getStartOffset())
  3175. .nreplace(true, getJpcre2Option(), last_replace_counter);
  3176. }
  3177. ///PCRE2 compatible replace function that takes a MatchEvaluator.
  3178. ///String returned by callback function is processed by pcre2_substitute,
  3179. ///thus all PCRE2 substitute options are supported by this replace function.
  3180. ///
  3181. ///It always performs a new match.
  3182. ///@param me MatchEvaluator instance, (copied and modified according to this object).
  3183. ///@return resultant string.
  3184. ///@see replace()
  3185. String replace(MatchEvaluator me){
  3186. return me.setRegexObject(getRegexObject())
  3187. .setSubject(getSubjectPointer())
  3188. .setFindAll((getPcre2Option() & PCRE2_SUBSTITUTE_GLOBAL)!=0)
  3189. .setMatchContext(getMatchContext())
  3190. .setMatchDataBlock(getMatchDataBlock())
  3191. .setBufferSize(getBufferSize())
  3192. .setStartOffset(getStartOffset())
  3193. .replace(true, getPcre2Option(), last_replace_counter);
  3194. }
  3195. };
  3196. /** Provides public constructors to create Regex object.
  3197. * Each regex pattern needs an object of this class and each pattern needs to be compiled.
  3198. * Pattern compilation can be done using one of its' overloaded constructors or the `Regex::compile()`
  3199. * member function.
  3200. *
  3201. * Examples:
  3202. *
  3203. * ```cpp
  3204. * jp::Regex re; //does not perform a compile
  3205. * re.compile("pattern", "modifier");
  3206. * jp::Regex re2("pattern", "modifier"); //performs a compile
  3207. * ```
  3208. *
  3209. */
  3210. class Regex {
  3211. private:
  3212. friend class RegexMatch;
  3213. friend class RegexReplace;
  3214. friend class MatchEvaluator;
  3215. String pat_str;
  3216. String const *pat_str_ptr;
  3217. Pcre2Code *code;
  3218. Uint compile_opts;
  3219. Uint jpcre2_compile_opts;
  3220. ModifierTable const * modtab;
  3221. CompileContext *ccontext;
  3222. std::vector<unsigned char> tabv;
  3223. void init_vars() {
  3224. jpcre2_compile_opts = 0;
  3225. compile_opts = 0;
  3226. error_number = 0;
  3227. error_offset = 0;
  3228. code = 0;
  3229. pat_str_ptr = &pat_str;
  3230. ccontext = 0;
  3231. modtab = 0;
  3232. }
  3233. void freeRegexMemory(void) {
  3234. Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::code_free(code);
  3235. code = 0; //we may use it again
  3236. }
  3237. void freeCompileContext(){
  3238. Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::compile_context_free(ccontext);
  3239. ccontext = 0;
  3240. }
  3241. void onlyCopy(Regex const &r){
  3242. //r.pat_str_ptr may point to other user data
  3243. pat_str_ptr = (r.pat_str_ptr == &r.pat_str) ? &pat_str //not r.pat_str
  3244. : r.pat_str_ptr; //other user data
  3245. compile_opts = r.compile_opts;
  3246. jpcre2_compile_opts = r.jpcre2_compile_opts;
  3247. error_number = r.error_number;
  3248. error_offset = r.error_offset;
  3249. modtab = r.modtab;
  3250. }
  3251. void deepCopy(Regex const &r) {
  3252. pat_str = r.pat_str; //must not use setPattern() here
  3253. onlyCopy(r);
  3254. //copy tables
  3255. tabv = r.tabv;
  3256. //copy ccontext if it's not null
  3257. freeCompileContext();
  3258. ccontext = (r.ccontext) ? Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::compile_context_copy(r.ccontext) : 0;
  3259. //if tabv is not empty and ccontext is ok (not null) set the table pointer to ccontext
  3260. if(ccontext && !tabv.empty()) Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::set_character_tables(ccontext, &tabv[0]);
  3261. //table pointer must be updated in the compiled code itself, jit memory copy is not available.
  3262. //copy is not going to work, we need a recompile.
  3263. //as all vars are already copied, we can just call compile()
  3264. r.code ? compile() //compile frees previous memory.
  3265. : freeRegexMemory();
  3266. }
  3267. #ifdef JPCRE2_USE_MINIMUM_CXX_11
  3268. void deepMove(Regex& r) {
  3269. pat_str = std::move_if_noexcept(r.pat_str);
  3270. onlyCopy(r);
  3271. //steal tables
  3272. tabv = std::move_if_noexcept(r.tabv);
  3273. //steal ccontext
  3274. freeCompileContext();
  3275. ccontext = r.ccontext; r.ccontext = 0; //must set this to 0
  3276. if(ccontext && !tabv.empty()) Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::set_character_tables(ccontext, &tabv[0]);
  3277. //steal the code
  3278. freeRegexMemory();
  3279. code = r.code; r.code = 0; //must set this to 0
  3280. }
  3281. #endif
  3282. protected:
  3283. int error_number;
  3284. PCRE2_SIZE error_offset;
  3285. public:
  3286. /// Default Constructor.
  3287. /// Initializes all class variables to defaults.
  3288. /// Does not perform any pattern compilation.
  3289. Regex() {
  3290. init_vars();
  3291. }
  3292. ///Compile pattern with initialization.
  3293. /// @param re Pattern string
  3294. Regex(String const &re) {
  3295. init_vars();
  3296. compile(re);
  3297. }
  3298. /// @overload
  3299. /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern.
  3300. Regex(String const *re) {
  3301. init_vars();
  3302. compile(re);
  3303. }
  3304. ///@overload
  3305. /// @param re Pattern string .
  3306. /// @param mod Modifier string.
  3307. Regex(String const &re, Modifier const& mod) {
  3308. init_vars();
  3309. compile(re, mod);
  3310. }
  3311. ///@overload
  3312. /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern.
  3313. /// @param mod Modifier string.
  3314. Regex(String const *re, Modifier const& mod) {
  3315. init_vars();
  3316. compile(re, mod);
  3317. }
  3318. ///@overload
  3319. /// @param re Pattern string .
  3320. /// @param po PCRE2 option value
  3321. Regex(String const &re, Uint po) {
  3322. init_vars();
  3323. compile(re, po);
  3324. }
  3325. ///@overload
  3326. /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern.
  3327. /// @param po PCRE2 option value
  3328. Regex(String const *re, Uint po) {
  3329. init_vars();
  3330. compile(re, po);
  3331. }
  3332. ///@overload
  3333. /// @param re Pattern string .
  3334. /// @param po PCRE2 option value
  3335. /// @param jo JPCRE2 option value
  3336. Regex(String const &re, Uint po, Uint jo) {
  3337. init_vars();
  3338. compile(re, po, jo);
  3339. }
  3340. ///@overload
  3341. /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern.
  3342. /// @param po PCRE2 option value
  3343. /// @param jo JPCRE2 option value
  3344. Regex(String const *re, Uint po, Uint jo) {
  3345. init_vars();
  3346. compile(re, po, jo);
  3347. }
  3348. /// @overload
  3349. ///...
  3350. /// Copy constructor.
  3351. /// A separate and new compile is performed from the copied options.
  3352. ///
  3353. /// @param r Constant Regex object reference.
  3354. Regex(Regex const &r) {
  3355. init_vars();
  3356. deepCopy(r);
  3357. }
  3358. /// Overloaded assignment operator.
  3359. /// @param r Regex const &
  3360. /// @return *this
  3361. Regex& operator=(Regex const &r) {
  3362. if (this == &r) return *this;
  3363. deepCopy(r);
  3364. return *this;
  3365. }
  3366. #ifdef JPCRE2_USE_MINIMUM_CXX_11
  3367. /// @overload
  3368. ///...
  3369. /// Move constructor.
  3370. ///This constructor steals resources from the argument.
  3371. ///It leaves the argument in a valid but indeterminate sate.
  3372. ///The indeterminate state can be returned to normal by calling reset() on that object.
  3373. /// @param r rvalue reference to a Regex object.
  3374. Regex(Regex&& r) {
  3375. init_vars();
  3376. deepMove(r);
  3377. }
  3378. ///@overload
  3379. ///...
  3380. /// Overloaded move-assignment operator.
  3381. ///This constructor steals resources from the argument.
  3382. ///It leaves the argument in a valid but indeterminate sate.
  3383. ///The indeterminate state can be returned to normal by calling reset() on that object.
  3384. /// @param r Regex&&
  3385. /// @return *this
  3386. Regex& operator=(Regex&& r) {
  3387. if (this == &r) return *this;
  3388. deepMove(r);
  3389. return *this;
  3390. }
  3391. /// Provides boolean check for the status of the object.
  3392. /// This overloaded boolean operator needs to be declared
  3393. /// explicit to prevent implicit conversion and overloading issues.
  3394. ///
  3395. /// We will only enable it if >=C++11 is being used, as the explicit keyword
  3396. /// for a function other than constructor is not supported in older compilers.
  3397. ///
  3398. /// If you are dealing with legacy code/compilers use the Double bang trick mentioned
  3399. /// in Regex::operator!().
  3400. ///
  3401. /// This helps us to check the status of the compiled regex like this:
  3402. ///
  3403. /// ```
  3404. /// jpcre2::select<char>::Regex re("pat", "mod");
  3405. /// if(re) {
  3406. /// std::cout<<"Compile success";
  3407. /// } else {
  3408. /// std::cout<<"Compile failed";
  3409. /// }
  3410. /// ```
  3411. ///@return true if regex compiled successfully, false otherwise.
  3412. ///
  3413. explicit operator bool() const {
  3414. return (code != 0);
  3415. }
  3416. #endif
  3417. /// Provides boolean check for the status of the object.
  3418. /// This is a safe boolean approach (no implicit conversion or overloading).
  3419. /// We don't need the explicit keyword here and thus it's the preferable method
  3420. /// to check for object status that will work well with older compilers.
  3421. /// e.g:
  3422. ///
  3423. /// ```
  3424. /// jpcre2::select<char>::Regex re("pat","mod");
  3425. /// if(!re) {
  3426. /// std::cout<<"Compile failed";
  3427. /// } else {
  3428. /// std::cout<<"Compiled successfully";
  3429. /// }
  3430. /// ```
  3431. /// Double bang trick:
  3432. ///
  3433. /// ```
  3434. /// jpcre2::select<char>::Regex re("pat","mod");
  3435. /// if(!!re) {
  3436. /// std::cout<<"Compiled successfully";
  3437. /// } else {
  3438. /// std::cout<<"Compile failed";
  3439. /// }
  3440. /// ```
  3441. /// @return true if regex compile failed, false otherwise.
  3442. bool operator!() const {
  3443. return (code == 0);
  3444. }
  3445. virtual ~Regex() {
  3446. freeRegexMemory();
  3447. freeCompileContext();
  3448. }
  3449. ///Reset all class variables to its default (initial) state including memory.
  3450. ///@return Reference to the calling Regex object.
  3451. Regex& reset() {
  3452. freeRegexMemory();
  3453. freeCompileContext();
  3454. String().swap(pat_str);
  3455. init_vars();
  3456. return *this;
  3457. }
  3458. ///Clear all class variables to its default (initial) state (some memory may retain for further use).
  3459. ///@return Reference to the calling Regex object.
  3460. Regex& clear() {
  3461. freeRegexMemory();
  3462. freeCompileContext();
  3463. pat_str.clear();
  3464. init_vars();
  3465. return *this;
  3466. }
  3467. ///Reset regex compile related errors to zero.
  3468. ///@return A reference to the Regex object
  3469. ///@see RegexReplace::resetErrors()
  3470. ///@see RegexMatch::resetErrors()
  3471. Regex& resetErrors() {
  3472. error_number = 0;
  3473. error_offset = 0;
  3474. return *this;
  3475. }
  3476. /// Recreate character tables used by PCRE2.
  3477. /// You should call this function after changing the locale to remake the
  3478. /// character tables according to the new locale.
  3479. /// These character tables are used to compile the regex and used by match
  3480. /// and replace operation. A separate call to compile() will be required
  3481. /// to apply the new character tables.
  3482. /// @return Reference to the calling Regex object.
  3483. Regex& resetCharacterTables() {
  3484. const unsigned char* tables = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::maketables(0); //must pass 0, we are using free() to free the tables.
  3485. tabv = std::vector<unsigned char>(tables, tables+1088);
  3486. ::free((void*)tables); //must free memory
  3487. if(!ccontext)
  3488. ccontext = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::compile_context_create(0);
  3489. Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::set_character_tables(ccontext, &tabv[0]);
  3490. return *this;
  3491. }
  3492. ///Get Pcre2 raw compiled code pointer.
  3493. ///@return pointer to constant pcre2_code or null.
  3494. Pcre2Code const* getPcre2Code() const{
  3495. return code;
  3496. }
  3497. /// Get pattern string
  3498. ///@return pattern string of type jpcre2::select::String
  3499. String getPattern() const {
  3500. return *pat_str_ptr;
  3501. }
  3502. /// Get pointer to pattern string
  3503. ///@return Pointer to constant pattern string
  3504. String const * getPatternPointer() const {
  3505. return pat_str_ptr;
  3506. }
  3507. ///Get number of captures from compiled code.
  3508. ///@return New line option value or 0.
  3509. Uint getNumCaptures() {
  3510. if(!code) return 0;
  3511. Uint numCaptures = 0;
  3512. int ret = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::pattern_info(code, PCRE2_INFO_CAPTURECOUNT, &numCaptures);
  3513. if(ret < 0) error_number = ret;
  3514. return numCaptures;
  3515. }
  3516. /// Calculate modifier string from PCRE2 and JPCRE2 options and return it.
  3517. ///
  3518. /// **Mixed or combined modifier**.
  3519. ///
  3520. /// Some modifier may include other modifiers i.e they have the same meaning of some modifiers
  3521. /// combined together. For example, the 'n' modifier includes the 'u' modifier and together they
  3522. /// are equivalent to `PCRE2_UTF | PCRE2_UCP`. When you set a modifier like this, both options
  3523. /// get set, and when you remove the 'n' modifier (with `Regex::changeModifier()`), both will get removed.
  3524. ///@tparam Char_T Character type
  3525. ///@return Calculated modifier string (std::string)
  3526. ///@see RegexMatch::getModifier()
  3527. ///@see RegexReplace::getModifier()
  3528. std::string getModifier() const {
  3529. return modtab ? modtab->fromCompileOption(compile_opts, jpcre2_compile_opts)
  3530. : MOD::fromCompileOption(compile_opts, jpcre2_compile_opts);
  3531. }
  3532. /// Get PCRE2 option
  3533. /// @return Compile time PCRE2 option value
  3534. ///@see RegexReplace::getPcre2Option()
  3535. ///@see RegexMatch::getPcre2Option()
  3536. Uint getPcre2Option() const {
  3537. return compile_opts;
  3538. }
  3539. /// Get JPCRE2 option
  3540. /// @return Compile time JPCRE2 option value
  3541. ///@see RegexReplace::getJpcre2Option()
  3542. ///@see RegexMatch::getJpcre2Option()
  3543. Uint getJpcre2Option() const {
  3544. return jpcre2_compile_opts;
  3545. }
  3546. /// Returns the last error number
  3547. ///@return Last error number
  3548. int getErrorNumber() const {
  3549. return error_number;
  3550. }
  3551. /// Returns the last error offset
  3552. ///@return Last error offset
  3553. int getErrorOffset() const {
  3554. return (int)error_offset;
  3555. }
  3556. /// Returns the last error message
  3557. ///@return Last error message
  3558. String getErrorMessage() const {
  3559. #ifdef JPCRE2_USE_MINIMUM_CXX_11
  3560. return select<Char, Map>::getErrorMessage(error_number, error_offset);
  3561. #else
  3562. return select<Char>::getErrorMessage(error_number, error_offset);
  3563. #endif
  3564. }
  3565. ///Get new line convention from compiled code.
  3566. ///@return New line option value or 0.
  3567. ///```
  3568. ///PCRE2_NEWLINE_CR Carriage return only
  3569. ///PCRE2_NEWLINE_LF Linefeed only
  3570. ///PCRE2_NEWLINE_CRLF CR followed by LF only
  3571. ///PCRE2_NEWLINE_ANYCRLF Any of the above
  3572. ///PCRE2_NEWLINE_ANY Any Unicode newline sequence
  3573. ///```
  3574. Uint getNewLine() {
  3575. if(!code) return 0;
  3576. Uint newline = 0;
  3577. int ret = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::pattern_info(code, PCRE2_INFO_NEWLINE, &newline);
  3578. if(ret < 0) error_number = ret;
  3579. return newline;
  3580. }
  3581. ///Get the modifier table that is set,
  3582. ///@return constant ModifierTable pointer.
  3583. ModifierTable const* getModifierTable(){
  3584. return modtab;
  3585. }
  3586. ///Set new line convention.
  3587. ///@param value New line option value.
  3588. ///```
  3589. ///PCRE2_NEWLINE_CR Carriage return only
  3590. ///PCRE2_NEWLINE_LF Linefeed only
  3591. ///PCRE2_NEWLINE_CRLF CR followed by LF only
  3592. ///PCRE2_NEWLINE_ANYCRLF Any of the above
  3593. ///PCRE2_NEWLINE_ANY Any Unicode newline sequence
  3594. ///```
  3595. ///@return Reference to the calling Regex object
  3596. Regex& setNewLine(Uint value){
  3597. if(!ccontext)
  3598. ccontext = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::compile_context_create(0);
  3599. int ret = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::set_newline(ccontext, value);
  3600. if(ret < 0) error_number = ret;
  3601. return *this;
  3602. }
  3603. /// Set the pattern string to compile
  3604. /// @param re Pattern string
  3605. /// @return Reference to the calling Regex object.
  3606. Regex& setPattern(String const &re) {
  3607. pat_str = re;
  3608. pat_str_ptr = &pat_str; //must overwrite
  3609. return *this;
  3610. }
  3611. /// @overload
  3612. /// @param re Pattern string pointer, null pointer will unset it.
  3613. /// @return Reference to the calling Regex object.
  3614. Regex& setPattern(String const *re) {
  3615. if(re) pat_str_ptr = re;
  3616. else {
  3617. pat_str.clear();
  3618. pat_str_ptr = &pat_str;
  3619. }
  3620. return *this;
  3621. }
  3622. /// set the modifier (resets all JPCRE2 and PCRE2 options) by calling Regex::changeModifier().
  3623. /// Re-initializes the option bits for PCRE2 and JPCRE2 options, then parses the modifier and sets
  3624. /// equivalent PCRE2 and JPCRE2 options.
  3625. /// @param x Modifier string.
  3626. /// @return Reference to the calling Regex object.
  3627. /// @see RegexMatch::setModifier()
  3628. /// @see RegexReplace::setModifier()
  3629. Regex& setModifier(Modifier const& x) {
  3630. compile_opts = 0;
  3631. jpcre2_compile_opts = 0;
  3632. return changeModifier(x, true);
  3633. }
  3634. ///Set a custom modifier table to be used.
  3635. ///@param mdt pointer to ModifierTable object.
  3636. /// @return Reference to the calling Regex object.
  3637. Regex& setModifierTable(ModifierTable const * mdt){
  3638. modtab = mdt;
  3639. return *this;
  3640. }
  3641. /// Set JPCRE2 option for compile (overwrites existing option)
  3642. /// @param x Option value
  3643. /// @return Reference to the calling Regex object.
  3644. /// @see RegexMatch::setJpcre2Option()
  3645. /// @see RegexReplace::setJpcre2Option()
  3646. Regex& setJpcre2Option(Uint x) {
  3647. jpcre2_compile_opts = x;
  3648. return *this;
  3649. }
  3650. /// Set PCRE2 option for compile (overwrites existing option)
  3651. /// @param x Option value
  3652. /// @return Reference to the calling Regex object.
  3653. /// @see RegexMatch::setPcre2Option()
  3654. /// @see RegexReplace::setPcre2Option()
  3655. Regex& setPcre2Option(Uint x) {
  3656. compile_opts = x;
  3657. return *this;
  3658. }
  3659. /// Parse modifier and add/remove equivalent PCRE2 and JPCRE2 options.
  3660. /// This function does not initialize or re-initialize options.
  3661. /// If you want to set options from scratch, initialize them to 0 before calling this function.
  3662. ///
  3663. /// If invalid modifier is detected, then the error number for the Regex
  3664. /// object will be jpcre2::ERROR::INVALID_MODIFIER and error offset will be the modifier character.
  3665. /// You can get the message with Regex::getErrorMessage() function.
  3666. /// @param mod Modifier string.
  3667. /// @param x Whether to add or remove option
  3668. /// @return Reference to the calling Regex object
  3669. /// @see RegexMatch::changeModifier()
  3670. /// @see RegexReplace::changeModifier()
  3671. Regex& changeModifier(Modifier const& mod, bool x){
  3672. modtab ? modtab->toCompileOption(mod, x, &compile_opts, &jpcre2_compile_opts, &error_number, &error_offset)
  3673. : MOD::toCompileOption(mod, x, &compile_opts, &jpcre2_compile_opts, &error_number, &error_offset);
  3674. return *this;
  3675. }
  3676. /// Add or remove a JPCRE2 option
  3677. /// @param opt JPCRE2 option value
  3678. /// @param x Add the option if it's true, remove otherwise.
  3679. /// @return Reference to the calling Regex object
  3680. /// @see RegexMatch::changeJpcre2Option()
  3681. /// @see RegexReplace::changeJpcre2Option()
  3682. Regex& changeJpcre2Option(Uint opt, bool x) {
  3683. jpcre2_compile_opts = x ? jpcre2_compile_opts | opt : jpcre2_compile_opts & ~opt;
  3684. return *this;
  3685. }
  3686. /// Add or remove a PCRE2 option
  3687. /// @param opt PCRE2 option value
  3688. /// @param x Add the option if it's true, remove otherwise.
  3689. /// @return Reference to the calling Regex object
  3690. /// @see RegexMatch::changePcre2Option()
  3691. /// @see RegexReplace::changePcre2Option()
  3692. Regex& changePcre2Option(Uint opt, bool x) {
  3693. compile_opts = x ? compile_opts | opt : compile_opts & ~opt;
  3694. return *this;
  3695. }
  3696. /// Parse modifier string and add equivalent PCRE2 and JPCRE2 options.
  3697. /// This is just a wrapper of the original function Regex::changeModifier()
  3698. /// provided for convenience.
  3699. /// @param mod Modifier string.
  3700. /// @return Reference to the calling Regex object
  3701. /// @see RegexMatch::addModifier()
  3702. /// @see RegexReplace::addModifier()
  3703. Regex& addModifier(Modifier const& mod){
  3704. return changeModifier(mod, true);
  3705. }
  3706. /// Add option to existing JPCRE2 options for compile
  3707. /// @param x Option value
  3708. /// @return Reference to the calling Regex object
  3709. /// @see RegexMatch::addJpcre2Option()
  3710. /// @see RegexReplace::addJpcre2Option()
  3711. Regex& addJpcre2Option(Uint x) {
  3712. jpcre2_compile_opts |= x;
  3713. return *this;
  3714. }
  3715. /// Add option to existing PCRE2 options for compile
  3716. /// @param x Option value
  3717. /// @return Reference to the calling Regex object
  3718. /// @see RegexMatch::addPcre2Option()
  3719. /// @see RegexReplace::addPcre2Option()
  3720. Regex& addPcre2Option(Uint x) {
  3721. compile_opts |= x;
  3722. return *this;
  3723. }
  3724. ///Compile pattern using info from class variables.
  3725. ///@see Regex::compile(String const &re, Uint po, Uint jo)
  3726. ///@see Regex::compile(String const &re, Uint po)
  3727. ///@see Regex::compile(String const &re, Modifier mod)
  3728. ///@see Regex::compile(String const &re)
  3729. void compile(void);
  3730. ///@overload
  3731. ///...
  3732. /// Set the specified parameters, then compile the pattern using information from class variables.
  3733. /// @param re Pattern string
  3734. /// @param po PCRE2 option
  3735. /// @param jo JPCRE2 option
  3736. void compile(String const &re, Uint po, Uint jo) {
  3737. setPattern(re).setPcre2Option(po).setJpcre2Option(jo);
  3738. compile();
  3739. }
  3740. ///@overload
  3741. /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern.
  3742. /// @param po PCRE2 option
  3743. /// @param jo JPCRE2 option
  3744. void compile(String const *re, Uint po, Uint jo) {
  3745. setPattern(re).setPcre2Option(po).setJpcre2Option(jo);
  3746. compile();
  3747. }
  3748. ///@overload
  3749. /// @param re Pattern string
  3750. /// @param po PCRE2 option
  3751. void compile(String const &re, Uint po) {
  3752. setPattern(re).setPcre2Option(po);
  3753. compile();
  3754. }
  3755. ///@overload
  3756. /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern.
  3757. /// @param po PCRE2 option
  3758. void compile(String const *re, Uint po) {
  3759. setPattern(re).setPcre2Option(po);
  3760. compile();
  3761. }
  3762. /// @overload
  3763. /// @param re Pattern string
  3764. /// @param mod Modifier string.
  3765. void compile(String const &re, Modifier const& mod) {
  3766. setPattern(re).setModifier(mod);
  3767. compile();
  3768. }
  3769. ///@overload
  3770. /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern.
  3771. /// @param mod Modifier string.
  3772. void compile(String const *re, Modifier const& mod) {
  3773. setPattern(re).setModifier(mod);
  3774. compile();
  3775. }
  3776. ///@overload
  3777. /// @param re Pattern string .
  3778. void compile(String const &re) {
  3779. setPattern(re);
  3780. compile();
  3781. }
  3782. ///@overload
  3783. /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern.
  3784. void compile(String const *re) {
  3785. setPattern(re);
  3786. compile();
  3787. }
  3788. ///Returns a default constructed RegexMatch object by value.
  3789. ///This object is initialized with the same modifier table
  3790. ///as this Regex object.
  3791. ///@return RegexMatch object.
  3792. RegexMatch initMatch(){
  3793. RegexMatch rm(this);
  3794. rm.setModifierTable(modtab);
  3795. return rm;
  3796. }
  3797. ///Synonym for initMatch()
  3798. ///@return RegexMatch object by value.
  3799. RegexMatch getMatchObject(){
  3800. return initMatch();
  3801. }
  3802. /// Perform regex match and return match count using a temporary match object.
  3803. /// This temporary match object will get available options from this Regex object,
  3804. /// that includes modifier table.
  3805. /// @param s Subject string .
  3806. /// @param mod Modifier string.
  3807. /// @param start_offset Offset from where matching will start in the subject string.
  3808. /// @return Match count
  3809. /// @see RegexMatch::match()
  3810. SIZE_T match(String const &s, Modifier const& mod, PCRE2_SIZE start_offset=0) {
  3811. return initMatch().setStartOffset(start_offset).setSubject(s).setModifier(mod).match();
  3812. }
  3813. ///@overload
  3814. ///...
  3815. ///@param s Pointer to subject string. A null pointer will unset the subject and perform a match with empty subject.
  3816. ///@param mod Modifier string.
  3817. ///@param start_offset Offset from where matching will start in the subject string.
  3818. ///@return Match count
  3819. SIZE_T match(String const *s, Modifier const& mod, PCRE2_SIZE start_offset=0) {
  3820. return initMatch().setStartOffset(start_offset).setSubject(s).setModifier(mod).match();
  3821. }
  3822. ///@overload
  3823. ///...
  3824. /// @param s Subject string .
  3825. /// @param start_offset Offset from where matching will start in the subject string.
  3826. /// @return Match count
  3827. /// @see RegexMatch::match()
  3828. SIZE_T match(String const &s, PCRE2_SIZE start_offset=0) {
  3829. return initMatch().setStartOffset(start_offset).setSubject(s).match();
  3830. }
  3831. ///@overload
  3832. ///...
  3833. /// @param s Pointer to subject string. A null pointer will unset the subject and perform a match with empty subject.
  3834. /// @param start_offset Offset from where matching will start in the subject string.
  3835. /// @return Match count
  3836. /// @see RegexMatch::match()
  3837. SIZE_T match(String const *s, PCRE2_SIZE start_offset=0) {
  3838. return initMatch().setStartOffset(start_offset).setSubject(s).match();
  3839. }
  3840. ///Returns a default constructed RegexReplace object by value.
  3841. ///This object is initialized with the same modifier table as this Regex object.
  3842. ///@return RegexReplace object.
  3843. RegexReplace initReplace(){
  3844. RegexReplace rr(this);
  3845. rr.setModifierTable(modtab);
  3846. return rr;
  3847. }
  3848. ///Synonym for initReplace()
  3849. ///@return RegexReplace object.
  3850. RegexReplace getReplaceObject(){
  3851. return initReplace();
  3852. }
  3853. /// Perform regex replace and return the replaced string using a temporary replace object.
  3854. /// This temporary replace object will get available options from this Regex object,
  3855. /// that includes modifier table.
  3856. /// @param mains Subject string.
  3857. /// @param repl String to replace with
  3858. /// @param mod Modifier string.
  3859. ///@param counter Pointer to a counter to store the number of replacement done.
  3860. /// @return Resultant string after regex replace
  3861. /// @see RegexReplace::replace()
  3862. String replace(String const &mains, String const &repl, Modifier const& mod="", SIZE_T* counter=0) {
  3863. return initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(counter).replace();
  3864. }
  3865. ///@overload
  3866. /// @param mains Pointer to subject string
  3867. /// @param repl String to replace with
  3868. /// @param mod Modifier string.
  3869. ///@param counter Pointer to a counter to store the number of replacement done.
  3870. /// @return Resultant string after regex replace
  3871. /// @see RegexReplace::replace()
  3872. String replace(String *mains, String const &repl, Modifier const& mod="", SIZE_T* counter=0) {
  3873. return initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(counter).replace();
  3874. }
  3875. ///@overload
  3876. ///...
  3877. /// @param mains Subject string
  3878. /// @param repl Pointer to string to replace with
  3879. /// @param mod Modifier string.
  3880. ///@param counter Pointer to a counter to store the number of replacement done.
  3881. /// @return Resultant string after regex replace
  3882. /// @see RegexReplace::replace()
  3883. String replace(String const &mains, String const *repl, Modifier const& mod="", SIZE_T* counter=0) {
  3884. return initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(counter).replace();
  3885. }
  3886. ///@overload
  3887. ///...
  3888. /// @param mains Pointer to subject string
  3889. /// @param repl Pointer to string to replace with
  3890. /// @param mod Modifier string.
  3891. ///@param counter Pointer to a counter to store the number of replacement done.
  3892. /// @return Resultant string after regex replace
  3893. /// @see RegexReplace::replace()
  3894. String replace(String *mains, String const *repl, Modifier const& mod="", SIZE_T* counter=0) {
  3895. return initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(counter).replace();
  3896. }
  3897. /// Perl compatible replace method.
  3898. /// Modifies subject string in-place and returns replace count.
  3899. ///
  3900. /// It's a shorthand method to `RegexReplace::preplace()`.
  3901. /// @param mains Pointer to subject string.
  3902. /// @param repl Replacement string (string to replace with).
  3903. /// @param mod Modifier string.
  3904. /// @return replace count.
  3905. SIZE_T preplace(String * mains, String const& repl, Modifier const& mod=""){
  3906. SIZE_T counter = 0;
  3907. if(mains) *mains = initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(&counter).replace();
  3908. return counter;
  3909. }
  3910. /// @overload
  3911. ///
  3912. /// Perl compatible replace method.
  3913. /// Modifies subject string in-place and returns replace count.
  3914. ///
  3915. /// It's a shorthand method to `RegexReplace::preplace()`.
  3916. /// @param mains Pointer to subject string.
  3917. /// @param repl Pointer to replacement string (string to replace with).
  3918. /// @param mod Modifier string.
  3919. /// @return replace count.
  3920. SIZE_T preplace(String * mains, String const* repl, Modifier const& mod=""){
  3921. SIZE_T counter = 0;
  3922. if(mains) *mains = initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(&counter).replace();
  3923. return counter;
  3924. }
  3925. /// @overload
  3926. ///
  3927. /// Perl compatible replace method.
  3928. /// Returns replace count and discards subject string.
  3929. ///
  3930. /// It's a shorthand method to `RegexReplace::preplace()`.
  3931. /// @param mains Subject string.
  3932. /// @param repl Replacement string (string to replace with).
  3933. /// @param mod Modifier string.
  3934. /// @return replace count.
  3935. SIZE_T preplace(String const& mains, String const& repl, Modifier const& mod=""){
  3936. SIZE_T counter = 0;
  3937. initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(&counter).replace();
  3938. return counter;
  3939. }
  3940. /// @overload
  3941. ///
  3942. /// Perl compatible replace method.
  3943. /// Returns replace count and discards subject string.
  3944. ///
  3945. /// It's a shorthand method to `RegexReplace::preplace()`.
  3946. /// @param mains Subject string.
  3947. /// @param repl Pointer to replacement string (string to replace with).
  3948. /// @param mod Modifier string.
  3949. /// @return replace count.
  3950. SIZE_T preplace(String const& mains, String const* repl, Modifier const& mod=""){
  3951. SIZE_T counter = 0;
  3952. initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(&counter).replace();
  3953. return counter;
  3954. }
  3955. };
  3956. private:
  3957. //prevent object instantiation of select class
  3958. select();
  3959. select(select const &);
  3960. #ifdef JPCRE2_USE_MINIMUM_CXX_11
  3961. select(select&&);
  3962. #endif
  3963. ~select();
  3964. };//struct select
  3965. }//jpcre2 namespace
  3966. inline void jpcre2::ModifierTable::parseModifierTable(std::string& tabjs, VecOpt& tabjv,
  3967. std::string& tab_s, VecOpt& tab_v,
  3968. std::string const& tabs, VecOpt const& tabv){
  3969. SIZE_T n = tabs.length();
  3970. JPCRE2_ASSERT(n == tabv.size(), ("ValueError: Could not set Modifier table.\
  3971. Modifier character and value tables are not of the same size (" + _tostdstring(n) + " == " + _tostdstring(tabv.size()) + ").").c_str());
  3972. tabjs.clear();
  3973. tab_s.clear(); tab_s.reserve(n);
  3974. tabjv.clear();
  3975. tab_v.clear(); tab_v.reserve(n);
  3976. for(SIZE_T i=0;i<n;++i){
  3977. switch(tabv[i]){
  3978. case JIT_COMPILE:
  3979. case FIND_ALL: //JPCRE2 options are unique, so it's not necessary to check if it's compile or replace or match.
  3980. tabjs.push_back(tabs[i]); tabjv.push_back(tabv[i]);break;
  3981. default: tab_s.push_back(tabs[i]); tab_v.push_back(tabv[i]); break;
  3982. }
  3983. }
  3984. }
  3985. #ifdef JPCRE2_USE_MINIMUM_CXX_11
  3986. template<typename Char_T, template<typename...> class Map>
  3987. void jpcre2::select<Char_T, Map>::Regex::compile() {
  3988. #else
  3989. template<typename Char_T>
  3990. void jpcre2::select<Char_T>::Regex::compile() {
  3991. #endif
  3992. //Get c_str of pattern
  3993. Pcre2Sptr c_pattern = (Pcre2Sptr) pat_str_ptr->c_str();
  3994. int err_number = 0;
  3995. PCRE2_SIZE err_offset = 0;
  3996. /**************************************************************************
  3997. * Compile the regular expression pattern, and handle
  3998. * any errors that are detected.
  3999. *************************************************************************/
  4000. //first release any previous memory
  4001. freeRegexMemory();
  4002. code = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::compile( c_pattern, /* the pattern */
  4003. PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
  4004. compile_opts, /* default options */
  4005. &err_number, /* for error number */
  4006. &err_offset, /* for error offset */
  4007. ccontext); /* use compile context */
  4008. if (code == 0) {
  4009. /* Compilation failed */
  4010. //must not free regex memory, the only function has that right is the destructor
  4011. error_number = err_number;
  4012. error_offset = err_offset;
  4013. return;
  4014. } else if ((jpcre2_compile_opts & JIT_COMPILE) != 0) {
  4015. ///perform JIT compilation it it's enabled
  4016. int jit_ret = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::jit_compile(code, PCRE2_JIT_COMPLETE);
  4017. if(jit_ret < 0) error_number = jit_ret;
  4018. }
  4019. //everything's OK
  4020. }
  4021. #ifdef JPCRE2_USE_MINIMUM_CXX_11
  4022. template<typename Char_T, template<typename...> class Map>
  4023. typename jpcre2::select<Char_T, Map>::String jpcre2::select<Char_T, Map>::MatchEvaluator::replace(bool do_match, Uint replace_opts, SIZE_T * counter) {
  4024. #else
  4025. template<typename Char_T>
  4026. typename jpcre2::select<Char_T>::String jpcre2::select<Char_T>::MatchEvaluator::replace(bool do_match, Uint replace_opts, SIZE_T * counter) {
  4027. #endif
  4028. if(counter) *counter = 0;
  4029. replace_opts |= PCRE2_SUBSTITUTE_OVERFLOW_LENGTH;
  4030. replace_opts &= ~PCRE2_SUBSTITUTE_GLOBAL;
  4031. Regex const * re = RegexMatch::getRegexObject();
  4032. // If re or re->code is null, return the subject string unmodified.
  4033. if (!re || re->code == 0)
  4034. return RegexMatch::getSubject();
  4035. Pcre2Sptr r_subject_ptr = (Pcre2Sptr) RegexMatch::getSubjectPointer()->c_str();
  4036. //~ SIZE_T totlen = RegexMatch::getSubjectPointer()->length();
  4037. if(do_match) match();
  4038. SIZE_T mcount = vec_soff.size();
  4039. // if mcount is 0, return the subject string. (there's no need to worry about re)
  4040. if(!mcount) return RegexMatch::getSubject();
  4041. SIZE_T current_offset = 0; //needs to be zero, not start_offset, because it's from where unmatched parts will be copied.
  4042. String res, tmp;
  4043. //A check, this check is not fullproof.
  4044. SIZE_T last = vec_eoff.size();
  4045. last = (last>0)?last-1:0;
  4046. JPCRE2_ASSERT(vec_eoff[last] <= RegexMatch::getSubject().size(), "ValueError: subject string is not of the required size, may be it's changed!!!\
  4047. If you are using existing match data, try a new match.");
  4048. //loop through the matches
  4049. for(SIZE_T i=0;i<mcount;++i){
  4050. //first copy the unmatched part.
  4051. //Matches that use \K to end before they start are not supported.
  4052. if(vec_soff[i] < current_offset || vec_eoff[i] < vec_soff[i]){
  4053. RegexMatch::error_number = PCRE2_ERROR_BADSUBSPATTERN;
  4054. return RegexMatch::getSubject();
  4055. } else {
  4056. //~ res += RegexMatch::getSubject().substr(current_offset, vec_soff[i]-current_offset);
  4057. res += String(r_subject_ptr+current_offset, r_subject_ptr+vec_soff[i]);
  4058. }
  4059. //now process the matched part
  4060. switch(callbackn){
  4061. case 0: tmp = callback0((void*)0, (void*)0, (void*)0); break;
  4062. case 1: JPCRE2_VECTOR_DATA_ASSERT(vec_num.size() == mcount, "VecNum");
  4063. tmp = callback1(vec_num[i], (void*)0, (void*)0); break;
  4064. case 2: JPCRE2_VECTOR_DATA_ASSERT(vec_nas.size() == mcount, "VecNas");
  4065. tmp = callback2((void*)0, vec_nas[i], (void*)0); break;
  4066. case 3: JPCRE2_VECTOR_DATA_ASSERT(vec_num.size() == mcount && vec_nas.size() == mcount, "VecNum or VecNas");
  4067. tmp = callback3(vec_num[i], vec_nas[i], (void*)0); break;
  4068. case 4: JPCRE2_VECTOR_DATA_ASSERT(vec_ntn.size() == mcount, "VecNtn");
  4069. tmp = callback4((void*)0, (void*)0, vec_ntn[i]); break;
  4070. case 5: JPCRE2_VECTOR_DATA_ASSERT(vec_num.size() == mcount && vec_ntn.size() == mcount, "VecNum or VecNtn");
  4071. tmp = callback5(vec_num[i], (void*)0, vec_ntn[i]); break;
  4072. case 6: JPCRE2_VECTOR_DATA_ASSERT(vec_nas.size() == mcount && vec_ntn.size() == mcount, "VecNas or VecNtn");
  4073. tmp = callback6((void*)0, vec_nas[i], vec_ntn[i]); break;
  4074. case 7: JPCRE2_VECTOR_DATA_ASSERT(vec_num.size() == mcount && vec_nas.size() == mcount && vec_ntn.size() == mcount, "VecNum\n or VecNas or VecNtn");
  4075. tmp = callback7(vec_num[i], vec_nas[i], vec_ntn[i]); break;
  4076. default: JPCRE2_ASSERT(2 == 1, "Invalid callbackn. Please file a bug report (must include the line number from below)."); break;
  4077. }
  4078. //reset the current offset
  4079. current_offset = vec_eoff[i];
  4080. //second part
  4081. ///the matched part is the subject
  4082. //~ Pcre2Sptr subject = (Pcre2Sptr) RegexMatch::getSubjectPointer()->c_str();
  4083. //substr(vec_soff[i], vec_eoff[i] - vec_soff[i]).c_str();
  4084. Pcre2Sptr subject = r_subject_ptr + vec_soff[i];
  4085. PCRE2_SIZE subject_length = vec_eoff[i] - vec_soff[i];
  4086. ///the string returned from the callback is the replacement string.
  4087. Pcre2Sptr replace = (Pcre2Sptr) tmp.c_str();
  4088. PCRE2_SIZE replace_length = tmp.length();
  4089. bool retry = true;
  4090. int ret = 0;
  4091. PCRE2_SIZE outlengthptr = 0;
  4092. Pcre2Uchar* output_buffer = new Pcre2Uchar[outlengthptr + 1]();
  4093. while (true) {
  4094. ret = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::substitute(
  4095. re->code, /*Points to the compiled pattern*/
  4096. subject, /*Points to the subject string*/
  4097. subject_length, /*Length of the subject string*/
  4098. 0, /*Offset in the subject at which to start matching*/ //must be zero
  4099. replace_opts, /*Option bits*/
  4100. RegexMatch::mdata, /*Points to a match data block, or is NULL*/
  4101. RegexMatch::mcontext, /*Points to a match context, or is NULL*/
  4102. replace, /*Points to the replacement string*/
  4103. replace_length, /*Length of the replacement string*/
  4104. output_buffer, /*Points to the output buffer*/
  4105. &outlengthptr /*Points to the length of the output buffer*/
  4106. );
  4107. if (ret < 0) {
  4108. //Handle errors
  4109. if ((replace_opts & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0
  4110. && ret == (int) PCRE2_ERROR_NOMEMORY && retry) {
  4111. retry = false;
  4112. /// If initial #buffer_size wasn't big enough for resultant string,
  4113. /// we will try once more with a new buffer size adjusted to the length of the resultant string.
  4114. delete[] output_buffer;
  4115. output_buffer = new Pcre2Uchar[outlengthptr + 1]();
  4116. // Go and try to perform the substitute again
  4117. continue;
  4118. } else {
  4119. RegexMatch::error_number = ret;
  4120. delete[] output_buffer;
  4121. return RegexMatch::getSubject();
  4122. }
  4123. }
  4124. //If everything's ok exit the loop
  4125. break;
  4126. }
  4127. res += String((Char*) output_buffer,(Char*) (output_buffer + outlengthptr) );
  4128. delete[] output_buffer;
  4129. if(counter) *counter += ret;
  4130. //if FIND_ALL is not set, single match will be performed
  4131. if((RegexMatch::getJpcre2Option() & FIND_ALL) == 0) break;
  4132. }
  4133. //All matched parts have been dealt with.
  4134. //now copy rest of the string from current_offset
  4135. res += RegexMatch::getSubject().substr(current_offset, String::npos);
  4136. return res;
  4137. }
  4138. #ifdef JPCRE2_USE_MINIMUM_CXX_11
  4139. template<typename Char_T, template<typename...> class Map>
  4140. typename jpcre2::select<Char_T, Map>::String jpcre2::select<Char_T, Map>::MatchEvaluator::nreplace(bool do_match, Uint jo, SIZE_T* counter){
  4141. #else
  4142. template<typename Char_T>
  4143. typename jpcre2::select<Char_T>::String jpcre2::select<Char_T>::MatchEvaluator::nreplace(bool do_match, Uint jo, SIZE_T* counter){
  4144. #endif
  4145. if(counter) *counter = 0;
  4146. if(do_match) match();
  4147. SIZE_T mcount = vec_soff.size();
  4148. // if mcount is 0, return the subject string. (there's no need to worry about re)
  4149. if(!mcount) return RegexMatch::getSubject();
  4150. SIZE_T current_offset = 0; //no need for worrying about start offset, it's handled by match and we get valid offsets out of it.
  4151. String res;
  4152. //A check, this check is not fullproof
  4153. SIZE_T last = vec_eoff.size();
  4154. last = (last>0)?last-1:0;
  4155. JPCRE2_ASSERT(vec_eoff[last] <= RegexMatch::getSubject().size(), "ValueError: subject string is not of the required size, may be it's changed!!!\
  4156. If you are using existing match data, try a new match.");
  4157. //loop through the matches
  4158. for(SIZE_T i=0;i<mcount;++i){
  4159. //first copy the unmatched part.
  4160. //Matches that use \K to end before they start are not supported.
  4161. if(vec_soff[i] < current_offset){
  4162. RegexMatch::error_number = PCRE2_ERROR_BADSUBSPATTERN;
  4163. return RegexMatch::getSubject();
  4164. } else {
  4165. res += RegexMatch::getSubject().substr(current_offset, vec_soff[i]-current_offset);
  4166. }
  4167. //now process the matched part
  4168. switch(callbackn){
  4169. case 0: res += callback0((void*)0, (void*)0, (void*)0); break;
  4170. case 1: JPCRE2_VECTOR_DATA_ASSERT(vec_num.size() == mcount, "VecNum");
  4171. res += callback1(vec_num[i], (void*)0, (void*)0); break;
  4172. case 2: JPCRE2_VECTOR_DATA_ASSERT(vec_nas.size() == mcount, "VecNas");
  4173. res += callback2((void*)0, vec_nas[i], (void*)0); break;
  4174. case 3: JPCRE2_VECTOR_DATA_ASSERT(vec_num.size() == mcount && vec_nas.size() == mcount, "VecNum or VecNas");
  4175. res += callback3(vec_num[i], vec_nas[i], (void*)0); break;
  4176. case 4: JPCRE2_VECTOR_DATA_ASSERT(vec_ntn.size() == mcount, "VecNtn");
  4177. res += callback4((void*)0, (void*)0, vec_ntn[i]); break;
  4178. case 5: JPCRE2_VECTOR_DATA_ASSERT(vec_num.size() == mcount && vec_ntn.size() == mcount, "VecNum or VecNtn");
  4179. res += callback5(vec_num[i], (void*)0, vec_ntn[i]); break;
  4180. case 6: JPCRE2_VECTOR_DATA_ASSERT(vec_nas.size() == mcount && vec_ntn.size() == mcount, "VecNas or VecNtn");
  4181. res += callback6((void*)0, vec_nas[i], vec_ntn[i]); break;
  4182. case 7: JPCRE2_VECTOR_DATA_ASSERT(vec_num.size() == mcount && vec_nas.size() == mcount && vec_ntn.size() == mcount, "VecNum\n or VecNas or VecNtn");
  4183. res += callback7(vec_num[i], vec_nas[i], vec_ntn[i]); break;
  4184. default: JPCRE2_ASSERT(2 == 1, "Invalid callbackn. Please file a bug report (must include the line number from below)."); break;
  4185. }
  4186. //reset the current offset
  4187. current_offset = vec_eoff[i];
  4188. if(counter) *counter += 1;
  4189. //if FIND_ALL is not set, single match will be performd
  4190. if((RegexMatch::getJpcre2Option() & FIND_ALL) == 0) break;
  4191. }
  4192. //All matched parts have been dealt with.
  4193. //now copy rest of the string from current_offset
  4194. res += RegexMatch::getSubject().substr(current_offset, String::npos);
  4195. return res;
  4196. }
  4197. #ifdef JPCRE2_USE_MINIMUM_CXX_11
  4198. template<typename Char_T, template<typename...> class Map>
  4199. typename jpcre2::select<Char_T, Map>::String jpcre2::select<Char_T, Map>::RegexReplace::replace() {
  4200. #else
  4201. template<typename Char_T>
  4202. typename jpcre2::select<Char_T>::String jpcre2::select<Char_T>::RegexReplace::replace() {
  4203. #endif
  4204. *last_replace_counter = 0;
  4205. // If re or re->code is null, return the subject string unmodified.
  4206. if (!re || re->code == 0)
  4207. return *r_subject_ptr;
  4208. Pcre2Sptr subject = (Pcre2Sptr) r_subject_ptr->c_str();
  4209. PCRE2_SIZE subject_length = r_subject_ptr->length();
  4210. Pcre2Sptr replace = (Pcre2Sptr) r_replw_ptr->c_str();
  4211. PCRE2_SIZE replace_length = r_replw_ptr->length();
  4212. PCRE2_SIZE outlengthptr = (PCRE2_SIZE) buffer_size;
  4213. bool retry = true;
  4214. int ret = 0;
  4215. Pcre2Uchar* output_buffer = new Pcre2Uchar[outlengthptr + 1]();
  4216. while (true) {
  4217. ret = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::substitute(
  4218. re->code, /*Points to the compiled pattern*/
  4219. subject, /*Points to the subject string*/
  4220. subject_length, /*Length of the subject string*/
  4221. _start_offset, /*Offset in the subject at which to start matching*/
  4222. replace_opts, /*Option bits*/
  4223. mdata, /*Points to a match data block, or is NULL*/
  4224. mcontext, /*Points to a match context, or is NULL*/
  4225. replace, /*Points to the replacement string*/
  4226. replace_length, /*Length of the replacement string*/
  4227. output_buffer, /*Points to the output buffer*/
  4228. &outlengthptr /*Points to the length of the output buffer*/
  4229. );
  4230. if (ret < 0) {
  4231. //Handle errors
  4232. if ((replace_opts & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0
  4233. && ret == (int) PCRE2_ERROR_NOMEMORY && retry) {
  4234. retry = false;
  4235. /// If initial #buffer_size wasn't big enough for resultant string,
  4236. /// we will try once more with a new buffer size adjusted to the length of the resultant string.
  4237. delete[] output_buffer;
  4238. output_buffer = new Pcre2Uchar[outlengthptr + 1]();
  4239. // Go and try to perform the substitute again
  4240. continue;
  4241. } else {
  4242. error_number = ret;
  4243. delete[] output_buffer;
  4244. return *r_subject_ptr;
  4245. }
  4246. }
  4247. //If everything's ok exit the loop
  4248. break;
  4249. }
  4250. *last_replace_counter += ret;
  4251. String result = String((Char*) output_buffer,(Char*) (output_buffer + outlengthptr) );
  4252. delete[] output_buffer;
  4253. return result;
  4254. }
  4255. #ifdef JPCRE2_USE_MINIMUM_CXX_11
  4256. template<typename Char_T, template<typename...> class Map>
  4257. bool jpcre2::select<Char_T, Map>::RegexMatch::getNumberedSubstrings(int rc, Pcre2Sptr subject, PCRE2_SIZE* ovector, uint32_t ovector_count) {
  4258. #else
  4259. template<typename Char_T>
  4260. bool jpcre2::select<Char_T>::RegexMatch::getNumberedSubstrings(int rc, Pcre2Sptr subject, PCRE2_SIZE* ovector, uint32_t ovector_count) {
  4261. #endif
  4262. NumSub num_sub;
  4263. uint32_t rcu = rc;
  4264. num_sub.reserve(rcu); //we know exactly how many elements it will have.
  4265. uint32_t i;
  4266. for (i = 0u; i < ovector_count; i++) {
  4267. if (ovector[2*i] != PCRE2_UNSET)
  4268. num_sub.push_back(String((Char*)(subject + ovector[2*i]), ovector[2*i+1] - ovector[2*i]));
  4269. else
  4270. #ifdef JPCRE2_UNSET_CAPTURES_NULL
  4271. num_sub.push_back(std::nullopt);
  4272. #else
  4273. num_sub.push_back(String());
  4274. #endif
  4275. }
  4276. vec_num->push_back(num_sub); //this function shouldn't be called if this vector is null
  4277. return true;
  4278. }
  4279. #ifdef JPCRE2_USE_MINIMUM_CXX_11
  4280. template<typename Char_T, template<typename...> class Map>
  4281. bool jpcre2::select<Char_T, Map>::RegexMatch::getNamedSubstrings(int namecount, int name_entry_size,
  4282. Pcre2Sptr name_table,
  4283. Pcre2Sptr subject, PCRE2_SIZE* ovector ) {
  4284. #else
  4285. template<typename Char_T>
  4286. bool jpcre2::select<Char_T>::RegexMatch::getNamedSubstrings(int namecount, int name_entry_size,
  4287. Pcre2Sptr name_table,
  4288. Pcre2Sptr subject, PCRE2_SIZE* ovector ) {
  4289. #endif
  4290. Pcre2Sptr tabptr = name_table;
  4291. String key;
  4292. MapNas map_nas;
  4293. MapNtN map_ntn;
  4294. for (int i = 0; i < namecount; i++) {
  4295. int n;
  4296. if(sizeof( Char_T ) * CHAR_BIT == 8){
  4297. n = (int)((tabptr[0] << 8) | tabptr[1]);
  4298. key = toString((Char*) (tabptr + 2));
  4299. }
  4300. else{
  4301. n = (int)tabptr[0];
  4302. key = toString((Char*) (tabptr + 1));
  4303. }
  4304. //Use of tabptr is finished for this iteration, let's increment it now.
  4305. tabptr += name_entry_size;
  4306. String value((Char*)(subject + ovector[2*n]), ovector[2*n+1] - ovector[2*n]); //n, not i.
  4307. if(vec_nas) map_nas[key] = value;
  4308. if(vec_ntn) map_ntn[key] = n;
  4309. }
  4310. //push the maps into vectors:
  4311. if(vec_nas) vec_nas->push_back(map_nas);
  4312. if(vec_ntn) vec_ntn->push_back(map_ntn);
  4313. return true;
  4314. }
  4315. #ifdef JPCRE2_USE_MINIMUM_CXX_11
  4316. template<typename Char_T, template<typename...> class Map>
  4317. jpcre2::SIZE_T jpcre2::select<Char_T, Map>::RegexMatch::match() {
  4318. #else
  4319. template<typename Char_T>
  4320. jpcre2::SIZE_T jpcre2::select<Char_T>::RegexMatch::match() {
  4321. #endif
  4322. // If re or re->code is null, return 0 as the match count
  4323. if (!re || re->code == 0)
  4324. return 0;
  4325. Pcre2Sptr subject = (Pcre2Sptr) m_subject_ptr->c_str();
  4326. Pcre2Sptr name_table = 0;
  4327. int crlf_is_newline = 0;
  4328. int namecount = 0;
  4329. int name_entry_size = 0;
  4330. int rc = 0;
  4331. uint32_t ovector_count = 0;
  4332. int utf = 0;
  4333. SIZE_T count = 0;
  4334. Uint option_bits;
  4335. Uint newline = 0;
  4336. PCRE2_SIZE *ovector = 0;
  4337. SIZE_T subject_length = 0;
  4338. MatchData *match_data = 0;
  4339. subject_length = m_subject_ptr->length();
  4340. bool mdc = false; //mdata created.
  4341. if (vec_num) vec_num->clear();
  4342. if (vec_nas) vec_nas->clear();
  4343. if (vec_ntn) vec_ntn->clear();
  4344. if(vec_soff) vec_soff->clear();
  4345. if(vec_eoff) vec_eoff->clear();
  4346. /* Using this function ensures that the block is exactly the right size for
  4347. the number of capturing parentheses in the pattern. */
  4348. if(mdata) match_data = mdata;
  4349. else {
  4350. match_data = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::match_data_create_from_pattern(re->code, 0);
  4351. mdc = true;
  4352. }
  4353. rc = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::match( re->code, /* the compiled pattern */
  4354. subject, /* the subject string */
  4355. subject_length, /* the length of the subject */
  4356. _start_offset, /* start at offset 'start_offset' in the subject */
  4357. match_opts, /* default options */
  4358. match_data, /* block for storing the result */
  4359. mcontext); /* use default match context */
  4360. /* Matching failed: handle error cases */
  4361. if (rc < 0) {
  4362. if(mdc)
  4363. Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::match_data_free(match_data); /* Release memory used for the match */
  4364. //must not free code. This function has no right to modify regex
  4365. switch (rc) {
  4366. case PCRE2_ERROR_NOMATCH:
  4367. return count;
  4368. /*
  4369. Handle other special cases if you like
  4370. */
  4371. default:;
  4372. }
  4373. error_number = rc;
  4374. return count;
  4375. }
  4376. ++count; //Increment the counter
  4377. /* Match succeded. Get a pointer to the output vector, where string offsets are
  4378. stored. */
  4379. ovector = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::get_ovector_pointer(match_data);
  4380. ovector_count = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::get_ovector_count(match_data);
  4381. /************************************************************************//*
  4382. * We have found the first match within the subject string. If the output *
  4383. * vector wasn't big enough, say so. Then output any substrings that were *
  4384. * captured. *
  4385. *************************************************************************/
  4386. /* The output vector wasn't big enough. This should not happen, because we used
  4387. pcre2_match_data_create_from_pattern() above. */
  4388. if (rc == 0) {
  4389. //ovector was not big enough for all the captured substrings;
  4390. error_number = (int)ERROR::INSUFFICIENT_OVECTOR;
  4391. rc = ovector_count;
  4392. // TODO: We may throw exception at this point.
  4393. }
  4394. //match succeeded at offset ovector[0]
  4395. if(vec_soff) vec_soff->push_back(ovector[0]);
  4396. if(vec_eoff) vec_eoff->push_back(ovector[1]);
  4397. // Get numbered substrings if vec_num isn't null
  4398. if (vec_num) { //must do null check
  4399. if(!getNumberedSubstrings(rc, subject, ovector, ovector_count))
  4400. return count;
  4401. }
  4402. //get named substrings if either vec_nas or vec_ntn is given.
  4403. if (vec_nas || vec_ntn) {
  4404. /* See if there are any named substrings, and if so, show them by name. First
  4405. we have to extract the count of named parentheses from the pattern. */
  4406. (void) Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::pattern_info( re->code, /* the compiled pattern */
  4407. PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */
  4408. &namecount); /* where to put the answer */
  4409. if (namecount <= 0); /*No named substrings*/
  4410. else {
  4411. /* Before we can access the substrings, we must extract the table for
  4412. translating names to numbers, and the size of each entry in the table. */
  4413. (void) Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::pattern_info( re->code, /* the compiled pattern */
  4414. PCRE2_INFO_NAMETABLE, /* address of the table */
  4415. &name_table); /* where to put the answer */
  4416. (void) Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::pattern_info( re->code, /* the compiled pattern */
  4417. PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
  4418. &name_entry_size); /* where to put the answer */
  4419. /* Now we can scan the table and, for each entry, print the number, the name,
  4420. and the substring itself. In the 8-bit library the number is held in two
  4421. bytes, most significant first. */
  4422. // Get named substrings if vec_nas isn't null.
  4423. // Get name to number map if vec_ntn isn't null.
  4424. }
  4425. //the following must be outside the above if-else
  4426. if(!getNamedSubstrings(namecount, name_entry_size, name_table, subject, ovector))
  4427. return count;
  4428. }
  4429. /***********************************************************************//*
  4430. * If the "g" modifier was given, we want to continue *
  4431. * to search for additional matches in the subject string, in a similar *
  4432. * way to the /g option in Perl. This turns out to be trickier than you *
  4433. * might think because of the possibility of matching an empty string. *
  4434. * What happens is as follows: *
  4435. * *
  4436. * If the previous match was NOT for an empty string, we can just start *
  4437. * the next match at the end of the previous one. *
  4438. * *
  4439. * If the previous match WAS for an empty string, we can't do that, as it *
  4440. * would lead to an infinite loop. Instead, a call of pcre2_match() is *
  4441. * made with the PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set. The *
  4442. * first of these tells PCRE2 that an empty string at the start of the *
  4443. * subject is not a valid match; other possibilities must be tried. The *
  4444. * second flag restricts PCRE2 to one match attempt at the initial string *
  4445. * position. If this match succeeds, an alternative to the empty string *
  4446. * match has been found, and we can print it and proceed round the loop, *
  4447. * advancing by the length of whatever was found. If this match does not *
  4448. * succeed, we still stay in the loop, advancing by just one character. *
  4449. * In UTF-8 mode, which can be set by (*UTF) in the pattern, this may be *
  4450. * more than one byte. *
  4451. * *
  4452. * However, there is a complication concerned with newlines. When the *
  4453. * newline convention is such that CRLF is a valid newline, we must *
  4454. * advance by two characters rather than one. The newline convention can *
  4455. * be set in the regex by (*CR), etc.; if not, we must find the default. *
  4456. *************************************************************************/
  4457. if ((jpcre2_match_opts & FIND_ALL) == 0) {
  4458. if(mdc)
  4459. Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::match_data_free(match_data); /* Release the memory that was used */
  4460. // Must not free code. This function has no right to modify regex.
  4461. return count; /* Exit the program. */
  4462. }
  4463. /* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
  4464. sequence. First, find the options with which the regex was compiled and extract
  4465. the UTF state. */
  4466. (void) Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::pattern_info(re->code, PCRE2_INFO_ALLOPTIONS, &option_bits);
  4467. utf = ((option_bits & PCRE2_UTF) != 0);
  4468. /* Now find the newline convention and see whether CRLF is a valid newline
  4469. sequence. */
  4470. (void) Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::pattern_info(re->code, PCRE2_INFO_NEWLINE, &newline);
  4471. crlf_is_newline = newline == PCRE2_NEWLINE_ANY
  4472. || newline == PCRE2_NEWLINE_CRLF
  4473. || newline == PCRE2_NEWLINE_ANYCRLF;
  4474. /** We got the first match. Now loop for second and subsequent matches. */
  4475. for (;;) {
  4476. Uint options = match_opts; /* Normally no options */
  4477. PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */
  4478. /* If the previous match was for an empty string, we are finished if we are
  4479. at the end of the subject. Otherwise, arrange to run another match at the
  4480. same point to see if a non-empty match can be found. */
  4481. if (ovector[0] == ovector[1]) {
  4482. if (ovector[0] == subject_length)
  4483. break;
  4484. options |= PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
  4485. }
  4486. /// Run the next matching operation */
  4487. rc = Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::match( re->code, /* the compiled pattern */
  4488. subject, /* the subject string */
  4489. subject_length, /* the length of the subject */
  4490. start_offset, /* starting offset in the subject */
  4491. options, /* options */
  4492. match_data, /* block for storing the result */
  4493. mcontext); /* use match context */
  4494. /* This time, a result of NOMATCH isn't an error. If the value in "options"
  4495. is zero, it just means we have found all possible matches, so the loop ends.
  4496. Otherwise, it means we have failed to find a non-empty-string match at a
  4497. point where there was a previous empty-string match. In this case, we do what
  4498. Perl does: advance the matching position by one character, and continue. We
  4499. do this by setting the "end of previous match" offset, because that is picked
  4500. up at the top of the loop as the point at which to start again.
  4501. There are two complications: (a) When CRLF is a valid newline sequence, and
  4502. the current position is just before it, advance by an extra byte. (b)
  4503. Otherwise we must ensure that we skip an entire UTF character if we are in
  4504. UTF mode. */
  4505. if (rc == PCRE2_ERROR_NOMATCH) {
  4506. if (options == 0)
  4507. break; /* All matches found */
  4508. ovector[1] = start_offset + 1; /* Advance one code unit */
  4509. if (crlf_is_newline && /* If CRLF is newline & */
  4510. start_offset < subject_length - 1 && /* we are at CRLF, */
  4511. subject[start_offset] == '\r' && subject[start_offset + 1] == '\n')
  4512. ovector[1] += 1; /* Advance by one more. */
  4513. else if (utf) { /* advance a whole UTF (8 or 16), for UTF-32, it's not needed */
  4514. while (ovector[1] < subject_length) {
  4515. if(sizeof( Char_T ) * CHAR_BIT == 8 && (subject[ovector[1]] & 0xc0) != 0x80) break;
  4516. else if(sizeof( Char_T ) * CHAR_BIT == 16 && (subject[ovector[1]] & 0xfc00) != 0xdc00) break;
  4517. else if(sizeof( Char_T ) * CHAR_BIT == 32) break; //must be else if
  4518. ovector[1] += 1;
  4519. }
  4520. }
  4521. continue; /* Go round the loop again */
  4522. }
  4523. /* Other matching errors are not recoverable. */
  4524. if (rc < 0) {
  4525. if(mdc)
  4526. Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::match_data_free(match_data);
  4527. // Must not free code. This function has no right to modify regex.
  4528. error_number = rc;
  4529. return count;
  4530. }
  4531. /* match succeeded */
  4532. ++count; //Increment the counter
  4533. if (rc == 0) {
  4534. /* The match succeeded, but the output vector wasn't big enough. This
  4535. should not happen. */
  4536. error_number = (int)ERROR::INSUFFICIENT_OVECTOR;
  4537. rc = ovector_count;
  4538. // TODO: We may throw exception at this point.
  4539. }
  4540. //match succeded at ovector[0]
  4541. if(vec_soff) vec_soff->push_back(ovector[0]);
  4542. if(vec_eoff) vec_eoff->push_back(ovector[1]);
  4543. /* As before, get substrings stored in the output vector by number, and then
  4544. also any named substrings. */
  4545. // Get numbered substrings if vec_num isn't null
  4546. if (vec_num) { //must do null check
  4547. if(!getNumberedSubstrings(rc, subject, ovector, ovector_count))
  4548. return count;
  4549. }
  4550. if (vec_nas || vec_ntn) {
  4551. //must call this whether we have named substrings or not:
  4552. if(!getNamedSubstrings(namecount, name_entry_size, name_table, subject, ovector))
  4553. return count;
  4554. }
  4555. } /* End of loop to find second and subsequent matches */
  4556. if(mdc)
  4557. Pcre2Func<sizeof( Char_T ) * CHAR_BIT>::match_data_free(match_data);
  4558. // Must not free code. This function has no right to modify regex.
  4559. return count;
  4560. }
  4561. #undef JPCRE2_VECTOR_DATA_ASSERT
  4562. #undef JPCRE2_UNUSED
  4563. #undef JPCRE2_USE_MINIMUM_CXX_11
  4564. //some macro documentation for doxygen
  4565. #ifdef __DOXYGEN__
  4566. #ifndef JPCRE2_USE_FUNCTION_POINTER_CALLBACK
  4567. #define JPCRE2_USE_FUNCTION_POINTER_CALLBACK
  4568. #endif
  4569. #ifndef JPCRE2_NDEBUG
  4570. #define JPCRE2_NDEBUG
  4571. #endif
  4572. ///@def JPCRE2_USE_FUNCTION_POINTER_CALLBACK
  4573. ///Use function pointer in all cases for MatchEvaluatorCallback function.
  4574. ///By default function pointer is used for callback in MatchEvaluator when using <C++11 compiler, but for
  4575. ///`>=C++11` compiler `std::function` instead of function pointer is used.
  4576. ///If this macro is defined before including jpcre2.hpp, function pointer will be used in all cases.
  4577. ///It you are using lambda function with captures, stick with `std::function`, on the other hand, if
  4578. ///you are using older compilers, you might want to use function pointer instead.
  4579. ///
  4580. ///For example, with gcc-4.7, `std::function` will give compile error in C++11 mode, in such cases where full C++11
  4581. ///support is not available, use function pointer.
  4582. ///@def JPCRE2_ASSERT(cond, msg)
  4583. ///Macro to call `jpcre2::jassert()` with file path and line number.
  4584. ///When `NDEBUG` or `JPCRE2_NDEBUG` is defined before including this header, this macro will
  4585. ///be defined as `((void)0)` thus eliminating this assertion.
  4586. ///@param cond condtion (boolean)
  4587. ///@param msg message
  4588. ///@def JPCRE2_NDEBUG
  4589. ///Macro to remove debug codes.
  4590. ///Using this macro is discouraged even in production mode but provided for completeness.
  4591. ///You should not use this macro to bypass any error in your program.
  4592. ///Define this macro before including this header if you want to remove debug codes included in this library.
  4593. ///
  4594. ///Using the standard `NDEBUG` macro will have the same effect,
  4595. ///but it is recommended that you use `JPCRE2_NDEBUG` to strip out debug codes specifically for this library.
  4596. ///@def JPCRE2_UNSET_CAPTURES_NULL
  4597. ///Define to change the type of NumSub so that captures are recorded
  4598. ///with std::optional. It is undefined by default. This feature requires C++17.
  4599. #endif
  4600. #endif