105-Upgrade-PCRE-to-PCRE2.patch 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479
  1. From 32f944b9a06fb2be4cd50da2434f2fd4b4decede Mon Sep 17 00:00:00 2001
  2. From: sbwml <[email protected]>
  3. Date: Thu, 1 Feb 2024 21:21:56 +0800
  4. Subject: [PATCH] Upgrade PCRE to PCRE2
  5. Signed-off-by: sbwml <[email protected]>
  6. ---
  7. configure.ac | 8 +--
  8. m4/pcre.m4 | 152 ------------------------------------------
  9. m4/pcre2.m4 | 181 +++++++++++++++++++++++++++++++++++++++++++++++++++
  10. src/rule.c | 47 ++++++++++---
  11. src/rule.h | 22 +++++--
  12. 5 files changed, 238 insertions(+), 172 deletions(-)
  13. delete mode 100644 m4/pcre.m4
  14. create mode 100644 m4/pcre2.m4
  15. --- a/configure.ac
  16. +++ b/configure.ac
  17. @@ -20,10 +20,10 @@ AC_DISABLE_STATIC
  18. AC_DISABLE_SHARED
  19. LT_INIT([dlopen])
  20. -dnl Check for pcre library
  21. -TS_CHECK_PCRE
  22. -if test "x${enable_pcre}" != "xyes"; then
  23. - AC_MSG_ERROR([Cannot find pcre library. Configure --with-pcre=DIR])
  24. +dnl Check for pcre2 library
  25. +TS_CHECK_PCRE2
  26. +if test "x${enable_pcre2}" != "xyes"; then
  27. + AC_MSG_ERROR([Cannot find pcre2 library. Configure --with-pcre2=DIR])
  28. fi
  29. dnl Checks for using shared libraries from system
  30. --- a/m4/pcre.m4
  31. +++ /dev/null
  32. @@ -1,152 +0,0 @@
  33. -dnl -------------------------------------------------------- -*- autoconf -*-
  34. -dnl Licensed to the Apache Software Foundation (ASF) under one or more
  35. -dnl contributor license agreements. See the NOTICE file distributed with
  36. -dnl this work for additional information regarding copyright ownership.
  37. -dnl The ASF licenses this file to You under the Apache License, Version 2.0
  38. -dnl (the "License"); you may not use this file except in compliance with
  39. -dnl the License. You may obtain a copy of the License at
  40. -dnl
  41. -dnl http://www.apache.org/licenses/LICENSE-2.0
  42. -dnl
  43. -dnl Unless required by applicable law or agreed to in writing, software
  44. -dnl distributed under the License is distributed on an "AS IS" BASIS,
  45. -dnl WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  46. -dnl See the License for the specific language governing permissions and
  47. -dnl limitations under the License.
  48. -
  49. -dnl
  50. -dnl TS_ADDTO(variable, value)
  51. -dnl
  52. -dnl Add value to variable
  53. -dnl
  54. -AC_DEFUN([TS_ADDTO], [
  55. - if test "x$$1" = "x"; then
  56. - test "x$verbose" = "xyes" && echo " setting $1 to \"$2\""
  57. - $1="$2"
  58. - else
  59. - ats_addto_bugger="$2"
  60. - for i in $ats_addto_bugger; do
  61. - ats_addto_duplicate="0"
  62. - for j in $$1; do
  63. - if test "x$i" = "x$j"; then
  64. - ats_addto_duplicate="1"
  65. - break
  66. - fi
  67. - done
  68. - if test $ats_addto_duplicate = "0"; then
  69. - test "x$verbose" = "xyes" && echo " adding \"$i\" to $1"
  70. - $1="$$1 $i"
  71. - fi
  72. - done
  73. - fi
  74. -])dnl
  75. -
  76. -dnl
  77. -dnl TS_ADDTO_RPATH(path)
  78. -dnl
  79. -dnl Adds path to variable with the '-rpath' directive.
  80. -dnl
  81. -AC_DEFUN([TS_ADDTO_RPATH], [
  82. - AC_MSG_NOTICE([adding $1 to RPATH])
  83. - TS_ADDTO(LIBTOOL_LINK_FLAGS, [-R$1])
  84. -])dnl
  85. -
  86. -dnl
  87. -dnl pcre.m4: Trafficserver's pcre autoconf macros
  88. -dnl
  89. -
  90. -dnl
  91. -dnl TS_CHECK_PCRE: look for pcre libraries and headers
  92. -dnl
  93. -AC_DEFUN([TS_CHECK_PCRE], [
  94. -enable_pcre=no
  95. -AC_ARG_WITH(pcre, [AC_HELP_STRING([--with-pcre=DIR],[use a specific pcre library])],
  96. -[
  97. - if test "x$withval" != "xyes" && test "x$withval" != "x"; then
  98. - pcre_base_dir="$withval"
  99. - if test "$withval" != "no"; then
  100. - enable_pcre=yes
  101. - case "$withval" in
  102. - *":"*)
  103. - pcre_include="`echo $withval |sed -e 's/:.*$//'`"
  104. - pcre_ldflags="`echo $withval |sed -e 's/^.*://'`"
  105. - AC_MSG_CHECKING(checking for pcre includes in $pcre_include libs in $pcre_ldflags )
  106. - ;;
  107. - *)
  108. - pcre_include="$withval/include"
  109. - pcre_ldflags="$withval/lib"
  110. - AC_MSG_CHECKING(checking for pcre includes in $withval)
  111. - ;;
  112. - esac
  113. - fi
  114. - fi
  115. -],
  116. -[
  117. - AC_CHECK_PROG(PCRE_CONFIG, pcre-config, pcre-config)
  118. - if test "x$PCRE_CONFIG" != "x"; then
  119. - enable_pcre=yes
  120. - pcre_base_dir="`$PCRE_CONFIG --prefix`"
  121. - pcre_include="`$PCRE_CONFIG --cflags | sed -es/-I//`"
  122. - pcre_ldflags="`$PCRE_CONFIG --libs | sed -es/-lpcre// -es/-L//`"
  123. - fi
  124. -])
  125. -
  126. -if test "x$pcre_base_dir" = "x"; then
  127. - AC_MSG_CHECKING([for pcre location])
  128. - AC_CACHE_VAL(ats_cv_pcre_dir,[
  129. - for dir in /usr/local /usr ; do
  130. - if test -d $dir && ( test -f $dir/include/pcre.h || test -f $dir/include/pcre/pcre.h ); then
  131. - ats_cv_pcre_dir=$dir
  132. - break
  133. - fi
  134. - done
  135. - ])
  136. - pcre_base_dir=$ats_cv_pcre_dir
  137. - if test "x$pcre_base_dir" = "x"; then
  138. - enable_pcre=no
  139. - AC_MSG_RESULT([not found])
  140. - else
  141. - enable_pcre=yes
  142. - pcre_include="$pcre_base_dir/include"
  143. - pcre_ldflags="$pcre_base_dir/lib"
  144. - AC_MSG_RESULT([$pcre_base_dir])
  145. - fi
  146. -else
  147. - AC_MSG_CHECKING(for pcre headers in $pcre_include)
  148. - if test -d $pcre_include && test -d $pcre_ldflags && ( test -f $pcre_include/pcre.h || test -f $pcre_include/pcre/pcre.h ); then
  149. - AC_MSG_RESULT([ok])
  150. - else
  151. - AC_MSG_RESULT([not found])
  152. - fi
  153. -fi
  154. -
  155. -pcreh=0
  156. -pcre_pcreh=0
  157. -if test "$enable_pcre" != "no"; then
  158. - saved_ldflags=$LDFLAGS
  159. - saved_cppflags=$CFLAGS
  160. - pcre_have_headers=0
  161. - pcre_have_libs=0
  162. - if test "$pcre_base_dir" != "/usr"; then
  163. - TS_ADDTO(CFLAGS, [-I${pcre_include}])
  164. - TS_ADDTO(CFLAGS, [-DPCRE_STATIC])
  165. - TS_ADDTO(LDFLAGS, [-L${pcre_ldflags}])
  166. - TS_ADDTO_RPATH(${pcre_ldflags})
  167. - fi
  168. - AC_SEARCH_LIBS([pcre_exec], [pcre], [pcre_have_libs=1])
  169. - if test "$pcre_have_libs" != "0"; then
  170. - AC_CHECK_HEADERS(pcre.h, [pcre_have_headers=1])
  171. - AC_CHECK_HEADERS(pcre/pcre.h, [pcre_have_headers=1])
  172. - fi
  173. - if test "$pcre_have_headers" != "0"; then
  174. - AC_DEFINE(HAVE_LIBPCRE,1,[Compiling with pcre support])
  175. - AC_SUBST(LIBPCRE, [-lpcre])
  176. - else
  177. - enable_pcre=no
  178. - CFLAGS=$saved_cppflags
  179. - LDFLAGS=$saved_ldflags
  180. - fi
  181. -fi
  182. -AC_SUBST(pcreh)
  183. -AC_SUBST(pcre_pcreh)
  184. -])
  185. --- /dev/null
  186. +++ b/m4/pcre2.m4
  187. @@ -0,0 +1,181 @@
  188. +dnl -------------------------------------------------------- -*- autoconf -*-
  189. +dnl Licensed to the Apache Software Foundation (ASF) under one or more
  190. +dnl contributor license agreements. See the NOTICE file distributed with
  191. +dnl this work for additional information regarding copyright ownership.
  192. +dnl The ASF licenses this file to You under the Apache License, Version 2.0
  193. +dnl (the "License"); you may not use this file except in compliance with
  194. +dnl the License. You may obtain a copy of the License at
  195. +dnl
  196. +dnl http://www.apache.org/licenses/LICENSE-2.0
  197. +dnl
  198. +dnl Unless required by applicable law or agreed to in writing, software
  199. +dnl distributed under the License is distributed on an "AS IS" BASIS,
  200. +dnl WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  201. +dnl See the License for the specific language governing permissions and
  202. +dnl limitations under the License.
  203. +
  204. +dnl Modified by Syrone Wong <[email protected]> to support pcre2 8bit variant only
  205. +
  206. +dnl
  207. +dnl TS_ADDTO(variable, value)
  208. +dnl
  209. +dnl Add value to variable
  210. +dnl
  211. +AC_DEFUN([TS_ADDTO], [
  212. + if test "x$$1" = "x"; then
  213. + test "x$verbose" = "xyes" && echo " setting $1 to \"$2\""
  214. + $1="$2"
  215. + else
  216. + ats_addto_bugger="$2"
  217. + for i in $ats_addto_bugger; do
  218. + ats_addto_duplicate="0"
  219. + for j in $$1; do
  220. + if test "x$i" = "x$j"; then
  221. + ats_addto_duplicate="1"
  222. + break
  223. + fi
  224. + done
  225. + if test $ats_addto_duplicate = "0"; then
  226. + test "x$verbose" = "xyes" && echo " adding \"$i\" to $1"
  227. + $1="$$1 $i"
  228. + fi
  229. + done
  230. + fi
  231. +])dnl
  232. +
  233. +dnl
  234. +dnl TS_ADDTO_RPATH(path)
  235. +dnl
  236. +dnl Adds path to variable with the '-rpath' directive.
  237. +dnl
  238. +AC_DEFUN([TS_ADDTO_RPATH], [
  239. + AC_MSG_NOTICE([adding $1 to RPATH])
  240. + TS_ADDTO(LIBTOOL_LINK_FLAGS, [-R$1])
  241. +])dnl
  242. +
  243. +dnl
  244. +dnl pcre2.m4: Trafficserver's pcre2 autoconf macros
  245. +dnl
  246. +
  247. +dnl
  248. +dnl TS_CHECK_PCRE2: look for pcre2 libraries and headers
  249. +dnl
  250. +AC_DEFUN([TS_CHECK_PCRE2], [
  251. +enable_pcre2=no
  252. +AC_ARG_WITH(pcre2, [AC_HELP_STRING([--with-pcre2=DIR],[use a specific pcre2 library])],
  253. +[
  254. + if test "x$withval" != "xyes" && test "x$withval" != "x"; then
  255. + pcre2_base_dir="$withval"
  256. + if test "$withval" != "no"; then
  257. + enable_pcre2=yes
  258. + case "$withval" in
  259. + *":"*)
  260. + pcre2_include="`echo $withval |sed -e 's/:.*$//'`"
  261. + pcre2_ldflags="`echo $withval |sed -e 's/^.*://'`"
  262. + AC_MSG_CHECKING(checking for pcre2 includes in $pcre2_include libs in $pcre2_ldflags )
  263. + ;;
  264. + *)
  265. + pcre2_include="$withval/include"
  266. + pcre2_ldflags="$withval/lib"
  267. + AC_MSG_CHECKING(checking for pcre2 includes in $withval)
  268. + ;;
  269. + esac
  270. + fi
  271. + fi
  272. +],
  273. +[
  274. + AC_CHECK_PROG(PCRE2_CONFIG, pcre2-config, pcre2-config)
  275. + if test "x$PCRE2_CONFIG" != "x"; then
  276. + enable_pcre2=yes
  277. + pcre2_base_dir="`$PCRE2_CONFIG --prefix`"
  278. + pcre2_include="`$PCRE2_CONFIG --cflags | sed -es/-I//`"
  279. + pcre2_ldflags="`$PCRE2_CONFIG --libs8 | sed -es/-lpcre2-8// -es/-L//`"
  280. + fi
  281. +])
  282. +
  283. +if test "x$pcre2_base_dir" = "x"; then
  284. + AC_MSG_CHECKING([for pcre2 location])
  285. + AC_CACHE_VAL(ats_cv_pcre2_dir,[
  286. + for dir in /usr/local /usr ; do
  287. + if test -d $dir && ( test -f $dir/include/pcre2.h || test -f $dir/include/pcre2/pcre2.h ); then
  288. + ats_cv_pcre2_dir=$dir
  289. + break
  290. + fi
  291. + done
  292. + ])
  293. + pcre2_base_dir=$ats_cv_pcre2_dir
  294. + if test "x$pcre2_base_dir" = "x"; then
  295. + enable_pcre2=no
  296. + AC_MSG_RESULT([not found])
  297. + else
  298. + enable_pcre2=yes
  299. + pcre2_include="$pcre2_base_dir/include"
  300. + pcre2_ldflags="$pcre2_base_dir/lib"
  301. + AC_MSG_RESULT([$pcre2_base_dir])
  302. + fi
  303. +else
  304. + AC_MSG_CHECKING(for pcre2 headers in $pcre2_include)
  305. + if test -d $pcre2_include && test -d $pcre2_ldflags && ( test -f $pcre2_include/pcre2.h || test -f $pcre2_include/pcre2/pcre2.h ); then
  306. + AC_MSG_RESULT([ok])
  307. + else
  308. + AC_MSG_RESULT([not found])
  309. + fi
  310. +fi
  311. +
  312. +pcre2h=0
  313. +pcre2_pcre2h=0
  314. +if test "$enable_pcre2" != "no"; then
  315. + saved_ldflags=$LDFLAGS
  316. + saved_cppflags=$CFLAGS
  317. + pcre2_have_headers=0
  318. + pcre2_have_libs=0
  319. + if test "$pcre2_base_dir" != "/usr"; then
  320. + TS_ADDTO(CFLAGS, [-I${pcre2_include}])
  321. + TS_ADDTO(CFLAGS, [-DPCRE2_STATIC])
  322. + TS_ADDTO(LDFLAGS, [-L${pcre2_ldflags}])
  323. + TS_ADDTO_RPATH(${pcre2_ldflags})
  324. + fi
  325. + AC_SEARCH_LIBS([pcre2_match_8], [pcre2-8], [pcre2_have_libs=1])
  326. + if test "$pcre2_have_libs" != "0"; then
  327. + AC_MSG_CHECKING([pcre2.h])
  328. + AC_COMPILE_IFELSE(
  329. + [AC_LANG_PROGRAM(
  330. + [[
  331. +#define PCRE2_CODE_UNIT_WIDTH 8
  332. +#include <pcre2.h>
  333. + ]],
  334. + [[
  335. + ]]
  336. + )],
  337. + [pcre2_have_headers=1
  338. + AC_MSG_RESULT([ok])],
  339. + [AC_MSG_RESULT([not found])]
  340. + )
  341. +
  342. + AC_MSG_CHECKING([pcre2/pcre2.h])
  343. + AC_COMPILE_IFELSE(
  344. + [AC_LANG_PROGRAM(
  345. + [[
  346. +#define PCRE2_CODE_UNIT_WIDTH 8
  347. +#include <pcre2/pcre2.h>
  348. + ]],
  349. + [[
  350. + ]]
  351. + )],
  352. + [pcre2_have_headers=1
  353. + AC_MSG_RESULT([ok])],
  354. + [AC_MSG_RESULT([not found])]
  355. + )
  356. + fi
  357. + if test "$pcre2_have_headers" != "0"; then
  358. + AC_DEFINE(HAVE_LIBPCRE2,1,[Compiling with pcre2 support])
  359. + AC_SUBST(LIBPCRE2, [-lpcre2-8])
  360. + else
  361. + enable_pcre2=no
  362. + CFLAGS=$saved_cppflags
  363. + LDFLAGS=$saved_ldflags
  364. + fi
  365. +fi
  366. +AC_SUBST(pcre2h)
  367. +AC_SUBST(pcre2_pcre2h)
  368. +])
  369. --- a/src/rule.c
  370. +++ b/src/rule.c
  371. @@ -82,14 +82,28 @@ int
  372. init_rule(rule_t *rule)
  373. {
  374. if (rule->pattern_re == NULL) {
  375. - const char *reerr;
  376. - int reerroffset;
  377. + int errornumber;
  378. + PCRE2_SIZE erroroffset;
  379. + rule->pattern_re = pcre2_compile(
  380. + (PCRE2_SPTR)rule->pattern, /* the pattern */
  381. + PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
  382. + 0, /* default options */
  383. + &errornumber, /* for error number */
  384. + &erroroffset, /* for error offset */
  385. + NULL); /* use default compile context */
  386. - rule->pattern_re =
  387. - pcre_compile(rule->pattern, 0, &reerr, &reerroffset, NULL);
  388. if (rule->pattern_re == NULL) {
  389. - LOGE("Regex compilation of \"%s\" failed: %s, offset %d",
  390. - rule->pattern, reerr, reerroffset);
  391. + PCRE2_UCHAR errbuffer[512];
  392. + pcre2_get_error_message(errornumber, errbuffer, sizeof(errbuffer));
  393. + LOGE("PCRE2 regex compilation failed at offset %d: %s\n", (int)erroroffset,
  394. + errbuffer);
  395. + return 0;
  396. + }
  397. +
  398. + rule->pattern_re_match_data = pcre2_match_data_create_from_pattern(rule->pattern_re, NULL);
  399. +
  400. + if (rule->pattern_re_match_data == NULL) {
  401. + ERROR("PCRE2: the memory for the block could not be obtained");
  402. return 0;
  403. }
  404. }
  405. @@ -109,8 +123,15 @@ lookup_rule(const struct cork_dllist *ru
  406. cork_dllist_foreach_void(rules, curr, next) {
  407. rule_t *rule = cork_container_of(curr, rule_t, entries);
  408. - if (pcre_exec(rule->pattern_re, NULL,
  409. - name, name_len, 0, 0, NULL, 0) >= 0)
  410. + if (pcre2_match(
  411. + rule->pattern_re, /* the compiled pattern */
  412. + (PCRE2_SPTR)name, /* the subject string */
  413. + name_len, /* the length of the subject */
  414. + 0, /* start at offset 0 in the subject */
  415. + 0, /* default options */
  416. + rule->pattern_re_match_data, /* block for storing the result */
  417. + NULL /* use default match context */
  418. + ) >= 0)
  419. return rule;
  420. }
  421. @@ -131,7 +152,13 @@ free_rule(rule_t *rule)
  422. return;
  423. ss_free(rule->pattern);
  424. - if (rule->pattern_re != NULL)
  425. - pcre_free(rule->pattern_re);
  426. + if (rule->pattern_re != NULL) {
  427. + pcre2_code_free(rule->pattern_re); /* data and the compiled pattern. */
  428. + rule->pattern_re = NULL;
  429. + }
  430. + if (rule->pattern_re_match_data != NULL) {
  431. + pcre2_match_data_free(rule->pattern_re_match_data); /* Release memory used for the match */
  432. + rule->pattern_re_match_data = NULL;
  433. + }
  434. ss_free(rule);
  435. }
  436. --- a/src/rule.h
  437. +++ b/src/rule.h
  438. @@ -33,17 +33,27 @@
  439. #include <libcork/ds.h>
  440. -#ifdef HAVE_PCRE_H
  441. -#include <pcre.h>
  442. -#elif HAVE_PCRE_PCRE_H
  443. -#include <pcre/pcre.h>
  444. -#endif
  445. +/*
  446. + * The PCRE2_CODE_UNIT_WIDTH macro must be defined before including pcre2.h.
  447. + * For a program that uses only one code unit width, setting it to 8, 16, or 32
  448. + * makes it possible to use generic function names such as pcre2_compile(). Note
  449. + * that just changing 8 to 16 (for example) is not sufficient to convert this
  450. + * program to process 16-bit characters. Even in a fully 16-bit environment, where
  451. + * string-handling functions such as strcmp() and printf() work with 16-bit
  452. + * characters, the code for handling the table of named substrings will still need
  453. + * to be modified.
  454. + */
  455. +/* we only need to support ASCII chartable, thus set it to 8 */
  456. +#define PCRE2_CODE_UNIT_WIDTH 8
  457. +
  458. +#include <pcre2.h>
  459. typedef struct rule {
  460. char *pattern;
  461. /* Runtime fields */
  462. - pcre *pattern_re;
  463. + pcre2_code *pattern_re;
  464. + pcre2_match_data *pattern_re_match_data;
  465. struct cork_dllist_item entries;
  466. } rule_t;