RegularExpression.hxx.in 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425
  1. /* Distributed under the OSI-approved BSD 3-Clause License. See accompanying
  2. file Copyright.txt or https://cmake.org/licensing#kwsys for details. */
  3. // Original Copyright notice:
  4. // Copyright (C) 1991 Texas Instruments Incorporated.
  5. //
  6. // Permission is granted to any individual or institution to use, copy, modify,
  7. // and distribute this software, provided that this complete copyright and
  8. // permission notice is maintained, intact, in all copies and supporting
  9. // documentation.
  10. //
  11. // Texas Instruments Incorporated provides this software "as is" without
  12. // express or implied warranty.
  13. //
  14. // Created: MNF 06/13/89 Initial Design and Implementation
  15. // Updated: LGO 08/09/89 Inherit from Generic
  16. // Updated: MBN 09/07/89 Added conditional exception handling
  17. // Updated: MBN 12/15/89 Sprinkled "const" qualifiers all over the place!
  18. // Updated: DLS 03/22/91 New lite version
  19. //
  20. #ifndef @KWSYS_NAMESPACE@_RegularExpression_hxx
  21. #define @KWSYS_NAMESPACE@_RegularExpression_hxx
  22. #include <@KWSYS_NAMESPACE@/Configure.h>
  23. #include <@KWSYS_NAMESPACE@/Configure.hxx>
  24. #include <string>
  25. /* Disable useless Borland warnings. KWSys tries not to force things
  26. on its includers, but there is no choice here. */
  27. #if defined(__BORLANDC__)
  28. #pragma warn - 8027 /* function not inlined. */
  29. #endif
  30. namespace @KWSYS_NAMESPACE@ {
  31. /** \class RegularExpression
  32. * \brief Implements pattern matching with regular expressions.
  33. *
  34. * This is the header file for the regular expression class. An object of
  35. * this class contains a regular expression, in a special "compiled" format.
  36. * This compiled format consists of several slots all kept as the objects
  37. * private data. The RegularExpression class provides a convenient way to
  38. * represent regular expressions. It makes it easy to search for the same
  39. * regular expression in many different strings without having to compile a
  40. * string to regular expression format more than necessary.
  41. *
  42. * This class implements pattern matching via regular expressions.
  43. * A regular expression allows a programmer to specify complex
  44. * patterns that can be searched for and matched against the
  45. * character string of a string object. In its simplest form, a
  46. * regular expression is a sequence of characters used to
  47. * search for exact character matches. However, many times the
  48. * exact sequence to be found is not known, or only a match at
  49. * the beginning or end of a string is desired. The RegularExpression regu-
  50. * lar expression class implements regular expression pattern
  51. * matching as is found and implemented in many UNIX commands
  52. * and utilities.
  53. *
  54. * Example: The perl code
  55. *
  56. * $filename =~ m"([a-z]+)\.cc";
  57. * print $1;
  58. *
  59. * Is written as follows in C++
  60. *
  61. * RegularExpression re("([a-z]+)\\.cc");
  62. * re.find(filename);
  63. * cerr << re.match(1);
  64. *
  65. *
  66. * The regular expression class provides a convenient mechanism
  67. * for specifying and manipulating regular expressions. The
  68. * regular expression object allows specification of such pat-
  69. * terns by using the following regular expression metacharac-
  70. * ters:
  71. *
  72. * ^ Matches at beginning of a line
  73. *
  74. * $ Matches at end of a line
  75. *
  76. * . Matches any single character
  77. *
  78. * [ ] Matches any character(s) inside the brackets
  79. *
  80. * [^ ] Matches any character(s) not inside the brackets
  81. *
  82. * - Matches any character in range on either side of a dash
  83. *
  84. * * Matches preceding pattern zero or more times
  85. *
  86. * + Matches preceding pattern one or more times
  87. *
  88. * ? Matches preceding pattern zero or once only
  89. *
  90. * () Saves a matched expression and uses it in a later match
  91. *
  92. * Note that more than one of these metacharacters can be used
  93. * in a single regular expression in order to create complex
  94. * search patterns. For example, the pattern [^ab1-9] says to
  95. * match any character sequence that does not begin with the
  96. * characters "ab" followed by numbers in the series one
  97. * through nine.
  98. *
  99. * There are three constructors for RegularExpression. One just creates an
  100. * empty RegularExpression object. Another creates a RegularExpression
  101. * object and initializes it with a regular expression that is given in the
  102. * form of a char*. The third takes a reference to a RegularExpression
  103. * object as an argument and creates an object initialized with the
  104. * information from the given RegularExpression object.
  105. *
  106. * The find member function finds the first occurrence of the regular
  107. * expression of that object in the string given to find as an argument. Find
  108. * returns a boolean, and if true, mutates the private data appropriately.
  109. * Find sets pointers to the beginning and end of the thing last found, they
  110. * are pointers into the actual string that was searched. The start and end
  111. * member functions return indices into the searched string that correspond
  112. * to the beginning and end pointers respectively. The compile member
  113. * function takes a char* and puts the compiled version of the char* argument
  114. * into the object's private data fields. The == and != operators only check
  115. * the to see if the compiled regular expression is the same, and the
  116. * deep_equal functions also checks to see if the start and end pointers are
  117. * the same. The is_valid function returns false if program is set to NULL,
  118. * (i.e. there is no valid compiled exression). The set_invalid function sets
  119. * the program to NULL (Warning: this deletes the compiled expression). The
  120. * following examples may help clarify regular expression usage:
  121. *
  122. * * The regular expression "^hello" matches a "hello" only at the
  123. * beginning of a line. It would match "hello there" but not "hi,
  124. * hello there".
  125. *
  126. * * The regular expression "long$" matches a "long" only at the end
  127. * of a line. It would match "so long\0", but not "long ago".
  128. *
  129. * * The regular expression "t..t..g" will match anything that has a
  130. * "t" then any two characters, another "t", any two characters and
  131. * then a "g". It will match "testing", or "test again" but would
  132. * not match "toasting"
  133. *
  134. * * The regular expression "[1-9ab]" matches any number one through
  135. * nine, and the characters "a" and "b". It would match "hello 1"
  136. * or "begin", but would not match "no-match".
  137. *
  138. * * The regular expression "[^1-9ab]" matches any character that is
  139. * not a number one through nine, or an "a" or "b". It would NOT
  140. * match "hello 1" or "begin", but would match "no-match".
  141. *
  142. * * The regular expression "br* " matches something that begins with
  143. * a "b", is followed by zero or more "r"s, and ends in a space. It
  144. * would match "brrrrr ", and "b ", but would not match "brrh ".
  145. *
  146. * * The regular expression "br+ " matches something that begins with
  147. * a "b", is followed by one or more "r"s, and ends in a space. It
  148. * would match "brrrrr ", and "br ", but would not match "b " or
  149. * "brrh ".
  150. *
  151. * * The regular expression "br? " matches something that begins with
  152. * a "b", is followed by zero or one "r"s, and ends in a space. It
  153. * would match "br ", and "b ", but would not match "brrrr " or
  154. * "brrh ".
  155. *
  156. * * The regular expression "(..p)b" matches something ending with pb
  157. * and beginning with whatever the two characters before the first p
  158. * encounterd in the line were. It would find "repb" in "rep drepa
  159. * qrepb". The regular expression "(..p)a" would find "repa qrepb"
  160. * in "rep drepa qrepb"
  161. *
  162. * * The regular expression "d(..p)" matches something ending with p,
  163. * beginning with d, and having two characters in between that are
  164. * the same as the two characters before the first p encounterd in
  165. * the line. It would match "drepa qrepb" in "rep drepa qrepb".
  166. *
  167. */
  168. class @KWSYS_NAMESPACE@_EXPORT RegularExpression
  169. {
  170. public:
  171. /**
  172. * Instantiate RegularExpression with program=NULL.
  173. */
  174. inline RegularExpression();
  175. /**
  176. * Instantiate RegularExpression with compiled char*.
  177. */
  178. inline RegularExpression(char const*);
  179. /**
  180. * Instantiate RegularExpression as a copy of another regular expression.
  181. */
  182. RegularExpression(RegularExpression const&);
  183. /**
  184. * Instantiate RegularExpression with compiled string.
  185. */
  186. inline RegularExpression(std::string const&);
  187. /**
  188. * Destructor.
  189. */
  190. inline ~RegularExpression();
  191. /**
  192. * Compile a regular expression into internal code
  193. * for later pattern matching.
  194. */
  195. bool compile(char const*);
  196. /**
  197. * Compile a regular expression into internal code
  198. * for later pattern matching.
  199. */
  200. inline bool compile(std::string const&);
  201. /**
  202. * Matches the regular expression to the given string.
  203. * Returns true if found, and sets start and end indexes accordingly.
  204. */
  205. bool find(char const*);
  206. /**
  207. * Matches the regular expression to the given std string.
  208. * Returns true if found, and sets start and end indexes accordingly.
  209. */
  210. inline bool find(std::string const&);
  211. /**
  212. * Index to start of first find.
  213. */
  214. inline std::string::size_type start() const;
  215. /**
  216. * Index to end of first find.
  217. */
  218. inline std::string::size_type end() const;
  219. /**
  220. * Copy the given regular expression.
  221. */
  222. RegularExpression& operator=(const RegularExpression& rxp);
  223. /**
  224. * Returns true if two regular expressions have the same
  225. * compiled program for pattern matching.
  226. */
  227. bool operator==(RegularExpression const&) const;
  228. /**
  229. * Returns true if two regular expressions have different
  230. * compiled program for pattern matching.
  231. */
  232. inline bool operator!=(RegularExpression const&) const;
  233. /**
  234. * Returns true if have the same compiled regular expressions
  235. * and the same start and end pointers.
  236. */
  237. bool deep_equal(RegularExpression const&) const;
  238. /**
  239. * True if the compiled regexp is valid.
  240. */
  241. inline bool is_valid() const;
  242. /**
  243. * Marks the regular expression as invalid.
  244. */
  245. inline void set_invalid();
  246. /**
  247. * Destructor.
  248. */
  249. // awf added
  250. std::string::size_type start(int n) const;
  251. std::string::size_type end(int n) const;
  252. std::string match(int n) const;
  253. enum
  254. {
  255. NSUBEXP = 10
  256. };
  257. private:
  258. const char* startp[NSUBEXP];
  259. const char* endp[NSUBEXP];
  260. char regstart; // Internal use only
  261. char reganch; // Internal use only
  262. const char* regmust; // Internal use only
  263. std::string::size_type regmlen; // Internal use only
  264. char* program;
  265. int progsize;
  266. const char* searchstring;
  267. };
  268. /**
  269. * Create an empty regular expression.
  270. */
  271. inline RegularExpression::RegularExpression()
  272. {
  273. this->program = 0;
  274. }
  275. /**
  276. * Creates a regular expression from string s, and
  277. * compiles s.
  278. */
  279. inline RegularExpression::RegularExpression(const char* s)
  280. {
  281. this->program = 0;
  282. if (s) {
  283. this->compile(s);
  284. }
  285. }
  286. /**
  287. * Creates a regular expression from string s, and
  288. * compiles s.
  289. */
  290. inline RegularExpression::RegularExpression(const std::string& s)
  291. {
  292. this->program = 0;
  293. this->compile(s);
  294. }
  295. /**
  296. * Destroys and frees space allocated for the regular expression.
  297. */
  298. inline RegularExpression::~RegularExpression()
  299. {
  300. //#ifndef _WIN32
  301. delete[] this->program;
  302. //#endif
  303. }
  304. /**
  305. * Compile a regular expression into internal code
  306. * for later pattern matching.
  307. */
  308. inline bool RegularExpression::compile(std::string const& s)
  309. {
  310. return this->compile(s.c_str());
  311. }
  312. /**
  313. * Matches the regular expression to the given std string.
  314. * Returns true if found, and sets start and end indexes accordingly.
  315. */
  316. inline bool RegularExpression::find(std::string const& s)
  317. {
  318. return this->find(s.c_str());
  319. }
  320. /**
  321. * Set the start position for the regular expression.
  322. */
  323. inline std::string::size_type RegularExpression::start() const
  324. {
  325. return static_cast<std::string::size_type>(this->startp[0] - searchstring);
  326. }
  327. /**
  328. * Returns the start/end index of the last item found.
  329. */
  330. inline std::string::size_type RegularExpression::end() const
  331. {
  332. return static_cast<std::string::size_type>(this->endp[0] - searchstring);
  333. }
  334. /**
  335. * Returns true if two regular expressions have different
  336. * compiled program for pattern matching.
  337. */
  338. inline bool RegularExpression::operator!=(const RegularExpression& r) const
  339. {
  340. return (!(*this == r));
  341. }
  342. /**
  343. * Returns true if a valid regular expression is compiled
  344. * and ready for pattern matching.
  345. */
  346. inline bool RegularExpression::is_valid() const
  347. {
  348. return (this->program != 0);
  349. }
  350. inline void RegularExpression::set_invalid()
  351. {
  352. //#ifndef _WIN32
  353. delete[] this->program;
  354. //#endif
  355. this->program = 0;
  356. }
  357. /**
  358. * Return start index of nth submatch. start(0) is the start of the full match.
  359. */
  360. inline std::string::size_type RegularExpression::start(int n) const
  361. {
  362. return static_cast<std::string::size_type>(this->startp[n] - searchstring);
  363. }
  364. /**
  365. * Return end index of nth submatch. end(0) is the end of the full match.
  366. */
  367. inline std::string::size_type RegularExpression::end(int n) const
  368. {
  369. return static_cast<std::string::size_type>(this->endp[n] - searchstring);
  370. }
  371. /**
  372. * Return nth submatch as a string.
  373. */
  374. inline std::string RegularExpression::match(int n) const
  375. {
  376. if (this->startp[n] == 0) {
  377. return std::string("");
  378. } else {
  379. return std::string(this->startp[n], static_cast<std::string::size_type>(
  380. this->endp[n] - this->startp[n]));
  381. }
  382. }
  383. } // namespace @KWSYS_NAMESPACE@
  384. #endif