RegularExpression.hxx.in 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453
  1. /*============================================================================
  2. KWSys - Kitware System Library
  3. Copyright 2000-2009 Kitware, Inc., Insight Software Consortium
  4. Distributed under the OSI-approved BSD License (the "License");
  5. see accompanying file Copyright.txt for details.
  6. This software is distributed WITHOUT ANY WARRANTY; without even the
  7. implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  8. See the License for more information.
  9. ============================================================================*/
  10. // Original Copyright notice:
  11. // Copyright (C) 1991 Texas Instruments Incorporated.
  12. //
  13. // Permission is granted to any individual or institution to use, copy, modify,
  14. // and distribute this software, provided that this complete copyright and
  15. // permission notice is maintained, intact, in all copies and supporting
  16. // documentation.
  17. //
  18. // Texas Instruments Incorporated provides this software "as is" without
  19. // express or implied warranty.
  20. //
  21. // Created: MNF 06/13/89 Initial Design and Implementation
  22. // Updated: LGO 08/09/89 Inherit from Generic
  23. // Updated: MBN 09/07/89 Added conditional exception handling
  24. // Updated: MBN 12/15/89 Sprinkled "const" qualifiers all over the place!
  25. // Updated: DLS 03/22/91 New lite version
  26. //
  27. #ifndef @KWSYS_NAMESPACE@_RegularExpression_hxx
  28. #define @KWSYS_NAMESPACE@_RegularExpression_hxx
  29. #include <@KWSYS_NAMESPACE@/Configure.h>
  30. #include <@KWSYS_NAMESPACE@/Configure.hxx>
  31. #include <@KWSYS_NAMESPACE@/stl/string>
  32. /* Define this macro temporarily to keep the code readable. */
  33. #if !defined (KWSYS_NAMESPACE) && !@KWSYS_NAMESPACE@_NAME_IS_KWSYS
  34. # define kwsys_stl @KWSYS_NAMESPACE@_stl
  35. #endif
  36. /* Disable useless Borland warnings. KWSys tries not to force things
  37. on its includers, but there is no choice here. */
  38. #if defined(__BORLANDC__)
  39. # pragma warn -8027 /* function not inlined. */
  40. #endif
  41. namespace @KWSYS_NAMESPACE@
  42. {
  43. /** \class RegularExpression
  44. * \brief Implements pattern matching with regular expressions.
  45. *
  46. * This is the header file for the regular expression class. An object of
  47. * this class contains a regular expression, in a special "compiled" format.
  48. * This compiled format consists of several slots all kept as the objects
  49. * private data. The RegularExpression class provides a convenient way to
  50. * represent regular expressions. It makes it easy to search for the same
  51. * regular expression in many different strings without having to compile a
  52. * string to regular expression format more than necessary.
  53. *
  54. * This class implements pattern matching via regular expressions.
  55. * A regular expression allows a programmer to specify complex
  56. * patterns that can be searched for and matched against the
  57. * character string of a string object. In its simplest form, a
  58. * regular expression is a sequence of characters used to
  59. * search for exact character matches. However, many times the
  60. * exact sequence to be found is not known, or only a match at
  61. * the beginning or end of a string is desired. The RegularExpression regu-
  62. * lar expression class implements regular expression pattern
  63. * matching as is found and implemented in many UNIX commands
  64. * and utilities.
  65. *
  66. * Example: The perl code
  67. *
  68. * $filename =~ m"([a-z]+)\.cc";
  69. * print $1;
  70. *
  71. * Is written as follows in C++
  72. *
  73. * RegularExpression re("([a-z]+)\\.cc");
  74. * re.find(filename);
  75. * cerr << re.match(1);
  76. *
  77. *
  78. * The regular expression class provides a convenient mechanism
  79. * for specifying and manipulating regular expressions. The
  80. * regular expression object allows specification of such pat-
  81. * terns by using the following regular expression metacharac-
  82. * ters:
  83. *
  84. * ^ Matches at beginning of a line
  85. *
  86. * $ Matches at end of a line
  87. *
  88. * . Matches any single character
  89. *
  90. * [ ] Matches any character(s) inside the brackets
  91. *
  92. * [^ ] Matches any character(s) not inside the brackets
  93. *
  94. * - Matches any character in range on either side of a dash
  95. *
  96. * * Matches preceding pattern zero or more times
  97. *
  98. * + Matches preceding pattern one or more times
  99. *
  100. * ? Matches preceding pattern zero or once only
  101. *
  102. * () Saves a matched expression and uses it in a later match
  103. *
  104. * Note that more than one of these metacharacters can be used
  105. * in a single regular expression in order to create complex
  106. * search patterns. For example, the pattern [^ab1-9] says to
  107. * match any character sequence that does not begin with the
  108. * characters "ab" followed by numbers in the series one
  109. * through nine.
  110. *
  111. * There are three constructors for RegularExpression. One just creates an
  112. * empty RegularExpression object. Another creates a RegularExpression
  113. * object and initializes it with a regular expression that is given in the
  114. * form of a char*. The third takes a reference to a RegularExpression
  115. * object as an argument and creates an object initialized with the
  116. * information from the given RegularExpression object.
  117. *
  118. * The find member function finds the first occurence of the regualr
  119. * expression of that object in the string given to find as an argument. Find
  120. * returns a boolean, and if true, mutates the private data appropriately.
  121. * Find sets pointers to the beginning and end of the thing last found, they
  122. * are pointers into the actual string that was searched. The start and end
  123. * member functions return indicies into the searched string that correspond
  124. * to the beginning and end pointers respectively. The compile member
  125. * function takes a char* and puts the compiled version of the char* argument
  126. * into the object's private data fields. The == and != operators only check
  127. * the to see if the compiled regular expression is the same, and the
  128. * deep_equal functions also checks to see if the start and end pointers are
  129. * the same. The is_valid function returns false if program is set to NULL,
  130. * (i.e. there is no valid compiled exression). The set_invalid function sets
  131. * the program to NULL (Warning: this deletes the compiled expression). The
  132. * following examples may help clarify regular expression usage:
  133. *
  134. * * The regular expression "^hello" matches a "hello" only at the
  135. * beginning of a line. It would match "hello there" but not "hi,
  136. * hello there".
  137. *
  138. * * The regular expression "long$" matches a "long" only at the end
  139. * of a line. It would match "so long\0", but not "long ago".
  140. *
  141. * * The regular expression "t..t..g" will match anything that has a
  142. * "t" then any two characters, another "t", any two characters and
  143. * then a "g". It will match "testing", or "test again" but would
  144. * not match "toasting"
  145. *
  146. * * The regular expression "[1-9ab]" matches any number one through
  147. * nine, and the characters "a" and "b". It would match "hello 1"
  148. * or "begin", but would not match "no-match".
  149. *
  150. * * The regular expression "[^1-9ab]" matches any character that is
  151. * not a number one through nine, or an "a" or "b". It would NOT
  152. * match "hello 1" or "begin", but would match "no-match".
  153. *
  154. * * The regular expression "br* " matches something that begins with
  155. * a "b", is followed by zero or more "r"s, and ends in a space. It
  156. * would match "brrrrr ", and "b ", but would not match "brrh ".
  157. *
  158. * * The regular expression "br+ " matches something that begins with
  159. * a "b", is followed by one or more "r"s, and ends in a space. It
  160. * would match "brrrrr ", and "br ", but would not match "b " or
  161. * "brrh ".
  162. *
  163. * * The regular expression "br? " matches something that begins with
  164. * a "b", is followed by zero or one "r"s, and ends in a space. It
  165. * would match "br ", and "b ", but would not match "brrrr " or
  166. * "brrh ".
  167. *
  168. * * The regular expression "(..p)b" matches something ending with pb
  169. * and beginning with whatever the two characters before the first p
  170. * encounterd in the line were. It would find "repb" in "rep drepa
  171. * qrepb". The regular expression "(..p)a" would find "repa qrepb"
  172. * in "rep drepa qrepb"
  173. *
  174. * * The regular expression "d(..p)" matches something ending with p,
  175. * beginning with d, and having two characters in between that are
  176. * the same as the two characters before the first p encounterd in
  177. * the line. It would match "drepa qrepb" in "rep drepa qrepb".
  178. *
  179. */
  180. class @KWSYS_NAMESPACE@_EXPORT RegularExpression
  181. {
  182. public:
  183. /**
  184. * Instantiate RegularExpression with program=NULL.
  185. */
  186. inline RegularExpression ();
  187. /**
  188. * Instantiate RegularExpression with compiled char*.
  189. */
  190. inline RegularExpression (char const*);
  191. /**
  192. * Instantiate RegularExpression as a copy of another regular expression.
  193. */
  194. RegularExpression (RegularExpression const&);
  195. /**
  196. * Instantiate RegularExpression with compiled string.
  197. */
  198. inline RegularExpression (kwsys_stl::string const&);
  199. /**
  200. * Destructor.
  201. */
  202. inline ~RegularExpression();
  203. /**
  204. * Compile a regular expression into internal code
  205. * for later pattern matching.
  206. */
  207. bool compile (char const*);
  208. /**
  209. * Compile a regular expression into internal code
  210. * for later pattern matching.
  211. */
  212. inline bool compile (kwsys_stl::string const&);
  213. /**
  214. * Matches the regular expression to the given string.
  215. * Returns true if found, and sets start and end indexes accordingly.
  216. */
  217. bool find (char const*);
  218. /**
  219. * Matches the regular expression to the given std string.
  220. * Returns true if found, and sets start and end indexes accordingly.
  221. */
  222. inline bool find (kwsys_stl::string const&);
  223. /**
  224. * Index to start of first find.
  225. */
  226. inline kwsys_stl::string::size_type start() const;
  227. /**
  228. * Index to end of first find.
  229. */
  230. inline kwsys_stl::string::size_type end() const;
  231. /**
  232. * Copy the given regular expression.
  233. */
  234. RegularExpression& operator= (const RegularExpression& rxp);
  235. /**
  236. * Returns true if two regular expressions have the same
  237. * compiled program for pattern matching.
  238. */
  239. bool operator== (RegularExpression const&) const;
  240. /**
  241. * Returns true if two regular expressions have different
  242. * compiled program for pattern matching.
  243. */
  244. inline bool operator!= (RegularExpression const&) const;
  245. /**
  246. * Returns true if have the same compiled regular expressions
  247. * and the same start and end pointers.
  248. */
  249. bool deep_equal (RegularExpression const&) const;
  250. /**
  251. * True if the compiled regexp is valid.
  252. */
  253. inline bool is_valid() const;
  254. /**
  255. * Marks the regular expression as invalid.
  256. */
  257. inline void set_invalid();
  258. /**
  259. * Destructor.
  260. */
  261. // awf added
  262. kwsys_stl::string::size_type start(int n) const;
  263. kwsys_stl::string::size_type end(int n) const;
  264. kwsys_stl::string match(int n) const;
  265. enum { NSUBEXP = 10 };
  266. private:
  267. const char* startp[NSUBEXP];
  268. const char* endp[NSUBEXP];
  269. char regstart; // Internal use only
  270. char reganch; // Internal use only
  271. const char* regmust; // Internal use only
  272. kwsys_stl::string::size_type regmlen; // Internal use only
  273. char* program;
  274. int progsize;
  275. const char* searchstring;
  276. };
  277. /**
  278. * Create an empty regular expression.
  279. */
  280. inline RegularExpression::RegularExpression ()
  281. {
  282. this->program = 0;
  283. }
  284. /**
  285. * Creates a regular expression from string s, and
  286. * compiles s.
  287. */
  288. inline RegularExpression::RegularExpression (const char* s)
  289. {
  290. this->program = 0;
  291. if ( s )
  292. {
  293. this->compile(s);
  294. }
  295. }
  296. /**
  297. * Creates a regular expression from string s, and
  298. * compiles s.
  299. */
  300. inline RegularExpression::RegularExpression (const kwsys_stl::string& s)
  301. {
  302. this->program = 0;
  303. this->compile(s);
  304. }
  305. /**
  306. * Destroys and frees space allocated for the regular expression.
  307. */
  308. inline RegularExpression::~RegularExpression ()
  309. {
  310. //#ifndef WIN32
  311. delete [] this->program;
  312. //#endif
  313. }
  314. /**
  315. * Compile a regular expression into internal code
  316. * for later pattern matching.
  317. */
  318. inline bool RegularExpression::compile (kwsys_stl::string const& s)
  319. {
  320. return this->compile(s.c_str());
  321. }
  322. /**
  323. * Matches the regular expression to the given std string.
  324. * Returns true if found, and sets start and end indexes accordingly.
  325. */
  326. inline bool RegularExpression::find (kwsys_stl::string const& s)
  327. {
  328. return this->find(s.c_str());
  329. }
  330. /**
  331. * Set the start position for the regular expression.
  332. */
  333. inline kwsys_stl::string::size_type RegularExpression::start () const
  334. {
  335. return static_cast<kwsys_stl::string::size_type>(
  336. this->startp[0] - searchstring);
  337. }
  338. /**
  339. * Returns the start/end index of the last item found.
  340. */
  341. inline kwsys_stl::string::size_type RegularExpression::end () const
  342. {
  343. return static_cast<kwsys_stl::string::size_type>(
  344. this->endp[0] - searchstring);
  345. }
  346. /**
  347. * Returns true if two regular expressions have different
  348. * compiled program for pattern matching.
  349. */
  350. inline bool RegularExpression::operator!= (const RegularExpression& r) const
  351. {
  352. return(!(*this == r));
  353. }
  354. /**
  355. * Returns true if a valid regular expression is compiled
  356. * and ready for pattern matching.
  357. */
  358. inline bool RegularExpression::is_valid () const
  359. {
  360. return (this->program != 0);
  361. }
  362. inline void RegularExpression::set_invalid ()
  363. {
  364. //#ifndef WIN32
  365. delete [] this->program;
  366. //#endif
  367. this->program = 0;
  368. }
  369. /**
  370. * Return start index of nth submatch. start(0) is the start of the full match.
  371. */
  372. inline kwsys_stl::string::size_type RegularExpression::start(int n) const
  373. {
  374. return static_cast<kwsys_stl::string::size_type>(
  375. this->startp[n] - searchstring);
  376. }
  377. /**
  378. * Return end index of nth submatch. end(0) is the end of the full match.
  379. */
  380. inline kwsys_stl::string::size_type RegularExpression::end(int n) const
  381. {
  382. return static_cast<kwsys_stl::string::size_type>(
  383. this->endp[n] - searchstring);
  384. }
  385. /**
  386. * Return nth submatch as a string.
  387. */
  388. inline kwsys_stl::string RegularExpression::match(int n) const
  389. {
  390. if (this->startp[n]==0)
  391. {
  392. return kwsys_stl::string("");
  393. }
  394. else
  395. {
  396. return kwsys_stl::string(this->startp[n],
  397. static_cast<kwsys_stl::string::size_type>(
  398. this->endp[n] - this->startp[n]));
  399. }
  400. }
  401. } // namespace @KWSYS_NAMESPACE@
  402. /* Undefine temporary macro. */
  403. #if !defined (KWSYS_NAMESPACE) && !@KWSYS_NAMESPACE@_NAME_IS_KWSYS
  404. # undef kwsys_stl
  405. #endif
  406. #endif