RegularExpression.hxx.in 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. /*=========================================================================
  2. Program: KWSys - Kitware System Library
  3. Module: $RCSfile$
  4. Language: C++
  5. Date: $Date$
  6. Version: $Revision$
  7. Copyright (c) 2002 Kitware, Inc., Insight Consortium. All rights reserved.
  8. See http://www.cmake.org/HTML/Copyright.html for details.
  9. This software is distributed WITHOUT ANY WARRANTY; without even
  10. the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  11. PURPOSE. See the above copyright notices for more information.
  12. =========================================================================*/
  13. // Original Copyright notice:
  14. // Copyright (C) 1991 Texas Instruments Incorporated.
  15. //
  16. // Permission is granted to any individual or institution to use, copy, modify,
  17. // and distribute this software, provided that this complete copyright and
  18. // permission notice is maintained, intact, in all copies and supporting
  19. // documentation.
  20. //
  21. // Texas Instruments Incorporated provides this software "as is" without
  22. // express or implied warranty.
  23. //
  24. // Created: MNF 06/13/89 Initial Design and Implementation
  25. // Updated: LGO 08/09/89 Inherit from Generic
  26. // Updated: MBN 09/07/89 Added conditional exception handling
  27. // Updated: MBN 12/15/89 Sprinkled "const" qualifiers all over the place!
  28. // Updated: DLS 03/22/91 New lite version
  29. //
  30. #ifndef @KWSYS_NAMESPACE@_RegularExpression_hxx
  31. #define @KWSYS_NAMESPACE@_RegularExpression_hxx
  32. #include <@KWSYS_NAMESPACE@/Configure.hxx>
  33. #include <@KWSYS_NAMESPACE@/std/string>
  34. /* Define this macro temporarily to keep the code readable. */
  35. #if !defined (KWSYS_NAMESPACE) && !defined(@KWSYS_NAMESPACE@_NAME_IS_KWSYS)
  36. # define kwsys_std @KWSYS_NAMESPACE@_std
  37. #endif
  38. namespace @KWSYS_NAMESPACE@
  39. {
  40. const int RegularExpressionNSUBEXP = 10;
  41. /** \class RegularExpression
  42. * \brief Implements pattern matching with regular expressions.
  43. *
  44. * This is the header file for the regular expression class. An object of
  45. * this class contains a regular expression, in a special "compiled" format.
  46. * This compiled format consists of several slots all kept as the objects
  47. * private data. The RegularExpression class provides a convenient way to
  48. * represent regular expressions. It makes it easy to search for the same
  49. * regular expression in many different strings without having to compile a
  50. * string to regular expression format more than necessary.
  51. *
  52. * This class implements pattern matching via regular expressions.
  53. * A regular expression allows a programmer to specify complex
  54. * patterns that can be searched for and matched against the
  55. * character string of a string object. In its simplest form, a
  56. * regular expression is a sequence of characters used to
  57. * search for exact character matches. However, many times the
  58. * exact sequence to be found is not known, or only a match at
  59. * the beginning or end of a string is desired. The RegularExpression regu-
  60. * lar expression class implements regular expression pattern
  61. * matching as is found and implemented in many UNIX commands
  62. * and utilities.
  63. *
  64. * Example: The perl code
  65. *
  66. * $filename =~ m"([a-z]+)\.cc";
  67. * print $1;
  68. *
  69. * Is written as follows in C++
  70. *
  71. * RegularExpression re("([a-z]+)\\.cc");
  72. * re.find(filename);
  73. * cerr << re.match(1);
  74. *
  75. *
  76. * The regular expression class provides a convenient mechanism
  77. * for specifying and manipulating regular expressions. The
  78. * regular expression object allows specification of such pat-
  79. * terns by using the following regular expression metacharac-
  80. * ters:
  81. *
  82. * ^ Matches at beginning of a line
  83. *
  84. * $ Matches at end of a line
  85. *
  86. * . Matches any single character
  87. *
  88. * [ ] Matches any character(s) inside the brackets
  89. *
  90. * [^ ] Matches any character(s) not inside the brackets
  91. *
  92. * - Matches any character in range on either side of a dash
  93. *
  94. * * Matches preceding pattern zero or more times
  95. *
  96. * + Matches preceding pattern one or more times
  97. *
  98. * ? Matches preceding pattern zero or once only
  99. *
  100. * () Saves a matched expression and uses it in a later match
  101. *
  102. * Note that more than one of these metacharacters can be used
  103. * in a single regular expression in order to create complex
  104. * search patterns. For example, the pattern [^ab1-9] says to
  105. * match any character sequence that does not begin with the
  106. * characters "ab" followed by numbers in the series one
  107. * through nine.
  108. *
  109. * There are three constructors for RegularExpression. One just creates an
  110. * empty RegularExpression object. Another creates a RegularExpression
  111. * object and initializes it with a regular expression that is given in the
  112. * form of a char*. The third takes a reference to a RegularExpression
  113. * object as an argument and creates an object initialized with the
  114. * information from the given RegularExpression object.
  115. *
  116. * The find member function finds the first occurence of the regualr
  117. * expression of that object in the string given to find as an argument. Find
  118. * returns a boolean, and if true, mutates the private data appropriately.
  119. * Find sets pointers to the beginning and end of the thing last found, they
  120. * are pointers into the actual string that was searched. The start and end
  121. * member functions return indicies into the searched string that correspond
  122. * to the beginning and end pointers respectively. The compile member
  123. * function takes a char* and puts the compiled version of the char* argument
  124. * into the object's private data fields. The == and != operators only check
  125. * the to see if the compiled regular expression is the same, and the
  126. * deep_equal functions also checks to see if the start and end pointers are
  127. * the same. The is_valid function returns false if program is set to NULL,
  128. * (i.e. there is no valid compiled exression). The set_invalid function sets
  129. * the program to NULL (Warning: this deletes the compiled expression). The
  130. * following examples may help clarify regular expression usage:
  131. *
  132. * * The regular expression "^hello" matches a "hello" only at the
  133. * beginning of a line. It would match "hello there" but not "hi,
  134. * hello there".
  135. *
  136. * * The regular expression "long$" matches a "long" only at the end
  137. * of a line. It would match "so long\0", but not "long ago".
  138. *
  139. * * The regular expression "t..t..g" will match anything that has a
  140. * "t" then any two characters, another "t", any two characters and
  141. * then a "g". It will match "testing", or "test again" but would
  142. * not match "toasting"
  143. *
  144. * * The regular expression "[1-9ab]" matches any number one through
  145. * nine, and the characters "a" and "b". It would match "hello 1"
  146. * or "begin", but would not match "no-match".
  147. *
  148. * * The regular expression "[^1-9ab]" matches any character that is
  149. * not a number one through nine, or an "a" or "b". It would NOT
  150. * match "hello 1" or "begin", but would match "no-match".
  151. *
  152. * * The regular expression "br* " matches something that begins with
  153. * a "b", is followed by zero or more "r"s, and ends in a space. It
  154. * would match "brrrrr ", and "b ", but would not match "brrh ".
  155. *
  156. * * The regular expression "br+ " matches something that begins with
  157. * a "b", is followed by one or more "r"s, and ends in a space. It
  158. * would match "brrrrr ", and "br ", but would not match "b " or
  159. * "brrh ".
  160. *
  161. * * The regular expression "br? " matches something that begins with
  162. * a "b", is followed by zero or one "r"s, and ends in a space. It
  163. * would match "br ", and "b ", but would not match "brrrr " or
  164. * "brrh ".
  165. *
  166. * * The regular expression "(..p)b" matches something ending with pb
  167. * and beginning with whatever the two characters before the first p
  168. * encounterd in the line were. It would find "repb" in "rep drepa
  169. * qrepb". The regular expression "(..p)a" would find "repa qrepb"
  170. * in "rep drepa qrepb"
  171. *
  172. * * The regular expression "d(..p)" matches something ending with p,
  173. * beginning with d, and having two characters in between that are
  174. * the same as the two characters before the first p encounterd in
  175. * the line. It would match "drepa qrepb" in "rep drepa qrepb".
  176. *
  177. */
  178. class RegularExpression
  179. {
  180. public:
  181. /**
  182. * Instantiate RegularExpression with program=NULL.
  183. */
  184. inline RegularExpression ();
  185. /**
  186. * Instantiate RegularExpression with compiled char*.
  187. */
  188. inline RegularExpression (char const*);
  189. /**
  190. * Instantiate RegularExpression as a copy of another regular expression.
  191. */
  192. RegularExpression (RegularExpression const&);
  193. /**
  194. * Destructor.
  195. */
  196. inline ~RegularExpression();
  197. /**
  198. * Compile a regular expression into internal code
  199. * for later pattern matching.
  200. */
  201. bool compile (char const*);
  202. /**
  203. * Matches the regular expression to the given string.
  204. * Returns true if found, and sets start and end indexes accordingly.
  205. */
  206. bool find (char const*);
  207. /**
  208. * Matches the regular expression to the given std string.
  209. * Returns true if found, and sets start and end indexes accordingly.
  210. */
  211. bool find (kwsys_std::string const&);
  212. /**
  213. * Index to start of first find.
  214. */
  215. inline kwsys_std::string::size_type start() const;
  216. /**
  217. * Index to end of first find.
  218. */
  219. inline kwsys_std::string::size_type end() const;
  220. /**
  221. * Returns true if two regular expressions have the same
  222. * compiled program for pattern matching.
  223. */
  224. bool operator== (RegularExpression const&) const;
  225. /**
  226. * Returns true if two regular expressions have different
  227. * compiled program for pattern matching.
  228. */
  229. inline bool operator!= (RegularExpression const&) const;
  230. /**
  231. * Returns true if have the same compiled regular expressions
  232. * and the same start and end pointers.
  233. */
  234. bool deep_equal (RegularExpression const&) const;
  235. /**
  236. * True if the compiled regexp is valid.
  237. */
  238. inline bool is_valid() const;
  239. /**
  240. * Marks the regular expression as invalid.
  241. */
  242. inline void set_invalid();
  243. /**
  244. * Destructor.
  245. */
  246. // awf added
  247. kwsys_std::string::size_type start(int n) const;
  248. kwsys_std::string::size_type end(int n) const;
  249. kwsys_std::string match(int n) const;
  250. private:
  251. const char* startp[RegularExpressionNSUBEXP];
  252. const char* endp[RegularExpressionNSUBEXP];
  253. char regstart; // Internal use only
  254. char reganch; // Internal use only
  255. const char* regmust; // Internal use only
  256. int regmlen; // Internal use only
  257. char* program;
  258. int progsize;
  259. const char* searchstring;
  260. };
  261. /**
  262. * Create an empty regular expression.
  263. */
  264. inline RegularExpression::RegularExpression ()
  265. {
  266. this->program = 0;
  267. }
  268. /**
  269. * Creates a regular expression from string s, and
  270. * compiles s.
  271. */
  272. inline RegularExpression::RegularExpression (const char* s)
  273. {
  274. this->program = 0;
  275. if ( s )
  276. {
  277. this->compile(s);
  278. }
  279. }
  280. /**
  281. * Destroys and frees space allocated for the regular expression.
  282. */
  283. inline RegularExpression::~RegularExpression ()
  284. {
  285. //#ifndef WIN32
  286. delete [] this->program;
  287. //#endif
  288. }
  289. /**
  290. * Set the start position for the regular expression.
  291. */
  292. inline kwsys_std::string::size_type RegularExpression::start () const
  293. {
  294. return(this->startp[0] - searchstring);
  295. }
  296. /**
  297. * Returns the start/end index of the last item found.
  298. */
  299. inline kwsys_std::string::size_type RegularExpression::end () const
  300. {
  301. return(this->endp[0] - searchstring);
  302. }
  303. /**
  304. * Returns true if two regular expressions have different
  305. * compiled program for pattern matching.
  306. */
  307. inline bool RegularExpression::operator!= (const RegularExpression& r) const
  308. {
  309. return(!(*this == r));
  310. }
  311. /**
  312. * Returns true if a valid regular expression is compiled
  313. * and ready for pattern matching.
  314. */
  315. inline bool RegularExpression::is_valid () const
  316. {
  317. return (this->program != 0);
  318. }
  319. inline void RegularExpression::set_invalid ()
  320. {
  321. //#ifndef WIN32
  322. delete [] this->program;
  323. //#endif
  324. this->program = 0;
  325. }
  326. /**
  327. * Return start index of nth submatch. start(0) is the start of the full match.
  328. */
  329. inline kwsys_std::string::size_type RegularExpression::start(int n) const
  330. {
  331. return this->startp[n] - searchstring;
  332. }
  333. /**
  334. * Return end index of nth submatch. end(0) is the end of the full match.
  335. */
  336. inline kwsys_std::string::size_type RegularExpression::end(int n) const
  337. {
  338. return this->endp[n] - searchstring;
  339. }
  340. /**
  341. * Return nth submatch as a string.
  342. */
  343. inline kwsys_std::string RegularExpression::match(int n) const
  344. {
  345. return kwsys_std::string(this->startp[n], this->endp[n] - this->startp[n]);
  346. }
  347. } // namespace @KWSYS_NAMESPACE@
  348. /* Undefine temporary macro. */
  349. #if !defined (KWSYS_NAMESPACE) && !defined(@KWSYS_NAMESPACE@_NAME_IS_KWSYS)
  350. # undef kwsys_std
  351. #endif
  352. #endif