cmRegularExpression.h 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. /*=========================================================================
  2. Program: Insight Segmentation & Registration Toolkit
  3. Module: $RCSfile$
  4. Language: C++
  5. Date: $Date$
  6. Version: $Revision$
  7. Copyright (c) 2000 National Library of Medicine
  8. All rights reserved.
  9. See COPYRIGHT.txt for copyright details.
  10. =========================================================================*/
  11. /// Original Copyright notice:
  12. // Copyright (C) 1991 Texas Instruments Incorporated.
  13. //
  14. // Permission is granted to any individual or institution to use, copy, modify,
  15. // and distribute this software, provided that this complete copyright and
  16. // permission notice is maintained, intact, in all copies and supporting
  17. // documentation.
  18. //
  19. // Texas Instruments Incorporated provides this software "as is" without
  20. // express or implied warranty.
  21. //
  22. // .LIBRARY vbl
  23. // .HEADER Basics Package
  24. // .INCLUDE cmRegularExpression.h
  25. // .FILE cmRegularExpression.cxx
  26. //
  27. #ifndef cmRegularExpression_h
  28. #define cmRegularExpression_h
  29. #include "cmStandardIncludes.h"
  30. const int NSUBEXP = 10;
  31. //: Pattern matching with regular expressions
  32. // A regular expression allows a programmer to specify complex
  33. // patterns that can be searched for and matched against the
  34. // character string of a string object. In its simplest form, a
  35. // regular expression is a sequence of characters used to
  36. // search for exact character matches. However, many times the
  37. // exact sequence to be found is not known, or only a match at
  38. // the beginning or end of a string is desired. The vbl regu-
  39. // lar expression class implements regular expression pattern
  40. // matching as is found and implemented in many UNIX commands
  41. // and utilities.
  42. //
  43. // Example: The perl code
  44. //
  45. // $filename =~ m"([a-z]+)\.cc";
  46. // print $1;
  47. //
  48. // Is written as follows in C++
  49. //
  50. // vbl_reg_exp re("([a-z]+)\\.cc");
  51. // re.find(filename);
  52. // cerr << re.match(1);
  53. //
  54. //
  55. // The regular expression class provides a convenient mechanism
  56. // for specifying and manipulating regular expressions. The
  57. // regular expression object allows specification of such pat-
  58. // terns by using the following regular expression metacharac-
  59. // ters:
  60. //
  61. // ^ Matches at beginning of a line
  62. //
  63. // $ Matches at end of a line
  64. //
  65. // . Matches any single character
  66. //
  67. // [ ] Matches any character(s) inside the brackets
  68. //
  69. // [^ ] Matches any character(s) not inside the brackets
  70. //
  71. // - Matches any character in range on either side of a dash
  72. //
  73. // * Matches preceding pattern zero or more times
  74. //
  75. // + Matches preceding pattern one or more times
  76. //
  77. // ? Matches preceding pattern zero or once only
  78. //
  79. // () Saves a matched expression and uses it in a later match
  80. //
  81. // Note that more than one of these metacharacters can be used
  82. // in a single regular expression in order to create complex
  83. // search patterns. For example, the pattern [^ab1-9] says to
  84. // match any character sequence that does not begin with the
  85. // characters "ab" followed by numbers in the series one
  86. // through nine.
  87. //
  88. class cmRegularExpression {
  89. public:
  90. inline cmRegularExpression (); // cmRegularExpression with program=NULL
  91. inline cmRegularExpression (char const*); // cmRegularExpression with compiled char*
  92. cmRegularExpression (cmRegularExpression const&); // Copy constructor
  93. inline ~cmRegularExpression(); // Destructor
  94. void compile (char const*); // Compiles char* --> regexp
  95. bool find (char const*); // true if regexp in char* arg
  96. bool find (std::string const&); // true if regexp in char* arg
  97. inline long start() const; // Index to start of first find
  98. inline long end() const; // Index to end of first find
  99. bool operator== (cmRegularExpression const&) const; // Equality operator
  100. inline bool operator!= (cmRegularExpression const&) const; // Inequality operator
  101. bool deep_equal (cmRegularExpression const&) const; // Same regexp and state?
  102. inline bool is_valid() const; // true if compiled regexp
  103. inline void set_invalid(); // Invalidates regexp
  104. // awf added
  105. int start(int n) const;
  106. int end(int n) const;
  107. std::string match(int n) const;
  108. private:
  109. const char* startp[NSUBEXP];
  110. const char* endp[NSUBEXP];
  111. char regstart; // Internal use only
  112. char reganch; // Internal use only
  113. const char* regmust; // Internal use only
  114. int regmlen; // Internal use only
  115. char* program;
  116. int progsize;
  117. const char* searchstring;
  118. };
  119. // cmRegularExpression -- Creates an empty regular expression.
  120. inline cmRegularExpression::cmRegularExpression () {
  121. this->program = NULL;
  122. }
  123. // cmRegularExpression -- Creates a regular expression from string s, and
  124. // compiles s.
  125. inline cmRegularExpression::cmRegularExpression (const char* s) {
  126. this->program = NULL;
  127. compile(s);
  128. }
  129. // ~cmRegularExpression -- Frees space allocated for regular expression.
  130. inline cmRegularExpression::~cmRegularExpression () {
  131. //#ifndef WIN32
  132. delete [] this->program;
  133. //#endif
  134. }
  135. // Start --
  136. inline long cmRegularExpression::start () const {
  137. return(this->startp[0] - searchstring);
  138. }
  139. // End -- Returns the start/end index of the last item found.
  140. inline long cmRegularExpression::end () const {
  141. return(this->endp[0] - searchstring);
  142. }
  143. // operator!= //
  144. inline bool cmRegularExpression::operator!= (const cmRegularExpression& r) const {
  145. return(!(*this == r));
  146. }
  147. // is_valid -- Returns true if a valid regular expression is compiled
  148. // and ready for pattern matching.
  149. inline bool cmRegularExpression::is_valid () const {
  150. return (this->program != NULL);
  151. }
  152. // set_invalid -- Invalidates regular expression.
  153. inline void cmRegularExpression::set_invalid () {
  154. //#ifndef WIN32
  155. delete [] this->program;
  156. //#endif
  157. this->program = NULL;
  158. }
  159. // -- Return start index of nth submatch. start(0) is the start of the full match.
  160. inline int cmRegularExpression::start(int n) const
  161. {
  162. return this->startp[n] - searchstring;
  163. }
  164. // -- Return end index of nth submatch. end(0) is the end of the full match.
  165. inline int cmRegularExpression::end(int n) const
  166. {
  167. return this->endp[n] - searchstring;
  168. }
  169. // -- Return nth submatch as a string.
  170. inline std::string cmRegularExpression::match(int n) const
  171. {
  172. return std::string(this->startp[n], this->endp[n] - this->startp[n]);
  173. }
  174. #endif // cmRegularExpressionh