Glob.cxx 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562
  1. /*============================================================================
  2. KWSys - Kitware System Library
  3. Copyright 2000-2009 Kitware, Inc., Insight Software Consortium
  4. Distributed under the OSI-approved BSD License (the "License");
  5. see accompanying file Copyright.txt for details.
  6. This software is distributed WITHOUT ANY WARRANTY; without even the
  7. implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  8. See the License for more information.
  9. ============================================================================*/
  10. #include "kwsysPrivate.h"
  11. #include KWSYS_HEADER(Glob.hxx)
  12. #include KWSYS_HEADER(Configure.hxx)
  13. #include KWSYS_HEADER(RegularExpression.hxx)
  14. #include KWSYS_HEADER(SystemTools.hxx)
  15. #include KWSYS_HEADER(Directory.hxx)
  16. #include KWSYS_HEADER(stl/string)
  17. #include KWSYS_HEADER(stl/vector)
  18. #include KWSYS_HEADER(stl/algorithm)
  19. // Work-around CMake dependency scanning limitation. This must
  20. // duplicate the above list of headers.
  21. #if 0
  22. # include "Glob.hxx.in"
  23. # include "Directory.hxx.in"
  24. # include "Configure.hxx.in"
  25. # include "RegularExpression.hxx.in"
  26. # include "SystemTools.hxx.in"
  27. # include "kwsys_stl.hxx.in"
  28. # include "kwsys_stl_string.hxx.in"
  29. # include "kwsys_stl_vector.hxx.in"
  30. # include "kwsys_stl_algorithm.hxx.in"
  31. #endif
  32. #include <ctype.h>
  33. #include <stdio.h>
  34. #include <string.h>
  35. namespace KWSYS_NAMESPACE
  36. {
  37. #if defined(_WIN32) || defined(__APPLE__) || defined(__CYGWIN__)
  38. // On Windows and apple, no difference between lower and upper case
  39. # define KWSYS_GLOB_CASE_INDEPENDENT
  40. #endif
  41. #if defined(_WIN32) || defined(__CYGWIN__)
  42. // Handle network paths
  43. # define KWSYS_GLOB_SUPPORT_NETWORK_PATHS
  44. #endif
  45. //----------------------------------------------------------------------------
  46. class GlobInternals
  47. {
  48. public:
  49. kwsys_stl::vector<kwsys_stl::string> Files;
  50. kwsys_stl::vector<kwsys::RegularExpression> Expressions;
  51. };
  52. //----------------------------------------------------------------------------
  53. Glob::Glob()
  54. {
  55. this->Internals = new GlobInternals;
  56. this->Recurse = false;
  57. this->Relative = "";
  58. this->RecurseThroughSymlinks = true;
  59. // RecurseThroughSymlinks is true by default for backwards compatibility,
  60. // not because it's a good idea...
  61. this->FollowedSymlinkCount = 0;
  62. // Keep separate variables for directory listing for back compatibility
  63. this->ListDirs = true;
  64. this->RecurseListDirs = false;
  65. }
  66. //----------------------------------------------------------------------------
  67. Glob::~Glob()
  68. {
  69. delete this->Internals;
  70. }
  71. //----------------------------------------------------------------------------
  72. kwsys_stl::vector<kwsys_stl::string>& Glob::GetFiles()
  73. {
  74. return this->Internals->Files;
  75. }
  76. //----------------------------------------------------------------------------
  77. kwsys_stl::string Glob::PatternToRegex(const kwsys_stl::string& pattern,
  78. bool require_whole_string,
  79. bool preserve_case)
  80. {
  81. // Incrementally build the regular expression from the pattern.
  82. kwsys_stl::string regex = require_whole_string? "^" : "";
  83. kwsys_stl::string::const_iterator pattern_first = pattern.begin();
  84. kwsys_stl::string::const_iterator pattern_last = pattern.end();
  85. for(kwsys_stl::string::const_iterator i = pattern_first;
  86. i != pattern_last; ++i)
  87. {
  88. int c = *i;
  89. if(c == '*')
  90. {
  91. // A '*' (not between brackets) matches any string.
  92. // We modify this to not match slashes since the orignal glob
  93. // pattern documentation was meant for matching file name
  94. // components separated by slashes.
  95. regex += "[^/]*";
  96. }
  97. else if(c == '?')
  98. {
  99. // A '?' (not between brackets) matches any single character.
  100. // We modify this to not match slashes since the orignal glob
  101. // pattern documentation was meant for matching file name
  102. // components separated by slashes.
  103. regex += "[^/]";
  104. }
  105. else if(c == '[')
  106. {
  107. // Parse out the bracket expression. It begins just after the
  108. // opening character.
  109. kwsys_stl::string::const_iterator bracket_first = i+1;
  110. kwsys_stl::string::const_iterator bracket_last = bracket_first;
  111. // The first character may be complementation '!' or '^'.
  112. if(bracket_last != pattern_last &&
  113. (*bracket_last == '!' || *bracket_last == '^'))
  114. {
  115. ++bracket_last;
  116. }
  117. // If the next character is a ']' it is included in the brackets
  118. // because the bracket string may not be empty.
  119. if(bracket_last != pattern_last && *bracket_last == ']')
  120. {
  121. ++bracket_last;
  122. }
  123. // Search for the closing ']'.
  124. while(bracket_last != pattern_last && *bracket_last != ']')
  125. {
  126. ++bracket_last;
  127. }
  128. // Check whether we have a complete bracket string.
  129. if(bracket_last == pattern_last)
  130. {
  131. // The bracket string did not end, so it was opened simply by
  132. // a '[' that is supposed to be matched literally.
  133. regex += "\\[";
  134. }
  135. else
  136. {
  137. // Convert the bracket string to its regex equivalent.
  138. kwsys_stl::string::const_iterator k = bracket_first;
  139. // Open the regex block.
  140. regex += "[";
  141. // A regex range complement uses '^' instead of '!'.
  142. if(k != bracket_last && *k == '!')
  143. {
  144. regex += "^";
  145. ++k;
  146. }
  147. // Convert the remaining characters.
  148. for(; k != bracket_last; ++k)
  149. {
  150. // Backslashes must be escaped.
  151. if(*k == '\\')
  152. {
  153. regex += "\\";
  154. }
  155. // Store this character.
  156. regex += *k;
  157. }
  158. // Close the regex block.
  159. regex += "]";
  160. // Jump to the end of the bracket string.
  161. i = bracket_last;
  162. }
  163. }
  164. else
  165. {
  166. // A single character matches itself.
  167. int ch = c;
  168. if(!(('a' <= ch && ch <= 'z') ||
  169. ('A' <= ch && ch <= 'Z') ||
  170. ('0' <= ch && ch <= '9')))
  171. {
  172. // Escape the non-alphanumeric character.
  173. regex += "\\";
  174. }
  175. #if defined(KWSYS_GLOB_CASE_INDEPENDENT)
  176. else
  177. {
  178. // On case-insensitive systems file names are converted to lower
  179. // case before matching.
  180. if(!preserve_case)
  181. {
  182. ch = tolower(ch);
  183. }
  184. }
  185. #endif
  186. (void)preserve_case;
  187. // Store the character.
  188. regex.append(1, static_cast<char>(ch));
  189. }
  190. }
  191. if(require_whole_string)
  192. {
  193. regex += "$";
  194. }
  195. return regex;
  196. }
  197. //----------------------------------------------------------------------------
  198. bool Glob::RecurseDirectory(kwsys_stl::string::size_type start,
  199. const kwsys_stl::string& dir, GlobMessages* messages)
  200. {
  201. kwsys::Directory d;
  202. if ( !d.Load(dir) )
  203. {
  204. return true;
  205. }
  206. unsigned long cc;
  207. kwsys_stl::string realname;
  208. kwsys_stl::string fname;
  209. for ( cc = 0; cc < d.GetNumberOfFiles(); cc ++ )
  210. {
  211. fname = d.GetFile(cc);
  212. if ( fname == "." || fname == ".." )
  213. {
  214. continue;
  215. }
  216. if ( start == 0 )
  217. {
  218. realname = dir + fname;
  219. }
  220. else
  221. {
  222. realname = dir + "/" + fname;
  223. }
  224. #if defined( KWSYS_GLOB_CASE_INDEPENDENT )
  225. // On Windows and apple, no difference between lower and upper case
  226. fname = kwsys::SystemTools::LowerCase(fname);
  227. #endif
  228. bool isDir = kwsys::SystemTools::FileIsDirectory(realname);
  229. bool isSymLink = kwsys::SystemTools::FileIsSymlink(realname);
  230. if ( isDir && (!isSymLink || this->RecurseThroughSymlinks) )
  231. {
  232. if (isSymLink)
  233. {
  234. ++this->FollowedSymlinkCount;
  235. kwsys_stl::string realPathErrorMessage;
  236. kwsys_stl::string canonicalPath(SystemTools::GetRealPath(dir,
  237. &realPathErrorMessage));
  238. if(!realPathErrorMessage.empty())
  239. {
  240. if(messages)
  241. {
  242. messages->push_back(Message(
  243. Glob::error, "Canonical path generation from path '"
  244. + dir + "' failed! Reason: '" + realPathErrorMessage + "'"));
  245. }
  246. return false;
  247. }
  248. if(kwsys_stl::find(this->VisitedSymlinks.begin(),
  249. this->VisitedSymlinks.end(),
  250. canonicalPath) == this->VisitedSymlinks.end())
  251. {
  252. if(this->RecurseListDirs)
  253. {
  254. // symlinks are treated as directories
  255. this->AddFile(this->Internals->Files, realname);
  256. }
  257. this->VisitedSymlinks.push_back(canonicalPath);
  258. if(!this->RecurseDirectory(start+1, realname, messages))
  259. {
  260. this->VisitedSymlinks.pop_back();
  261. return false;
  262. }
  263. this->VisitedSymlinks.pop_back();
  264. }
  265. // else we have already visited this symlink - prevent cyclic recursion
  266. else if(messages)
  267. {
  268. kwsys_stl::string message;
  269. for(kwsys_stl::vector<kwsys_stl::string>::const_iterator
  270. pathIt = kwsys_stl::find(this->VisitedSymlinks.begin(),
  271. this->VisitedSymlinks.end(),
  272. canonicalPath);
  273. pathIt != this->VisitedSymlinks.end(); ++pathIt)
  274. {
  275. message += *pathIt + "\n";
  276. }
  277. message += canonicalPath + "/" + fname;
  278. messages->push_back(Message(Glob::cyclicRecursion, message));
  279. }
  280. }
  281. else
  282. {
  283. if(this->RecurseListDirs)
  284. {
  285. this->AddFile(this->Internals->Files, realname);
  286. }
  287. if(!this->RecurseDirectory(start+1, realname, messages))
  288. {
  289. return false;
  290. }
  291. }
  292. }
  293. else
  294. {
  295. if ( !this->Internals->Expressions.empty() &&
  296. this->Internals->Expressions.rbegin()->find(fname) )
  297. {
  298. this->AddFile(this->Internals->Files, realname);
  299. }
  300. }
  301. }
  302. return true;
  303. }
  304. //----------------------------------------------------------------------------
  305. void Glob::ProcessDirectory(kwsys_stl::string::size_type start,
  306. const kwsys_stl::string& dir, GlobMessages* messages)
  307. {
  308. //kwsys_ios::cout << "ProcessDirectory: " << dir << kwsys_ios::endl;
  309. bool last = ( start == this->Internals->Expressions.size()-1 );
  310. if ( last && this->Recurse )
  311. {
  312. this->RecurseDirectory(start, dir, messages);
  313. return;
  314. }
  315. if ( start >= this->Internals->Expressions.size() )
  316. {
  317. return;
  318. }
  319. kwsys::Directory d;
  320. if ( !d.Load(dir) )
  321. {
  322. return;
  323. }
  324. unsigned long cc;
  325. kwsys_stl::string realname;
  326. kwsys_stl::string fname;
  327. for ( cc = 0; cc < d.GetNumberOfFiles(); cc ++ )
  328. {
  329. fname = d.GetFile(cc);
  330. if ( fname == "." || fname == ".." )
  331. {
  332. continue;
  333. }
  334. if ( start == 0 )
  335. {
  336. realname = dir + fname;
  337. }
  338. else
  339. {
  340. realname = dir + "/" + fname;
  341. }
  342. #if defined(KWSYS_GLOB_CASE_INDEPENDENT)
  343. // On case-insensitive file systems convert to lower case for matching.
  344. fname = kwsys::SystemTools::LowerCase(fname);
  345. #endif
  346. //kwsys_ios::cout << "Look at file: " << fname << kwsys_ios::endl;
  347. //kwsys_ios::cout << "Match: "
  348. // << this->Internals->TextExpressions[start].c_str() << kwsys_ios::endl;
  349. //kwsys_ios::cout << "Real name: " << realname << kwsys_ios::endl;
  350. if( (!last && !kwsys::SystemTools::FileIsDirectory(realname))
  351. || (!this->ListDirs && last &&
  352. kwsys::SystemTools::FileIsDirectory(realname)) )
  353. {
  354. continue;
  355. }
  356. if ( this->Internals->Expressions[start].find(fname) )
  357. {
  358. if ( last )
  359. {
  360. this->AddFile(this->Internals->Files, realname);
  361. }
  362. else
  363. {
  364. this->ProcessDirectory(start+1, realname, messages);
  365. }
  366. }
  367. }
  368. }
  369. //----------------------------------------------------------------------------
  370. bool Glob::FindFiles(const kwsys_stl::string& inexpr, GlobMessages* messages)
  371. {
  372. kwsys_stl::string cexpr;
  373. kwsys_stl::string::size_type cc;
  374. kwsys_stl::string expr = inexpr;
  375. this->Internals->Expressions.clear();
  376. this->Internals->Files.clear();
  377. if ( !kwsys::SystemTools::FileIsFullPath(expr) )
  378. {
  379. expr = kwsys::SystemTools::GetCurrentWorkingDirectory();
  380. expr += "/" + inexpr;
  381. }
  382. kwsys_stl::string fexpr = expr;
  383. kwsys_stl::string::size_type skip = 0;
  384. kwsys_stl::string::size_type last_slash = 0;
  385. for ( cc = 0; cc < expr.size(); cc ++ )
  386. {
  387. if ( cc > 0 && expr[cc] == '/' && expr[cc-1] != '\\' )
  388. {
  389. last_slash = cc;
  390. }
  391. if ( cc > 0 &&
  392. (expr[cc] == '[' || expr[cc] == '?' || expr[cc] == '*') &&
  393. expr[cc-1] != '\\' )
  394. {
  395. break;
  396. }
  397. }
  398. if ( last_slash > 0 )
  399. {
  400. //kwsys_ios::cout << "I can skip: " << fexpr.substr(0, last_slash)
  401. // << kwsys_ios::endl;
  402. skip = last_slash;
  403. }
  404. if ( skip == 0 )
  405. {
  406. #if defined( KWSYS_GLOB_SUPPORT_NETWORK_PATHS )
  407. // Handle network paths
  408. if ( expr[0] == '/' && expr[1] == '/' )
  409. {
  410. int cnt = 0;
  411. for ( cc = 2; cc < expr.size(); cc ++ )
  412. {
  413. if ( expr[cc] == '/' )
  414. {
  415. cnt ++;
  416. if ( cnt == 2 )
  417. {
  418. break;
  419. }
  420. }
  421. }
  422. skip = int(cc + 1);
  423. }
  424. else
  425. #endif
  426. // Handle drive letters on Windows
  427. if ( expr[1] == ':' && expr[0] != '/' )
  428. {
  429. skip = 2;
  430. }
  431. }
  432. if ( skip > 0 )
  433. {
  434. expr = expr.substr(skip);
  435. }
  436. cexpr = "";
  437. for ( cc = 0; cc < expr.size(); cc ++ )
  438. {
  439. int ch = expr[cc];
  440. if ( ch == '/' )
  441. {
  442. if ( !cexpr.empty() )
  443. {
  444. this->AddExpression(cexpr);
  445. }
  446. cexpr = "";
  447. }
  448. else
  449. {
  450. cexpr.append(1, static_cast<char>(ch));
  451. }
  452. }
  453. if ( !cexpr.empty() )
  454. {
  455. this->AddExpression(cexpr);
  456. }
  457. // Handle network paths
  458. if ( skip > 0 )
  459. {
  460. this->ProcessDirectory(0, fexpr.substr(0, skip) + "/", messages);
  461. }
  462. else
  463. {
  464. this->ProcessDirectory(0, "/", messages);
  465. }
  466. return true;
  467. }
  468. //----------------------------------------------------------------------------
  469. void Glob::AddExpression(const kwsys_stl::string& expr)
  470. {
  471. this->Internals->Expressions.push_back(
  472. kwsys::RegularExpression(
  473. this->PatternToRegex(expr)));
  474. }
  475. //----------------------------------------------------------------------------
  476. void Glob::SetRelative(const char* dir)
  477. {
  478. if ( !dir )
  479. {
  480. this->Relative = "";
  481. return;
  482. }
  483. this->Relative = dir;
  484. }
  485. //----------------------------------------------------------------------------
  486. const char* Glob::GetRelative()
  487. {
  488. if ( this->Relative.empty() )
  489. {
  490. return 0;
  491. }
  492. return this->Relative.c_str();
  493. }
  494. //----------------------------------------------------------------------------
  495. void Glob::AddFile(kwsys_stl::vector<kwsys_stl::string>& files, const kwsys_stl::string& file)
  496. {
  497. if ( !this->Relative.empty() )
  498. {
  499. files.push_back(kwsys::SystemTools::RelativePath(this->Relative, file));
  500. }
  501. else
  502. {
  503. files.push_back(file);
  504. }
  505. }
  506. } // namespace KWSYS_NAMESPACE