cmStringAlgorithms.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405
  1. /* Distributed under the OSI-approved BSD 3-Clause License. See accompanying
  2. file LICENSE.rst or https://cmake.org/licensing for details. */
  3. #pragma once
  4. #include "cmConfigure.h" // IWYU pragma: keep
  5. #include <cctype>
  6. #include <cstring>
  7. #include <initializer_list>
  8. #include <iterator>
  9. #include <numeric>
  10. #include <sstream>
  11. #include <string>
  12. #include <utility>
  13. #include <vector>
  14. #include <cm/string_view>
  15. #include "cmRange.h"
  16. #include "cmValue.h"
  17. /** String range type. */
  18. using cmStringRange = cmRange<std::vector<std::string>::const_iterator>;
  19. /** Returns length of a literal string. */
  20. template <size_t N>
  21. constexpr size_t cmStrLen(char const (&)[N])
  22. {
  23. return N - 1;
  24. }
  25. /** Callable string comparison struct. */
  26. struct cmStrCmp
  27. {
  28. cmStrCmp(std::string str)
  29. : Test_(std::move(str))
  30. {
  31. }
  32. bool operator()(cm::string_view sv) const { return this->Test_ == sv; }
  33. private:
  34. std::string const Test_;
  35. };
  36. /**
  37. * Test if two strings are identical, ignoring case.
  38. *
  39. * Note that this is not guaranteed to work correctly on non-ASCII strings.
  40. */
  41. bool cmStrCaseEq(cm::string_view a, cm::string_view b);
  42. /** Returns true if the character @a ch is a whitespace character. **/
  43. inline bool cmIsSpace(char ch)
  44. {
  45. // isspace takes 'int' but documents that the value must be representable
  46. // by 'unsigned char', or be EOF. Cast to 'unsigned char' to avoid sign
  47. // extension while converting to 'int'.
  48. return std::isspace(static_cast<unsigned char>(ch));
  49. }
  50. /** Returns a string that has whitespace removed from the start and the end. */
  51. std::string cmTrimWhitespace(cm::string_view str);
  52. /**
  53. * Returns a string view that has whitespace removed from the start and the
  54. * end.
  55. */
  56. cm::string_view cmStripWhitespace(cm::string_view str);
  57. /** Returns a string that has quotes removed from the start and the end. */
  58. std::string cmRemoveQuotes(cm::string_view str);
  59. /** Escape quotes in a string. */
  60. std::string cmEscapeQuotes(cm::string_view str);
  61. /** Joins elements of a range with separator into a single string. */
  62. template <typename Range>
  63. std::string cmJoin(Range const& rng, cm::string_view separator)
  64. {
  65. if (rng.empty()) {
  66. return std::string();
  67. }
  68. std::ostringstream os;
  69. auto it = rng.begin();
  70. auto const end = rng.end();
  71. os << *it;
  72. while (++it != end) {
  73. os << separator << *it;
  74. }
  75. return os.str();
  76. }
  77. /** Generic function to join strings range with separator
  78. * and initial leading string into a single string.
  79. */
  80. template <typename Range>
  81. std::string cmJoinStrings(Range const& rng, cm::string_view separator,
  82. cm::string_view initial)
  83. {
  84. if (rng.empty()) {
  85. return { std::begin(initial), std::end(initial) };
  86. }
  87. std::string result;
  88. result.reserve(std::accumulate(
  89. std::begin(rng), std::end(rng),
  90. initial.size() + (rng.size() - 1) * separator.size(),
  91. [](std::size_t sum, typename Range::value_type const& item) {
  92. return sum + item.size();
  93. }));
  94. result.append(std::begin(initial), std::end(initial));
  95. auto begin = std::begin(rng);
  96. auto end = std::end(rng);
  97. result += *begin;
  98. for (++begin; begin != end; ++begin) {
  99. result.append(std::begin(separator), std::end(separator));
  100. result += *begin;
  101. }
  102. return result;
  103. }
  104. /**
  105. * Faster overloads for std::string ranges.
  106. * If @a initial is provided, it prepends the resulted string without
  107. * @a separator between them.
  108. */
  109. std::string cmJoin(std::vector<std::string> const& rng,
  110. cm::string_view separator, cm::string_view initial = {});
  111. std::string cmJoin(cmStringRange rng, cm::string_view separator,
  112. cm::string_view initial = {});
  113. enum class cmTokenizerMode
  114. {
  115. /// A backward-compatible behavior when in the case of no
  116. /// tokens have found in an input text it'll return one empty
  117. /// token in the result container (vector).
  118. Legacy,
  119. /// The new behavior is to return an empty vector.
  120. New
  121. };
  122. /**
  123. * \brief A generic version of a tokenizer.
  124. *
  125. * Extract tokens from the input string separated by any
  126. * of the characters in `sep` and assign them to the
  127. * given output iterator.
  128. *
  129. * The `mode` parameter defines the behavior in the case when
  130. * no tokens have found in the input text.
  131. *
  132. */
  133. template <typename StringT, typename OutIt, typename Sep = char>
  134. void cmTokenize(OutIt outIt, cm::string_view str, Sep sep,
  135. cmTokenizerMode mode)
  136. {
  137. auto hasTokens = false;
  138. // clang-format off
  139. for (auto start = str.find_first_not_of(sep)
  140. , end = str.find_first_of(sep, start)
  141. ; start != cm::string_view::npos
  142. ; start = str.find_first_not_of(sep, end)
  143. , end = str.find_first_of(sep, start)
  144. , hasTokens = true
  145. ) {
  146. *outIt++ = StringT{ str.substr(start, end - start) };
  147. }
  148. // clang-format on
  149. if (!hasTokens && mode == cmTokenizerMode::Legacy) {
  150. *outIt = StringT{};
  151. }
  152. }
  153. /**
  154. * \brief Extract tokens that are separated by any of the
  155. * characters in `sep`.
  156. *
  157. * Backward compatible signature.
  158. *
  159. * \return A vector of strings.
  160. */
  161. template <typename Sep = char>
  162. std::vector<std::string> cmTokenize(
  163. cm::string_view str, Sep sep, cmTokenizerMode mode = cmTokenizerMode::Legacy)
  164. {
  165. using StringType = std::string;
  166. std::vector<StringType> tokens;
  167. cmTokenize<StringType>(std::back_inserter(tokens), str, sep, mode);
  168. return tokens;
  169. }
  170. /**
  171. * \brief Extract tokens that are separated by any of the
  172. * characters in `sep`.
  173. *
  174. * \return A vector of string views.
  175. */
  176. template <typename Sep = char>
  177. std::vector<cm::string_view> cmTokenizedView(
  178. cm::string_view str, Sep sep, cmTokenizerMode mode = cmTokenizerMode::Legacy)
  179. {
  180. using StringType = cm::string_view;
  181. std::vector<StringType> tokens;
  182. cmTokenize<StringType>(std::back_inserter(tokens), str, sep, mode);
  183. return tokens;
  184. }
  185. /** Concatenate string pieces into a single string. */
  186. std::string cmCatViews(
  187. std::initializer_list<std::pair<cm::string_view, std::string*>> views);
  188. /** Utility class for cmStrCat. */
  189. class cmAlphaNum
  190. {
  191. public:
  192. cmAlphaNum(cm::string_view view)
  193. : View_(view)
  194. {
  195. }
  196. cmAlphaNum(std::string const& str)
  197. : View_(str)
  198. {
  199. }
  200. cmAlphaNum(std::string&& str)
  201. : RValueString_(&str)
  202. {
  203. }
  204. cmAlphaNum(char const* str)
  205. : View_(str ? cm::string_view(str) : cm::string_view())
  206. {
  207. }
  208. cmAlphaNum(char ch)
  209. : View_(this->Digits_, 1)
  210. {
  211. this->Digits_[0] = ch;
  212. }
  213. cmAlphaNum(int val);
  214. cmAlphaNum(unsigned int val);
  215. cmAlphaNum(long int val);
  216. cmAlphaNum(unsigned long int val);
  217. cmAlphaNum(long long int val);
  218. cmAlphaNum(unsigned long long int val);
  219. cmAlphaNum(float val);
  220. cmAlphaNum(double val);
  221. cmAlphaNum(cmValue value)
  222. : View_(*value)
  223. {
  224. }
  225. cm::string_view View() const
  226. {
  227. if (this->RValueString_) {
  228. return *this->RValueString_;
  229. }
  230. return this->View_;
  231. }
  232. std::string* RValueString() const { return this->RValueString_; }
  233. private:
  234. std::string* RValueString_ = nullptr;
  235. cm::string_view View_;
  236. char Digits_[32];
  237. };
  238. /** Concatenate string pieces and numbers into a single string. */
  239. template <typename A, typename B, typename... AV>
  240. inline std::string cmStrCat(A&& a, B&& b, AV&&... args)
  241. {
  242. static auto const makePair =
  243. [](cmAlphaNum const& arg) -> std::pair<cm::string_view, std::string*> {
  244. return { arg.View(), arg.RValueString() };
  245. };
  246. return cmCatViews({ makePair(std::forward<A>(a)),
  247. makePair(std::forward<B>(b)),
  248. makePair(std::forward<AV>(args))... });
  249. }
  250. /** Joins wrapped elements of a range with separator into a single string. */
  251. template <typename Range>
  252. std::string cmWrap(cm::string_view prefix, Range const& rng,
  253. cm::string_view suffix, cm::string_view sep)
  254. {
  255. if (rng.empty()) {
  256. return std::string();
  257. }
  258. return cmCatViews({ { prefix, nullptr },
  259. { cmJoin(rng,
  260. cmCatViews({ { suffix, nullptr },
  261. { sep, nullptr },
  262. { prefix, nullptr } })),
  263. nullptr },
  264. { suffix, nullptr } });
  265. }
  266. /** Joins wrapped elements of a range with separator into a single string. */
  267. template <typename Range>
  268. std::string cmWrap(char prefix, Range const& rng, char suffix,
  269. cm::string_view sep)
  270. {
  271. return cmWrap(cm::string_view(&prefix, 1), rng, cm::string_view(&suffix, 1),
  272. sep);
  273. }
  274. /** Returns true if string @a str starts with the character @a prefix. */
  275. inline bool cmHasPrefix(cm::string_view str, char prefix)
  276. {
  277. return !str.empty() && (str.front() == prefix);
  278. }
  279. /** Returns true if string @a str starts with string @a prefix. */
  280. inline bool cmHasPrefix(cm::string_view str, cm::string_view prefix)
  281. {
  282. return str.compare(0, prefix.size(), prefix) == 0;
  283. }
  284. /** Returns true if string @a str starts with string @a prefix. */
  285. inline bool cmHasPrefix(cm::string_view str, cmValue prefix)
  286. {
  287. if (!prefix) {
  288. return false;
  289. }
  290. return str.compare(0, prefix->size(), *prefix) == 0;
  291. }
  292. /** Returns true if string @a str starts with string @a prefix. */
  293. template <size_t N>
  294. inline bool cmHasLiteralPrefix(cm::string_view str, char const (&prefix)[N])
  295. {
  296. return cmHasPrefix(str, cm::string_view(prefix, N - 1));
  297. }
  298. /** Returns true if string @a str ends with the character @a suffix. */
  299. inline bool cmHasSuffix(cm::string_view str, char suffix)
  300. {
  301. return !str.empty() && (str.back() == suffix);
  302. }
  303. /** Returns true if string @a str ends with string @a suffix. */
  304. inline bool cmHasSuffix(cm::string_view str, cm::string_view suffix)
  305. {
  306. return str.size() >= suffix.size() &&
  307. str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0;
  308. }
  309. /** Returns true if string @a str ends with string @a suffix. */
  310. inline bool cmHasSuffix(cm::string_view str, cmValue suffix)
  311. {
  312. if (!suffix) {
  313. return false;
  314. }
  315. return str.size() >= suffix->size() &&
  316. str.compare(str.size() - suffix->size(), suffix->size(), *suffix) == 0;
  317. }
  318. /** Returns true if string @a str ends with string @a suffix. */
  319. template <size_t N>
  320. inline bool cmHasLiteralSuffix(cm::string_view str, char const (&suffix)[N])
  321. {
  322. return cmHasSuffix(str, cm::string_view(suffix, N - 1));
  323. }
  324. /** Removes an existing suffix character of from the string @a str. */
  325. inline void cmStripSuffixIfExists(std::string& str, char suffix)
  326. {
  327. if (cmHasSuffix(str, suffix)) {
  328. str.pop_back();
  329. }
  330. }
  331. /** Removes an existing suffix string of from the string @a str. */
  332. inline void cmStripSuffixIfExists(std::string& str, cm::string_view suffix)
  333. {
  334. if (cmHasSuffix(str, suffix)) {
  335. str.resize(str.size() - suffix.size());
  336. }
  337. }
  338. /** Converts a string to long. Expects that the whole string is an integer. */
  339. bool cmStrToLong(char const* str, long* value);
  340. bool cmStrToLong(std::string const& str, long* value);
  341. /** Converts a string to unsigned long. Expects that the whole string is an
  342. * integer */
  343. bool cmStrToULong(char const* str, unsigned long* value);
  344. bool cmStrToULong(std::string const& str, unsigned long* value);
  345. /** Converts a string to long long. Expects that the whole string
  346. * is an integer */
  347. bool cmStrToLongLong(char const* str, long long* value);
  348. bool cmStrToLongLong(std::string const& str, long long* value);
  349. /** Converts a string to unsigned long long. Expects that the whole string
  350. * is an integer */
  351. bool cmStrToULongLong(char const* str, unsigned long long* value);
  352. bool cmStrToULongLong(std::string const& str, unsigned long long* value);