TextOperations.h 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. /*
  2. * TextOperations.h, part of VCMI engine
  3. *
  4. * Authors: listed in file AUTHORS in main folder
  5. *
  6. * License: GNU General Public License v2.0 or later
  7. * Full text of license available in license.txt file, in main folder
  8. *
  9. */
  10. #pragma once
  11. #include <boost/lexical_cast.hpp>
  12. VCMI_LIB_NAMESPACE_BEGIN
  13. /// Namespace that provides utilities for unicode support (UTF-8)
  14. namespace TextOperations
  15. {
  16. /// returns 32-bit UTF codepoint for UTF-8 character symbol
  17. uint32_t DLL_LINKAGE getUnicodeCodepoint(const char *data, size_t maxSize);
  18. /// returns 32-bit UTF codepoint for character symbol in selected single-byte encoding
  19. uint32_t DLL_LINKAGE getUnicodeCodepoint(char data, const std::string & encoding );
  20. /// returns length (in bytes) of UTF-8 character starting from specified character
  21. size_t DLL_LINKAGE getUnicodeCharacterSize(char firstByte);
  22. /// test if character is a valid UTF-8 symbol
  23. /// maxSize - maximum number of bytes this symbol may consist from ( = remainder of string)
  24. bool DLL_LINKAGE isValidUnicodeCharacter(const char * character, size_t maxSize);
  25. /// returns true if text contains valid ASCII-string
  26. /// Note that since UTF-8 extends ASCII, any ASCII string is also UTF-8 string
  27. bool DLL_LINKAGE isValidASCII(const std::string & text);
  28. bool DLL_LINKAGE isValidASCII(const char * data, size_t size);
  29. /// test if text contains valid UTF-8 sequence
  30. bool DLL_LINKAGE isValidUnicodeString(std::string_view text);
  31. bool DLL_LINKAGE isValidUnicodeString(const char * data, size_t size);
  32. /// converts text to UTF-8 from specified encoding or from one specified in settings
  33. std::string DLL_LINKAGE toUnicode(const std::string & text, const std::string & encoding);
  34. /// converts text from unicode to specified encoding or to one specified in settings
  35. /// NOTE: usage of these functions should be avoided if possible
  36. std::string DLL_LINKAGE fromUnicode(const std::string & text, const std::string & encoding);
  37. ///delete specified amount of UTF-8 characters from right
  38. DLL_LINKAGE void trimRightUnicode(std::string & text, size_t amount = 1);
  39. /// give back amount of unicode characters
  40. size_t DLL_LINKAGE getUnicodeCharactersCount(std::string_view text);
  41. /// converts number into string using metric system prefixes, e.g. 'k' or 'M' to keep resulting strings within specified size
  42. /// Note that resulting string may have more symbols than digits: minus sign and prefix symbol
  43. template<typename Arithmetic>
  44. inline std::string formatMetric(Arithmetic number, int maxDigits);
  45. template<typename Arithmetic>
  46. inline Arithmetic parseMetric(const std::string &text);
  47. /// replaces all symbols that normally need escaping with appropriate escape sequences
  48. std::string escapeString(std::string input);
  49. /// get formatted DateTime depending on the language selected
  50. DLL_LINKAGE std::string getFormattedDateTimeLocal(std::time_t dt);
  51. /// get formatted current DateTime depending on the language selected
  52. /// timeOffset - optional parameter to modify current time by specified time in seconds
  53. DLL_LINKAGE std::string getCurrentFormattedDateTimeLocal(std::chrono::seconds timeOffset = {});
  54. /// get formatted time (without date)
  55. DLL_LINKAGE std::string getFormattedTimeLocal(std::time_t dt);
  56. /// get formatted time (without date)
  57. /// timeOffset - optional parameter to modify current time by specified time in seconds
  58. DLL_LINKAGE std::string getCurrentFormattedTimeLocal(std::chrono::seconds timeOffset = {});
  59. /// Algorithm for detection of typos in words
  60. /// Determines how 'different' two strings are - how many changes must be done to turn one string into another one
  61. /// https://en.wikipedia.org/wiki/Levenshtein_distance#Iterative_with_two_matrix_rows
  62. DLL_LINKAGE int getLevenshteinDistance(std::string_view s, std::string_view t);
  63. /// Compares two strings using locale-aware collation based on the selected game language.
  64. DLL_LINKAGE bool compareLocalizedStrings(std::string_view str1, std::string_view str2);
  65. /// Check if texts have similarity when typing into search boxes
  66. /// 0 -> Exact match or starts with typed-in text, 1 -> Close match or substring match,
  67. /// other values = Levenshtein distance, returns std::nullopt for unrelated word (bad match).
  68. DLL_LINKAGE std::optional<int> textSearchSimilarityScore(const std::string & s, const std::string & t);
  69. /// This function is mainly used to avoid garbled text when reading or writing files
  70. /// with non-ASCII (e.g. Chinese) characters in the path, especially on Windows.
  71. /// Call this before passing the path to file I/O functions that take std::string.
  72. DLL_LINKAGE std::string filesystemPathToUtf8(const boost::filesystem::path& path);
  73. // Used for handling paths with non-ASCII characters.
  74. DLL_LINKAGE boost::filesystem::path Utf8TofilesystemPath(const std::string& path);
  75. /// Strip out unwanted characters from map name
  76. DLL_LINKAGE std::string convertMapName(std::string input);
  77. };
  78. template<typename Arithmetic>
  79. inline std::string TextOperations::formatMetric(Arithmetic number, int maxDigits)
  80. {
  81. Arithmetic max = std::pow(10, maxDigits);
  82. if (std::abs(number) < max)
  83. return std::to_string(number);
  84. std::string symbols = " kMGTPE";
  85. auto iter = symbols.begin();
  86. while (std::abs(number) >= max)
  87. {
  88. number /= 1000;
  89. iter++;
  90. assert(iter != symbols.end());//should be enough even for int64
  91. }
  92. return std::to_string(number) + *iter;
  93. }
  94. template<typename Arithmetic>
  95. inline Arithmetic TextOperations::parseMetric(const std::string &text)
  96. {
  97. if (text.empty())
  98. return 0;
  99. // Trim whitespace
  100. std::string trimmed = text;
  101. trimmed.erase(trimmed.begin(), std::find_if(trimmed.begin(), trimmed.end(), [](unsigned char ch){ return !std::isspace(ch); }));
  102. trimmed.erase(std::find_if(trimmed.rbegin(), trimmed.rend(), [](unsigned char ch){ return !std::isspace(ch); }).base(), trimmed.end());
  103. // Check if last character is a metric suffix
  104. char last = trimmed.back();
  105. int power = 0; // number of *1000 multiplications
  106. switch (std::toupper(last))
  107. {
  108. case 'K': power = 1; break;
  109. case 'M': power = 2; break;
  110. case 'G': power = 3; break;
  111. case 'T': power = 4; break;
  112. case 'P': power = 5; break;
  113. case 'E': power = 6; break;
  114. default: power = 0; break; // no suffix
  115. }
  116. std::string numberPart = trimmed;
  117. if (power > 0)
  118. numberPart.pop_back();
  119. // Remove any non-digit or minus sign (same spirit as your numberFilter)
  120. numberPart.erase(std::remove_if(numberPart.begin(), numberPart.end(), [](char c)
  121. {
  122. return !(std::isdigit(static_cast<unsigned char>(c)) || c == '-');
  123. }), numberPart.end());
  124. if (numberPart.empty() || (numberPart == "-"))
  125. return 0;
  126. try
  127. {
  128. Arithmetic value = std::stoll(numberPart);
  129. for (int i = 0; i < power; ++i)
  130. {
  131. // Multiply by 1000, check for overflow if desired
  132. if (value > std::numeric_limits<Arithmetic>::max() / 1000)
  133. return std::numeric_limits<Arithmetic>::max();
  134. if (value < std::numeric_limits<Arithmetic>::min() / 1000)
  135. return std::numeric_limits<Arithmetic>::min();
  136. value *= static_cast<Arithmetic>(1000);
  137. }
  138. return value;
  139. }
  140. catch (std::invalid_argument &)
  141. {
  142. return 0;
  143. }
  144. }
  145. VCMI_LIB_NAMESPACE_END