TextOperations.h 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. /*
  2. * TextOperations.h, part of VCMI engine
  3. *
  4. * Authors: listed in file AUTHORS in main folder
  5. *
  6. * License: GNU General Public License v2.0 or later
  7. * Full text of license available in license.txt file, in main folder
  8. *
  9. */
  10. #pragma once
  11. VCMI_LIB_NAMESPACE_BEGIN
  12. /// Namespace that provides utilities for unicode support (UTF-8)
  13. namespace TextOperations
  14. {
  15. /// returns 32-bit UTF codepoint for UTF-8 character symbol
  16. uint32_t DLL_LINKAGE getUnicodeCodepoint(const char *data, size_t maxSize);
  17. /// returns 32-bit UTF codepoint for character symbol in selected single-byte encoding
  18. uint32_t DLL_LINKAGE getUnicodeCodepoint(char data, const std::string & encoding );
  19. /// returns length (in bytes) of UTF-8 character starting from specified character
  20. size_t DLL_LINKAGE getUnicodeCharacterSize(char firstByte);
  21. /// test if character is a valid UTF-8 symbol
  22. /// maxSize - maximum number of bytes this symbol may consist from ( = remainder of string)
  23. bool DLL_LINKAGE isValidUnicodeCharacter(const char * character, size_t maxSize);
  24. /// returns true if text contains valid ASCII-string
  25. /// Note that since UTF-8 extends ASCII, any ASCII string is also UTF-8 string
  26. bool DLL_LINKAGE isValidASCII(const std::string & text);
  27. bool DLL_LINKAGE isValidASCII(const char * data, size_t size);
  28. /// test if text contains valid UTF-8 sequence
  29. bool DLL_LINKAGE isValidUnicodeString(std::string_view text);
  30. bool DLL_LINKAGE isValidUnicodeString(const char * data, size_t size);
  31. /// converts text to UTF-8 from specified encoding or from one specified in settings
  32. std::string DLL_LINKAGE toUnicode(const std::string & text, const std::string & encoding);
  33. /// converts text from unicode to specified encoding or to one specified in settings
  34. /// NOTE: usage of these functions should be avoided if possible
  35. std::string DLL_LINKAGE fromUnicode(const std::string & text, const std::string & encoding);
  36. ///delete specified amount of UTF-8 characters from right
  37. DLL_LINKAGE void trimRightUnicode(std::string & text, size_t amount = 1);
  38. /// give back amount of unicode characters
  39. size_t DLL_LINKAGE getUnicodeCharactersCount(std::string_view text);
  40. /// converts number into string using metric system prefixes, e.g. 'k' or 'M' to keep resulting strings within specified size
  41. /// Note that resulting string may have more symbols than digits: minus sign and prefix symbol
  42. template<typename Arithmetic>
  43. inline std::string formatMetric(Arithmetic number, int maxDigits);
  44. /// replaces all symbols that normally need escaping with appropriate escape sequences
  45. std::string escapeString(std::string input);
  46. /// get formatted DateTime depending on the language selected
  47. DLL_LINKAGE std::string getFormattedDateTimeLocal(std::time_t dt);
  48. /// get formatted current DateTime depending on the language selected
  49. /// timeOffset - optional parameter to modify current time by specified time in seconds
  50. DLL_LINKAGE std::string getCurrentFormattedDateTimeLocal(std::chrono::seconds timeOffset = {});
  51. /// get formatted time (without date)
  52. DLL_LINKAGE std::string getFormattedTimeLocal(std::time_t dt);
  53. /// get formatted time (without date)
  54. /// timeOffset - optional parameter to modify current time by specified time in seconds
  55. DLL_LINKAGE std::string getCurrentFormattedTimeLocal(std::chrono::seconds timeOffset = {});
  56. /// Algorithm for detection of typos in words
  57. /// Determines how 'different' two strings are - how many changes must be done to turn one string into another one
  58. /// https://en.wikipedia.org/wiki/Levenshtein_distance#Iterative_with_two_matrix_rows
  59. DLL_LINKAGE int getLevenshteinDistance(std::string_view s, std::string_view t);
  60. /// Compares two strings using locale-aware collation based on the selected game language.
  61. DLL_LINKAGE bool compareLocalizedStrings(std::string_view str1, std::string_view str2);
  62. /// Check if texts have similarity when typing into search boxes
  63. /// 0 -> Exact match or starts with typed-in text, 1 -> Close match or substring match,
  64. /// other values = Levenshtein distance, returns std::nullopt for unrelated word (bad match).
  65. DLL_LINKAGE std::optional<int> textSearchSimilarityScore(const std::string & s, const std::string & t);
  66. /// This function is mainly used to avoid garbled text when reading or writing files
  67. /// with non-ASCII (e.g. Chinese) characters in the path, especially on Windows.
  68. /// Call this before passing the path to file I/O functions that take std::string.
  69. DLL_LINKAGE std::string filesystemPathToUtf8(const boost::filesystem::path& path);
  70. // Used for handling paths with non-ASCII characters.
  71. DLL_LINKAGE boost::filesystem::path Utf8TofilesystemPath(const std::string& path);
  72. };
  73. template<typename Arithmetic>
  74. inline std::string TextOperations::formatMetric(Arithmetic number, int maxDigits)
  75. {
  76. Arithmetic max = std::pow(10, maxDigits);
  77. if (std::abs(number) < max)
  78. return std::to_string(number);
  79. std::string symbols = " kMGTPE";
  80. auto iter = symbols.begin();
  81. while (std::abs(number) >= max)
  82. {
  83. number /= 1000;
  84. iter++;
  85. assert(iter != symbols.end());//should be enough even for int64
  86. }
  87. return std::to_string(number) + *iter;
  88. }
  89. VCMI_LIB_NAMESPACE_END