testEncoding.cxx 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. /* Distributed under the OSI-approved BSD 3-Clause License. See accompanying
  2. file Copyright.txt or https://cmake.org/licensing#kwsys for details. */
  3. #include "kwsysPrivate.h"
  4. #if defined(_MSC_VER)
  5. #pragma warning(disable : 4786)
  6. #endif
  7. #include KWSYS_HEADER(Encoding.hxx)
  8. #include KWSYS_HEADER(Encoding.h)
  9. #include <algorithm>
  10. #include <iostream>
  11. #include <locale.h>
  12. #include <stdlib.h>
  13. #include <string.h>
  14. // Work-around CMake dependency scanning limitation. This must
  15. // duplicate the above list of headers.
  16. #if 0
  17. #include "Encoding.h.in"
  18. #include "Encoding.hxx.in"
  19. #endif
  20. //----------------------------------------------------------------------------
  21. static const unsigned char helloWorldStrings[][32] = {
  22. // English
  23. { 'H', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', 'd', 0 },
  24. // Japanese
  25. { 0xE3, 0x81, 0x93, 0xE3, 0x82, 0x93, 0xE3, 0x81, 0xAB, 0xE3, 0x81,
  26. 0xA1, 0xE3, 0x81, 0xAF, 0xE4, 0xB8, 0x96, 0xE7, 0x95, 0x8C, 0 },
  27. // Arabic
  28. { 0xD9, 0x85, 0xD8, 0xB1, 0xD8, 0xAD, 0xD8, 0xA8, 0xD8, 0xA7, 0x20, 0xD8,
  29. 0xA7, 0xD9, 0x84, 0xD8, 0xB9, 0xD8, 0xA7, 0xD9, 0x84, 0xD9, 0x85, 0 },
  30. // Yiddish
  31. { 0xD7, 0x94, 0xD7, 0xA2, 0xD7, 0x9C, 0xD7, 0x90, 0x20, 0xD7,
  32. 0x95, 0xD7, 0x95, 0xD7, 0xA2, 0xD7, 0x9C, 0xD7, 0x98, 0 },
  33. // Russian
  34. { 0xD0, 0xBF, 0xD1, 0x80, 0xD0, 0xB8, 0xD0, 0xB2, 0xD0, 0xB5,
  35. 0xD1, 0x82, 0x20, 0xD0, 0xBC, 0xD0, 0xB8, 0xD1, 0x80, 0 },
  36. // Latin
  37. { 0x4D, 0x75, 0x6E, 0x64, 0x75, 0x73, 0x20, 0x73, 0x61, 0x6C, 0x76, 0x65,
  38. 0 },
  39. // Swahili
  40. { 0x68, 0x75, 0x6A, 0x61, 0x6D, 0x62, 0x6F, 0x20, 0x44, 0x75, 0x6E, 0x69,
  41. 0x61, 0 },
  42. // Icelandic
  43. { 0x48, 0x61, 0x6C, 0x6C, 0xC3, 0xB3, 0x20, 0x68, 0x65, 0x69, 0x6D, 0x75,
  44. 0x72, 0 },
  45. { 0 }
  46. };
  47. //----------------------------------------------------------------------------
  48. static int testHelloWorldEncoding()
  49. {
  50. int ret = 0;
  51. for (int i = 0; helloWorldStrings[i][0] != 0; i++) {
  52. std::string str = reinterpret_cast<const char*>(helloWorldStrings[i]);
  53. std::cout << str << std::endl;
  54. std::wstring wstr = kwsys::Encoding::ToWide(str);
  55. std::string str2 = kwsys::Encoding::ToNarrow(wstr);
  56. wchar_t* c_wstr = kwsysEncoding_DupToWide(str.c_str());
  57. char* c_str2 = kwsysEncoding_DupToNarrow(c_wstr);
  58. if (!wstr.empty() && (str != str2 || strcmp(c_str2, str.c_str()))) {
  59. std::cout << "converted string was different: " << str2 << std::endl;
  60. std::cout << "converted string was different: " << c_str2 << std::endl;
  61. ret++;
  62. }
  63. free(c_wstr);
  64. free(c_str2);
  65. }
  66. return ret;
  67. }
  68. static int testRobustEncoding()
  69. {
  70. // test that the conversion functions handle invalid
  71. // unicode correctly/gracefully
  72. int ret = 0;
  73. char cstr[] = { (char)-1, 0 };
  74. // this conversion could fail
  75. std::wstring wstr = kwsys::Encoding::ToWide(cstr);
  76. wstr = kwsys::Encoding::ToWide(NULL);
  77. if (wstr != L"") {
  78. const wchar_t* wcstr = wstr.c_str();
  79. std::cout << "ToWide(NULL) returned";
  80. for (size_t i = 0; i < wstr.size(); i++) {
  81. std::cout << " " << std::hex << (int)wcstr[i];
  82. }
  83. std::cout << std::endl;
  84. ret++;
  85. }
  86. wstr = kwsys::Encoding::ToWide("");
  87. if (wstr != L"") {
  88. const wchar_t* wcstr = wstr.c_str();
  89. std::cout << "ToWide(\"\") returned";
  90. for (size_t i = 0; i < wstr.size(); i++) {
  91. std::cout << " " << std::hex << (int)wcstr[i];
  92. }
  93. std::cout << std::endl;
  94. ret++;
  95. }
  96. #ifdef _WIN32
  97. // 16 bit wchar_t - we make an invalid surrogate pair
  98. wchar_t cwstr[] = { 0xD801, 0xDA00, 0 };
  99. // this conversion could fail
  100. std::string win_str = kwsys::Encoding::ToNarrow(cwstr);
  101. #endif
  102. std::string str = kwsys::Encoding::ToNarrow(NULL);
  103. if (str != "") {
  104. std::cout << "ToNarrow(NULL) returned " << str << std::endl;
  105. ret++;
  106. }
  107. str = kwsys::Encoding::ToNarrow(L"");
  108. if (wstr != L"") {
  109. std::cout << "ToNarrow(\"\") returned " << str << std::endl;
  110. ret++;
  111. }
  112. return ret;
  113. }
  114. static int testWithNulls()
  115. {
  116. int ret = 0;
  117. std::vector<std::string> strings;
  118. strings.push_back(std::string("ab") + '\0' + 'c');
  119. strings.push_back(std::string("d") + '\0' + '\0' + 'e');
  120. strings.push_back(std::string() + '\0' + 'f');
  121. strings.push_back(std::string() + '\0' + '\0' + "gh");
  122. strings.push_back(std::string("ij") + '\0');
  123. strings.push_back(std::string("k") + '\0' + '\0');
  124. strings.push_back(std::string("\0\0\0\0", 4) + "lmn" +
  125. std::string("\0\0\0\0", 4));
  126. for (std::vector<std::string>::iterator it = strings.begin();
  127. it != strings.end(); ++it) {
  128. std::wstring wstr = kwsys::Encoding::ToWide(*it);
  129. std::string str = kwsys::Encoding::ToNarrow(wstr);
  130. std::string s(*it);
  131. std::replace(s.begin(), s.end(), '\0', ' ');
  132. std::cout << "'" << s << "' (" << it->size() << ")" << std::endl;
  133. if (str != *it) {
  134. std::replace(str.begin(), str.end(), '\0', ' ');
  135. std::cout << "string with null was different: '" << str << "' ("
  136. << str.size() << ")" << std::endl;
  137. ret++;
  138. }
  139. }
  140. return ret;
  141. }
  142. static int testCommandLineArguments()
  143. {
  144. int status = 0;
  145. char const* argv[2] = { "./app.exe", (char const*)helloWorldStrings[1] };
  146. kwsys::Encoding::CommandLineArguments args(2, argv);
  147. kwsys::Encoding::CommandLineArguments arg2 =
  148. kwsys::Encoding::CommandLineArguments(args);
  149. char const* const* u8_argv = args.argv();
  150. for (int i = 0; i < args.argc(); i++) {
  151. char const* u8_arg = u8_argv[i];
  152. if (strcmp(argv[i], u8_arg) != 0) {
  153. std::cout << "argv[" << i << "] " << argv[i] << " != " << u8_arg
  154. << std::endl;
  155. status++;
  156. }
  157. }
  158. kwsys::Encoding::CommandLineArguments args3 =
  159. kwsys::Encoding::CommandLineArguments::Main(2, argv);
  160. return status;
  161. }
  162. //----------------------------------------------------------------------------
  163. int testEncoding(int, char* [])
  164. {
  165. const char* loc = setlocale(LC_ALL, "");
  166. if (loc) {
  167. std::cout << "Locale: " << loc << std::endl;
  168. } else {
  169. std::cout << "Locale: None" << std::endl;
  170. }
  171. int ret = 0;
  172. ret |= testHelloWorldEncoding();
  173. ret |= testRobustEncoding();
  174. ret |= testCommandLineArguments();
  175. ret |= testWithNulls();
  176. return ret;
  177. }