testEncoding.cxx 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. /* Distributed under the OSI-approved BSD 3-Clause License. See accompanying
  2. file Copyright.txt or https://cmake.org/licensing#kwsys for details. */
  3. #include "kwsysPrivate.h"
  4. #if defined(_MSC_VER)
  5. #pragma warning(disable : 4786)
  6. #endif
  7. #include KWSYS_HEADER(Encoding.hxx)
  8. #include KWSYS_HEADER(Encoding.h)
  9. #include <algorithm>
  10. #include <iostream>
  11. #include <locale.h>
  12. #include <stdlib.h>
  13. #include <string.h>
  14. // Work-around CMake dependency scanning limitation. This must
  15. // duplicate the above list of headers.
  16. #if 0
  17. #include "Encoding.h.in"
  18. #include "Encoding.hxx.in"
  19. #endif
  20. static const unsigned char helloWorldStrings[][32] = {
  21. // English
  22. { 'H', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', 'd', 0 },
  23. // Japanese
  24. { 0xE3, 0x81, 0x93, 0xE3, 0x82, 0x93, 0xE3, 0x81, 0xAB, 0xE3, 0x81,
  25. 0xA1, 0xE3, 0x81, 0xAF, 0xE4, 0xB8, 0x96, 0xE7, 0x95, 0x8C, 0 },
  26. // Arabic
  27. { 0xD9, 0x85, 0xD8, 0xB1, 0xD8, 0xAD, 0xD8, 0xA8, 0xD8, 0xA7, 0x20, 0xD8,
  28. 0xA7, 0xD9, 0x84, 0xD8, 0xB9, 0xD8, 0xA7, 0xD9, 0x84, 0xD9, 0x85, 0 },
  29. // Yiddish
  30. { 0xD7, 0x94, 0xD7, 0xA2, 0xD7, 0x9C, 0xD7, 0x90, 0x20, 0xD7,
  31. 0x95, 0xD7, 0x95, 0xD7, 0xA2, 0xD7, 0x9C, 0xD7, 0x98, 0 },
  32. // Russian
  33. { 0xD0, 0xBF, 0xD1, 0x80, 0xD0, 0xB8, 0xD0, 0xB2, 0xD0, 0xB5,
  34. 0xD1, 0x82, 0x20, 0xD0, 0xBC, 0xD0, 0xB8, 0xD1, 0x80, 0 },
  35. // Latin
  36. { 0x4D, 0x75, 0x6E, 0x64, 0x75, 0x73, 0x20, 0x73, 0x61, 0x6C, 0x76, 0x65,
  37. 0 },
  38. // Swahili
  39. { 0x68, 0x75, 0x6A, 0x61, 0x6D, 0x62, 0x6F, 0x20, 0x44, 0x75, 0x6E, 0x69,
  40. 0x61, 0 },
  41. // Icelandic
  42. { 0x48, 0x61, 0x6C, 0x6C, 0xC3, 0xB3, 0x20, 0x68, 0x65, 0x69, 0x6D, 0x75,
  43. 0x72, 0 },
  44. { 0 }
  45. };
  46. static int testHelloWorldEncoding()
  47. {
  48. int ret = 0;
  49. for (int i = 0; helloWorldStrings[i][0] != 0; i++) {
  50. std::string str = reinterpret_cast<const char*>(helloWorldStrings[i]);
  51. std::cout << str << std::endl;
  52. std::wstring wstr = kwsys::Encoding::ToWide(str);
  53. std::string str2 = kwsys::Encoding::ToNarrow(wstr);
  54. wchar_t* c_wstr = kwsysEncoding_DupToWide(str.c_str());
  55. char* c_str2 = kwsysEncoding_DupToNarrow(c_wstr);
  56. if (!wstr.empty() && (str != str2 || strcmp(c_str2, str.c_str()))) {
  57. std::cout << "converted string was different: " << str2 << std::endl;
  58. std::cout << "converted string was different: " << c_str2 << std::endl;
  59. ret++;
  60. }
  61. free(c_wstr);
  62. free(c_str2);
  63. }
  64. return ret;
  65. }
  66. static int testRobustEncoding()
  67. {
  68. // test that the conversion functions handle invalid
  69. // unicode correctly/gracefully
  70. int ret = 0;
  71. char cstr[] = { (char)-1, 0 };
  72. // this conversion could fail
  73. std::wstring wstr = kwsys::Encoding::ToWide(cstr);
  74. wstr = kwsys::Encoding::ToWide(NULL);
  75. if (wstr != L"") {
  76. const wchar_t* wcstr = wstr.c_str();
  77. std::cout << "ToWide(NULL) returned";
  78. for (size_t i = 0; i < wstr.size(); i++) {
  79. std::cout << " " << std::hex << (int)wcstr[i];
  80. }
  81. std::cout << std::endl;
  82. ret++;
  83. }
  84. wstr = kwsys::Encoding::ToWide("");
  85. if (wstr != L"") {
  86. const wchar_t* wcstr = wstr.c_str();
  87. std::cout << "ToWide(\"\") returned";
  88. for (size_t i = 0; i < wstr.size(); i++) {
  89. std::cout << " " << std::hex << (int)wcstr[i];
  90. }
  91. std::cout << std::endl;
  92. ret++;
  93. }
  94. #ifdef _WIN32
  95. // 16 bit wchar_t - we make an invalid surrogate pair
  96. wchar_t cwstr[] = { 0xD801, 0xDA00, 0 };
  97. // this conversion could fail
  98. std::string win_str = kwsys::Encoding::ToNarrow(cwstr);
  99. #endif
  100. std::string str = kwsys::Encoding::ToNarrow(NULL);
  101. if (str != "") {
  102. std::cout << "ToNarrow(NULL) returned " << str << std::endl;
  103. ret++;
  104. }
  105. str = kwsys::Encoding::ToNarrow(L"");
  106. if (wstr != L"") {
  107. std::cout << "ToNarrow(\"\") returned " << str << std::endl;
  108. ret++;
  109. }
  110. return ret;
  111. }
  112. static int testWithNulls()
  113. {
  114. int ret = 0;
  115. std::vector<std::string> strings;
  116. strings.push_back(std::string("ab") + '\0' + 'c');
  117. strings.push_back(std::string("d") + '\0' + '\0' + 'e');
  118. strings.push_back(std::string() + '\0' + 'f');
  119. strings.push_back(std::string() + '\0' + '\0' + "gh");
  120. strings.push_back(std::string("ij") + '\0');
  121. strings.push_back(std::string("k") + '\0' + '\0');
  122. strings.push_back(std::string("\0\0\0\0", 4) + "lmn" +
  123. std::string("\0\0\0\0", 4));
  124. for (std::vector<std::string>::iterator it = strings.begin();
  125. it != strings.end(); ++it) {
  126. std::wstring wstr = kwsys::Encoding::ToWide(*it);
  127. std::string str = kwsys::Encoding::ToNarrow(wstr);
  128. std::string s(*it);
  129. std::replace(s.begin(), s.end(), '\0', ' ');
  130. std::cout << "'" << s << "' (" << it->size() << ")" << std::endl;
  131. if (str != *it) {
  132. std::replace(str.begin(), str.end(), '\0', ' ');
  133. std::cout << "string with null was different: '" << str << "' ("
  134. << str.size() << ")" << std::endl;
  135. ret++;
  136. }
  137. }
  138. return ret;
  139. }
  140. static int testCommandLineArguments()
  141. {
  142. int status = 0;
  143. char const* argv[2] = { "./app.exe", (char const*)helloWorldStrings[1] };
  144. kwsys::Encoding::CommandLineArguments args(2, argv);
  145. kwsys::Encoding::CommandLineArguments arg2 =
  146. kwsys::Encoding::CommandLineArguments(args);
  147. char const* const* u8_argv = args.argv();
  148. for (int i = 0; i < args.argc(); i++) {
  149. char const* u8_arg = u8_argv[i];
  150. if (strcmp(argv[i], u8_arg) != 0) {
  151. std::cout << "argv[" << i << "] " << argv[i] << " != " << u8_arg
  152. << std::endl;
  153. status++;
  154. }
  155. }
  156. kwsys::Encoding::CommandLineArguments args3 =
  157. kwsys::Encoding::CommandLineArguments::Main(2, argv);
  158. return status;
  159. }
  160. static int testToWindowsExtendedPath()
  161. {
  162. #ifdef _WIN32
  163. int ret = 0;
  164. if (kwsys::Encoding::ToWindowsExtendedPath(
  165. "L:\\Local Mojo\\Hex Power Pack\\Iffy Voodoo") !=
  166. L"\\\\?\\L:\\Local Mojo\\Hex Power Pack\\Iffy Voodoo") {
  167. std::cout << "Problem with ToWindowsExtendedPath "
  168. << "\"L:\\Local Mojo\\Hex Power Pack\\Iffy Voodoo\""
  169. << std::endl;
  170. ++ret;
  171. }
  172. if (kwsys::Encoding::ToWindowsExtendedPath(
  173. "L:/Local Mojo/Hex Power Pack/Iffy Voodoo") !=
  174. L"\\\\?\\L:\\Local Mojo\\Hex Power Pack\\Iffy Voodoo") {
  175. std::cout << "Problem with ToWindowsExtendedPath "
  176. << "\"L:/Local Mojo/Hex Power Pack/Iffy Voodoo\"" << std::endl;
  177. ++ret;
  178. }
  179. if (kwsys::Encoding::ToWindowsExtendedPath(
  180. "\\\\Foo\\Local Mojo\\Hex Power Pack\\Iffy Voodoo") !=
  181. L"\\\\?\\UNC\\Foo\\Local Mojo\\Hex Power Pack\\Iffy Voodoo") {
  182. std::cout << "Problem with ToWindowsExtendedPath "
  183. << "\"\\\\Foo\\Local Mojo\\Hex Power Pack\\Iffy Voodoo\""
  184. << std::endl;
  185. ++ret;
  186. }
  187. if (kwsys::Encoding::ToWindowsExtendedPath(
  188. "//Foo/Local Mojo/Hex Power Pack/Iffy Voodoo") !=
  189. L"\\\\?\\UNC\\Foo\\Local Mojo\\Hex Power Pack\\Iffy Voodoo") {
  190. std::cout << "Problem with ToWindowsExtendedPath "
  191. << "\"//Foo/Local Mojo/Hex Power Pack/Iffy Voodoo\""
  192. << std::endl;
  193. ++ret;
  194. }
  195. if (kwsys::Encoding::ToWindowsExtendedPath("//") != L"//") {
  196. std::cout << "Problem with ToWindowsExtendedPath "
  197. << "\"//\"" << std::endl;
  198. ++ret;
  199. }
  200. if (kwsys::Encoding::ToWindowsExtendedPath("\\\\.\\") != L"\\\\.\\") {
  201. std::cout << "Problem with ToWindowsExtendedPath "
  202. << "\"\\\\.\\\"" << std::endl;
  203. ++ret;
  204. }
  205. if (kwsys::Encoding::ToWindowsExtendedPath("\\\\.\\X") != L"\\\\.\\X") {
  206. std::cout << "Problem with ToWindowsExtendedPath "
  207. << "\"\\\\.\\X\"" << std::endl;
  208. ++ret;
  209. }
  210. if (kwsys::Encoding::ToWindowsExtendedPath("\\\\.\\X:") != L"\\\\?\\X:") {
  211. std::cout << "Problem with ToWindowsExtendedPath "
  212. << "\"\\\\.\\X:\"" << std::endl;
  213. ++ret;
  214. }
  215. if (kwsys::Encoding::ToWindowsExtendedPath("\\\\.\\X:\\") !=
  216. L"\\\\?\\X:\\") {
  217. std::cout << "Problem with ToWindowsExtendedPath "
  218. << "\"\\\\.\\X:\\\"" << std::endl;
  219. ++ret;
  220. }
  221. if (kwsys::Encoding::ToWindowsExtendedPath("NUL") != L"\\\\.\\NUL") {
  222. std::cout << "Problem with ToWindowsExtendedPath "
  223. << "\"NUL\"" << std::endl;
  224. ++ret;
  225. }
  226. return ret;
  227. #else
  228. return 0;
  229. #endif
  230. }
  231. int testEncoding(int, char* [])
  232. {
  233. const char* loc = setlocale(LC_ALL, "");
  234. if (loc) {
  235. std::cout << "Locale: " << loc << std::endl;
  236. } else {
  237. std::cout << "Locale: None" << std::endl;
  238. }
  239. int ret = 0;
  240. ret |= testHelloWorldEncoding();
  241. ret |= testRobustEncoding();
  242. ret |= testCommandLineArguments();
  243. ret |= testWithNulls();
  244. ret |= testToWindowsExtendedPath();
  245. return ret;
  246. }