testEncoding.cxx 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. /* Distributed under the OSI-approved BSD 3-Clause License. See accompanying
  2. file Copyright.txt or https://cmake.org/licensing#kwsys for details. */
  3. #include "kwsysPrivate.h"
  4. #if defined(_MSC_VER)
  5. #pragma warning(disable : 4786)
  6. #endif
  7. #include KWSYS_HEADER(Encoding.hxx)
  8. #include KWSYS_HEADER(Encoding.h)
  9. #include <iostream>
  10. #include <locale.h>
  11. #include <stdlib.h>
  12. #include <string.h>
  13. // Work-around CMake dependency scanning limitation. This must
  14. // duplicate the above list of headers.
  15. #if 0
  16. #include "Encoding.h.in"
  17. #include "Encoding.hxx.in"
  18. #endif
  19. //----------------------------------------------------------------------------
  20. static const unsigned char helloWorldStrings[][32] = {
  21. // English
  22. { 'H', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', 'd', 0 },
  23. // Japanese
  24. { 0xE3, 0x81, 0x93, 0xE3, 0x82, 0x93, 0xE3, 0x81, 0xAB, 0xE3, 0x81,
  25. 0xA1, 0xE3, 0x81, 0xAF, 0xE4, 0xB8, 0x96, 0xE7, 0x95, 0x8C, 0 },
  26. // Arabic
  27. { 0xD9, 0x85, 0xD8, 0xB1, 0xD8, 0xAD, 0xD8, 0xA8, 0xD8, 0xA7, 0x20, 0xD8,
  28. 0xA7, 0xD9, 0x84, 0xD8, 0xB9, 0xD8, 0xA7, 0xD9, 0x84, 0xD9, 0x85, 0 },
  29. // Yiddish
  30. { 0xD7, 0x94, 0xD7, 0xA2, 0xD7, 0x9C, 0xD7, 0x90, 0x20, 0xD7,
  31. 0x95, 0xD7, 0x95, 0xD7, 0xA2, 0xD7, 0x9C, 0xD7, 0x98, 0 },
  32. // Russian
  33. { 0xD0, 0xBF, 0xD1, 0x80, 0xD0, 0xB8, 0xD0, 0xB2, 0xD0, 0xB5,
  34. 0xD1, 0x82, 0x20, 0xD0, 0xBC, 0xD0, 0xB8, 0xD1, 0x80, 0 },
  35. // Latin
  36. { 0x4D, 0x75, 0x6E, 0x64, 0x75, 0x73, 0x20, 0x73, 0x61, 0x6C, 0x76, 0x65,
  37. 0 },
  38. // Swahili
  39. { 0x68, 0x75, 0x6A, 0x61, 0x6D, 0x62, 0x6F, 0x20, 0x44, 0x75, 0x6E, 0x69,
  40. 0x61, 0 },
  41. // Icelandic
  42. { 0x48, 0x61, 0x6C, 0x6C, 0xC3, 0xB3, 0x20, 0x68, 0x65, 0x69, 0x6D, 0x75,
  43. 0x72, 0 },
  44. { 0 }
  45. };
  46. //----------------------------------------------------------------------------
  47. static int testHelloWorldEncoding()
  48. {
  49. int ret = 0;
  50. for (int i = 0; helloWorldStrings[i][0] != 0; i++) {
  51. std::string str = reinterpret_cast<const char*>(helloWorldStrings[i]);
  52. std::cout << str << std::endl;
  53. std::wstring wstr = kwsys::Encoding::ToWide(str);
  54. std::string str2 = kwsys::Encoding::ToNarrow(wstr);
  55. wchar_t* c_wstr = kwsysEncoding_DupToWide(str.c_str());
  56. char* c_str2 = kwsysEncoding_DupToNarrow(c_wstr);
  57. if (!wstr.empty() && (str != str2 || strcmp(c_str2, str.c_str()))) {
  58. std::cout << "converted string was different: " << str2 << std::endl;
  59. std::cout << "converted string was different: " << c_str2 << std::endl;
  60. ret++;
  61. }
  62. free(c_wstr);
  63. free(c_str2);
  64. }
  65. return ret;
  66. }
  67. static int testRobustEncoding()
  68. {
  69. // test that the conversion functions handle invalid
  70. // unicode correctly/gracefully
  71. int ret = 0;
  72. char cstr[] = { (char)-1, 0 };
  73. // this conversion could fail
  74. std::wstring wstr = kwsys::Encoding::ToWide(cstr);
  75. wstr = kwsys::Encoding::ToWide(NULL);
  76. if (wstr != L"") {
  77. const wchar_t* wcstr = wstr.c_str();
  78. std::cout << "ToWide(NULL) returned";
  79. for (size_t i = 0; i < wstr.size(); i++) {
  80. std::cout << " " << std::hex << (int)wcstr[i];
  81. }
  82. std::cout << std::endl;
  83. ret++;
  84. }
  85. wstr = kwsys::Encoding::ToWide("");
  86. if (wstr != L"") {
  87. const wchar_t* wcstr = wstr.c_str();
  88. std::cout << "ToWide(\"\") returned";
  89. for (size_t i = 0; i < wstr.size(); i++) {
  90. std::cout << " " << std::hex << (int)wcstr[i];
  91. }
  92. std::cout << std::endl;
  93. ret++;
  94. }
  95. #ifdef _WIN32
  96. // 16 bit wchar_t - we make an invalid surrogate pair
  97. wchar_t cwstr[] = { 0xD801, 0xDA00, 0 };
  98. // this conversion could fail
  99. std::string win_str = kwsys::Encoding::ToNarrow(cwstr);
  100. #endif
  101. std::string str = kwsys::Encoding::ToNarrow(NULL);
  102. if (str != "") {
  103. std::cout << "ToNarrow(NULL) returned " << str << std::endl;
  104. ret++;
  105. }
  106. str = kwsys::Encoding::ToNarrow(L"");
  107. if (wstr != L"") {
  108. std::cout << "ToNarrow(\"\") returned " << str << std::endl;
  109. ret++;
  110. }
  111. return ret;
  112. }
  113. static int testCommandLineArguments()
  114. {
  115. int status = 0;
  116. char const* argv[2] = { "./app.exe", (char const*)helloWorldStrings[1] };
  117. kwsys::Encoding::CommandLineArguments args(2, argv);
  118. kwsys::Encoding::CommandLineArguments arg2 =
  119. kwsys::Encoding::CommandLineArguments(args);
  120. char const* const* u8_argv = args.argv();
  121. for (int i = 0; i < args.argc(); i++) {
  122. char const* u8_arg = u8_argv[i];
  123. if (strcmp(argv[i], u8_arg) != 0) {
  124. std::cout << "argv[" << i << "] " << argv[i] << " != " << u8_arg
  125. << std::endl;
  126. status++;
  127. }
  128. }
  129. kwsys::Encoding::CommandLineArguments args3 =
  130. kwsys::Encoding::CommandLineArguments::Main(2, argv);
  131. return status;
  132. }
  133. //----------------------------------------------------------------------------
  134. int testEncoding(int, char* [])
  135. {
  136. const char* loc = setlocale(LC_ALL, "");
  137. if (loc) {
  138. std::cout << "Locale: " << loc << std::endl;
  139. } else {
  140. std::cout << "Locale: None" << std::endl;
  141. }
  142. int ret = 0;
  143. ret |= testHelloWorldEncoding();
  144. ret |= testRobustEncoding();
  145. ret |= testCommandLineArguments();
  146. return ret;
  147. }