testEncoding.cxx 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. /*============================================================================
  2. KWSys - Kitware System Library
  3. Copyright 2000-2009 Kitware, Inc., Insight Software Consortium
  4. Distributed under the OSI-approved BSD License (the "License");
  5. see accompanying file Copyright.txt for details.
  6. This software is distributed WITHOUT ANY WARRANTY; without even the
  7. implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  8. See the License for more information.
  9. ============================================================================*/
  10. #include "kwsysPrivate.h"
  11. #if defined(_MSC_VER)
  12. # pragma warning (disable:4786)
  13. #endif
  14. #include KWSYS_HEADER(Encoding.hxx)
  15. #include KWSYS_HEADER(Encoding.h)
  16. #include <iostream>
  17. #include <locale.h>
  18. #include <string.h>
  19. #include <stdlib.h>
  20. // Work-around CMake dependency scanning limitation. This must
  21. // duplicate the above list of headers.
  22. #if 0
  23. # include "Encoding.hxx.in"
  24. # include "Encoding.h.in"
  25. #endif
  26. //----------------------------------------------------------------------------
  27. static const unsigned char helloWorldStrings[][32] =
  28. {
  29. // English
  30. {'H','e','l','l','o',' ','W','o','r','l','d',0},
  31. // Japanese
  32. {0xE3, 0x81, 0x93, 0xE3, 0x82, 0x93, 0xE3, 0x81, 0xAB, 0xE3,
  33. 0x81, 0xA1, 0xE3, 0x81, 0xAF, 0xE4, 0xB8, 0x96, 0xE7, 0x95,
  34. 0x8C, 0},
  35. // Arabic
  36. {0xD9, 0x85, 0xD8, 0xB1, 0xD8, 0xAD, 0xD8, 0xA8, 0xD8, 0xA7,
  37. 0x20, 0xD8, 0xA7, 0xD9, 0x84, 0xD8, 0xB9, 0xD8, 0xA7, 0xD9,
  38. 0x84, 0xD9, 0x85, 0},
  39. // Yiddish
  40. {0xD7, 0x94, 0xD7, 0xA2, 0xD7, 0x9C, 0xD7, 0x90, 0x20, 0xD7,
  41. 0x95, 0xD7, 0x95, 0xD7, 0xA2, 0xD7, 0x9C, 0xD7, 0x98, 0},
  42. // Russian
  43. {0xD0, 0xBF, 0xD1, 0x80, 0xD0, 0xB8, 0xD0, 0xB2, 0xD0, 0xB5,
  44. 0xD1, 0x82, 0x20, 0xD0, 0xBC, 0xD0, 0xB8, 0xD1, 0x80, 0},
  45. // Latin
  46. {0x4D, 0x75, 0x6E, 0x64, 0x75, 0x73, 0x20, 0x73, 0x61, 0x6C,
  47. 0x76, 0x65, 0},
  48. // Swahili
  49. {0x68, 0x75, 0x6A, 0x61, 0x6D, 0x62, 0x6F, 0x20, 0x44, 0x75,
  50. 0x6E, 0x69, 0x61, 0},
  51. // Icelandic
  52. {0x48, 0x61, 0x6C, 0x6C, 0xC3, 0xB3, 0x20, 0x68, 0x65, 0x69,
  53. 0x6D, 0x75, 0x72, 0},
  54. {0}
  55. };
  56. //----------------------------------------------------------------------------
  57. static int testHelloWorldEncoding()
  58. {
  59. int ret = 0;
  60. for(int i=0; helloWorldStrings[i][0] != 0; i++)
  61. {
  62. std::string str = reinterpret_cast<const char*>(helloWorldStrings[i]);
  63. std::cout << str << std::endl;
  64. std::wstring wstr = kwsys::Encoding::ToWide(str);
  65. std::string str2 = kwsys::Encoding::ToNarrow(wstr);
  66. wchar_t* c_wstr = kwsysEncoding_DupToWide(str.c_str());
  67. char* c_str2 = kwsysEncoding_DupToNarrow(c_wstr);
  68. if(!wstr.empty() && (str != str2 || strcmp(c_str2, str.c_str())))
  69. {
  70. std::cout << "converted string was different: " << str2 << std::endl;
  71. std::cout << "converted string was different: " << c_str2 << std::endl;
  72. ret++;
  73. }
  74. free(c_wstr);
  75. free(c_str2);
  76. }
  77. return ret;
  78. }
  79. static int testRobustEncoding()
  80. {
  81. // test that the conversion functions handle invalid
  82. // unicode correctly/gracefully
  83. int ret = 0;
  84. char cstr[] = {(char)-1, 0};
  85. // this conversion could fail
  86. std::wstring wstr = kwsys::Encoding::ToWide(cstr);
  87. wstr = kwsys::Encoding::ToWide(NULL);
  88. if(wstr != L"")
  89. {
  90. const wchar_t* wcstr = wstr.c_str();
  91. std::cout << "ToWide(NULL) returned";
  92. for(size_t i=0; i<wstr.size(); i++)
  93. {
  94. std::cout << " " << std::hex << (int)wcstr[i];
  95. }
  96. std::cout << std::endl;
  97. ret++;
  98. }
  99. wstr = kwsys::Encoding::ToWide("");
  100. if(wstr != L"")
  101. {
  102. const wchar_t* wcstr = wstr.c_str();
  103. std::cout << "ToWide(\"\") returned";
  104. for(size_t i=0; i<wstr.size(); i++)
  105. {
  106. std::cout << " " << std::hex << (int)wcstr[i];
  107. }
  108. std::cout << std::endl;
  109. ret++;
  110. }
  111. #ifdef _WIN32
  112. // 16 bit wchar_t - we make an invalid surrogate pair
  113. wchar_t cwstr[] = {0xD801, 0xDA00, 0};
  114. // this conversion could fail
  115. std::string win_str = kwsys::Encoding::ToNarrow(cwstr);
  116. #endif
  117. std::string str = kwsys::Encoding::ToNarrow(NULL);
  118. if(str != "")
  119. {
  120. std::cout << "ToNarrow(NULL) returned " << str << std::endl;
  121. ret++;
  122. }
  123. str = kwsys::Encoding::ToNarrow(L"");
  124. if(wstr != L"")
  125. {
  126. std::cout << "ToNarrow(\"\") returned " << str << std::endl;
  127. ret++;
  128. }
  129. return ret;
  130. }
  131. static int testCommandLineArguments()
  132. {
  133. int status = 0;
  134. char const* argv[2] = {
  135. "./app.exe",
  136. (char const*)helloWorldStrings[1]
  137. };
  138. kwsys::Encoding::CommandLineArguments args(2, argv);
  139. kwsys::Encoding::CommandLineArguments arg2 =
  140. kwsys::Encoding::CommandLineArguments(args);
  141. char const* const* u8_argv = args.argv();
  142. for(int i=0; i<args.argc(); i++)
  143. {
  144. char const* u8_arg = u8_argv[i];
  145. if(strcmp(argv[i], u8_arg) != 0)
  146. {
  147. std::cout << "argv[" << i << "] " << argv[i] << " != "
  148. << u8_arg << std::endl;
  149. status++;
  150. }
  151. }
  152. kwsys::Encoding::CommandLineArguments args3 =
  153. kwsys::Encoding::CommandLineArguments::Main(2, argv);
  154. return status;
  155. }
  156. //----------------------------------------------------------------------------
  157. int testEncoding(int, char*[])
  158. {
  159. const char* loc = setlocale(LC_ALL, "");
  160. if(loc)
  161. {
  162. std::cout << "Locale: " << loc << std::endl;
  163. }
  164. else
  165. {
  166. std::cout << "Locale: None" << std::endl;
  167. }
  168. int ret = 0;
  169. ret |= testHelloWorldEncoding();
  170. ret |= testRobustEncoding();
  171. ret |= testCommandLineArguments();
  172. return ret;
  173. }