testUTF8.cxx 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. /*============================================================================
  2. CMake - Cross Platform Makefile Generator
  3. Copyright 2000-2009 Kitware, Inc., Insight Software Consortium
  4. Distributed under the OSI-approved BSD License (the "License");
  5. see accompanying file Copyright.txt for details.
  6. This software is distributed WITHOUT ANY WARRANTY; without even the
  7. implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  8. See the License for more information.
  9. ============================================================================*/
  10. #include <cm_utf8.h>
  11. #include <stdio.h>
  12. #include <string.h>
  13. typedef char test_utf8_char[5];
  14. static void test_utf8_char_print(test_utf8_char const c)
  15. {
  16. unsigned char const* d = reinterpret_cast<unsigned char const*>(c);
  17. printf("[0x%02X,0x%02X,0x%02X,0x%02X]", (int)d[0], (int)d[1], (int)d[2],
  18. (int)d[3]);
  19. }
  20. struct test_utf8_entry
  21. {
  22. int n;
  23. test_utf8_char str;
  24. unsigned int chr;
  25. };
  26. static test_utf8_entry const good_entry[] = {
  27. { 1, "\x20\x00\x00\x00", 0x0020 }, /* Space. */
  28. { 2, "\xC2\xA9\x00\x00", 0x00A9 }, /* Copyright. */
  29. { 3, "\xE2\x80\x98\x00", 0x2018 }, /* Open-single-quote. */
  30. { 3, "\xE2\x80\x99\x00", 0x2019 }, /* Close-single-quote. */
  31. { 4, "\xF0\xA3\x8E\xB4", 0x233B4 }, /* Example from RFC 3629. */
  32. { 0, { 0, 0, 0, 0, 0 }, 0 }
  33. };
  34. static test_utf8_char const bad_chars[] = {
  35. "\x80\x00\x00\x00", "\xC0\x00\x00\x00", "\xE0\x00\x00\x00",
  36. "\xE0\x80\x80\x00", "\xF0\x80\x80\x80", { 0, 0, 0, 0, 0 }
  37. };
  38. static void report_good(bool passed, test_utf8_char const c)
  39. {
  40. printf("%s: decoding good ", passed ? "pass" : "FAIL");
  41. test_utf8_char_print(c);
  42. printf(" (%s) ", c);
  43. }
  44. static void report_bad(bool passed, test_utf8_char const c)
  45. {
  46. printf("%s: decoding bad ", passed ? "pass" : "FAIL");
  47. test_utf8_char_print(c);
  48. printf(" ");
  49. }
  50. static bool decode_good(test_utf8_entry const entry)
  51. {
  52. unsigned int uc;
  53. if (const char* e =
  54. cm_utf8_decode_character(entry.str, entry.str + 4, &uc)) {
  55. int used = static_cast<int>(e - entry.str);
  56. if (uc != entry.chr) {
  57. report_good(false, entry.str);
  58. printf("expected 0x%04X, got 0x%04X\n", entry.chr, uc);
  59. return false;
  60. }
  61. if (used != entry.n) {
  62. report_good(false, entry.str);
  63. printf("had %d bytes, used %d\n", entry.n, used);
  64. return false;
  65. }
  66. report_good(true, entry.str);
  67. printf("got 0x%04X\n", uc);
  68. return true;
  69. }
  70. report_good(false, entry.str);
  71. printf("failed\n");
  72. return false;
  73. }
  74. static bool decode_bad(test_utf8_char const s)
  75. {
  76. unsigned int uc = 0xFFFFu;
  77. const char* e = cm_utf8_decode_character(s, s + 4, &uc);
  78. if (e) {
  79. report_bad(false, s);
  80. printf("expected failure, got 0x%04X\n", uc);
  81. return false;
  82. }
  83. report_bad(true, s);
  84. printf("failed as expected\n");
  85. return true;
  86. }
  87. int testUTF8(int /*unused*/, char* /*unused*/ [])
  88. {
  89. int result = 0;
  90. for (test_utf8_entry const* e = good_entry; e->n; ++e) {
  91. if (!decode_good(*e)) {
  92. result = 1;
  93. }
  94. }
  95. for (test_utf8_char const* c = bad_chars; (*c)[0]; ++c) {
  96. if (!decode_bad(*c)) {
  97. result = 1;
  98. }
  99. }
  100. return result;
  101. }