testUTF8.cxx 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. /*============================================================================
  2. CMake - Cross Platform Makefile Generator
  3. Copyright 2000-2009 Kitware, Inc., Insight Software Consortium
  4. Distributed under the OSI-approved BSD License (the "License");
  5. see accompanying file Copyright.txt for details.
  6. This software is distributed WITHOUT ANY WARRANTY; without even the
  7. implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  8. See the License for more information.
  9. ============================================================================*/
  10. #include <cm_utf8.h>
  11. #include <string.h>
  12. #include <stdio.h>
  13. typedef char test_utf8_char[5];
  14. static void test_utf8_char_print(test_utf8_char const c)
  15. {
  16. unsigned char const* d = reinterpret_cast<unsigned char const*>(c);
  17. printf("[0x%02X,0x%02X,0x%02X,0x%02X]",
  18. (int)d[0], (int)d[1], (int)d[2], (int)d[3]);
  19. }
  20. struct test_utf8_entry
  21. {
  22. int n;
  23. test_utf8_char str;
  24. unsigned int chr;
  25. };
  26. static test_utf8_entry const good_entry[] = {
  27. {1, "\x20\x00\x00\x00", 0x0020}, /* Space. */
  28. {2, "\xC2\xA9\x00\x00", 0x00A9}, /* Copyright. */
  29. {3, "\xE2\x80\x98\x00", 0x2018}, /* Open-single-quote. */
  30. {3, "\xE2\x80\x99\x00", 0x2019}, /* Close-single-quote. */
  31. {4, "\xF0\xA3\x8E\xB4", 0x233B4}, /* Example from RFC 3629. */
  32. {0, {0,0,0,0,0}, 0}
  33. };
  34. static test_utf8_char const bad_chars[] = {
  35. "\x80\x00\x00\x00",
  36. "\xC0\x00\x00\x00",
  37. "\xE0\x00\x00\x00",
  38. "\xE0\x80\x80\x00",
  39. "\xF0\x80\x80\x80",
  40. {0,0,0,0,0}
  41. };
  42. static void report_good(bool passed, test_utf8_char const c)
  43. {
  44. printf("%s: decoding good ", passed?"pass":"FAIL");
  45. test_utf8_char_print(c);
  46. printf(" (%s) ", c);
  47. }
  48. static void report_bad(bool passed, test_utf8_char const c)
  49. {
  50. printf("%s: decoding bad ", passed?"pass":"FAIL");
  51. test_utf8_char_print(c);
  52. printf(" ");
  53. }
  54. static bool decode_good(test_utf8_entry const entry)
  55. {
  56. unsigned int uc;
  57. if(const char* e = cm_utf8_decode_character(entry.str, entry.str+4, &uc))
  58. {
  59. int used = static_cast<int>(e-entry.str);
  60. if(uc != entry.chr)
  61. {
  62. report_good(false, entry.str);
  63. printf("expected 0x%04X, got 0x%04X\n", entry.chr, uc);
  64. return false;
  65. }
  66. if(used != entry.n)
  67. {
  68. report_good(false, entry.str);
  69. printf("had %d bytes, used %d\n", entry.n, used);
  70. return false;
  71. }
  72. report_good(true, entry.str);
  73. printf("got 0x%04X\n", uc);
  74. return true;
  75. }
  76. report_good(false, entry.str);
  77. printf("failed\n");
  78. return false;
  79. }
  80. static bool decode_bad(test_utf8_char const s)
  81. {
  82. unsigned int uc = 0xFFFFu;
  83. const char* e = cm_utf8_decode_character(s, s+4, &uc);
  84. if(e)
  85. {
  86. report_bad(false, s);
  87. printf("expected failure, got 0x%04X\n", uc);
  88. return false;
  89. }
  90. report_bad(true, s);
  91. printf("failed as expected\n");
  92. return true;
  93. }
  94. int testUTF8(int, char*[])
  95. {
  96. int result = 0;
  97. for(test_utf8_entry const* e = good_entry; e->n; ++e)
  98. {
  99. if(!decode_good(*e))
  100. {
  101. result = 1;
  102. }
  103. }
  104. for(test_utf8_char const* c = bad_chars; (*c)[0]; ++c)
  105. {
  106. if(!decode_bad(*c))
  107. {
  108. result = 1;
  109. }
  110. }
  111. return result;
  112. }