find-font-iconv.c 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. #include <iconv.h>
  2. #include <errno.h>
  3. #include "find-font.h"
  4. struct mac_font_mapping {
  5. unsigned short encoding_id;
  6. unsigned short language_id;
  7. const char *code_page;
  8. };
  9. #define TT_MAC_LANGID_ANY 0xFFFF
  10. static const struct mac_font_mapping mac_codes[] = {
  11. {TT_MAC_ID_ROMAN, TT_MAC_LANGID_ENGLISH, "macintosh"},
  12. {TT_MAC_ID_ROMAN, TT_MAC_LANGID_ICELANDIC,"x-mac-icelandic"},
  13. {TT_MAC_ID_ROMAN, TT_MAC_LANGID_TURKISH, "x-mac-ce"},
  14. {TT_MAC_ID_ROMAN, TT_MAC_LANGID_POLISH, "x-mac-ce"},
  15. {TT_MAC_ID_ROMAN, TT_MAC_LANGID_ROMANIAN, "x-mac-romanian"},
  16. {TT_MAC_ID_ROMAN, TT_MAC_LANGID_CZECH, "x-mac-ce"},
  17. {TT_MAC_ID_ROMAN, TT_MAC_LANGID_SLOVAK, "x-mac-ce"},
  18. {TT_MAC_ID_ROMAN, TT_MAC_LANGID_ANY, "macintosh"},
  19. {TT_MAC_ID_JAPANESE, TT_MAC_LANGID_JAPANESE, "Shift_JIS"},
  20. {TT_MAC_ID_JAPANESE, TT_MAC_LANGID_ANY, "Shift_JIS"},
  21. {TT_MAC_ID_KOREAN, TT_MAC_LANGID_KOREAN, "EUC-KR"},
  22. {TT_MAC_ID_KOREAN, TT_MAC_LANGID_ANY, "EUC-KR"},
  23. {TT_MAC_ID_ARABIC, TT_MAC_LANGID_ARABIC, "x-mac-arabic"},
  24. {TT_MAC_ID_ARABIC, TT_MAC_LANGID_URDU, "x-mac-farsi"},
  25. {TT_MAC_ID_ARABIC, TT_MAC_LANGID_FARSI, "x-mac-farsi"},
  26. {TT_MAC_ID_ARABIC, TT_MAC_LANGID_ANY, "x-mac-arabic"},
  27. {TT_MAC_ID_HEBREW, TT_MAC_LANGID_HEBREW, "x-mac-hebrew"},
  28. {TT_MAC_ID_HEBREW, TT_MAC_LANGID_ANY, "x-mac-hebrew"},
  29. {TT_MAC_ID_GREEK, TT_MAC_LANGID_ANY, "x-mac-greek"},
  30. {TT_MAC_ID_RUSSIAN, TT_MAC_LANGID_ANY, "x-mac-cyrillic"},
  31. {TT_MAC_ID_DEVANAGARI, TT_MAC_LANGID_ANY, "x-mac-devanagari"},
  32. {TT_MAC_ID_GURMUKHI, TT_MAC_LANGID_ANY, "x-mac-gurmukhi"},
  33. {TT_MAC_ID_GUJARATI, TT_MAC_LANGID_ANY, "x-mac-gujarati"},
  34. {
  35. TT_MAC_ID_TRADITIONAL_CHINESE,
  36. TT_MAC_LANGID_CHINESE_SIMPLIFIED,
  37. "Big5"
  38. },
  39. {
  40. TT_MAC_ID_TRADITIONAL_CHINESE,
  41. TT_MAC_LANGID_ANY,
  42. "Big5"
  43. },
  44. {
  45. TT_MAC_ID_SIMPLIFIED_CHINESE,
  46. TT_MAC_LANGID_CHINESE_SIMPLIFIED,
  47. "GB2312"
  48. },
  49. {
  50. TT_MAC_ID_SIMPLIFIED_CHINESE,
  51. TT_MAC_LANGID_ANY,
  52. "GB2312"
  53. }
  54. };
  55. const char *iso_codes[] = {
  56. "us-ascii",
  57. NULL,
  58. "iso-8859-1"
  59. };
  60. const char *ms_codes[] = {
  61. "UTF-16BE",
  62. "UTF-16BE",
  63. "Shift_JIS",
  64. NULL,
  65. "Big5",
  66. NULL,
  67. NULL,
  68. NULL,
  69. NULL,
  70. NULL,
  71. "UTF-16BE"
  72. };
  73. static const size_t mac_code_count = sizeof(mac_codes) / sizeof(mac_codes[0]);
  74. static const size_t iso_code_count = sizeof(iso_codes) / sizeof(iso_codes[0]);
  75. static const size_t ms_code_count = sizeof(ms_codes) / sizeof(ms_codes[0]);
  76. static const char *get_mac_code(uint16_t encoding_id, uint16_t language_id)
  77. {
  78. for (size_t i = 0; i < mac_code_count; i++) {
  79. const struct mac_font_mapping *mac_code = &mac_codes[i];
  80. if (mac_code->encoding_id == encoding_id &&
  81. mac_code->language_id == language_id)
  82. return mac_code->code_page;
  83. }
  84. return NULL;
  85. }
  86. static const char *get_code_page_for_font(uint16_t platform_id,
  87. uint16_t encoding_id, uint16_t language_id)
  88. {
  89. const char *ret;
  90. switch (platform_id) {
  91. case TT_PLATFORM_APPLE_UNICODE:
  92. return "UTF-16BE";
  93. case TT_PLATFORM_MACINTOSH:
  94. ret = get_mac_code(encoding_id, language_id);
  95. if (!ret)
  96. ret = get_mac_code(encoding_id, TT_MAC_LANGID_ANY);
  97. return ret;
  98. case TT_PLATFORM_ISO:
  99. if (encoding_id < iso_code_count)
  100. return iso_codes[encoding_id];
  101. break;
  102. case TT_PLATFORM_MICROSOFT:
  103. if (encoding_id < ms_code_count)
  104. return ms_codes[encoding_id];
  105. break;
  106. }
  107. return NULL;
  108. }
  109. char *sfnt_name_to_utf8(FT_SfntName *sfnt_name)
  110. {
  111. const char *charset = get_code_page_for_font(sfnt_name->platform_id,
  112. sfnt_name->encoding_id, sfnt_name->language_id);
  113. char utf8[256];
  114. char *conv_in, *conv_out;
  115. size_t in_len, out_len;
  116. if (!charset) {
  117. blog(LOG_DEBUG, "invalid character set found, "
  118. "platform_id: %d, encoding_id: %d, "
  119. "language_id: %d",
  120. sfnt_name->platform_id,
  121. sfnt_name->encoding_id,
  122. sfnt_name->language_id);
  123. return NULL;
  124. }
  125. iconv_t ic = iconv_open("UTF-8", charset);
  126. if (ic == (iconv_t)-1) {
  127. blog(LOG_DEBUG, "couldn't intialize font code page "
  128. "conversion: '%s' to 'utf-8': errno = %d",
  129. charset, (int)errno);
  130. return NULL;
  131. }
  132. conv_in = (char*)sfnt_name->string;
  133. conv_out = utf8;
  134. in_len = sfnt_name->string_len;
  135. out_len = 256;
  136. size_t n = iconv(ic, &conv_in, &in_len, &conv_out, &out_len);
  137. if (n == (size_t)-1) {
  138. blog(LOG_WARNING, "couldn't convert font name text: errno = %d",
  139. (int)errno);
  140. iconv_close(ic);
  141. return NULL;
  142. }
  143. iconv_close(ic);
  144. *conv_out = 0;
  145. return bstrdup(utf8);
  146. }