subuniutil.cpp 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. #include "subuniutil.h"
  2. // Copied: 2-8-2005
  3. // From: secuniutil.c
  4. unsigned long
  5. utf8getcc( const char** src )
  6. {
  7. register unsigned long c;
  8. register const unsigned char* s = (const unsigned char*)*src;
  9. switch (UTF8len [(*s >> 2) & 0x3F]) {
  10. case 0: /* erroneous: s points to the middle of a character. */
  11. c = (*s++) & 0x3F; goto more5;
  12. case 1: c = (*s++); break;
  13. case 2: c = (*s++) & 0x1F; goto more1;
  14. case 3: c = (*s++) & 0x0F; goto more2;
  15. case 4: c = (*s++) & 0x07; goto more3;
  16. case 5: c = (*s++) & 0x03; goto more4;
  17. case 6: c = (*s++) & 0x01; goto more5;
  18. more5: if ((*s & 0xC0) != 0x80) break; c = (c << 6) | ((*s++) & 0x3F);
  19. more4: if ((*s & 0xC0) != 0x80) break; c = (c << 6) | ((*s++) & 0x3F);
  20. more3: if ((*s & 0xC0) != 0x80) break; c = (c << 6) | ((*s++) & 0x3F);
  21. more2: if ((*s & 0xC0) != 0x80) break; c = (c << 6) | ((*s++) & 0x3F);
  22. more1: if ((*s & 0xC0) != 0x80) break; c = (c << 6) | ((*s++) & 0x3F);
  23. break;
  24. }
  25. *src = (const char*)s;
  26. return c;
  27. }
  28. //
  29. wchar_t *
  30. ASCIIToUnicode( const char *buf, wchar_t *uni, int inUnilen )
  31. /* Convert the 0-terminated UTF-8 string 'buf' to 0-terminated UCS-2;
  32. write the result into uni, truncated (if necessary) to fit in 0..unilen-1. */
  33. /* XXX This function should be named UTF8ToUnicode */
  34. /* XXX unilen should be size_t, not int */
  35. {
  36. auto size_t unilen = (size_t)inUnilen; /* to get rid of warnings for now */
  37. auto size_t i;
  38. if (unilen > 0 && buf && uni) {
  39. for (i = 0; i < unilen; ++i) {
  40. register unsigned long c = utf8getcc( &buf );
  41. if (c >= 0xfffeUL) c = 0xfffdUL; /* REPLACEMENT CHARACTER */
  42. if (0 == (uni[i] = (wchar_t)c)) break;
  43. }
  44. if (i >= unilen && unilen > 0) {
  45. uni[unilen-1] = 0;
  46. }
  47. }
  48. return uni;
  49. }
  50. wchar_t *
  51. StrToUnicode( const char *buf )
  52. {
  53. wchar_t unibuf[1024];
  54. ASCIIToUnicode( buf, unibuf, sizeof(unibuf) );
  55. return _wcsdup( unibuf );
  56. }
  57. // End Copy