subuniutil.cpp 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. /** BEGIN COPYRIGHT BLOCK
  2. * Copyright (C) 2001 Sun Microsystems, Inc. Used by permission.
  3. * Copyright (C) 2005 Red Hat, Inc.
  4. * All rights reserved.
  5. * END COPYRIGHT BLOCK **/
  6. #include "subuniutil.h"
  7. // Copied: 2-8-2005
  8. // From: secuniutil.c
  9. unsigned long
  10. utf8getcc( const char** src )
  11. {
  12. register unsigned long c;
  13. register const unsigned char* s = (const unsigned char*)*src;
  14. switch (UTF8len [(*s >> 2) & 0x3F]) {
  15. case 0: /* erroneous: s points to the middle of a character. */
  16. c = (*s++) & 0x3F; goto more5;
  17. case 1: c = (*s++); break;
  18. case 2: c = (*s++) & 0x1F; goto more1;
  19. case 3: c = (*s++) & 0x0F; goto more2;
  20. case 4: c = (*s++) & 0x07; goto more3;
  21. case 5: c = (*s++) & 0x03; goto more4;
  22. case 6: c = (*s++) & 0x01; goto more5;
  23. more5: if ((*s & 0xC0) != 0x80) break; c = (c << 6) | ((*s++) & 0x3F);
  24. more4: if ((*s & 0xC0) != 0x80) break; c = (c << 6) | ((*s++) & 0x3F);
  25. more3: if ((*s & 0xC0) != 0x80) break; c = (c << 6) | ((*s++) & 0x3F);
  26. more2: if ((*s & 0xC0) != 0x80) break; c = (c << 6) | ((*s++) & 0x3F);
  27. more1: if ((*s & 0xC0) != 0x80) break; c = (c << 6) | ((*s++) & 0x3F);
  28. break;
  29. }
  30. *src = (const char*)s;
  31. return c;
  32. }
  33. //
  34. wchar_t *
  35. ASCIIToUnicode( const char *buf, wchar_t *uni, int inUnilen )
  36. /* Convert the 0-terminated UTF-8 string 'buf' to 0-terminated UCS-2;
  37. write the result into uni, truncated (if necessary) to fit in 0..unilen-1. */
  38. /* XXX This function should be named UTF8ToUnicode */
  39. /* XXX unilen should be size_t, not int */
  40. {
  41. auto size_t unilen = (size_t)inUnilen; /* to get rid of warnings for now */
  42. auto size_t i;
  43. if (unilen > 0 && buf && uni) {
  44. for (i = 0; i < unilen; ++i) {
  45. register unsigned long c = utf8getcc( &buf );
  46. if (c >= 0xfffeUL) c = 0xfffdUL; /* REPLACEMENT CHARACTER */
  47. if (0 == (uni[i] = (wchar_t)c)) break;
  48. }
  49. if (i >= unilen && unilen > 0) {
  50. uni[unilen-1] = 0;
  51. }
  52. }
  53. return uni;
  54. }
  55. wchar_t *
  56. StrToUnicode( const char *buf )
  57. {
  58. wchar_t unibuf[1024];
  59. ASCIIToUnicode( buf, unibuf, sizeof(unibuf) );
  60. return _wcsdup( unibuf );
  61. }
  62. // End Copy