Fzutf8.cpp 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. // MP: Renamed to avoid clashes with utf8.c from PuTTY
  2. #include "stdafx.h"
  3. #include "utf8.h"
  4. // Check for valid UTF-8 string. Code taken from the examples in RFC 2640
  5. int utf8_valid(const unsigned char *buf, unsigned int len)
  6. {
  7. const unsigned char *endbuf = buf + len;
  8. unsigned char byte2mask=0x00, c;
  9. int trailing = 0; // trailing (continuation) bytes to follow
  10. while (buf != endbuf)
  11. {
  12. c = *buf++;
  13. if (trailing)
  14. if ((c&0xC0) == 0x80) // Does trailing byte follow UTF-8 format?
  15. {if (byte2mask) // Need to check 2nd byte for proper range?
  16. if (c&byte2mask) // Are appropriate bits set?
  17. byte2mask=0x00;
  18. else
  19. return 0;
  20. trailing--; }
  21. else
  22. return 0;
  23. else
  24. if ((c&0x80) == 0x00) continue; // valid 1 byte UTF-8
  25. else if ((c&0xE0) == 0xC0) // valid 2 byte UTF-8
  26. if (c&0x1E) // Is UTF-8 byte in
  27. // proper range?
  28. trailing =1;
  29. else
  30. return 0;
  31. else if ((c&0xF0) == 0xE0) // valid 3 byte UTF-8
  32. {if (!(c&0x0F)) // Is UTF-8 byte in
  33. // proper range?
  34. byte2mask=0x20; // If not set mask
  35. // to check next byte
  36. trailing = 2;}
  37. else if ((c&0xF8) == 0xF0) // valid 4 byte UTF-8
  38. {if (!(c&0x07)) // Is UTF-8 byte in
  39. // proper range?
  40. byte2mask=0x30; // If not set mask
  41. // to check next byte
  42. trailing = 3;}
  43. else if ((c&0xFC) == 0xF8) // valid 5 byte UTF-8
  44. {if (!(c&0x03)) // Is UTF-8 byte in
  45. // proper range?
  46. byte2mask=0x38; // If not set mask
  47. // to check next byte
  48. trailing = 4;}
  49. else if ((c&0xFE) == 0xFC) // valid 6 byte UTF-8
  50. {if (!(c&0x01)) // Is UTF-8 byte in
  51. // proper range?
  52. byte2mask=0x3C; // If not set mask
  53. // to check next byte
  54. trailing = 5;}
  55. else return 0;
  56. }
  57. return trailing == 0;
  58. }