utf8.h 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. /**********************************************************************************************/
  2. /* The MIT License */
  3. /* */
  4. /* Copyright 2016-2017 Twitch Interactive, Inc. or its affiliates. All Rights Reserved. */
  5. /* */
  6. /* Permission is hereby granted, free of charge, to any person obtaining a copy */
  7. /* of this software and associated documentation files (the "Software"), to deal */
  8. /* in the Software without restriction, including without limitation the rights */
  9. /* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell */
  10. /* copies of the Software, and to permit persons to whom the Software is */
  11. /* furnished to do so, subject to the following conditions: */
  12. /* */
  13. /* The above copyright notice and this permission notice shall be included in */
  14. /* all copies or substantial portions of the Software. */
  15. /* */
  16. /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR */
  17. /* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, */
  18. /* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE */
  19. /* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */
  20. /* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, */
  21. /* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN */
  22. /* THE SOFTWARE. */
  23. /**********************************************************************************************/
  24. #ifndef LIBCAPTION_UTF8_H
  25. #define LIBCAPTION_UTF8_H
  26. #ifdef __cplusplus
  27. extern "C" {
  28. #endif
  29. #include <inttypes.h>
  30. #include <stddef.h>
  31. // These types exist to make the code more self dcoumenting
  32. // utf8_char_t point is a null teminate string of utf8 encodecd chars
  33. //
  34. // utf8_size_t is the length of a string in chars
  35. // size_t is bytes
  36. typedef char utf8_char_t;
  37. typedef size_t utf8_size_t;
  38. /*! \brief
  39. \param
  40. Skiped continuation bytes
  41. */
  42. const utf8_char_t* utf8_char_next(const utf8_char_t* c);
  43. /*! \brief
  44. \param
  45. returnes the length of the char in bytes
  46. */
  47. size_t utf8_char_length(const utf8_char_t* c);
  48. /*! \brief
  49. \param
  50. returns 1 if first charcter is white space
  51. */
  52. int utf8_char_whitespace(const utf8_char_t* c);
  53. /*! \brief
  54. \param
  55. returns length of the string in bytes
  56. size is number of charcter to count (0 to count until NULL term)
  57. */
  58. size_t utf8_string_length(const utf8_char_t* data, utf8_size_t size);
  59. /*! \brief
  60. \param
  61. */
  62. size_t utf8_char_copy(utf8_char_t* dst, const utf8_char_t* src);
  63. /*! \brief
  64. \param
  65. returnes the number of utf8 charcters in a string givne the numbe of bytes
  66. to coutn until the a null terminator, pass 0 for size
  67. */
  68. utf8_size_t utf8_char_count(const char* data, size_t size);
  69. /*! \brief
  70. \param
  71. returnes the length of the line in bytes triming not printable characters at the end
  72. */
  73. utf8_size_t utf8_trimmed_length(const utf8_char_t* data, utf8_size_t charcters);
  74. /*! \brief
  75. \param
  76. returns the length in bytes of the line including the new line charcter(s)
  77. auto detects between windows(CRLF), unix(LF), mac(CR) and riscos (LFCR) line endings
  78. */
  79. size_t utf8_line_length(const utf8_char_t* data);
  80. /*! \brief
  81. \param
  82. returns number of chars to include before split
  83. */
  84. utf8_size_t utf8_wrap_length(const utf8_char_t* data, utf8_size_t size);
  85. /*! \brief
  86. \param
  87. returns number of new lines in the string
  88. */
  89. int utf8_line_count(const utf8_char_t* data);
  90. /*! \brief
  91. \param
  92. size in/out. In the the max seize, out is the size read;
  93. returns number of new lins in teh string
  94. */
  95. #define UFTF_DEFAULT_MAX_FILE_SIZE = (50 * 1024 * 1024);
  96. utf8_char_t* utf8_load_text_file(const char* path, size_t* size);
  97. /*! \brief
  98. \param
  99. Compares 2 strings up to max len
  100. */
  101. #ifndef strnstr
  102. char* strnstr(const char* string1, const char* string2, size_t len);
  103. #endif
  104. #ifdef __cplusplus
  105. }
  106. #endif
  107. #endif