Codepoint.cs 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. using System.Runtime.CompilerServices;
  2. using Avalonia.Utilities;
  3. namespace Avalonia.Media.TextFormatting.Unicode
  4. {
  5. public readonly struct Codepoint
  6. {
  7. private readonly uint _value;
  8. /// <summary>
  9. /// The replacement codepoint that is used for non supported values.
  10. /// </summary>
  11. public static readonly Codepoint ReplacementCodepoint = new Codepoint('\uFFFD');
  12. public Codepoint(uint value)
  13. {
  14. _value = value;
  15. }
  16. /// <summary>
  17. /// Get the codepoint's value.
  18. /// </summary>
  19. public uint Value => _value;
  20. /// <summary>
  21. /// Gets the <see cref="Unicode.GeneralCategory"/>.
  22. /// </summary>
  23. public GeneralCategory GeneralCategory => UnicodeData.GetGeneralCategory(_value);
  24. /// <summary>
  25. /// Gets the <see cref="Unicode.Script"/>.
  26. /// </summary>
  27. public Script Script => UnicodeData.GetScript(_value);
  28. /// <summary>
  29. /// Gets the <see cref="Unicode.BidiClass"/>.
  30. /// </summary>
  31. public BidiClass BiDiClass => UnicodeData.GetBiDiClass(_value);
  32. /// <summary>
  33. /// Gets the <see cref="Unicode.BidiPairedBracketType"/>.
  34. /// </summary>
  35. public BidiPairedBracketType PairedBracketType => UnicodeData.GetBiDiPairedBracketType(_value);
  36. /// <summary>
  37. /// Gets the <see cref="Unicode.LineBreakClass"/>.
  38. /// </summary>
  39. public LineBreakClass LineBreakClass => UnicodeData.GetLineBreakClass(_value);
  40. /// <summary>
  41. /// Gets the <see cref="GraphemeBreakClass"/>.
  42. /// </summary>
  43. public GraphemeBreakClass GraphemeBreakClass => UnicodeData.GetGraphemeClusterBreak(_value);
  44. /// <summary>
  45. /// Determines whether this <see cref="Codepoint"/> is a break char.
  46. /// </summary>
  47. /// <returns>
  48. /// <c>true</c> if [is break character]; otherwise, <c>false</c>.
  49. /// </returns>
  50. public bool IsBreakChar
  51. {
  52. get
  53. {
  54. switch (_value)
  55. {
  56. case '\u000A':
  57. case '\u000B':
  58. case '\u000C':
  59. case '\u000D':
  60. case '\u0085':
  61. case '\u2028':
  62. case '\u2029':
  63. return true;
  64. default:
  65. return false;
  66. }
  67. }
  68. }
  69. /// <summary>
  70. /// Determines whether this <see cref="Codepoint"/> is white space.
  71. /// </summary>
  72. /// <returns>
  73. /// <c>true</c> if [is whitespace]; otherwise, <c>false</c>.
  74. /// </returns>
  75. public bool IsWhiteSpace
  76. {
  77. get
  78. {
  79. switch (GeneralCategory)
  80. {
  81. case GeneralCategory.Control:
  82. case GeneralCategory.NonspacingMark:
  83. case GeneralCategory.Format:
  84. case GeneralCategory.SpaceSeparator:
  85. case GeneralCategory.SpacingMark:
  86. return true;
  87. }
  88. return false;
  89. }
  90. }
  91. /// <summary>
  92. /// Gets the canonical representation of a given codepoint.
  93. /// <see href="http://www.unicode.org/L2/L2013/13123-norm-and-bpa.pdf"/>
  94. /// </summary>
  95. /// <param name="codePoint">The code point to be mapped.</param>
  96. /// <returns>The mapped canonical code point, or the passed <paramref name="codePoint"/>.</returns>
  97. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  98. internal static Codepoint GetCanonicalType(Codepoint codePoint)
  99. {
  100. if (codePoint._value == 0x3008)
  101. {
  102. return new Codepoint(0x2329);
  103. }
  104. if (codePoint._value == 0x3009)
  105. {
  106. return new Codepoint(0x232A);
  107. }
  108. return codePoint;
  109. }
  110. /// <summary>
  111. /// Gets the codepoint representing the bracket pairing for this instance.
  112. /// </summary>
  113. /// <param name="codepoint">
  114. /// When this method returns, contains the codepoint representing the bracket pairing for this instance;
  115. /// otherwise, the default value for the type of the <paramref name="codepoint"/> parameter.
  116. /// This parameter is passed uninitialized.
  117. /// .</param>
  118. /// <returns><see langword="true"/> if this instance has a bracket pairing; otherwise, <see langword="false"/></returns>
  119. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  120. public bool TryGetPairedBracket(out Codepoint codepoint)
  121. {
  122. if (PairedBracketType == BidiPairedBracketType.None)
  123. {
  124. codepoint = default;
  125. return false;
  126. }
  127. codepoint = UnicodeData.GetBiDiPairedBracket(_value);
  128. return true;
  129. }
  130. public static implicit operator int(Codepoint codepoint)
  131. {
  132. return (int)codepoint._value;
  133. }
  134. public static implicit operator uint(Codepoint codepoint)
  135. {
  136. return codepoint._value;
  137. }
  138. /// <summary>
  139. /// Reads the <see cref="Codepoint"/> at specified position.
  140. /// </summary>
  141. /// <param name="text">The buffer to read from.</param>
  142. /// <param name="index">The index to read at.</param>
  143. /// <param name="count">The count of character that were read.</param>
  144. /// <returns></returns>
  145. public static Codepoint ReadAt(ReadOnlySlice<char> text, int index, out int count)
  146. {
  147. count = 1;
  148. if (index >= text.Length)
  149. {
  150. return ReplacementCodepoint;
  151. }
  152. var code = text[index];
  153. ushort hi, low;
  154. //# High surrogate
  155. if (0xD800 <= code && code <= 0xDBFF)
  156. {
  157. hi = code;
  158. if (index + 1 == text.Length)
  159. {
  160. return ReplacementCodepoint;
  161. }
  162. low = text[index + 1];
  163. if (0xDC00 <= low && low <= 0xDFFF)
  164. {
  165. count = 2;
  166. return new Codepoint((uint)((hi - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000));
  167. }
  168. return ReplacementCodepoint;
  169. }
  170. //# Low surrogate
  171. if (0xDC00 <= code && code <= 0xDFFF)
  172. {
  173. if (index == 0)
  174. {
  175. return ReplacementCodepoint;
  176. }
  177. hi = text[index - 1];
  178. low = code;
  179. if (0xD800 <= hi && hi <= 0xDBFF)
  180. {
  181. count = 2;
  182. return new Codepoint((uint)((hi - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000));
  183. }
  184. return ReplacementCodepoint;
  185. }
  186. return new Codepoint(code);
  187. }
  188. /// <summary>
  189. /// Returns <see langword="true"/> if <paramref name="value"/> is between
  190. /// <paramref name="lowerBound"/> and <paramref name="upperBound"/>, inclusive.
  191. /// </summary>
  192. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  193. public static bool IsInRangeInclusive(Codepoint cp, uint lowerBound, uint upperBound)
  194. => (cp._value - lowerBound) <= (upperBound - lowerBound);
  195. }
  196. }