Codepoint.cs 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Runtime.CompilerServices;
  4. namespace Avalonia.Media.TextFormatting.Unicode
  5. {
  6. public readonly record struct Codepoint
  7. {
  8. private readonly uint _value;
  9. /// <summary>
  10. /// The replacement codepoint that is used for non supported values.
  11. /// </summary>
  12. public static readonly Codepoint ReplacementCodepoint = new Codepoint('\uFFFD');
  13. public Codepoint(uint value)
  14. {
  15. _value = value;
  16. }
  17. /// <summary>
  18. /// Get the codepoint's value.
  19. /// </summary>
  20. public uint Value => _value;
  21. /// <summary>
  22. /// Gets the <see cref="Unicode.GeneralCategory"/>.
  23. /// </summary>
  24. public GeneralCategory GeneralCategory => UnicodeData.GetGeneralCategory(_value);
  25. /// <summary>
  26. /// Gets the <see cref="Unicode.Script"/>.
  27. /// </summary>
  28. public Script Script => UnicodeData.GetScript(_value);
  29. /// <summary>
  30. /// Gets the <see cref="Unicode.BidiClass"/>.
  31. /// </summary>
  32. public BidiClass BiDiClass => UnicodeData.GetBiDiClass(_value);
  33. /// <summary>
  34. /// Gets the <see cref="Unicode.BidiPairedBracketType"/>.
  35. /// </summary>
  36. public BidiPairedBracketType PairedBracketType => UnicodeData.GetBiDiPairedBracketType(_value);
  37. /// <summary>
  38. /// Gets the <see cref="Unicode.LineBreakClass"/>.
  39. /// </summary>
  40. public LineBreakClass LineBreakClass => UnicodeData.GetLineBreakClass(_value);
  41. /// <summary>
  42. /// Gets the <see cref="GraphemeBreakClass"/>.
  43. /// </summary>
  44. public GraphemeBreakClass GraphemeBreakClass => UnicodeData.GetGraphemeClusterBreak(_value);
  45. /// <summary>
  46. /// Determines whether this <see cref="Codepoint"/> is a break char.
  47. /// </summary>
  48. /// <returns>
  49. /// <c>true</c> if [is break character]; otherwise, <c>false</c>.
  50. /// </returns>
  51. public bool IsBreakChar
  52. {
  53. get
  54. {
  55. switch (_value)
  56. {
  57. case '\u000A':
  58. case '\u000B':
  59. case '\u000C':
  60. case '\u000D':
  61. case '\u0085':
  62. case '\u2028':
  63. case '\u2029':
  64. return true;
  65. default:
  66. return false;
  67. }
  68. }
  69. }
  70. /// <summary>
  71. /// Determines whether this <see cref="Codepoint"/> is white space.
  72. /// </summary>
  73. /// <returns>
  74. /// <c>true</c> if [is whitespace]; otherwise, <c>false</c>.
  75. /// </returns>
  76. public bool IsWhiteSpace
  77. {
  78. get
  79. {
  80. switch (GeneralCategory)
  81. {
  82. case GeneralCategory.Control:
  83. case GeneralCategory.NonspacingMark:
  84. case GeneralCategory.Format:
  85. case GeneralCategory.SpaceSeparator:
  86. case GeneralCategory.SpacingMark:
  87. return true;
  88. }
  89. return false;
  90. }
  91. }
  92. /// <summary>
  93. /// Gets the canonical representation of a given codepoint.
  94. /// <see href="https://www.unicode.org/L2/L2013/13123-norm-and-bpa.pdf"/>
  95. /// </summary>
  96. /// <param name="codePoint">The code point to be mapped.</param>
  97. /// <returns>The mapped canonical code point, or the passed <paramref name="codePoint"/>.</returns>
  98. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  99. internal static Codepoint GetCanonicalType(Codepoint codePoint)
  100. {
  101. if (codePoint._value == 0x3008)
  102. {
  103. return new Codepoint(0x2329);
  104. }
  105. if (codePoint._value == 0x3009)
  106. {
  107. return new Codepoint(0x232A);
  108. }
  109. return codePoint;
  110. }
  111. /// <summary>
  112. /// Gets the codepoint representing the bracket pairing for this instance.
  113. /// </summary>
  114. /// <param name="codepoint">
  115. /// When this method returns, contains the codepoint representing the bracket pairing for this instance;
  116. /// otherwise, the default value for the type of the <paramref name="codepoint"/> parameter.
  117. /// This parameter is passed uninitialized.
  118. /// .</param>
  119. /// <returns><see langword="true"/> if this instance has a bracket pairing; otherwise, <see langword="false"/></returns>
  120. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  121. public bool TryGetPairedBracket(out Codepoint codepoint)
  122. {
  123. if (PairedBracketType == BidiPairedBracketType.None)
  124. {
  125. codepoint = default;
  126. return false;
  127. }
  128. codepoint = UnicodeData.GetBiDiPairedBracket(_value);
  129. return true;
  130. }
  131. public static implicit operator int(Codepoint codepoint)
  132. {
  133. return (int)codepoint._value;
  134. }
  135. public static implicit operator uint(Codepoint codepoint)
  136. {
  137. return codepoint._value;
  138. }
  139. /// <summary>
  140. /// Reads the <see cref="Codepoint"/> at specified position.
  141. /// </summary>
  142. /// <param name="text">The buffer to read from.</param>
  143. /// <param name="index">The index to read at.</param>
  144. /// <param name="count">The count of character that were read.</param>
  145. /// <returns></returns>
  146. public static Codepoint ReadAt(ReadOnlySpan<char> text, int index, out int count)
  147. {
  148. count = 1;
  149. if (index >= text.Length)
  150. {
  151. return ReplacementCodepoint;
  152. }
  153. var code = text[index];
  154. ushort hi, low;
  155. //# High surrogate
  156. if (0xD800 <= code && code <= 0xDBFF)
  157. {
  158. hi = code;
  159. if (index + 1 == text.Length)
  160. {
  161. return ReplacementCodepoint;
  162. }
  163. low = text[index + 1];
  164. if (0xDC00 <= low && low <= 0xDFFF)
  165. {
  166. count = 2;
  167. return new Codepoint((uint)((hi - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000));
  168. }
  169. return ReplacementCodepoint;
  170. }
  171. //# Low surrogate
  172. if (0xDC00 <= code && code <= 0xDFFF)
  173. {
  174. if (index == 0)
  175. {
  176. return ReplacementCodepoint;
  177. }
  178. hi = text[index - 1];
  179. low = code;
  180. if (0xD800 <= hi && hi <= 0xDBFF)
  181. {
  182. count = 2;
  183. return new Codepoint((uint)((hi - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000));
  184. }
  185. return ReplacementCodepoint;
  186. }
  187. return new Codepoint(code);
  188. }
  189. /// <summary>
  190. /// Returns <see langword="true"/> if <paramref name="cp"/> is between
  191. /// <paramref name="lowerBound"/> and <paramref name="upperBound"/>, inclusive.
  192. /// </summary>
  193. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  194. public static bool IsInRangeInclusive(Codepoint cp, uint lowerBound, uint upperBound)
  195. => (cp._value - lowerBound) <= (upperBound - lowerBound);
  196. }
  197. }