Browse Source

Merge pull request #5410 from Gillibald/fixes/LineBreakEnumerator

Replace LineBreakEnumerator implementation
Benedikt Stebner 4 years ago
parent
commit
6ff478bdb1

+ 0 - 56
src/Avalonia.Visuals/Media/TextFormatting/Unicode/BreakPairTable.cs

@@ -1,56 +0,0 @@
-namespace Avalonia.Media.TextFormatting.Unicode
-{
-    internal static class BreakPairTable
-    {
-        private static readonly byte[][] s_breakPairTable = 
-            {
-             new byte[] {4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,3,4,4,4,4,4,4,4,4,4,4,4},
-             new byte[] {0,4,4,1,1,4,4,4,4,1,1,0,0,0,0,4,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
-             new byte[] {0,4,4,1,1,4,4,4,4,1,1,1,1,1,0,4,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
-             new byte[] {4,4,4,1,1,1,4,4,4,1,1,1,1,1,1,1,1,1,1,1,4,2,4,1,1,1,1,1,1,1,1,1,1},
-             new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,1,1,1,1,1,1,4,2,4,1,1,1,1,1,1,1,1,1,1},
-             new byte[] {0,4,4,1,1,1,4,4,4,0,0,0,0,0,0,0,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
-             new byte[] {0,4,4,1,1,1,4,4,4,0,0,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
-             new byte[] {0,4,4,1,1,1,4,4,4,0,0,1,0,1,0,0,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
-             new byte[] {0,4,4,1,1,1,4,4,4,0,0,1,1,1,0,0,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
-             new byte[] {1,4,4,1,1,1,4,4,4,0,0,1,1,1,1,0,1,1,0,0,4,2,4,1,1,1,1,1,0,1,1,1,0},
-             new byte[] {1,4,4,1,1,1,4,4,4,0,0,1,1,1,0,0,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
-             new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
-             new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
-             new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
-             new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
-             new byte[] {0,4,4,1,1,1,4,4,4,0,0,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
-             new byte[] {0,4,4,1,0,1,4,4,4,0,0,1,0,0,0,0,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
-             new byte[] {0,4,4,1,0,1,4,4,4,0,0,0,0,0,0,0,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
-             new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,1,1,1,1,1,1,4,2,4,1,1,1,1,1,1,1,1,1,0},
-             new byte[] {0,4,4,1,1,1,4,4,4,0,0,0,0,0,0,0,1,1,0,4,4,2,4,0,0,0,0,0,0,0,0,1,0},
-             new byte[] {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0},
-             new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
-             new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,1,1,1,1,1,1,4,2,4,1,1,1,1,1,1,1,1,1,1},
-             new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,1,1,0,0,0,1,0},
-             new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,1,0,0,0,1,0},
-             new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,1,1,1,1,0,0,0,0,1,0},
-             new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,1,1,0,0,0,1,0},
-             new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,1,0,0,0,1,0},
-             new byte[] {0,4,4,1,1,1,4,4,4,0,0,0,0,0,0,0,1,1,0,0,4,2,4,0,0,0,0,0,1,0,0,1,0},
-             new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,1,1,0},
-             new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
-             new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
-             new byte[] {0,4,4,1,1,0,4,4,4,0,0,0,0,0,0,0,0,0,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
-        };
-
-        public static PairBreakType Map(LineBreakClass first, LineBreakClass second)
-        {
-            return (PairBreakType)s_breakPairTable[(int)first][(int)second];
-        }
-    }
-
-    internal enum PairBreakType : byte
-    {
-        DI = 0, // Direct break opportunity
-        IN = 1, // Indirect break opportunity
-        CI = 2, // Indirect break opportunity for combining marks
-        CP = 3, // Prohibited break for combining marks
-        PR = 4 // Prohibited break
-    }
-}

+ 15 - 12
src/Avalonia.Visuals/Media/TextFormatting/Unicode/Codepoint.cs

@@ -9,37 +9,40 @@ namespace Avalonia.Media.TextFormatting.Unicode
         /// </summary>
         public static readonly Codepoint ReplacementCodepoint = new Codepoint('\uFFFD');
 
-        private readonly int _value;
-
         public Codepoint(int value)
         {
-            _value = value;
+            Value = value;
         }
 
+        /// <summary>
+        /// Get the codepoint's value.
+        /// </summary>
+        public int Value { get; }
+
         /// <summary>
         /// Gets the <see cref="Unicode.GeneralCategory"/>.
         /// </summary>
-        public GeneralCategory GeneralCategory => UnicodeData.GetGeneralCategory(_value);
+        public GeneralCategory GeneralCategory => UnicodeData.GetGeneralCategory(Value);
 
         /// <summary>
         /// Gets the <see cref="Unicode.Script"/>.
         /// </summary>
-        public Script Script => UnicodeData.GetScript(_value);
+        public Script Script => UnicodeData.GetScript(Value);
 
         /// <summary>
         /// Gets the <see cref="Unicode.BiDiClass"/>.
         /// </summary>
-        public BiDiClass BiDiClass => UnicodeData.GetBiDiClass(_value);
+        public BiDiClass BiDiClass => UnicodeData.GetBiDiClass(Value);
 
         /// <summary>
         /// Gets the <see cref="Unicode.LineBreakClass"/>.
         /// </summary>
-        public LineBreakClass LineBreakClass => UnicodeData.GetLineBreakClass(_value);
+        public LineBreakClass LineBreakClass => UnicodeData.GetLineBreakClass(Value);
 
         /// <summary>
         /// Gets the <see cref="GraphemeBreakClass"/>.
         /// </summary>
-        public GraphemeBreakClass GraphemeBreakClass => UnicodeData.GetGraphemeClusterBreak(_value);
+        public GraphemeBreakClass GraphemeBreakClass => UnicodeData.GetGraphemeClusterBreak(Value);
 
         /// <summary>
         /// Determines whether this <see cref="Codepoint"/> is a break char.
@@ -51,7 +54,7 @@ namespace Avalonia.Media.TextFormatting.Unicode
         {
             get
             {
-                switch (_value)
+                switch (Value)
                 {
                     case '\u000A':
                     case '\u000B':
@@ -93,12 +96,12 @@ namespace Avalonia.Media.TextFormatting.Unicode
 
         public static implicit operator int(Codepoint codepoint)
         {
-            return codepoint._value;
+            return codepoint.Value;
         }
 
         public static implicit operator uint(Codepoint codepoint)
         {
-            return (uint)codepoint._value;
+            return (uint)codepoint.Value;
         }
 
         /// <summary>
@@ -112,7 +115,7 @@ namespace Avalonia.Media.TextFormatting.Unicode
         {
             count = 1;
 
-            if (index > text.Length)
+            if (index >= text.Length)
             {
                 return ReplacementCodepoint;
             }

+ 394 - 146
src/Avalonia.Visuals/Media/TextFormatting/Unicode/LineBreakEnumerator.cs

@@ -1,160 +1,460 @@
-// RichTextKit
-// Copyright © 2019 Topten Software. All Rights Reserved.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License"); you may 
-// not use this product except in compliance with the License. You may obtain 
-// a copy of the License at
-// 
-// http://www.apache.org/licenses/LICENSE-2.0
-// 
-// Unless required by applicable law or agreed to in writing, software 
-// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 
-// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 
-// License for the specific language governing permissions and limitations 
-// under the License.
-//
-// Ported from: https://github.com/foliojs/linebreak
-// Copied from: https://github.com/toptensoftware/RichTextKit
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+// Ported from: https://github.com/SixLabors/Fonts/
 
 using Avalonia.Utilities;
 
 namespace Avalonia.Media.TextFormatting.Unicode
 {
     /// <summary>
-    /// Implementation of the Unicode Line Break Algorithm
+    /// Implementation of the Unicode Line Break Algorithm. UAX:14
+    /// <see href="https://www.unicode.org/reports/tr14/tr14-37.html"/>
     /// </summary>
     public ref struct LineBreakEnumerator
     {
-        // State
         private readonly ReadOnlySlice<char> _text;
-        private int _pos;
-        private int _lastPos;
-        private LineBreakClass? _curClass;
-        private LineBreakClass? _nextClass;
+        private int _position;
+        private int _lastPosition;
+        private LineBreakClass _currentClass;
+        private LineBreakClass _nextClass;
+        private bool _first;
+        private int _alphaNumericCount;
+        private bool _lb8a;
+        private bool _lb21a;
+        private bool _lb22ex;
+        private bool _lb24ex;
+        private bool _lb25ex;
+        private bool _lb30;
+        private int _lb30a;
+        private bool _lb31;
 
         public LineBreakEnumerator(ReadOnlySlice<char> text)
+            : this()
         {
             _text = text;
-            _pos = 0;
-            _lastPos = 0;
-            _curClass = null;
-            _nextClass = null;
-            Current = default;
+            _position = 0;
+            _currentClass = LineBreakClass.Unknown;
+            _nextClass = LineBreakClass.Unknown;
+            _first = true;
+            _lb8a = false;
+            _lb21a = false;
+            _lb22ex = false;
+            _lb24ex = false;
+            _lb25ex = false;
+            _alphaNumericCount = 0;
+            _lb31 = false;
+            _lb30 = false;
+            _lb30a = 0;
         }
-
+        
         public LineBreak Current { get; private set; }
-
+        
         public bool MoveNext()
         {
-            // get the first char if we're at the beginning of the string
-            if (!_curClass.HasValue)
+            // Get the first char if we're at the beginning of the string.
+            if (_first)
             {
-                _curClass = PeekCharClass() == LineBreakClass.Space ? LineBreakClass.WordJoiner : MapFirst(ReadCharClass());
+                var firstClass = NextCharClass();
+                _first = false;
+                _currentClass = MapFirst(firstClass);
+                _nextClass = firstClass;
+                _lb8a = firstClass == LineBreakClass.ZWJ;
+                _lb30a = 0;
             }
 
-            while (_pos < _text.Length)
+            while (_position < _text.Length)
             {
-                _lastPos = _pos;
+                _lastPosition = _position;
                 var lastClass = _nextClass;
-                _nextClass = ReadCharClass();
+                _nextClass = NextCharClass();
 
-                // explicit newline
-                if (_curClass.HasValue && (_curClass == LineBreakClass.MandatoryBreak || _curClass == LineBreakClass.CarriageReturn && _nextClass != LineBreakClass.LineFeed))
+                // Explicit newline
+                switch (_currentClass)
                 {
-                    _curClass = MapFirst(MapClass(_nextClass.Value));
-                    Current = new LineBreak(FindPriorNonWhitespace(_lastPos), _lastPos, true);
+                    case LineBreakClass.MandatoryBreak:
+                    case LineBreakClass.CarriageReturn when _nextClass != LineBreakClass.LineFeed:
+                    {
+                        _currentClass = MapFirst(_nextClass);
+                        Current = new LineBreak(FindPriorNonWhitespace(_lastPosition), _lastPosition, true);
+                        return true;
+                    }
+                }
+
+                var shouldBreak = GetSimpleBreak() ?? (bool?)GetPairTableBreak(lastClass);
+
+                // Rule LB8a
+                _lb8a = _nextClass == LineBreakClass.ZWJ;
+
+                if (shouldBreak.Value)
+                {
+                    Current = new LineBreak(FindPriorNonWhitespace(_lastPosition), _lastPosition);
                     return true;
                 }
+            }
 
-                // handle classes not handled by the pair table
-                LineBreakClass? cur = null;
-                switch (_nextClass.Value)
+            if (_position >= _text.Length)
+            {
+                if (_lastPosition < _text.Length)
                 {
-                    case LineBreakClass.Space:
-                        cur = _curClass;
-                        break;
+                    _lastPosition = _text.Length;
+
+                    var required = false;
+
+                    switch (_currentClass)
+                    {
+                        case LineBreakClass.MandatoryBreak:
+                        case LineBreakClass.CarriageReturn when _nextClass != LineBreakClass.LineFeed:
+                            required = true;
+                            break;
+                    }
+
+                    Current = new LineBreak(FindPriorNonWhitespace(_lastPosition), _lastPosition, required);
+                    return true;
+                }
+            }
+
+            Current = default;
+            
+            return false;
+        }
+
+        private static LineBreakClass MapClass(Codepoint cp)
+        {
+            if (cp.Value == 327685)
+            {
+                return LineBreakClass.Alphabetic;
+            }
+            
+            // LB 1
+            // ==========================================
+            // Resolved Original    General_Category
+            // ==========================================
+            // AL       AI, SG, XX  Any
+            // CM       SA          Only Mn or Mc
+            // AL       SA          Any except Mn and Mc
+            // NS       CJ          Any
+            switch (cp.LineBreakClass)
+            {
+                case LineBreakClass.Ambiguous:
+                case LineBreakClass.Surrogate:
+                case LineBreakClass.Unknown:
+                    return LineBreakClass.Alphabetic;
 
+                case LineBreakClass.ComplexContext:
+                    return cp.GeneralCategory == GeneralCategory.NonspacingMark || cp.GeneralCategory == GeneralCategory.SpacingMark
+                        ? LineBreakClass.CombiningMark
+                        : LineBreakClass.Alphabetic;
+
+                case LineBreakClass.ConditionalJapaneseStarter:
+                    return LineBreakClass.Nonstarter;
+
+                default:
+                    return cp.LineBreakClass;
+            }
+        }
+
+        private static LineBreakClass MapFirst(LineBreakClass c)
+        {
+            switch (c)
+            {
+                case LineBreakClass.LineFeed:
+                case LineBreakClass.NextLine:
+                    return LineBreakClass.MandatoryBreak;
+
+                case LineBreakClass.Space:
+                    return LineBreakClass.WordJoiner;
+
+                default:
+                    return c;
+            }
+        }
+
+        private static bool IsAlphaNumeric(LineBreakClass cls)
+            => cls == LineBreakClass.Alphabetic
+            || cls == LineBreakClass.HebrewLetter
+            || cls == LineBreakClass.Numeric;
+
+        private LineBreakClass PeekNextCharClass()
+        {
+            var cp = Codepoint.ReadAt(_text, _position, out _);
+            
+            return MapClass(cp);
+        }
+
+        // Get the next character class
+        private LineBreakClass NextCharClass()
+        {
+            var cp = Codepoint.ReadAt(_text, _position, out var count);
+            var cls = MapClass(cp);
+            _position += count;
+
+            // Keep track of alphanumeric + any combining marks.
+            // This is used for LB22 and LB30.
+            if (IsAlphaNumeric(_currentClass) || _alphaNumericCount > 0 && cls == LineBreakClass.CombiningMark)
+            {
+                _alphaNumericCount++;
+            }
+
+            // Track combining mark exceptions. LB22
+            if (cls == LineBreakClass.CombiningMark)
+            {
+                switch (_currentClass)
+                {
                     case LineBreakClass.MandatoryBreak:
+                    case LineBreakClass.ContingentBreak:
+                    case LineBreakClass.Exclamation:
                     case LineBreakClass.LineFeed:
                     case LineBreakClass.NextLine:
-                        cur = LineBreakClass.MandatoryBreak;
-                        break;
-
+                    case LineBreakClass.Space:
+                    case LineBreakClass.ZWSpace:
                     case LineBreakClass.CarriageReturn:
-                        cur = LineBreakClass.CarriageReturn;
+                        _lb22ex = true;
                         break;
+                }
+            }
 
+            // Track combining mark exceptions. LB31
+            if (_first && cls == LineBreakClass.CombiningMark)
+            {
+                _lb31 = true;
+            }
+
+            if (cls == LineBreakClass.CombiningMark)
+            {
+                switch (_currentClass)
+                {
+                    case LineBreakClass.MandatoryBreak:
                     case LineBreakClass.ContingentBreak:
-                        cur = LineBreakClass.BreakAfter;
+                    case LineBreakClass.Exclamation:
+                    case LineBreakClass.LineFeed:
+                    case LineBreakClass.NextLine:
+                    case LineBreakClass.Space:
+                    case LineBreakClass.ZWSpace:
+                    case LineBreakClass.CarriageReturn:
+                    case LineBreakClass.ZWJ:
+                        _lb31 = true;
                         break;
                 }
+            }
+
+            if (_first
+                && (cls == LineBreakClass.PostfixNumeric || cls == LineBreakClass.PrefixNumeric || cls == LineBreakClass.Space))
+            {
+                _lb31 = true;
+            }
+
+            if (_currentClass == LineBreakClass.Alphabetic && 
+                (cls == LineBreakClass.PostfixNumeric || cls == LineBreakClass.PrefixNumeric || cls == LineBreakClass.Space))
+            {
+                _lb31 = true;
+            }
+
+            // Reset LB31 if next is U+0028 (Left Opening Parenthesis)
+            if (_lb31
+                && _currentClass != LineBreakClass.PostfixNumeric
+                && _currentClass != LineBreakClass.PrefixNumeric
+                && cls == LineBreakClass.OpenPunctuation && cp.Value == 0x0028)
+            {
+                _lb31 = false;
+            }
+
+            // Rule LB24
+            if (_first && (cls == LineBreakClass.ClosePunctuation || cls == LineBreakClass.CloseParenthesis))
+            {
+                _lb24ex = true;
+            }
 
-                if (cur != null)
+            // Rule LB25
+            if (_first
+                && (cls == LineBreakClass.ClosePunctuation || cls == LineBreakClass.InfixNumeric || cls == LineBreakClass.BreakSymbols))
+            {
+                _lb25ex = true;
+            }
+
+            if (cls == LineBreakClass.Space || cls == LineBreakClass.WordJoiner || cls == LineBreakClass.Alphabetic)
+            {
+                var next = PeekNextCharClass();
+                if (next == LineBreakClass.ClosePunctuation || next == LineBreakClass.InfixNumeric || next == LineBreakClass.BreakSymbols)
                 {
-                    _curClass = cur;
+                    _lb25ex = true;
+                }
+            }
+
+            // AlphaNumeric + and combining marks can break for OP except.
+            // - U+0028 (Left Opening Parenthesis)
+            // - U+005B (Opening Square Bracket)
+            // - U+007B (Left Curly Bracket)
+            // See custom colums|rules in the text pair table.
+            // https://www.unicode.org/Public/13.0.0/ucd/auxiliary/LineBreakTest.html
+            _lb30 = _alphaNumericCount > 0
+                && cls == LineBreakClass.OpenPunctuation
+                && cp.Value != 0x0028
+                && cp.Value != 0x005B
+                && cp.Value != 0x007B;
+
+            return cls;
+        }
+
+        private bool? GetSimpleBreak()
+        {
+            // handle classes not handled by the pair table
+            switch (_nextClass)
+            {
+                case LineBreakClass.Space:
+                    return false;
 
-                    if (_nextClass.Value == LineBreakClass.MandatoryBreak)
+                case LineBreakClass.MandatoryBreak:
+                case LineBreakClass.LineFeed:
+                case LineBreakClass.NextLine:
+                    _currentClass = LineBreakClass.MandatoryBreak;
+                    return false;
+
+                case LineBreakClass.CarriageReturn:
+                    _currentClass = LineBreakClass.CarriageReturn;
+                    return false;
+            }
+
+            return null;
+        }
+
+        private bool GetPairTableBreak(LineBreakClass lastClass)
+        {
+            // If not handled already, use the pair table
+            bool shouldBreak = false;
+            switch (LineBreakPairTable.Table[(int)_currentClass][(int)_nextClass])
+            {
+                case LineBreakPairTable.DIBRK: // Direct break
+                    shouldBreak = true;
+                    break;
+
+                // TODO: Rewrite this so that it defaults to true and rules are set as exceptions.
+                case LineBreakPairTable.INBRK: // Possible indirect break
+
+                    // LB31
+                    if (_lb31 && _nextClass == LineBreakClass.OpenPunctuation)
                     {
-                        _lastPos = _pos;
-                        Current = new LineBreak(FindPriorNonWhitespace(_lastPos), _lastPos, true);
-                        return true;
+                        shouldBreak = true;
+                        _lb31 = false;
+                        break;
                     }
 
-                    continue;
-                }
-
-                // if not handled already, use the pair table
-                var shouldBreak = false;
-                switch (BreakPairTable.Map(_curClass.Value,_nextClass.Value))
-                {
-                    case PairBreakType.DI: // Direct break
+                    // LB30
+                    if (_lb30)
+                    {
                         shouldBreak = true;
+                        _lb30 = false;
+                        _alphaNumericCount = 0;
                         break;
+                    }
 
-                    case PairBreakType.IN: // possible indirect break
-                        shouldBreak = lastClass.HasValue && lastClass.Value == LineBreakClass.Space;
+                    // LB25
+                    if (_lb25ex && (_nextClass == LineBreakClass.PrefixNumeric || _nextClass == LineBreakClass.Numeric))
+                    {
+                        shouldBreak = true;
+                        _lb25ex = false;
                         break;
+                    }
 
-                    case PairBreakType.CI:
-                        shouldBreak = lastClass.HasValue && lastClass.Value == LineBreakClass.Space;
-                        if (!shouldBreak)
-                        {
-                            continue;
-                        }
+                    // LB24
+                    if (_lb24ex && (_nextClass == LineBreakClass.PostfixNumeric || _nextClass == LineBreakClass.PrefixNumeric))
+                    {
+                        shouldBreak = true;
+                        _lb24ex = false;
                         break;
+                    }
+
+                    // LB18
+                    shouldBreak = lastClass == LineBreakClass.Space;
+                    break;
+
+                case LineBreakPairTable.CIBRK:
+                    shouldBreak = lastClass == LineBreakClass.Space;
+                    if (!shouldBreak)
+                    {
+                        return false;
+                    }
 
-                    case PairBreakType.CP: // prohibited for combining marks
-                        if (!lastClass.HasValue || lastClass.Value != LineBreakClass.Space)
+                    break;
+
+                case LineBreakPairTable.CPBRK: // prohibited for combining marks
+                    if (lastClass != LineBreakClass.Space)
+                    {
+                        return false;
+                    }
+
+                    break;
+
+                case LineBreakPairTable.PRBRK:
+                    break;
+            }
+
+            // Rule LB22
+            if (_nextClass == LineBreakClass.Inseparable)
+            {
+                switch (lastClass)
+                {
+                    case LineBreakClass.MandatoryBreak:
+                    case LineBreakClass.ContingentBreak:
+                    case LineBreakClass.Exclamation:
+                    case LineBreakClass.LineFeed:
+                    case LineBreakClass.NextLine:
+                    case LineBreakClass.Space:
+                    case LineBreakClass.ZWSpace:
+
+                        // Allow break
+                        break;
+                    case LineBreakClass.CombiningMark:
+                        if (_lb22ex)
                         {
-                            continue;
+                            // Allow break
+                            _lb22ex = false;
+                            break;
                         }
+
+                        shouldBreak = false;
+                        break;
+                    default:
+                        shouldBreak = false;
                         break;
                 }
+            }
 
-                _curClass = _nextClass;
+            if (_lb8a)
+            {
+                shouldBreak = false;
+            }
 
-                if (shouldBreak)
-                {
-                    Current = new LineBreak(FindPriorNonWhitespace(_lastPos), _lastPos);
-                    return true;
-                }
+            // Rule LB21a
+            if (_lb21a && (_currentClass == LineBreakClass.Hyphen || _currentClass == LineBreakClass.BreakAfter))
+            {
+                shouldBreak = false;
+                _lb21a = false;
+            }
+            else
+            {
+                _lb21a = _currentClass == LineBreakClass.HebrewLetter;
             }
 
-            if (_pos >= _text.Length)
+            // Rule LB30a
+            if (_currentClass == LineBreakClass.RegionalIndicator)
             {
-                if (_lastPos < _text.Length)
+                _lb30a++;
+                if (_lb30a == 2 && _nextClass == LineBreakClass.RegionalIndicator)
                 {
-                    _lastPos = _text.Length;
-                    var cls = Codepoint.ReadAt(_text, _text.Length - 1, out _).LineBreakClass;
-                    bool required = cls == LineBreakClass.MandatoryBreak || cls == LineBreakClass.LineFeed || cls == LineBreakClass.CarriageReturn;
-                    Current = new LineBreak(FindPriorNonWhitespace(_text.Length), _text.Length, required);
-                    return true;
+                    shouldBreak = true;
+                    _lb30a = 0;
                 }
             }
+            else
+            {
+                _lb30a = 0;
+            }
 
-            return false;
-        }
+            _currentClass = _nextClass;
 
+            return shouldBreak;
+        }
+        
         private int FindPriorNonWhitespace(int from)
         {
             if (from > 0)
@@ -163,7 +463,8 @@ namespace Avalonia.Media.TextFormatting.Unicode
 
                 var cls = cp.LineBreakClass;
 
-                if (cls == LineBreakClass.MandatoryBreak || cls == LineBreakClass.LineFeed || cls == LineBreakClass.CarriageReturn)
+                if (cls == LineBreakClass.MandatoryBreak || cls == LineBreakClass.LineFeed ||
+                    cls == LineBreakClass.CarriageReturn)
                 {
                     from -= count;
                 }
@@ -184,61 +485,8 @@ namespace Avalonia.Media.TextFormatting.Unicode
                     break;
                 }
             }
-            return from;
-        }
 
-        // Get the next character class
-        private LineBreakClass ReadCharClass()
-        {
-            var cp = Codepoint.ReadAt(_text, _pos, out var count);
-
-            _pos += count;
-
-            return MapClass(cp.LineBreakClass);
-        }
-
-        private LineBreakClass PeekCharClass()
-        {
-            return MapClass(Codepoint.ReadAt(_text, _pos, out _).LineBreakClass);
-        }
-
-        private static LineBreakClass MapClass(LineBreakClass c)
-        {
-            switch (c)
-            {
-                case LineBreakClass.Ambiguous:
-                    return LineBreakClass.Alphabetic;
-
-                case LineBreakClass.ComplexContext:
-                case LineBreakClass.Surrogate:
-                case LineBreakClass.Unknown:
-                    return LineBreakClass.Alphabetic;
-
-                case LineBreakClass.ConditionalJapaneseStarter:
-                    return LineBreakClass.Nonstarter;
-
-                default:
-                    return c;
-            }
-        }
-
-        private static LineBreakClass MapFirst(LineBreakClass c)
-        {
-            switch (c)
-            {
-                case LineBreakClass.LineFeed:
-                case LineBreakClass.NextLine:
-                    return LineBreakClass.MandatoryBreak;
-
-                case LineBreakClass.ContingentBreak:
-                    return LineBreakClass.BreakAfter;
-
-                case LineBreakClass.Space:
-                    return LineBreakClass.WordJoiner;
-
-                default:
-                    return c;
-            }
+            return from;
         }
     }
 }

+ 74 - 0
src/Avalonia.Visuals/Media/TextFormatting/Unicode/LineBreakPairTable.cs

@@ -0,0 +1,74 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+// Ported from: https://github.com/SixLabors/Fonts/
+
+namespace Avalonia.Media.TextFormatting.Unicode
+{
+    internal static class LineBreakPairTable
+    {
+        /// <summary>
+        /// Direct break opportunity
+        /// </summary>
+        public const byte DIBRK = 0;
+
+        /// <summary>
+        /// Indirect break opportunity
+        /// </summary>
+        public const byte INBRK = 1;
+
+        /// <summary>
+        /// Indirect break opportunity for combining marks
+        /// </summary>
+        public const byte CIBRK = 2;
+
+        /// <summary>
+        /// Prohibited break for combining marks
+        /// </summary>
+        public const byte CPBRK = 3;
+
+        /// <summary>
+        /// Prohibited break
+        /// </summary>
+        public const byte PRBRK = 4;
+
+        // Based on example pair table from https://www.unicode.org/reports/tr14/tr14-37.html#Table2
+        // - ZWJ special processing for LB8a
+        // - CB manually added as per Rule LB20
+        public static byte[][] Table { get; } = {
+              // .         OP     CL     CP     QU     GL     NS     EX     SY     IS     PR     PO     NU     AL     HL     ID     IN     HY     BA     BB     B2     ZW     CM     WJ     H2     H3     JL     JV     JT     RI     EB     EM     ZWJ    CB
+              new[] { PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, CPBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK }, // OP
+              new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // CL
+              new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // CP
+              new[] { PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, PRBRK, CIBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK }, // QU
+              new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, PRBRK, CIBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK }, // GL
+              new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // NS
+              new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // EX
+              new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, INBRK, DIBRK, INBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // SY
+              new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // IS
+              new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK }, // PR
+              new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // PO
+              new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // NU
+              new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // AL
+              new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // HL
+              new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // ID
+              new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // IN
+              new[] { DIBRK, PRBRK, PRBRK, INBRK, DIBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // HY
+              new[] { DIBRK, PRBRK, PRBRK, INBRK, DIBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // BA
+              new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, PRBRK, CIBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK }, // BB
+              new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, PRBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // B2
+              new[] { DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK }, // ZW
+              new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // CM
+              new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, PRBRK, CIBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK }, // WJ
+              new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // H2
+              new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // H3
+              new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // JL
+              new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // JV
+              new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // JT
+              new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK, DIBRK, INBRK, DIBRK }, // RI
+              new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK }, // EB
+              new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // EM
+              new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // ZWJ
+              new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, DIBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK } // CB
+        };
+    }
+}

+ 2 - 2
tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/BreakPairTable.txt

@@ -1,7 +1,7 @@
 	OP	CL	CP	QU	GL	NS	EX	SY	IS	PR	PO	NU	AL	HL	ID	IN	HY	BA	BB	B2	ZW	CM	WJ	H2	H3	JL	JV	JT	RI	EB	EM	ZWJ	CB
 OP	^	^	^	^	^	^	^	^	^	^	^	^	^	^	^	^	^	^	^	^	^	@	^	^	^	^	^	^	^	^	^	^	^
-CL	_	^	^	%	%	^	^	^	^	%	%	_	_	_	_	^	%	%	_	_	^	#	^	_	_	_	_	_	_	_	_	%	_
-CP	_	^	^	%	%	^	^	^	^	%	%	%	%	%	_	^	%	%	_	_	^	#	^	_	_	_	_	_	_	_	_	%	_
+CL	_	^	^	%	%	^	^	^	^	%	%	_	_	_	_	_	%	%	_	_	^	#	^	_	_	_	_	_	_	_	_	%	_
+CP	_	^	^	%	%	^	^	^	^	%	%	%	%	%	_	_	%	%	_	_	^	#	^	_	_	_	_	_	_	_	_	%	_
 QU	^	^	^	%	%	%	^	^	^	%	%	%	%	%	%	%	%	%	%	%	^	#	^	%	%	%	%	%	%	%	%	%	%
 GL	%	^	^	%	%	%	^	^	^	%	%	%	%	%	%	%	%	%	%	%	^	#	^	%	%	%	%	%	%	%	%	%	%
 NS	_	^	^	%	%	%	^	^	^	_	_	_	_	_	_	_	%	%	_	_	^	#	^	_	_	_	_	_	_	_	_	%	_

+ 259 - 0
tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/LineBreakEnumuratorTests.cs

@@ -0,0 +1,259 @@
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Net.Http;
+using Avalonia.Media.TextFormatting.Unicode;
+using Xunit;
+using Xunit.Abstractions;
+
+namespace Avalonia.Visuals.UnitTests.Media.TextFormatting
+{
+    public class LineBreakEnumeratorTests
+    {
+        private readonly ITestOutputHelper _outputHelper;
+        
+        public LineBreakEnumeratorTests(ITestOutputHelper outputHelper)
+        {
+            _outputHelper = outputHelper;
+        }
+        
+        [Fact]
+        public void BasicLatinTest()
+        {
+            var lineBreaker = new LineBreakEnumerator("Hello World\r\nThis is a test.".AsMemory());
+
+            Assert.True(lineBreaker.MoveNext());
+            Assert.Equal(6, lineBreaker.Current.PositionWrap);
+            Assert.False(lineBreaker.Current.Required);
+
+            Assert.True(lineBreaker.MoveNext());
+            Assert.Equal(13, lineBreaker.Current.PositionWrap);
+            Assert.True(lineBreaker.Current.Required);
+
+            Assert.True(lineBreaker.MoveNext());
+            Assert.Equal(18, lineBreaker.Current.PositionWrap);
+            Assert.False(lineBreaker.Current.Required);
+
+            Assert.True(lineBreaker.MoveNext());
+            Assert.Equal(21, lineBreaker.Current.PositionWrap);
+            Assert.False(lineBreaker.Current.Required);
+
+            Assert.True(lineBreaker.MoveNext());
+            Assert.Equal(23, lineBreaker.Current.PositionWrap);
+            Assert.False(lineBreaker.Current.Required);
+
+            Assert.True(lineBreaker.MoveNext());
+            Assert.Equal(28, lineBreaker.Current.PositionWrap);
+            Assert.False(lineBreaker.Current.Required);
+
+            Assert.False(lineBreaker.MoveNext());
+        }
+
+
+        [Fact]
+        public void ForwardTextWithOuterWhitespace()
+        {
+            var lineBreaker = new LineBreakEnumerator(" Apples Pears Bananas   ".AsMemory());
+            var positionsF = GetBreaks(lineBreaker);
+            Assert.Equal(1, positionsF[0].PositionWrap);
+            Assert.Equal(0, positionsF[0].PositionMeasure);
+            Assert.Equal(8, positionsF[1].PositionWrap);
+            Assert.Equal(7, positionsF[1].PositionMeasure);
+            Assert.Equal(14, positionsF[2].PositionWrap);
+            Assert.Equal(13, positionsF[2].PositionMeasure);
+            Assert.Equal(24, positionsF[3].PositionWrap);
+            Assert.Equal(21, positionsF[3].PositionMeasure);
+        }
+
+        private static List<LineBreak> GetBreaks(LineBreakEnumerator lineBreaker)
+        {
+            var breaks = new List<LineBreak>();
+
+            while (lineBreaker.MoveNext())
+            {
+                breaks.Add(lineBreaker.Current);
+            }
+
+            return breaks;
+        }
+
+        [Fact]
+        public void ForwardTest()
+        {
+            var lineBreaker = new LineBreakEnumerator("Apples Pears Bananas".AsMemory());
+
+            var positionsF = GetBreaks(lineBreaker);
+            Assert.Equal(7, positionsF[0].PositionWrap);
+            Assert.Equal(6, positionsF[0].PositionMeasure);
+            Assert.Equal(13, positionsF[1].PositionWrap);
+            Assert.Equal(12, positionsF[1].PositionMeasure);
+            Assert.Equal(20, positionsF[2].PositionWrap);
+            Assert.Equal(20, positionsF[2].PositionMeasure);
+        }
+
+        [Theory(Skip = "Only run when the Unicode spec changes.")]
+        [ClassData(typeof(LineBreakTestDataGenerator))]
+        public void ShouldFindBreaks(int lineNumber, int[] codePoints, int[] breakPoints)
+        {
+            var text = string.Join(null, codePoints.Select(char.ConvertFromUtf32));
+
+            var lineBreaker = new LineBreakEnumerator(text.AsMemory());
+
+            var foundBreaks = new List<int>();
+            
+            while (lineBreaker.MoveNext())
+            {
+                foundBreaks.Add(lineBreaker.Current.PositionWrap);
+            }
+            
+            // Check the same
+            var pass = true;
+
+            if (foundBreaks.Count != breakPoints.Length)
+            {
+                pass = false;
+            }
+            else
+            {
+                for (var i = 0; i < foundBreaks.Count; i++)
+                {
+                    if (foundBreaks[i] != breakPoints[i])
+                    {
+                        pass = false;
+                    }
+                }
+            }
+
+            if (!pass)
+            {
+                _outputHelper.WriteLine($"Failed test on line {lineNumber}");
+                _outputHelper.WriteLine("");
+                _outputHelper.WriteLine($"    Code Points: {string.Join(" ", codePoints)}");
+                _outputHelper.WriteLine($"Expected Breaks: {string.Join(" ", breakPoints)}");
+                _outputHelper.WriteLine($"  Actual Breaks: {string.Join(" ", foundBreaks)}");
+                _outputHelper.WriteLine($"           Text: {text}");
+                _outputHelper.WriteLine($"     Char Props: {string.Join(" ", codePoints.Select(x => new Codepoint(x).LineBreakClass))}");
+                _outputHelper.WriteLine("");
+            }
+            
+            Assert.True(pass);
+        }
+
+        private class LineBreakTestDataGenerator : IEnumerable<object[]>
+        {
+            private readonly List<object[]> _testData;
+
+            public LineBreakTestDataGenerator()
+            {
+                _testData = GenerateTestData();
+            }
+
+            public IEnumerator<object[]> GetEnumerator()
+            {
+                return _testData.GetEnumerator();
+            }
+
+            IEnumerator IEnumerable.GetEnumerator()
+            {
+                return GetEnumerator();
+            }
+
+            private static List<object[]> GenerateTestData()
+            {
+                // Process each line
+                var tests = new List<object[]>();
+
+                // Read the test file
+                var url = Path.Combine(UnicodeDataGenerator.Ucd, "auxiliary/LineBreakTest.txt");
+
+                using (var client = new HttpClient())
+                using (var result = client.GetAsync(url).GetAwaiter().GetResult())
+                {
+                    if (!result.IsSuccessStatusCode)
+                    {
+                        return tests;
+                    }
+
+                    using (var stream = result.Content.ReadAsStreamAsync().GetAwaiter().GetResult())
+                    using (var reader = new StreamReader(stream))
+                    {
+                        var lineNumber = 1;
+
+                        while (!reader.EndOfStream)
+                        {
+                            var line = reader.ReadLine();
+
+                            if (line is null)
+                            {
+                                break;
+                            }
+
+                            // Get the line, remove comments
+                            line = line.Split('#')[0].Trim();
+
+                            // Ignore blank/comment only lines
+                            if (string.IsNullOrWhiteSpace(line))
+                            {
+                                lineNumber++;
+                                continue;
+                            }
+
+                            var codePoints = new List<int>();
+                            var breakPoints = new List<int>();
+
+                            // Parse the test
+                            var p = 0;
+
+                            while (p < line.Length)
+                            {
+                                // Ignore white space
+                                if (char.IsWhiteSpace(line[p]))
+                                {
+                                    p++;
+                                    continue;
+                                }
+
+                                if (line[p] == '×')
+                                {
+                                    p++;
+                                    continue;
+                                }
+
+                                if (line[p] == '÷')
+                                {
+                                    breakPoints.Add(codePoints.Select(x=> x > ushort.MaxValue ? 2 : 1).Sum());
+                                    p++;
+                                    continue;
+                                }
+
+                                var codePointPos = p;
+
+                                while (p < line.Length && IsHexDigit(line[p]))
+                                {
+                                    p++;
+                                }
+
+                                var codePointStr = line.Substring(codePointPos, p - codePointPos);
+                                var codePoint = Convert.ToInt32(codePointStr, 16);
+                                codePoints.Add(codePoint);
+                            }
+
+                            tests.Add(new object[] { lineNumber, codePoints.ToArray(), breakPoints.ToArray() });
+
+                            lineNumber++;
+                        }
+                    }
+                }
+
+                return tests;
+            }
+
+            private static bool IsHexDigit(char ch)
+            {
+                return char.IsDigit(ch) || (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f');
+            }
+        }
+    }
+}

+ 0 - 56
tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/LineBreakerTests.cs

@@ -1,56 +0,0 @@
-using System;
-using Avalonia.Media.TextFormatting.Unicode;
-using Avalonia.Utilities;
-using Xunit;
-
-namespace Avalonia.Visuals.UnitTests.Media.TextFormatting
-{
-    public class LineBreakerTests
-    {
-        [Fact]
-        public void Should_Split_Text_By_Explicit_Breaks()
-        {
-            //ABC [0 3]
-            //DEF\r[4 7]
-            //\r[8]
-            //Hello\r\n[9 15]
-            const string text = "ABC DEF\r\rHELLO\r\n";
-
-            var buffer = new ReadOnlySlice<char>(text.AsMemory());
-
-            var lineBreaker = new LineBreakEnumerator(buffer);
-
-            var current = 0;
-
-            Assert.True(lineBreaker.MoveNext());
-
-            var a = text.Substring(current, lineBreaker.Current.PositionMeasure - current + 1);
-
-            Assert.Equal("ABC ", a);
-
-            current += a.Length;
-
-            Assert.True(lineBreaker.MoveNext());
-
-            var b = text.Substring(current, lineBreaker.Current.PositionMeasure - current + 1);
-
-            Assert.Equal("DEF\r", b);
-
-            current += b.Length;
-
-            Assert.True(lineBreaker.MoveNext());
-
-            var c = text.Substring(current, lineBreaker.Current.PositionMeasure - current + 1);
-
-            Assert.Equal("\r", c);
-
-            current += c.Length;
-
-            Assert.True(lineBreaker.MoveNext());
-
-            var d = text.Substring(current, text.Length - current);
-
-            Assert.Equal("HELLO\r\n", d);
-        }
-    }
-}

+ 1 - 24
tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/UnicodeDataGeneratorTests.cs

@@ -1,6 +1,4 @@
-using System;
-using Avalonia.Media.TextFormatting.Unicode;
-using Xunit;
+using Xunit;
 
 namespace Avalonia.Visuals.UnitTests.Media.TextFormatting
 {
@@ -15,26 +13,5 @@ namespace Avalonia.Visuals.UnitTests.Media.TextFormatting
         {
             UnicodeDataGenerator.Execute();
         }
-        [Theory(Skip = "Only run when we update the trie.")]
-        [ClassData(typeof(LineBreakTestDataGenerator))]
-
-        public void Should_Enumerate_LineBreaks(string text, int expectedLength)
-        {
-            var textMemory = text.AsMemory();
-
-            var enumerator = new LineBreakEnumerator(textMemory);
-
-            Assert.True(enumerator.MoveNext());
-
-            Assert.Equal(expectedLength, enumerator.Current.PositionWrap);
-        }
-
-        private class LineBreakTestDataGenerator : TestDataGenerator
-        {
-            public LineBreakTestDataGenerator()
-                : base("auxiliary/LineBreakTest.txt")
-            {
-            }
-        }
     }
 }